In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from functools import reduce
import re
sns.set_style('dark')

In [2]:
def pd_counts_plotter(data, title, x_title, y_title, bi=0):
    if bi > 0:
        ax = data.value_counts().plot.hist(bins=bi, color="teal", figsize=(15,8), fontsize=14)
    else:
        ax = data.value_counts().plot.bar(color="teal", figsize=(15,8), fontsize=14)

    ax.set_title(title, fontsize=18)
    ax.set_xlabel(x_title, fontsize=18);
    ax.set_ylabel(y_title, fontsize=18);


    for i in ax.patches:
        ax.text(i.xy[0]+0.2, (i.get_height()+0.2), i.get_height(), fontsize=15, color='black')

In [3]:
def ensure_dates(feature, frame, regexp, compare, both=False, castType="int32"):
    '''This will replace the weird/null values as "UNKNOWN" and then ensure the data can be processed as whatever
    type we want to use. Takes a series or feature, the dataframe with that feature, a complied regex expression, 
    a comparison value, and the final cast type of the series/feature.'''
    frame[feature] = frame[feature].fillna("UNKNOWN")
    frame[feature] = frame[feature].apply(lambda x : re.findall(regexp, str(x))[0] \
                                          if re.findall(regexp, str(x)) else "UNKNOWN")
    
    df = pd.DataFrame({"Object ID": frame["Object ID"], feature: frame[feature]})
    idx = df[df[feature] == "UNKNOWN"].index
    df.drop(idx, inplace=True)
    
    if both:
        idx = df[df[feature] > compare].index
        df.drop(idx, inplace=True)
        compare = int(compare)

    if isinstance(compare, int):
        df[feature] = pd.to_numeric(df[feature], errors='coerce')

    idx = df[df[feature] > compare].index
    df.drop(idx, inplace=True)

    df = df.loc[df[feature].notnull()]
    df[feature] = df[feature].astype(castType)
    print("Unique " + feature + "s: " + str(len(df[feature].unique())))

    return df

In [4]:
def key_val_plotter(dictionary, df, title="Selected Text Features", xTitle="Features", yTitle="Number of Records", orient="vertical"):
    for key, val in dictionary.items():
        dictionary[key]= df[key].notna().sum()
        print(str(key) + " <" + str(dictionary[key]) + ">")
        
    f, ax = plt.subplots(figsize=(25,15))
    plt.bar(range(len(dictionary)), list(dictionary.values()), align='center', color='teal',orientation=orient)
    plt.xticks(range(len(dictionary)), list(dictionary.keys()), fontsize=14)
    plt.yticks(fontsize=14)
    
    plt.title(title, fontsize=40)
    plt.xlabel(xTitle, fontsize=40)
    plt.ylabel(yTitle, fontsize=40)
    plt.show()
    
# featCount = { i : 0 for i in textFeatures.keys() }
# key_val_plotter(featCount, textFeatures)

In [5]:
dirty = pd.read_csv("NVDABatch2-25--3-16.csv")

In [6]:
dirty

Unnamed: 0,id,user_description,orig_text,text,user_name,user_followers,created,symbolval
0,56,"Swing Positon trader , trading setups , high p...",Crazy intraday trades on both sides with calls...,Crazy intraday trades on both sides with calls...,jaybees12,238,2020-02-25 20:42:44.000000,nosymbol
1,88,"Run by investors, for investors.\n\nReal-time ...",Coronavirus fears pull U.S. chip index into a ...,Coronavirus fears pull US chip index into a co...,newsfilterio,667,2020-02-25 20:45:33.000000,NVDA
2,126,"Singer-songwriter/musician, environmentalist, ...",If people think that last two days of the stoc...,If people think that last two days of the stoc...,maddenifico,121396,2020-02-25 20:49:17.000000,nosymbol
3,158,Investor. Trader. Thinker. Trading Strategist....,📈 Tuesday Morning's #Top10 Most Actively-Trade...,Tuesday Mornings #Top10 Most ActivelyTraded St...,MarcoDaCostaFX,4120,2020-02-25 20:53:07.000000,NIO
4,161,,Nibbled on some small common $ULTA $NVDA $GOOG...,Nibbled on some small common ULTA NVDA GOOGL A...,ThomasPhilipp6,95,2020-02-25 20:53:26.000000,ULTA
...,...,...,...,...,...,...,...,...
3268,93019,"🔊 Trade alerts, trade-ideas and crypto (All st...",Trading Ideas: +$2125 on 1 trade in $NVDA http...,Trading Ideas 2125 on 1 trade in NVDA FREE t...,AlertTrade,136015,2020-03-16 15:00:03.000000,NVDA
3269,93021,Squawk 🔲\nNOT INVESTMENT ADVICE. TRADE MY TWEE...,Anyone else looking at markets today and going...,Anyone else looking at markets today and going...,squawksquare,1246,2020-03-16 15:00:12.000000,nosymbol
3270,93079,Options Traders - NOT Investment Advice - Trad...,$NFLX - Break above 325 room to 346.21 \n\n$NV...,NFLX Break above 325 room to 34621 \n\nNVDA ...,ManicTrading,3739,2020-03-16 15:05:45.000000,NFLX
3271,93081,Position trader buying low risk setups -i.e. p...,$SMH 40 mth SMA .\n\n$SOXX $IPHI $NVDA https:/...,SMH 40 mth SMA \n\nSOXX IPHI NVDA,FlynancialA,513,2020-03-16 15:05:59.000000,SMH


In [7]:
data = dirty[["id","orig_text", "text"]]
data

Unnamed: 0,id,orig_text,text
0,56,Crazy intraday trades on both sides with calls...,Crazy intraday trades on both sides with calls...
1,88,Coronavirus fears pull U.S. chip index into a ...,Coronavirus fears pull US chip index into a co...
2,126,If people think that last two days of the stoc...,If people think that last two days of the stoc...
3,158,📈 Tuesday Morning's #Top10 Most Actively-Trade...,Tuesday Mornings #Top10 Most ActivelyTraded St...
4,161,Nibbled on some small common $ULTA $NVDA $GOOG...,Nibbled on some small common ULTA NVDA GOOGL A...
...,...,...,...
3268,93019,Trading Ideas: +$2125 on 1 trade in $NVDA http...,Trading Ideas 2125 on 1 trade in NVDA FREE t...
3269,93021,Anyone else looking at markets today and going...,Anyone else looking at markets today and going...
3270,93079,$NFLX - Break above 325 room to 346.21 \n\n$NV...,NFLX Break above 325 room to 34621 \n\nNVDA ...
3271,93081,$SMH 40 mth SMA .\n\n$SOXX $IPHI $NVDA https:/...,SMH 40 mth SMA \n\nSOXX IPHI NVDA


In [11]:
first = data.iloc[0]
first["orig_text"]

'Crazy intraday trades on both sides with calls and puts , action range bound ThAts when you trade like ninja . Lock profits And mange you’re postion $NVDA puts then calls . $AMZN calls paired with $TQQQ . Awesome 💰🙏🏻🦏'

In [45]:
regexp = re.compile(r'\$([\w]{1,})')
data["stock"] = [", ".join(re.findall(regexp, ele.upper())) if regexp.search(ele) else "none" for ele in data["orig_text"]]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  


In [46]:
data

Unnamed: 0,id,orig_text,text,stock
0,56,Crazy intraday trades on both sides with calls...,Crazy intraday trades on both sides with calls...,"NVDA, AMZN, TQQQ"
1,88,Coronavirus fears pull U.S. chip index into a ...,Coronavirus fears pull US chip index into a co...,"NVDA, QCOM, TSM"
2,126,If people think that last two days of the stoc...,If people think that last two days of the stoc...,"AMZN, GOOGL, NVDA, CRM, MSFT"
3,158,📈 Tuesday Morning's #Top10 Most Actively-Trade...,Tuesday Mornings #Top10 Most ActivelyTraded St...,"NIO, AMD, MU, MSFT, AAPL, NVDA, BABA, PANW, TS..."
4,161,Nibbled on some small common $ULTA $NVDA $GOOG...,Nibbled on some small common ULTA NVDA GOOGL A...,"ULTA, NVDA, GOOGL, AMZN, MSFT"
...,...,...,...,...
3268,93019,Trading Ideas: +$2125 on 1 trade in $NVDA http...,Trading Ideas 2125 on 1 trade in NVDA FREE t...,"2125, NVDA"
3269,93021,Anyone else looking at markets today and going...,Anyone else looking at markets today and going...,"TSLA, AAPL, SPY, FB, NVDA, GOOG, NFLX, BA"
3270,93079,$NFLX - Break above 325 room to 346.21 \n\n$NV...,NFLX Break above 325 room to 34621 \n\nNVDA ...,"NFLX, NVDA"
3271,93081,$SMH 40 mth SMA .\n\n$SOXX $IPHI $NVDA https:/...,SMH 40 mth SMA \n\nSOXX IPHI NVDA,"SMH, SOXX, IPHI, NVDA"


In [49]:
dataNVDA = data[data["stock"] == "NVDA"]
dataNVDA

Unnamed: 0,id,orig_text,text,stock
5,177,"If you think this is a drop, wait till Monday ...",If you think this is a drop wait till Monday a...,NVDA
28,663,$NVDA breaks below 20-day moving average for t...,NVDA breaks below 20day moving average for the...,NVDA
42,1017,$NVDA 3 Lower lows Pattern \nDaily appearances...,NVDA 3 Lower lows Pattern \nDaily appearances ...,NVDA
43,1018,$NVDA Bearish Engulfing Pattern \nDaily appear...,NVDA Bearish Engulfing Pattern \nDaily appeara...,NVDA
44,1019,$NVDA Dark Cloud Cover Pattern \nDaily appeara...,NVDA Dark Cloud Cover Pattern \nDaily appearan...,NVDA
...,...,...,...,...
3255,92669,$NVDA I expect Mr. Mnuchin may have a bag of g...,NVDA I expect Mr Mnuchin may have a bag of goo...,NVDA
3257,92740,$NVDA Expiration:03/20/2020|MaxPain:240.0|High...,NVDA Expiration03202020|MaxPain2400|High Put O...,NVDA
3258,92742,$NVDA is a coiled spring. This stock is ready...,NVDA is a coiled spring This stock is ready t...,NVDA
3262,92854,.@nvidia postpones #GTC20 product announcement...,postpones #GTC20 product announcements due to ...,NVDA


In [51]:
data.iloc[0]["stock"]

'NVDA, AMZN, TQQQ'

In [50]:
data["stock"].str.contains('NVDA, AMZN, TQQQ', regex=False)

0        True
1       False
2       False
3       False
4       False
        ...  
3268    False
3269    False
3270    False
3271    False
3272    False
Name: stock, Length: 3273, dtype: bool

In [61]:
print(dataNVDA.iloc[5]["orig_text"])

NVIDIA | $NVDA

Autonomous A.I Trading Slashes NVIDIA Estimates Downgrading ...

Long or short it with BTC on Trade8: https://t.co/jVybFiVltW https://t.co/N3YDwhf2UX


In [58]:
print(dataNVDA.iloc[2]["text"])

NVDA 3 Lower lows Pattern 
Daily appearances since 20100104 121
Performance Close 3 days later 
Avg 024
Worst 2976
Best 1910 

5 days later
Avg 092
Worst 3088
Best 2740 

10 days later
Avg 294
Min 4069
Max 3912


In [63]:
len(dataNVDA)

483

In [66]:
for i in range(0, len(dataNVDA)):
    print(dataNVDA.iloc[i]["orig_text"])
    print("❁❁❁❁❁❁❁❁❁❁❁❁❁❁❁❁❁❁❁❁❁❁❁❁\n❁❁❁❁❁❁❁❁❁❁❁❁❁❁❁❁❁❁❁❁❁❁❁❁")

If you think this is a drop, wait till Monday after #coronavirusus numbers are updated this weekend. $nvda #goodtimes
❁❁❁❁❁❁❁❁❁❁❁❁❁❁❁❁❁❁❁❁❁❁❁❁
❁❁❁❁❁❁❁❁❁❁❁❁❁❁❁❁❁❁❁❁❁❁❁❁
$NVDA breaks below 20-day moving average for the first time  since February 3rd: https://t.co/Ogl0RgKQaj
Sentiment: Very bearish #NVDA #NVIDIA
❁❁❁❁❁❁❁❁❁❁❁❁❁❁❁❁❁❁❁❁❁❁❁❁
❁❁❁❁❁❁❁❁❁❁❁❁❁❁❁❁❁❁❁❁❁❁❁❁
$NVDA 3 Lower lows Pattern 
Daily appearances since 2010-01-04: 121
Performance (Close) 3 days later ±:
Avg: 0.24
Worst: -29.76
Best: 19.10 

5 days later:
Avg: 0.92
Worst: -30.88
Best: 27.40 

10 days later:
Avg: 2.94
Min: -40.69
Max: 39.12
❁❁❁❁❁❁❁❁❁❁❁❁❁❁❁❁❁❁❁❁❁❁❁❁
❁❁❁❁❁❁❁❁❁❁❁❁❁❁❁❁❁❁❁❁❁❁❁❁
$NVDA Bearish Engulfing Pattern 
Daily appearances since 2010-01-04: 90
Performance (Close) 3 days later ±:
Avg: 0.69
Worst: -15.12
Best: 23.73 

5 days later:
Avg: 0.38
Worst: -27.48
Best: 31.54 

10 days later:
Avg: 1.18
Min: -22.94
Max: 36.13
❁❁❁❁❁❁❁❁❁❁❁❁❁❁❁❁❁❁❁❁❁❁❁❁
❁❁❁❁❁❁❁❁❁❁❁❁❁❁❁❁❁❁❁❁❁❁❁❁
$NVDA Dark Cloud Cover Pattern 
Daily appearances s