In [28]:
from pathlib import Path
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
import nlp

In [29]:
f = Path("Data/nio_tweets.csv")
df = pd.read_csv(f, parse_dates = True, infer_datetime_format = True)
df.head(2)


Unnamed: 0,Text,Created,Likes,Sentiment
0,$CLF $X $TSLA $NIO $VALE,2021-01-16T17:55:13Z,805,{'basic': 'Bullish'}
1,$XPEV $NIO $LI Hands down XPEV.\nBest bang pe...,2021-01-16T17:53:37Z,300,


In [30]:
n = nlp.NLT()
nltk_df = n.make_sentiment_df(df,"twits")
nltk_df.head(2)

Unnamed: 0,Created,Likes,NLTK_Compound,NLTK_Neg,NLTK_Neu,NLTK_Pos,Sentiment,Text
0,2021-01-16,805,0.0,0.0,1.0,0.0,Bullish,$CLF $X $TSLA $NIO $VALE
1,2021-01-16,300,0.8519,0.0,0.751,0.249,,$XPEV $NIO $LI Hands down XPEV.\nBest bang pe...


In [31]:
b = nlp.Blobby()

nltk_blob_df = b.add_blob(nltk_df, "Text")
nltk_blob_df.head(2)

Unnamed: 0,Created,Likes,NLTK_Compound,NLTK_Neg,NLTK_Neu,NLTK_Pos,Sentiment,Text,Blob Class,Blob Pos,Blob Neg
0,2021-01-16,805,0.0,0.0,1.0,0.0,Bullish,$CLF $X $TSLA $NIO $VALE,pos,0.5,0.5
1,2021-01-16,300,0.8519,0.0,0.751,0.249,,$XPEV $NIO $LI Hands down XPEV.\nBest bang pe...,pos,0.682225,0.317775


In [32]:
df_final = nltk_blob_df[["Created", "NLTK_Compound","Blob Pos", "Blob Neg"]]
df_final["Blob Score"] = np.where(df_final["Blob Pos"] >0.5, df_final["Blob Pos"], df_final["Blob Neg"])
df_final = df_final[["Created", "NLTK_Compound", "Blob Score"]]

df_final.sort_index(inplace = True)
df_final = df_final.groupby(["Created"]).mean()
df_final.round({"NLTK_Compound": 4,
                "Blob Score" : 4
               })
df_final.head(2)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  


Unnamed: 0_level_0,NLTK_Compound,Blob Score
Created,Unnamed: 1_level_1,Unnamed: 2_level_1
2020-09-21,-0.012772,0.745363
2020-09-22,-0.045475,0.704881


In [33]:
df_final.to_csv("Data/tweets_nio_sentiment.csv", header = True, index = True)


In [None]:
# l = ml.LSTM()

# l.num_units
# plot_df = new_new_df[["Text_Compound","Text_Neg", "Text_Pos"]]
# l.plot_me(plot_df)

In [None]:
sent_df = nlp.make_sentiment_df(df, "Reddit")

In [None]:
sent_df.head()

In [None]:
nlp.show_stats(sent_df)

In [None]:
def window_data(df, window, feature_col_number, target_col_number):
    """
    This function accepts the column number for the features (X) and the target (y).
    It chunks the data up with a rolling window of Xt - window to predict Xt.
    It returns two numpy arrays of X and y.
    """
    X = []
    y = []
    for i in range(len(df) - window):
        features = df.iloc[i : (i + window), feature_col_number]
        target = df.iloc[(i + window), target_col_number]
        X.append(features)
        y.append(target)
    return np.array(X), np.array(y).reshape(-1, 1)

In [None]:
        X, y = window_data(df, 2,1, 1)
        
        split = int(0.7 * len(X))
        X_train = X[: split -1]
        X_test = X[split:]
        y_train = y[: split -1]
        y_test = y[split:]
        
        scaler = MinMaxScaler().fit(X_train)
        X_train = scaler.transform(X_train)
        X_test = scaler.transform(X_test)
       # scaler.fit(y_train)
       # y_train = scaler.transform(y_train)
       # y_test = scaler.transform(y_test)

        X_train = X_train.reshape((X_train.shape[0], X_train.shape[1], 1))
        X_test = X_test.reshape((X_test.shape[0], X_test.shape[1],1))
        
        
        #Might be tricky to separate this out right now due to coding for layers
        model = Sequential()
        number_units = 10
        dropout_fraction = 0.2

        #Layer1
        model.add(LSTM(
        units = number_units,
        return_sequences = True,
        input_shape = (X_train.shape[1], 1))
        )

        model.add(Dropout(dropout_fraction))
        #Layer2
        model.add(LSTM(units = number_units, return_sequences = True))
        model.add(Dropout(dropout_fraction))
        #Layer3
        model.add(LSTM(units=number_units))
        model.add(Dropout(dropout_fraction))
        #Output
        model.add(Dense(1))

        model.compile(optimizer = "adam", loss = "mean_squared_error")

        model.fit(
            X_train, y_train,
            epochs = 20,
            shuffle = False,
            batch_size = 2,
            verbose = 1
        )

        predicted = model.predict(X_test)
        scaler = MinMaxScaler().fit(y_train)
        predicted_prices = scaler.inverse_transform(predicted)
        real_prices = scaler.inverse_transform(y_test.reshape(-1, 1))

        stocks = pd.DataFrame({
            "Real": real_prices.ravel(),
            "Predicted": predicted_prices.ravel()
            }, index = df.index[-len(real_prices): ]) 



In [None]:
stocks.tail()