In [11]:
!pip install tensorflow datasets transformers pandas scikit-learn



In [12]:
import pandas as pd

splits = {'train': 'sent_train.csv', 'validation': 'sent_valid.csv'}

df_train = pd.read_csv("hf://datasets/zeroshot/twitter-financial-news-sentiment/" + splits["train"])
df_valid = pd.read_csv("hf://datasets/zeroshot/twitter-financial-news-sentiment/" + splits["validation"])

df_train.head()

Unnamed: 0,text,label
0,$BYND - JPMorgan reels in expectations on Beyo...,0
1,$CCL $RCL - Nomura points to bookings weakness...,0
2,"$CX - Cemex cut at Credit Suisse, J.P. Morgan ...",0
3,$ESS: BTIG Research cuts to Neutral https://t....,0
4,$FNKO - Funko slides after Piper Jaffray PT cu...,0


In [13]:
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
import numpy as np

label_map = {'Bearish': 0, 'Bullish': 1, 'Neutral': 2}

df_train.columns = ['text', 'label']
df_valid.columns = ['text', 'label']

df_train['label'] = df_train['label'].map(label_map)
df_valid['label'] = df_valid['label'].map(label_map)

tokenizer = Tokenizer(num_words=5000, oov_token='<OOV>')
tokenizer.fit_on_texts(df_train['text'])

X_train = tokenizer.texts_to_sequences(df_train['text'])
X_valid = tokenizer.texts_to_sequences(df_valid['text'])

X_train = pad_sequences(X_train, maxlen=100, padding='post', truncating='post')
X_valid = pad_sequences(X_valid, maxlen=100, padding='post', truncating='post')

y_train = np.array(df_train['label'])
y_valid = np.array(df_valid['label'])

In [14]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense, Dropout

model = Sequential([
    Embedding(input_dim=5000, output_dim=64, input_length=100),
    LSTM(128, return_sequences=True),
    Dropout(0.5),
    LSTM(64),
    Dense(32, activation='relu'),
    Dropout(0.5),
    Dense(3, activation='softmax')
])

model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

model.summary()



In [15]:
history = model.fit(
    X_train, y_train,
    validation_data=(X_valid, y_valid),
    epochs=10,
    batch_size=32
)

Epoch 1/10
[1m299/299[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 17ms/step - accuracy: 0.0000e+00 - loss: nan - val_accuracy: 0.0000e+00 - val_loss: nan
Epoch 2/10
[1m299/299[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 14ms/step - accuracy: 0.0000e+00 - loss: nan - val_accuracy: 0.0000e+00 - val_loss: nan
Epoch 3/10
[1m299/299[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 14ms/step - accuracy: 0.0000e+00 - loss: nan - val_accuracy: 0.0000e+00 - val_loss: nan
Epoch 4/10
[1m299/299[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 14ms/step - accuracy: 0.0000e+00 - loss: nan - val_accuracy: 0.0000e+00 - val_loss: nan
Epoch 5/10
[1m299/299[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 14ms/step - accuracy: 0.0000e+00 - loss: nan - val_accuracy: 0.0000e+00 - val_loss: nan
Epoch 6/10
[1m299/299[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 15ms/step - accuracy: 0.0000e+00 - loss: nan - val_accuracy: 0.0000e+00 - val_loss: nan
Epoch 7/10

In [18]:
import pickle

with open('tokenizer.pkl', 'wb') as handle:
    pickle.dump(tokenizer, handle, protocol=pickle.HIGHEST_PROTOCOL)

model.save('3681_sentiment_model_Kshitiz_Sharma.h5')

with open('3681_sentiment_model_Kshitiz_Sharma.pkl', 'wb') as f:
    pickle.dump(model, f)



In [17]:
from google.colab import files
files.download('3681_sentiment_model_Kshitiz_Sharma.pkl')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [19]:
from tensorflow.keras.models import load_model

loaded_model = load_model('3681_sentiment_model_Kshitiz_Sharma.h5')

with open('tokenizer.pkl', 'rb') as handle:
    loaded_tokenizer = pickle.load(handle)

def predict_sentiment(text):
    sequence = loaded_tokenizer.texts_to_sequences([text])
    padded_sequence = pad_sequences(sequence, maxlen=100, padding='post', truncating='post')
    prediction = loaded_model.predict(padded_sequence)
    label_map_reverse = {0: "Bearish", 1: "Bullish", 2: "Neutral"}
    return label_map_reverse[np.argmax(prediction)]

test_text = input("Enter a Tweet: ")



Enter a Tweet: The stock market is expected to rise tomorrow.


In [20]:
print(predict_sentiment(test_text))

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 194ms/step
Bearish
