**ICP - 10**

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


**1. Save the model and use the saved model to predict on new text data (ex, “A lot of good things are happening. We are respected again throughout the world, and that's a great thing.@realDonaldTrump”) **

In [2]:
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
from keras.models import Sequential
from keras.layers import Dense, Embedding, LSTM, SpatialDropout1D
from matplotlib import pyplot
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical
import re

from sklearn.preprocessing import LabelEncoder

data = pd.read_csv('/content/drive/MyDrive/N-N-D-L/Sentiment.csv')
# Keeping only the neccessary columns
data = data[['text','sentiment']]

data['text'] = data['text'].apply(lambda x: x.lower())
data['text'] = data['text'].apply((lambda x: re.sub('[^a-zA-z0-9\s]', '', x)))

for idx, row in data.iterrows():
    row[0] = row[0].replace('rt', ' ')

max_fatures = 2000
tokenizer = Tokenizer(num_words=max_fatures, split=' ')
tokenizer.fit_on_texts(data['text'].values)
X = tokenizer.texts_to_sequences(data['text'].values)

X = pad_sequences(X)

embed_dim = 128
lstm_out = 196
def createmodel():
    model = Sequential()
    model.add(Embedding(max_fatures, embed_dim,input_length = X.shape[1]))
    model.add(LSTM(lstm_out, dropout=0.2, recurrent_dropout=0.2))
    model.add(Dense(3,activation='softmax'))
    model.compile(loss = 'categorical_crossentropy', optimizer='adam',metrics = ['accuracy'])
    return model
# print(model.summary())

labelencoder = LabelEncoder()
integer_encoded = labelencoder.fit_transform(data['sentiment'])
y = to_categorical(integer_encoded)
X_train, X_test, Y_train, Y_test = train_test_split(X,y, test_size = 0.33, random_state = 42)

batch_size = 32
model = createmodel()
model.fit(X_train, Y_train, epochs = 1, batch_size=batch_size, verbose = 2)
score,acc = model.evaluate(X_test,Y_test,verbose=2,batch_size=batch_size)
print(score)
print(acc)
print(model.metrics_names)



291/291 - 38s - loss: 0.8143 - accuracy: 0.6408 - 38s/epoch - 132ms/step
144/144 - 2s - loss: 0.7557 - accuracy: 0.6632 - 2s/epoch - 11ms/step
0.7556701898574829
0.6631717085838318
['loss', 'accuracy']


In [3]:
model.save("sentiment_model.h5")

  saving_api.save_model(


In [4]:
import tweepy
from keras.models import load_model
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
import re

# Load the saved model
model = load_model("/content/sentiment_model.h5")

# Define a function for preprocessing text
def preprocess_text(text):
    text = text.lower()
    text = re.sub('[^a-zA-z0-9\s]', '', text)
    return text

# Example new text data
new_text = "A lot of good things are happening. We are respected again throughout the world, and that's a great thing. @realDonaldTrump"

# Preprocess the new text data
new_text = preprocess_text(new_text)

# Tokenize and pad the new text data
max_fatures = 2000
tokenizer = Tokenizer(num_words=max_fatures, split=' ')
tokenizer.fit_on_texts([new_text])
X_new = tokenizer.texts_to_sequences([new_text])
X_new = pad_sequences(X_new, maxlen=model.input_shape[1])

# Make predictions
predictions = model.predict(X_new)

# Determine the sentiment based on the prediction
sentiments = ['Negative', 'Neutral', 'Positive']
predicted_sentiment = sentiments[predictions.argmax()]

# Print the result
print("Predicted Sentiment: " + predicted_sentiment)




Predicted Sentiment: Negative


**2. Apply GridSearchCV on the source code provided in the class**

In [5]:
!pip install scikeras

Collecting scikeras
  Downloading scikeras-0.12.0-py3-none-any.whl (27 kB)
Installing collected packages: scikeras
Successfully installed scikeras-0.12.0


In [6]:
from scikeras.wrappers import KerasClassifier

In [7]:
import pandas as pd
import re
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Embedding, LSTM, SpatialDropout1D
from tensorflow.keras.utils import to_categorical
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import LabelEncoder
from scikeras.wrappers import KerasClassifier

# Assuming the data loading and preprocessing steps are the same

max_features = 2000
tokenizer = Tokenizer(num_words=max_features, split=' ')
# Assuming tokenizer fitting and text preprocessing is done here

def createmodel(optimizer='adam'):
    model = Sequential()
    model.add(Embedding(max_features, embed_dim, input_length=X.shape[1]))
    model.add(SpatialDropout1D(0.2))
    model.add(LSTM(lstm_out, dropout=0.2, recurrent_dropout=0.2))
    model.add(Dense(3, activation='softmax'))
    model.compile(loss='categorical_crossentropy', optimizer=optimizer, metrics=['accuracy'])
    return model

# Define the KerasClassifier with the build_fn as our model creation function
model = KerasClassifier(model=createmodel, verbose=2)

# Define hyperparameters to tune
param_grid = {
    'batch_size': [32, 64],
    'epochs': [1, 2],
    'optimizer': ['adam', 'rmsprop']
}

# Initialize GridSearchCV
grid = GridSearchCV(estimator=model, param_grid=param_grid, n_jobs=1, cv=3)
# Fit GridSearchCV
grid_result = grid.fit(X_train, Y_train)

# Summarize results
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))




194/194 - 34s - loss: 0.8560 - accuracy: 0.6326 - 34s/epoch - 173ms/step
97/97 - 1s - 1s/epoch - 10ms/step




194/194 - 29s - loss: 0.8564 - accuracy: 0.6331 - 29s/epoch - 148ms/step
97/97 - 1s - 1s/epoch - 14ms/step




194/194 - 25s - loss: 0.8698 - accuracy: 0.6251 - 25s/epoch - 130ms/step
97/97 - 1s - 1s/epoch - 15ms/step




194/194 - 26s - loss: 0.8646 - accuracy: 0.6300 - 26s/epoch - 136ms/step
97/97 - 1s - 1s/epoch - 12ms/step




194/194 - 28s - loss: 0.8620 - accuracy: 0.6268 - 28s/epoch - 144ms/step
97/97 - 1s - 1s/epoch - 11ms/step




194/194 - 27s - loss: 0.8574 - accuracy: 0.6257 - 27s/epoch - 138ms/step
97/97 - 1s - 1s/epoch - 11ms/step




Epoch 1/2
194/194 - 27s - loss: 0.8660 - accuracy: 0.6318 - 27s/epoch - 137ms/step
Epoch 2/2
194/194 - 19s - loss: 0.7136 - accuracy: 0.6967 - 19s/epoch - 98ms/step
97/97 - 1s - 1s/epoch - 10ms/step




Epoch 1/2
194/194 - 28s - loss: 0.8539 - accuracy: 0.6312 - 28s/epoch - 143ms/step
Epoch 2/2
194/194 - 18s - loss: 0.6965 - accuracy: 0.7041 - 18s/epoch - 91ms/step
97/97 - 1s - 1s/epoch - 15ms/step




Epoch 1/2
194/194 - 27s - loss: 0.8563 - accuracy: 0.6296 - 27s/epoch - 139ms/step
Epoch 2/2
194/194 - 18s - loss: 0.7037 - accuracy: 0.6992 - 18s/epoch - 91ms/step
97/97 - 1s - 1s/epoch - 14ms/step




Epoch 1/2
194/194 - 25s - loss: 0.8519 - accuracy: 0.6370 - 25s/epoch - 130ms/step
Epoch 2/2
194/194 - 18s - loss: 0.7005 - accuracy: 0.7010 - 18s/epoch - 95ms/step
97/97 - 1s - 1s/epoch - 10ms/step




Epoch 1/2
194/194 - 26s - loss: 0.8488 - accuracy: 0.6295 - 26s/epoch - 135ms/step
Epoch 2/2
194/194 - 19s - loss: 0.6925 - accuracy: 0.6991 - 19s/epoch - 98ms/step
97/97 - 1s - 1s/epoch - 11ms/step




Epoch 1/2
194/194 - 26s - loss: 0.8578 - accuracy: 0.6290 - 26s/epoch - 134ms/step
Epoch 2/2
194/194 - 19s - loss: 0.7023 - accuracy: 0.6964 - 19s/epoch - 99ms/step
97/97 - 1s - 1s/epoch - 10ms/step




97/97 - 19s - loss: 0.8878 - accuracy: 0.6179 - 19s/epoch - 195ms/step
49/49 - 1s - 624ms/epoch - 13ms/step




97/97 - 16s - loss: 0.8702 - accuracy: 0.6216 - 16s/epoch - 168ms/step
49/49 - 1s - 623ms/epoch - 13ms/step




97/97 - 17s - loss: 0.8901 - accuracy: 0.6173 - 17s/epoch - 174ms/step
49/49 - 1s - 626ms/epoch - 13ms/step




97/97 - 16s - loss: 0.8773 - accuracy: 0.6192 - 16s/epoch - 167ms/step
49/49 - 1s - 598ms/epoch - 12ms/step




97/97 - 18s - loss: 0.8844 - accuracy: 0.6203 - 18s/epoch - 181ms/step
49/49 - 1s - 617ms/epoch - 13ms/step




97/97 - 18s - loss: 0.8915 - accuracy: 0.6189 - 18s/epoch - 182ms/step
49/49 - 1s - 905ms/epoch - 18ms/step




Epoch 1/2
97/97 - 18s - loss: 0.8953 - accuracy: 0.6216 - 18s/epoch - 183ms/step
Epoch 2/2
97/97 - 10s - loss: 0.7299 - accuracy: 0.6883 - 10s/epoch - 98ms/step
49/49 - 1s - 634ms/epoch - 13ms/step




Epoch 1/2
97/97 - 18s - loss: 0.8734 - accuracy: 0.6216 - 18s/epoch - 183ms/step
Epoch 2/2
97/97 - 10s - loss: 0.7169 - accuracy: 0.6960 - 10s/epoch - 103ms/step
49/49 - 1s - 626ms/epoch - 13ms/step




Epoch 1/2
97/97 - 17s - loss: 0.8782 - accuracy: 0.6177 - 17s/epoch - 171ms/step
Epoch 2/2
97/97 - 11s - loss: 0.7142 - accuracy: 0.6958 - 11s/epoch - 111ms/step
49/49 - 1s - 1s/epoch - 23ms/step




Epoch 1/2
97/97 - 20s - loss: 0.8787 - accuracy: 0.6176 - 20s/epoch - 202ms/step
Epoch 2/2
97/97 - 10s - loss: 0.7277 - accuracy: 0.6852 - 10s/epoch - 106ms/step
49/49 - 1s - 941ms/epoch - 19ms/step




Epoch 1/2
97/97 - 17s - loss: 0.8704 - accuracy: 0.6178 - 17s/epoch - 174ms/step
Epoch 2/2
97/97 - 11s - loss: 0.7198 - accuracy: 0.6888 - 11s/epoch - 113ms/step
49/49 - 1s - 654ms/epoch - 13ms/step




Epoch 1/2
97/97 - 18s - loss: 0.8837 - accuracy: 0.6196 - 18s/epoch - 181ms/step
Epoch 2/2
97/97 - 10s - loss: 0.7218 - accuracy: 0.6875 - 10s/epoch - 102ms/step
49/49 - 1s - 629ms/epoch - 13ms/step




Epoch 1/2
291/291 - 39s - loss: 0.8290 - accuracy: 0.6418 - 39s/epoch - 134ms/step
Epoch 2/2
291/291 - 27s - loss: 0.6836 - accuracy: 0.7081 - 27s/epoch - 94ms/step
Best: 0.679758 using {'batch_size': 32, 'epochs': 2, 'optimizer': 'rmsprop'}
