# Multi-Domain Sentiment Analysis

## importing the dataset

In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf
tf.config.set_visible_devices([], 'GPU')

In [2]:
train_data = pd.read_csv('/Users/mohmmadmusaddique/Sentimental Analysis/Datasets/drugsComTrain_raw.tsv', on_bad_lines='skip', delimiter='\t')
test_data = pd.read_csv('/Users/mohmmadmusaddique/Sentimental Analysis/Datasets/drugsComTest_raw.tsv', on_bad_lines='skip', delimiter='\t')

## Data preprocessing

In [3]:
train_data.drop(['Unnamed: 0', 'date'], axis=1, inplace=True)
test_data.drop(['Unnamed: 0', 'date'], axis=1, inplace=True)

In [4]:
train_data.head()

Unnamed: 0,drugName,condition,review,rating,usefulCount
0,Valsartan,Left Ventricular Dysfunction,"""It has no side effect, I take it in combinati...",9.0,27
1,Guanfacine,ADHD,"""My son is halfway through his fourth week of ...",8.0,192
2,Lybrel,Birth Control,"""I used to take another oral contraceptive, wh...",5.0,17
3,Ortho Evra,Birth Control,"""This is my first time using any form of birth...",8.0,10
4,Buprenorphine / naloxone,Opiate Dependence,"""Suboxone has completely turned my life around...",9.0,37


In [5]:
# Binning the ratings into 2 classes
train_data['rating'] = train_data['rating'].apply(lambda x: 1 if x >= 7 else 0)
test_data['rating'] = test_data['rating'].apply(lambda x: 1 if x >= 7 else 0)
train_y = train_data['rating'].values
test_y = test_data['rating'].values

In [6]:
train_corpus = train_data['review']
test_corpus = test_data['review']

## Tokenizer the reviews

In [7]:
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

In [8]:
# Tokenize the data
tokenizer = Tokenizer(num_words=10000, oov_token='')
tokenizer.fit_on_texts(train_corpus)
X_train = tokenizer.texts_to_sequences(train_corpus)
X_test = tokenizer.texts_to_sequences(test_corpus)

In [9]:
# Pad the sequence to have equa length
X_train = pad_sequences(X_train, maxlen=120, truncating='post')
X_test = pad_sequences(X_test, maxlen=120)

## Defining the LSTM model

In [10]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Embedding, LSTM, Dropout

In [11]:
# Initializing the RNN
model = Sequential()

In [12]:
# Adding embedding layer
model.add(Embedding(10000, 16, input_length=120))

In [13]:
# Adding a hidden layer
model.add(Dense(128, activation='relu'))

In [14]:
# Adding a dropout layer to avoid overfitting
model.add(Dropout(0.5))

In [15]:
# Adding LSTM layer
model.add(LSTM(128, dropout=0.2, recurrent_dropout=0.2))

In [16]:
# Adding a dropout layer to avoid overfitting
model.add(Dropout(0.5))

In [17]:
# Adding the output layer
model.add(Dense(1, activation='sigmoid'))

In [18]:
# Compiling the model
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding (Embedding)       (None, 120, 16)           160000    
                                                                 
 dense (Dense)               (None, 120, 128)          2176      
                                                                 
 dropout (Dropout)           (None, 120, 128)          0         
                                                                 
 lstm (LSTM)                 (None, 128)               131584    
                                                                 
 dropout_1 (Dropout)         (None, 128)               0         
                                                                 
 dense_1 (Dense)             (None, 1)                 129       
                                                                 
Total params: 293,889
Trainable params: 293,889
Non-trai

### Training the model

In [19]:
model.fit(X_train, train_y, batch_size=32, epochs=5, validation_data=(X_test, test_y), validation_split=0.2)

Epoch 1/5


2023-03-23 19:26:19.926530: W tensorflow/tsl/platform/profile_utils/cpu_utils.cc:128] Failed to get CPU frequency: 0 Hz


   1/5041 [..............................] - ETA: 3:38:46 - loss: 0.6931 - accuracy: 0.4688

2023-03-23 19:26:22.372048: I tensorflow/compiler/xla/service/service.cc:173] XLA service 0x17f01a9a0 initialized for platform Host (this does not guarantee that XLA will be used). Devices:
2023-03-23 19:26:22.372068: I tensorflow/compiler/xla/service/service.cc:181]   StreamExecutor device (0): Host, Default Version
2023-03-23 19:26:22.379705: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:268] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.
2023-03-23 19:26:22.410142: I tensorflow/compiler/jit/xla_compilation_cache.cc:477] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x17c0879a0>

## Testing new reult

In [20]:
def predict_new_review(review):
    review = tokenizer.texts_to_sequences(review)
    review = pad_sequences(review, maxlen=120)
    answer = model.predict(review)
    if answer[0][0] > 0.5:
        print('Positive Review')
    else:
        print('Negative Review')

### Negetive review

In [21]:
predict_new_review(['I am very unhappy with this product. It is very ineffective. I have been using it for 2 months now and I am very unsatisfied with the results. I would not recommend this product to anyone who is looking for a good product.'])

Negative Review


### Positive review

In [22]:
predict_new_review(['I am very happy with this product. It is very effective. I have been using it for 2 months now and I am very satisfied with the results. I would recommend this product to anyone who is looking for a good product.'])

Positive Review


## Export model using pikle

In [23]:
import pickle
filename = 'Sentimental_Analysis_Model.sav'
pickle.dump(model, open(filename, 'wb'))

Keras weights file (<HDF5 file "variables.h5" (mode r+)>) saving:
...layers
......dense
.........vars
............0
............1
......dense_1
.........vars
............0
............1
......dropout
.........vars
......dropout_1
.........vars
......embedding
.........vars
............0
......lstm
.........cell
............vars
...............0
...............1
...............2
.........vars
...metrics
......mean
.........vars
............0
............1
......mean_metric_wrapper
.........vars
............0
............1
...optimizer
......vars
.........0
.........1
.........10
.........11
.........12
.........13
.........14
.........15
.........16
.........2
.........3
.........4
.........5
.........6
.........7
.........8
.........9
...vars
Keras model archive saving:
File Name                                             Modified             Size
config.json                                    2023-03-23 21:18:48         2921
metadata.json                                  2023-03-23 2

In [24]:
#open the saved model and use it to predict the sentiment of a new review
# import joblib
# model = joblib.load('Sentimental_Analysis_Model.sav')
# predict_new_review(['positive'])