# Sentiment analysis

# LSTM Normal 

# Import Necessary sentiment

In [None]:
import numpy as np 
import pandas as pd 

from tensorflow.keras.layers import Embedding
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.preprocessing.text import one_hot
from tensorflow.keras.layers import LSTM
from tensorflow.keras.layers import Dense
import tensorflow as tf
from tensorflow.keras.layers import Dropout

import nltk
import re
from nltk.corpus import stopwords
from nltk.stem.porter import PorterStemmer

# Preprocessing Training Data

In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

In [None]:
data = pd.read_csv('/kaggle/input/original/nepaliorg/nepalitext.csv',names=['label','type','sentence1','sentence2'])

data['sentence'] = data['sentence1'] + data['sentence2']

data = data.drop(columns=['sentence1','sentence2','type'])

# Viewing the data

In [None]:
data['label'].value_counts()

In [None]:
X = pd.read_csv('/kaggle/input/aspectiment/aspectsentiment/twitter.csv')

X['label'] = data['label']

# Data Preprocessing

In [None]:
#downloading stop words
nltk.download('stopwords')

In [None]:
X.head()

In [None]:
stopword=set(stopwords.words('english'))

In [None]:
corpus = X['sentence_english'].tolist()

In [None]:
aspect = []
sentiment = []

for i in range(0,len(corpus)):
    taag = nltk.pos_tag((corpus[i]))
    
    text =' '.join([word for word,pos in nltk.pos_tag(nltk.word_tokenize(corpus[i])) if(pos.endswith('NN'))])
    aspect.append(text)
                
    text =' '.join([word for word,pos in nltk.pos_tag(nltk.word_tokenize(corpus[i])) if(pos.endswith('JJ'))])
    sentiment.append(text)

In [None]:
#X['aspect'] = aspect
X['sentiment'] = sentiment

In [None]:
nan_value = float("NaN")

df = X
df.replace("", nan_value, inplace=True)

**label is our target label and data is input data.**

In [None]:
# Drop Nan Values
X = df.dropna()

# Get training data
X_data = df['sentiment']

#  Get target label
y = df['label']

# LSTM

In [None]:
# vocabulray size
voc_size = 5000

# One Hot Encoding
onehot_repr = [one_hot(words, voc_size) for words in corpus]

# Embedding Representation

In [None]:
# making all sentences of same length
sent_length = 20
embedded_docs = pad_sequences(onehot_repr, padding = 'pre', maxlen = sent_length)

In [None]:
# Finding the numberof labels
num_labels = len(set(y))

# Constructing LSTM model

In [None]:
# initializing the number of features
embedding_vector_features = 40

## Creating model
model=Sequential()
model.add(Embedding(voc_size,embedding_vector_features,input_length=sent_length))
model.add(LSTM(10))
model.add(Dense(num_labels,activation='softmax'))
model.compile(loss='binary_crossentropy',optimizer='adam',metrics=['accuracy'])
print(model.summary())

In [None]:
from sklearn import preprocessing

# encode label to int
le = preprocessing.LabelEncoder()
y = le.fit_transform(y)

X_final = np.array(embedded_docs)
y_final = np.array(y)

from keras.utils import to_categorical
y_final = to_categorical(y_final)

In [None]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X_final, y_final, test_size=0.2, random_state=42)

In [None]:
model.fit(X_train, y_train, validation_data = (X_test, y_test), epochs = 3, batch_size = 64)

# Test

In [None]:
# making prediction
y_pred_test = model.predict_classes(X_test)

In [None]:
y_test=np.argmax(y_test, axis=1)

# Test accuracy

In [None]:
from sklearn.metrics import accuracy_score

accuracy_score(y_test, y_pred_test)

# Model : Confusion Matrix

In [None]:
#Creating a confusion matrix
from sklearn.metrics import confusion_matrix
from sklearn.metrics import roc_curve,auc
import seaborn as sns
import matplotlib.pyplot as plt

cm = confusion_matrix(y_test, y_pred_test)
cm = cm.astype(np.float) / cm.sum(axis=1)[:, np.newaxis]

In [None]:
#Transform to df for easier plotting
final_cm = pd.DataFrame(cm, index = le.classes_,
                     columns = le.classes_
                    )

# Visualizing Confusion matrix

In [None]:
plt.figure(figsize = (5,5))
sns.heatmap(final_cm, annot = True,cmap='Greys',cbar=False)
plt.title('Sentiment Classify')
plt.ylabel('True class')
plt.xlabel('Prediction class')
plt.show()

In [None]:
from sklearn.metrics import classification_report


print(classification_report(y_test, y_pred_test))