In [3]:
import pandas as pd
df = pd.read_csv('feedback.csv')
df.head()

Unnamed: 0,Id,Review,Label
0,0,good and interesting,5
1,1,"This class is very helpful to me. Currently, I...",5
2,2,like!Prof and TAs are helpful and the discussi...,5
3,3,Easy to follow and includes a lot basic and im...,5
4,4,Really nice teacher!I could got the point eazl...,4


In [4]:
df = df[df['Label'] != 3]
df['sentiment'] = df['Label'].apply(lambda rating : +1 if rating > 3 else -1)

In [5]:
df.head()

Unnamed: 0,Id,Review,Label,sentiment
0,0,good and interesting,5,1
1,1,"This class is very helpful to me. Currently, I...",5,1
2,2,like!Prof and TAs are helpful and the discussi...,5,1
3,3,Easy to follow and includes a lot basic and im...,5,1
4,4,Really nice teacher!I could got the point eazl...,4,1


In [6]:
positive = df[df['sentiment'] == 1]
negative = df[df['sentiment'] == -1]

In [7]:
def remove_punctuation(text):
    final = "".join(u for u in text if u not in ("?", ".", ";", ":",  "!",'"',",","'"))
    return final
df['Review'] = df['Review'].apply(remove_punctuation)

In [8]:
df

Unnamed: 0,Id,Review,Label,sentiment
0,0,good and interesting,5,1
1,1,This class is very helpful to me Currently Im ...,5,1
2,2,likeProf and TAs are helpful and the discussio...,5,1
3,3,Easy to follow and includes a lot basic and im...,5,1
4,4,Really nice teacherI could got the point eazli...,4,1
...,...,...,...,...
107013,107013,Trendy topic with talks from expertises in the...,4,1
107014,107014,Wonderful Simple and clear language good instr...,5,1
107015,107015,an interesting and fun course thanks dr quincy,5,1
107016,107016,very broad perspective up to date information ...,4,1


In [9]:
dfNew = df[['Review','sentiment']]
dfNew.head()

Unnamed: 0,Review,sentiment
0,good and interesting,1
1,This class is very helpful to me Currently Im ...,1
2,likeProf and TAs are helpful and the discussio...,1
3,Easy to follow and includes a lot basic and im...,1
4,Really nice teacherI could got the point eazli...,1


In [10]:
import numpy as np
index = df.index
df['random_number'] = np.random.randn(len(index))
train = df[df['random_number'] <= 0.8]
test = df[df['random_number'] > 0.8]

In [11]:
df["sentiment"].value_counts()

 1    97227
-1     4720
Name: sentiment, dtype: int64

In [12]:
sentiment_label = df.sentiment.factorize()
sentiment_label

(array([0, 0, 0, ..., 0, 0, 0], dtype=int64),
 Int64Index([1, -1], dtype='int64'))

In [13]:
fback = df.Review.values

In [14]:
from tensorflow.keras.preprocessing.text import Tokenizer
tokenizer = Tokenizer(num_words=5000)
tokenizer.fit_on_texts(fback)

In [15]:
encoded_docs = tokenizer.texts_to_sequences(fback)

In [16]:
from tensorflow.keras.preprocessing.sequence import pad_sequences
padded_sequence = pad_sequences(encoded_docs, maxlen=200)

In [17]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM,Dense, Dropout, SpatialDropout1D
from tensorflow.keras.layers import Embedding
embedding_vector_length = 32
vocab_size = len(tokenizer.word_index) + 1
model = Sequential()
model.add(Embedding(vocab_size, embedding_vector_length, input_length=200))
model.add(SpatialDropout1D(0.25))
model.add(LSTM(50, dropout=0.5, recurrent_dropout=0.5))
model.add(Dropout(0.2))
model.add(Dense(1, activation='sigmoid'))
model.compile(loss='binary_crossentropy',optimizer='adam', metrics=['accuracy'])
print(model.summary())

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding (Embedding)       (None, 200, 32)           1406016   
                                                                 
 spatial_dropout1d (SpatialD  (None, 200, 32)          0         
 ropout1D)                                                       
                                                                 
 lstm (LSTM)                 (None, 50)                16600     
                                                                 
 dropout (Dropout)           (None, 50)                0         
                                                                 
 dense (Dense)               (None, 1)                 51        
                                                                 
Total params: 1,422,667
Trainable params: 1,422,667
Non-trainable params: 0
______________________________________________

In [18]:
history = model.fit(padded_sequence,sentiment_label[0],validation_split=0.2, epochs=5, batch_size=98)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [19]:
def predict_sentiment(text):
    tw = tokenizer.texts_to_sequences([text])
    tw = pad_sequences(tw,maxlen=200)
    prediction = int(model.predict(tw).round().item())
    return sentiment_label[1][prediction]
test_sentence1 = "It was a good course"
predict_sentiment(test_sentence1)
test_sentence2 = "This is the worst course"
predict_sentiment(test_sentence2)



-1

In [20]:
predict_sentiment("I felt safe. it was like talking to a friend. thank you for this")



1

In [21]:
predict_sentiment("Worst experience")



-1

In [22]:
import gradio as gr
def analyzeSentiment(text):
    return {1: "Glad you felt comfortable with our therapist. Thanks for this!", -1: "Very sorry about this. We'll look into it"}[predict_sentiment(text)]
interface=gr.Interface(fn=analyzeSentiment, inputs=["text"], outputs=['text'])
interface.launch(share=True)

Running on local URL:  http://127.0.0.1:7860/
Running on public URL: https://18773.gradio.app

This share link expires in 72 hours. For free permanent hosting, check out Spaces (https://huggingface.co/spaces)


(<gradio.routes.App at 0x1c814eb9940>,
 'http://127.0.0.1:7860/',
 'https://18773.gradio.app')

