In [1]:
# Importing essential libraries
import numpy as np
import pandas as pd

In [2]:
# Loading the dataset
messages = pd.read_csv('feedback_dataset.csv')

In [3]:
messages.shape

(5200, 2)

In [4]:
messages.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5200 entries, 0 to 5199
Data columns (total 2 columns):
 #   Column     Non-Null Count  Dtype 
---  ------     --------------  ----- 
 0   text       5200 non-null   object
 1   sentiment  5200 non-null   int64 
dtypes: int64(1), object(1)
memory usage: 81.4+ KB


In [5]:
messages.columns

Index(['text', 'sentiment'], dtype='object')

In [6]:
messages.head()

Unnamed: 0,text,sentiment
0,Display is excellent and camera is as good as ...,1
1,Battery life is also great!,1
2,Protects the phone on all sides.,1
3,"Clear Skype Calls, Long Battery Life, Long Range.",1
4,Great Hands Free Device.,1


# Data Cleaning & Preprocessing

In [7]:
messages.sentiment.unique()

array([1, 0], dtype=int64)

In [8]:
def to_sentiment(sentiment):
  sentiment = sentiment
  if sentiment == 0:
    return 'negative'
  else: 
    return 'positive'
messages['target'] = messages.sentiment.apply(to_sentiment)

In [9]:
messages['target']

0       positive
1       positive
2       positive
3       positive
4       positive
          ...   
5195    negative
5196    negative
5197    negative
5198    negative
5199    negative
Name: target, Length: 5200, dtype: object

In [10]:
import re
import nltk
nltk.download('stopwords')

[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\user\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


True

In [11]:
from nltk.corpus import stopwords
from nltk.stem.porter import PorterStemmer #to find the root word

In [12]:
ps = PorterStemmer()
corpus = []

In [13]:
for i in range (0,len(messages)):
    # Cleaning special character from the feedbacks
    feedback = re.sub('[^a-zA-Z]',' ',str(messages['text'][i]))
    
    # Converting the entire feedback into lower case
    feedback = feedback.lower()
    
    # Tokenizing the feedback by words
    feedback_words = feedback.split()
    
    # Stemming the words and removing the stopwords
    feedback = [ps.stem(word) for word in feedback_words if not word in set(stopwords.words('english')) ]
    
    # Joining the stemmed words
    feedback = ' '.join(feedback)

    # Creating a corpus
    corpus.append(feedback)

In [14]:
corpus[0:10]

['display excel camera good year',
 'batteri life also great',
 'protect phone side',
 'clear skype call long batteri life long rang',
 'great hand free devic',
 'even take self portrait outsid exterior display cool',
 'tri mani mani handsfre gadget one final work well',
 'magic help',
 'best phone market',
 'work well']

In [15]:
# Creating the Bag of Words model
from sklearn.feature_extraction.text import CountVectorizer
cv = CountVectorizer(max_features=1500)
X = cv.fit_transform(corpus).toarray()
y = messages['target'].values

In [16]:
X.shape

(5200, 1500)

In [17]:
y.shape

(5200,)

# Training Model

In [18]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.20, random_state = 0)

In [19]:
# Fitting Support Vector Machine Classifier to the Training set
from sklearn.svm import SVC
model = SVC(C=10, gamma=0.1)
model.fit(X_train, y_train)

SVC(C=10, gamma=0.1)

In [20]:
# Predicting the Test set results
y_pred = model.predict(X_test)

In [21]:
from sklearn.metrics import classification_report
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

    negative       0.84      0.88      0.86       529
    positive       0.87      0.82      0.85       511

    accuracy                           0.85      1040
   macro avg       0.85      0.85      0.85      1040
weighted avg       0.85      0.85      0.85      1040



In [22]:
# Making the Confusion Matrix
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_test, y_pred)
cm

array([[465,  64],
       [ 90, 421]], dtype=int64)

In [23]:
model = SVC(C=10, gamma=0.1)
model.fit(X_train, y_train)

SVC(C=10, gamma=0.1)

# Predictions

In [24]:
def predict_sentiment(sample_feedback):
  sample_feedback = re.sub(pattern='[^a-zA-Z]',repl=' ', string = sample_feedback)
  sample_feedback = sample_feedback.lower()
  sample_feedback_words = sample_feedback.split()
  sample_feedback_words = [word for word in sample_feedback_words if not word in set(stopwords.words('english'))]
  ps = PorterStemmer()
  final_feedback = [ps.stem(word) for word in sample_feedback_words]
  final_feedback = ' '.join(final_feedback)
  print(final_feedback)

  temp = cv.transform([final_feedback]).toarray()
  print(model.predict(temp))
  return model.predict(temp)

In [25]:
# Predicting values
sample_feedback = 'online learning is really worst it made me anxious.'
predict_sentiment(sample_feedback)

onlin learn realli worst made anxiou
['negative']


array(['negative'], dtype=object)

In [26]:
# Predicting values
sample_feedback = 'I find online learning best suited for me.'
predict_sentiment(sample_feedback)

find onlin learn best suit
['positive']


array(['positive'], dtype=object)