In [1]:
import pandas as pd
import numpy as np

In [2]:
df = pd.read_csv('Datafiniti_Amazon_Consumer_Reviews_of_Amazon_Products.csv', usecols = ['reviews.text', 'reviews.rating'])

In [3]:
df.sample(5)

Unnamed: 0,reviews.rating,reviews.text
2842,5,They had the tablet in stock and it is nice th...
3809,5,Bought this for my daughter. She loves it. Wif...
2329,4,Nice reader. Light weight. Clear display! Port...
918,5,"This is a must have is great for recepies, whi..."
1047,5,Bought the Amazon - Echo Plus + Philips Hue Bu...


In [4]:
df.head()

Unnamed: 0,reviews.rating,reviews.text
0,3,I thought it would be as big as small paper bu...
1,5,This kindle is light and easy to use especiall...
2,4,Didnt know how much i'd use a kindle so went f...
3,5,I am 100 happy with my purchase. I caught it o...
4,5,Solid entry level Kindle. Great for kids. Gift...


In [5]:
df['reviews.rating'].value_counts()

5    3478
4    1208
3     197
1      63
2      54
Name: reviews.rating, dtype: int64

In [6]:
import re
df['reviews.text']=df['reviews.text'].apply(str)
df['reviews.text'] = df['reviews.text'].str.lower()
df['reviews.text'] = df['reviews.text'].apply(lambda elem: re.sub(r"(@[A-Za-z0-9]+)|([^0-9A-Za-z \t])|(\w+:\/\/\S+)|^rt|http.+?", "", elem))  
# remove numbers
df['reviews.text'] = df['reviews.text'].apply(lambda elem: re.sub(r"\d+", "", elem))
    
df.head() 

Unnamed: 0,reviews.rating,reviews.text
0,3,i thought it would be as big as small paper bu...
1,5,this kindle is light and easy to use especiall...
2,4,didnt know how much id use a kindle so went fo...
3,5,i am happy with my purchase i caught it on sa...
4,5,solid entry level kindle great for kids gifted...


In [7]:
#TFIDF and Linear SVM
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.svm import LinearSVC
from sklearn.metrics import classification_report

In [8]:
tfidf = TfidfVectorizer(max_features=20000, ngram_range=(1,5), analyzer='char')

In [9]:
X = tfidf.fit_transform(df['reviews.text'])
y = df['reviews.rating']

In [10]:
X.shape, y.shape

((5000, 20000), (5000,))

In [11]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0)

In [12]:
X_train.shape

(4000, 20000)

In [13]:
clf = LinearSVC(C = 30, class_weight = 'balanced')
clf.fit(X_train, y_train)



LinearSVC(C=30, class_weight='balanced')

In [14]:
y_pred = clf.predict(X_test)

In [15]:
print(classification_report(y_test,y_pred))

              precision    recall  f1-score   support

           1       0.75      0.50      0.60         6
           2       0.00      0.00      0.00        10
           3       0.57      0.32      0.41        41
           4       0.55      0.52      0.54       272
           5       0.80      0.85      0.82       671

    accuracy                           0.73      1000
   macro avg       0.53      0.44      0.47      1000
weighted avg       0.71      0.73      0.72      1000



In [16]:
x = 'this is a bad product i returned it'
vec = tfidf.transform([x])
clf.predict(vec)

array([1], dtype=int64)

In [17]:
m = 'this is bad product'
vec = tfidf.transform([m])
clf.predict(vec)

array([1], dtype=int64)

In [18]:
m = 'this is good product'
vec = tfidf.transform([m])
clf.predict(vec)

array([4], dtype=int64)

In [19]:
m = 'this is an excellent product'
vec = tfidf.transform([m])
clf.predict(vec)

array([5], dtype=int64)

In [20]:
m = 'this is nice product'
vec = tfidf.transform([m])
clf.predict(vec)

array([5], dtype=int64)

In [None]:
# Build a python program to create GUI in python using tkinter.

import tkinter as tk
import tkinter.messagebox
# Top level window
frame = tk.Tk()
frame.title("NLP sentiment Analysis")
frame.geometry('500x420')
frame.configure(bg='yellow',padx = 50, pady = 50)
# Function for getting Input
# from textbox and printing it 
# at label widget

l=tk.Label(frame, text="E Commerce Product Rating Based On", fg='green',bg='yellow', font=("Helvetica", 14))
l.pack() 
l=tk.Label(frame, text="Customer Review Mining", fg='green',bg='yellow', font=("Helvetica", 14))
l.pack() 
# just added to give space between them
l=tk.Label(frame, text=" ", fg='yellow',bg='yellow', font=("Helvetica", 18))
l.pack()

l=tk.Label(frame, text="Enter Your Review", fg='blue',bg='yellow', font=("Helvetica", 18))
l.pack()
l=tk.Label(frame, text="", fg='yellow',bg='yellow', font=("Helvetica", 4))
l.pack()
def onClick():
    inp = inputtxt.get(1.0, "end-1c")
    vec = tfidf.transform([inp])
    msg = clf.predict(vec)
    tkinter.messagebox.showinfo('Rating',msg) 
# TextBox Creation
inputtxt = tk.Text(frame,
                   height = 3,
                   width = 30, bd=3, font=("Helvetica", 13))
  
inputtxt.pack()
# just added to give space between them
l=tk.Label(frame, text=" ", fg='yellow',bg='yellow', font=("Helvetica", 18))
l.pack() 
# Button Creation
printButton = tk.Button(frame,
                        text = "submit", 
                        command = onClick, height=1, width = 15, bg="blue", fg='white', font = ("Helvetica", 13))
printButton.pack()
  
# Label Creation
frame.mainloop()