<a href="https://colab.research.google.com/github/meghsha1400/MAJOR-PROJECT-Sentiment-Analysis-/blob/main/ML_Final_MAJOR_PROJECT.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [10]:
!pip install nltk



In [15]:
import pandas as pd
import numpy as np
import nltk
import re
from nltk.corpus import stopwords
from nltk.stem.porter import PorterStemmer
from sklearn.pipeline import Pipeline
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix,accuracy_score,classification_report

df = pd.read_csv('/content/drive/My Drive/Restaurant_Reviews.csv')
nltk.download('stopwords')
corpus = []
for i in range(0, 1000):
    comment = re.sub(pattern='[^a-zA-Z]',repl= ' ',string = df['Review'][i])
    comment = comment.lower()
    comment_words = comment.split()
    ps = PorterStemmer()
    comment = [ps.stem(word) for word in comment_words if not word in set(stopwords.words('english'))]
    comment = [ps.stem(word) for word in comment_words]
    comment = ' '.join(comment)
    corpus.append(comment)
tfidf = TfidfVectorizer(max_features=1500)
x = tfidf.fit_transform(corpus).toarray()
y = df.iloc[:, 1].values



x_train,x_test,y_train,y_test = train_test_split(x,y,random_state= 0)

text_model = MultinomialNB(alpha=0.2)
text_model.fit(x_train,y_train)
y_pred = text_model.predict(x_test)

import pickle
pickle.dump(tfidf, open('tfidf-transform.pkl', 'wb'))
filename = 'restaurant-review.pkl'
pickle.dump(text_model, open(filename, 'wb'))

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [16]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000 entries, 0 to 999
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   Review  1000 non-null   object
 1   Liked   1000 non-null   int64 
dtypes: int64(1), object(1)
memory usage: 15.8+ KB


In [18]:
df['Liked'].unique()

array([1, 0])

In [None]:
df.head()

Unnamed: 0,Review,Liked
0,Wow... Loved this place.,1
1,Crust is not good.,0
2,Not tasty and the texture was just nasty.,0
3,Stopped by during the late May bank holiday of...,1
4,The selection on the menu was great and so wer...,1


In [19]:
df["Review"].value_counts()

I won't be back.                                                                     2
I would not recommend this place.                                                    2
The food was terrible.                                                               2
I love this place.                                                                   2
I consider this theft.                                                               1
                                                                                    ..
Not a weekly haunt, but definitely a place to come back to every once in a while.    1
This place lacked style!!                                                            1
I loved the grilled pizza, reminded me of legit Italian pizza.                       1
The pancake was also really good and pretty large at that.                           1
Their daily specials are always a hit with my group.                                 1
Name: Review, Length: 996, dtype: int64

In [20]:
df.shape

(1000, 2)

In [21]:
corpus[0:20]

['wow love thi place',
 'crust is not good',
 'not tasti and the textur wa just nasti',
 'stop by dure the late may bank holiday off rick steve recommend and love it',
 'the select on the menu wa great and so were the price',
 'now i am get angri and i want my damn pho',
 'honeslti it didn t tast that fresh',
 'the potato were like rubber and you could tell they had been made up ahead of time be kept under a warmer',
 'the fri were great too',
 'a great touch',
 'servic wa veri prompt',
 'would not go back',
 'the cashier had no care what so ever on what i had to say it still end up be wayyy overpr',
 'i tri the cape cod ravoli chicken with cranberri mmmm',
 'i wa disgust becaus i wa pretti sure that wa human hair',
 'i wa shock becaus no sign indic cash onli',
 'highli recommend',
 'waitress wa a littl slow in servic',
 'thi place is not worth your time let alon vega',
 'did not like at all']

In [22]:
accuracy_score(y_test,y_pred)

0.772

In [23]:
confusion_matrix(y_test,y_pred)

array([[ 89,  28],
       [ 29, 104]])

In [24]:
pd.crosstab(y_test,y_pred, rownames=['Actual'], colnames=['Predicted'], margins=True)

Predicted,0,1,All
Actual,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,89,28,117
1,29,104,133
All,118,132,250


In [25]:
print(classification_report(y_test,y_pred))

              precision    recall  f1-score   support

           0       0.75      0.76      0.76       117
           1       0.79      0.78      0.78       133

    accuracy                           0.77       250
   macro avg       0.77      0.77      0.77       250
weighted avg       0.77      0.77      0.77       250



In [26]:

%%writefile app.py
import numpy as np
import pickle
import pandas as pd
#from flasgger import Swagger
import streamlit as st 

from PIL import Image

#app=Flask(__name__)
#Swagger(app)
filename = 'restaurant-review.pkl'
classifier = pickle.load(open(filename, 'rb'))
tfidf = pickle.load(open('tfidf-transform.pkl','rb'))


#@app.route('/')
def welcome():
    return "Welcome All"

#@app.route('/predict',methods=["Get"])
def predict_note(message):
    
    data = [message]
    vect = tfidf.transform(data).toarray()
    prediction=classifier.predict(vect)
    print(prediction)
    return prediction



def main():
    st.title("Restaurant Review Classifier")
    st.subheader('TFIFD Vectorizer')     
    st.write('This project is based on Naive Bayes Classifier')
    html_temp = """
    <div style="background-color:tomato;padding:10px">
    <h2 style="color:white;text-align:center;">Restaurant Review Classifier ML App </h2>
    </div>
    """
    st.markdown(html_temp,unsafe_allow_html=True)
    message = st.text_area("Enter Text","Type Here ..")
    
    result=""
    if st.button("Predict"):
        result=predict_note(message)
    st.success('The output is {}'.format(result))

if __name__=='__main__':
    main()

Writing app.py


In [1]:
!pip install streamlit



In [5]:
!streamlit run app.py

[0m
[34m[1m  You can now view your Streamlit app in your browser.[0m
[0m
[34m  Network URL: [0m[1mhttp://172.28.0.2:8501[0m
[34m  External URL: [0m[1mhttp://104.155.226.57:8501[0m
[0m
[1]
[34m  Stopping...[0m


In [3]:
!pip install pyngrok

Collecting pyngrok
  Downloading https://files.pythonhosted.org/packages/1e/6d/61ab40903e9337f87faf68d06ada6694f627c554b2f23e6e73b3e0271bbd/pyngrok-4.2.1.tar.gz
Building wheels for collected packages: pyngrok
  Building wheel for pyngrok (setup.py) ... [?25l[?25hdone
  Created wheel for pyngrok: filename=pyngrok-4.2.1-cp36-none-any.whl size=18022 sha256=581763717c3f6246dbe298b5f14d8b4d454655471693537542e6670f7ffd847c
  Stored in directory: /root/.cache/pip/wheels/5c/5d/83/e50a6ff70c80588b4fd970e8bf09936ecd460f0562676207aa
Successfully built pyngrok
Installing collected packages: pyngrok
Successfully installed pyngrok-4.2.1


In [4]:
from pyngrok import ngrok
url = ngrok.connect(port='8501')
url



  


'http://1bdec660c811.ngrok.io'