In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


Reading CSV file

In [None]:
import pandas as pd
import numpy as np

data = pd.read_csv('/content/drive/MyDrive/Review.csv')


In [None]:
data # Reviews

Unnamed: 0,sentiment,review
0,Negative,I had no background knowledge of this movie be...
1,Negative,I am a huge Jane Austen fan and I ordered the ...
2,Negative,Nothing to say but Wow! Has anyone actually ha...
3,Negative,i like Jane Austin novels. I love Pride and Pr...
4,Negative,In this day and age of incredible special movi...
...,...,...
9995,Positive,I first saw this movie back in the early'90's ...
9996,Positive,"NYC, 2022: The Greenhouse effect, vanished oce..."
9997,Positive,"Those individuals familiar with Asian cinema, ..."
9998,Positive,"The kids, aged 7 to 14, got such a huge kick o..."


In [None]:
data.replace({'sentiment':{'Positive':1}},inplace=True)

In [None]:
data.replace({'sentiment':{'Negative':0}},inplace=True)

In [None]:
data

Unnamed: 0,sentiment,review
0,0,I had no background knowledge of this movie be...
1,0,I am a huge Jane Austen fan and I ordered the ...
2,0,Nothing to say but Wow! Has anyone actually ha...
3,0,i like Jane Austin novels. I love Pride and Pr...
4,0,In this day and age of incredible special movi...
...,...,...
9995,1,I first saw this movie back in the early'90's ...
9996,1,"NYC, 2022: The Greenhouse effect, vanished oce..."
9997,1,"Those individuals familiar with Asian cinema, ..."
9998,1,"The kids, aged 7 to 14, got such a huge kick o..."


The data has equal features of each label

In [None]:
data['sentiment'].value_counts()

0    5081
1    4919
Name: sentiment, dtype: int64

Creating seperate data frames for reviews and labels

In [None]:
X_data = data['review']
X_data

0       I had no background knowledge of this movie be...
1       I am a huge Jane Austen fan and I ordered the ...
2       Nothing to say but Wow! Has anyone actually ha...
3       i like Jane Austin novels. I love Pride and Pr...
4       In this day and age of incredible special movi...
                              ...                        
9995    I first saw this movie back in the early'90's ...
9996    NYC, 2022: The Greenhouse effect, vanished oce...
9997    Those individuals familiar with Asian cinema, ...
9998    The kids, aged 7 to 14, got such a huge kick o...
9999    I so love this movie! The animation is great (...
Name: review, Length: 10000, dtype: object

In [None]:
y = data['sentiment']
y

0       0
1       0
2       0
3       0
4       0
       ..
9995    1
9996    1
9997    1
9998    1
9999    1
Name: sentiment, Length: 10000, dtype: int64

# Cleaning of the data

In [None]:
# Tokenize
# "I am a python dev" -> ["I", "am", "a", "python", "dev"]

In [None]:
from nltk.tokenize import RegexpTokenizer
# NLTK -> Tokenize -> RegexpTokenizer

In [None]:
# Stemming
# "Playing" -> "Play"
# "Working" -> "Work"

In [None]:
from nltk.stem import PorterStemmer
# NLTK -> Stem -> Porter -> PorterStemmer
import string
from nltk.corpus import stopwords
# NLTK -> Corpus -> stopwords

In [None]:
# Downloading the stopwords
import nltk
nltk.download('stopwords')
! pip install emoji==1.7

import emoji
from textblob import TextBlob

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.


Collecting emoji==1.7
  Downloading emoji-1.7.0.tar.gz (175 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m175.4/175.4 kB[0m [31m1.4 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: emoji
  Building wheel for emoji (setup.py) ... [?25l[?25hdone
  Created wheel for emoji: filename=emoji-1.7.0-py3-none-any.whl size=171033 sha256=112547b61d89f776ff213313fbb85e39d5d8cf2e5670f2625f5950c97e558a41
  Stored in directory: /root/.cache/pip/wheels/31/8a/8c/315c9e5d7773f74b33d5ed33f075b49c6eaeb7cedbb86e2cf8
Successfully built emoji
Installing collected packages: emoji
Successfully installed emoji-1.7.0


In [None]:
tokenizer = RegexpTokenizer(r"\w+")
en_stopwords = set(stopwords.words('english'))
ps = PorterStemmer()

In [None]:
def getCleanedText(text):
  try:
    if text is None:
          return ""

    if not isinstance(text, str):
          text = str(text)

    # tokenizing
    text_without_emojis = ''.join(c for c in text if c not in emoji.UNICODE_EMOJI)
    tokens = tokenizer.tokenize(text_without_emojis)
    tokens = [char for char in tokens if char not in string.punctuation]
    new_tokens = [token for token in tokens if token not in en_stopwords]
    stemmed_tokens = [ps.stem(tokens) for tokens in new_tokens]
    clean_text = " ".join(stemmed_tokens)
    return clean_text
  except Exception as e:
        print(f"Error processing text: {text}")
        print(f"Error message: {str(e)}")
        return ""


In [None]:
pred = ["i have happy news"]
X_Clean = [getCleanedText(i) for i in X_data]
xt_clean = [getCleanedText(i) for i in pred]

In [None]:
X_Clean

['i background knowledg movi i bought sound cool i want see realli kick butt vike movi awhil ala film i look i hope best instead deliv bore nordic soap opera seem drag long despit 84 minut run time the film premis intrigu enough it vike warlord defi god odin enrag curs warlord son name barek death rebirth berserk thi barek guy forc live enrag insan violent lifetim lifetim the movi film compet enough rich cinematographi quasi good perform actor i found bore question dribbl would end the filmmak chanc make someth rather entertain semi uniqu drop ball perhap could improv cheap exploit tactic thrown gratuit nuditi lot gore i mean talk berserk vike suppos bad enough rape pillag berserk suppos even extrem all unless fan the young restless etc fact insan berserk like self tortur i probabl steer clear drab piec celluloid',
 'i huge jane austen fan i order movi amazon uk i could see without wait forev come u s i realli save money what ann run wentworth the whole point ann elliot charact quiet r

# Vectorize

In [None]:
from sklearn.feature_extraction.text import CountVectorizer

In [None]:
cv = CountVectorizer()
# "I am PyDev" -> "i am", "am Pydev"

In [None]:
X = cv.fit_transform(X_Clean).toarray()

In [None]:
X

array([[0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       ...,
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0]])

In [None]:
print(cv.get_feature_names_out())

['00' '000' '0000000000001' ... 'â¾' 'âžiâžek' 'ã¼ber']


In [None]:
Xt_vect = cv.transform(xt_clean).toarray()

In [None]:
Xt_vect

array([[0, 0, 0, ..., 0, 0, 0]])

# Multinomial Naive Bayes

In [None]:
from sklearn.naive_bayes import MultinomialNB

In [None]:
mn = MultinomialNB()

In [None]:
mn.fit(X, y)

Sample prediction

In [None]:
y_pred = mn.predict(Xt_vect)

In [None]:
y_pred

array([1])

# Saving the model

In [None]:
import pickle

In [None]:
filename = 'trained_model.sav'
pickle.dump(mn, open(filename, 'wb'))

In [None]:
# loading saved model
loaded_model = pickle.load(open('/content/trained_model.sav', "rb"))

In [None]:
def predict(text):
  X_new = [getCleanedText(i) for i in text]
  X_pred = cv.transform(X_new).toarray()
  prediction = loaded_model.predict(X_pred)

  if(prediction[0] == 0):
    return('Negative tweet')
  else:
    return('Positive tweet')

In [None]:
predict(['Leo has wonderful bgm score'])

'Positive tweet'

In [None]:
predict(['his performance is legendary'])

'Positive tweet'

In [None]:
predict(['its unbelievable that in the 21st century wed need something like this. again. #neverump  #xenophobia '])

'Negative tweet'

In [None]:
predict(['The Government Media Office in Gaza says over 700 Palestinians were killed by Israeli airstrikes in the besieged Gaza Strip over the last 24 hours'])

'Negative tweet'

In [None]:
predict(['He is a Poor child lost his father and his brother, after IOF bombed their house in Gaza'])

'Negative tweet'

In [None]:
predict(["desai is a sore loser ,😂😂🤣🤣🤜🏼🤥😞😋😔🙁 he cannot surviv"])

'Negative tweet'

In [None]:
predict(['Yamazaki gives G-fans plenty of reasons to see “Godzilla Minus One” in theaters. He’s got a clear eye for action and a firm grasp on feel-good, saber-rattling melodrama. '])

'Positive tweet'

In [None]:
predict(['not satisfied'])

'Positive tweet'

In [None]:
predict(['I am not feeling well'])

'Positive tweet'

In [None]:
predict(['i dont want to live anymore'])

'Negative tweet'

In [None]:
predict