In [1]:
import numpy as np
import pandas as pd

import nltk
import string
pd.options.mode.chained_assignment = None

from bs4 import BeautifulSoup
from nltk.corpus import wordnet
from nltk.stem import WordNetLemmatizer

from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.externals import joblib

In [2]:
train_df = pd. read_csv('train.csv', header=None, delimiter=',', skiprows=1, names=['text','label'])
train_df.shape

(40000, 2)

In [3]:
df1 = train_df[["text"]]
df1["text"] = df1["text"].astype(str)
train_df.head()

Unnamed: 0,text,label
0,I grew up (b. 1965) watching and loving the Th...,0
1,"When I put this movie in my DVD player, and sa...",0
2,Why do people who do not know what a particula...,0
3,Even though I have great interest in Biblical ...,0
4,Im a die hard Dads Army fan and nothing will e...,1


In [4]:
df1["text"] = df1["text"].str.lower()
print(df1["text"])

0        i grew up (b. 1965) watching and loving the th...
1        when i put this movie in my dvd player, and sa...
2        why do people who do not know what a particula...
3        even though i have great interest in biblical ...
4        im a die hard dads army fan and nothing will e...
5        a terrible movie as everyone has said. what ma...
6        finally watched this shocking movie last night...
7        i caught this film on azn on cable. it sounded...
8        it may be the remake of 1987 autumn's tale aft...
9        my super ex girlfriend turned out to be a plea...
10       i can't believe people are looking for a plot ...
11       if you haven't seen the gong show tv series th...
12       i have always been a huge fan of "homicide: li...
13       greg davis and bryan daly take some crazed sta...
14       a half-hearted attempt to bring elvis presley ...
15       if you want a fun romp with loads of subtle hu...
16       i really wanted to be able to give this film a.

In [5]:
def remove_html_tags(text):
    return BeautifulSoup(text, "lxml").text

df1["text"] = df1["text"].apply(lambda text: remove_html_tags(text))
print(df1["text"])

0        i grew up (b. 1965) watching and loving the th...
1        when i put this movie in my dvd player, and sa...
2        why do people who do not know what a particula...
3        even though i have great interest in biblical ...
4        im a die hard dads army fan and nothing will e...
5        a terrible movie as everyone has said. what ma...
6        finally watched this shocking movie last night...
7        i caught this film on azn on cable. it sounded...
8        it may be the remake of 1987 autumn's tale aft...
9        my super ex girlfriend turned out to be a plea...
10       i can't believe people are looking for a plot ...
11       if you haven't seen the gong show tv series th...
12       i have always been a huge fan of "homicide: li...
13       greg davis and bryan daly take some crazed sta...
14       a half-hearted attempt to bring elvis presley ...
15       if you want a fun romp with loads of subtle hu...
16       i really wanted to be able to give this film a.

In [6]:
from nltk.corpus import stopwords
", ".join(stopwords.words('english'))

"i, me, my, myself, we, our, ours, ourselves, you, you're, you've, you'll, you'd, your, yours, yourself, yourselves, he, him, his, himself, she, she's, her, hers, herself, it, it's, its, itself, they, them, their, theirs, themselves, what, which, who, whom, this, that, that'll, these, those, am, is, are, was, were, be, been, being, have, has, had, having, do, does, did, doing, a, an, the, and, but, if, or, because, as, until, while, of, at, by, for, with, about, against, between, into, through, during, before, after, above, below, to, from, up, down, in, out, on, off, over, under, again, further, then, once, here, there, when, where, why, how, all, any, both, each, few, more, most, other, some, such, no, nor, not, only, own, same, so, than, too, very, s, t, can, will, just, don, don't, should, should've, now, d, ll, m, o, re, ve, y, ain, aren, aren't, couldn, couldn't, didn, didn't, doesn, doesn't, hadn, hadn't, hasn, hasn't, haven, haven't, isn, isn't, ma, mightn, mightn't, mustn, mus

In [7]:
STOPWORDS = set(stopwords.words('english'))
def remove_stopwords(text):
    
    return " ".join([word for word in str(text).split() if word not in STOPWORDS])

df1["text"] = df1["text"].apply(lambda text: remove_stopwords(text))
df1["text"]

0        grew (b. 1965) watching loving thunderbirds. m...
1        put movie dvd player, sat coke chips, expectat...
2        people know particular time past like feel nee...
3        even though great interest biblical movies, bo...
4        im die hard dads army fan nothing ever change ...
5        terrible movie everyone said. made laugh cameo...
6        finally watched shocking movie last night, dis...
7        caught film azn cable. sounded like would good...
8        may remake 1987 autumn's tale eleven years, di...
9        super ex girlfriend turned pleasant surprise m...
10       can't believe people looking plot film. laural...
11       seen gong show tv series like movie much all, ...
12       always huge fan "homicide: life street" heard ...
13       greg davis bryan daly take crazed statements t...
14       half-hearted attempt bring elvis presley moder...
15       want fun romp loads subtle humor, enjoy flick....
16       really wanted able give film 10. i've long tho.

In [8]:
PUNCT_TO_REMOVE = string.punctuation

def remove_punctuation(text):
    return text.translate(str.maketrans(' ', ' ', PUNCT_TO_REMOVE))

df1["text"] = df1["text"].apply(lambda text: remove_punctuation(text))
df1["text"]

0        grew b 1965 watching loving thunderbirds mates...
1        put movie dvd player sat coke chips expectatio...
2        people know particular time past like feel nee...
3        even though great interest biblical movies bor...
4        im die hard dads army fan nothing ever change ...
5        terrible movie everyone said made laugh cameo ...
6        finally watched shocking movie last night dist...
7        caught film azn cable sounded like would good ...
8        may remake 1987 autumns tale eleven years dire...
9        super ex girlfriend turned pleasant surprise m...
10       cant believe people looking plot film laural h...
11       seen gong show tv series like movie much all k...
12       always huge fan homicide life street heard reu...
13       greg davis bryan daly take crazed statements t...
14       halfhearted attempt bring elvis presley modern...
15       want fun romp loads subtle humor enjoy flicki ...
16       really wanted able give film 10 ive long thoug.

In [9]:
def remove_numbers(text):
    
    return " ".join([line for line in str(text).split() if not line.isdigit()])

df1["text"] = df1["text"].apply(lambda text: remove_numbers(text))
df1["text"]

0        grew b watching loving thunderbirds mates scho...
1        put movie dvd player sat coke chips expectatio...
2        people know particular time past like feel nee...
3        even though great interest biblical movies bor...
4        im die hard dads army fan nothing ever change ...
5        terrible movie everyone said made laugh cameo ...
6        finally watched shocking movie last night dist...
7        caught film azn cable sounded like would good ...
8        may remake autumns tale eleven years director ...
9        super ex girlfriend turned pleasant surprise m...
10       cant believe people looking plot film laural h...
11       seen gong show tv series like movie much all k...
12       always huge fan homicide life street heard reu...
13       greg davis bryan daly take crazed statements t...
14       halfhearted attempt bring elvis presley modern...
15       want fun romp loads subtle humor enjoy flicki ...
16       really wanted able give film ive long thought .

In [10]:
lemmatizer = WordNetLemmatizer()
wordnet_map = {"N":wordnet.NOUN, "V":wordnet.VERB, "J":wordnet.ADJ, "R":wordnet.ADV}

def lemmatize_words(text):
    pos_tagged_text = nltk.pos_tag(text.split())
    return " ".join([lemmatizer.lemmatize(word, wordnet_map.get(pos[0], wordnet.NOUN)) for word, pos in pos_tagged_text])

df1["text"] = df1["text"].apply(lambda text: lemmatize_words(text))
df1["text"]

0        grow b watch loving thunderbird mat school wat...
1        put movie dvd player sit coke chip expectation...
2        people know particular time past like feel nee...
3        even though great interest biblical movie bore...
4        im die hard dad army fan nothing ever change t...
5        terrible movie everyone say make laugh cameo a...
6        finally watch shock movie last night disturb m...
7        caught film azn cable sound like would good fi...
8        may remake autumns tale eleven year director m...
9        super ex girlfriend turn pleasant surprise me ...
10       cant believe people look plot film laural hard...
11       see gong show tv series like movie much all kn...
12       always huge fan homicide life street heard reu...
13       greg davis bryan daly take crazed statement te...
14       halfhearted attempt bring elvis presley modern...
15       want fun romp load subtle humor enjoy flicki u...
16       really want able give film ive long think favo.

In [11]:
vectorizer1 = TfidfVectorizer(ngram_range=(1,3), min_df=0.0015)

In [12]:
fitted_vectorizer1 = vectorizer1.fit(df1["text"])

In [2]:
tf = joblib.load('tfidf.joblib')

In [14]:
train_transform = tf.transform(df1['text'])
y1 = train_df['label']
print(train_transform)
y1

  (0, 10136)	0.14492936911205515
  (0, 10116)	0.050681302832840294
  (0, 10008)	0.09090907384025766
  (0, 9791)	0.09727953853658737
  (0, 9782)	0.1723366696007372
  (0, 9781)	0.0860486949807661
  (0, 9738)	0.12405859640011765
  (0, 9639)	0.09092944602752799
  (0, 9601)	0.13216692585732512
  (0, 9404)	0.12875022046074963
  (0, 9007)	0.10005711102126379
  (0, 8979)	0.1405541320523677
  (0, 8932)	0.10829820140620439
  (0, 8862)	0.0564046874204292
  (0, 8568)	0.06545813888284828
  (0, 8324)	0.17468501364470862
  (0, 8056)	0.0854916646741642
  (0, 7958)	0.09571357427633882
  (0, 7921)	0.040716571124056904
  (0, 7872)	0.12616965164675467
  (0, 7870)	0.09897276181919287
  (0, 7855)	0.28324460150297526
  (0, 7766)	0.13886830219332008
  (0, 7687)	0.133734911896795
  (0, 7500)	0.1341754630052528
  :	:
  (39999, 3877)	0.11611977202466821
  (39999, 3773)	0.04726456861693279
  (39999, 3381)	0.13497436327782442
  (39999, 3105)	0.13808091528038888
  (39999, 3022)	0.038378941897958305
  (39999, 2815)	

0        0
1        0
2        0
3        0
4        1
5        0
6        1
7        0
8        1
9        1
10       1
11       0
12       1
13       0
14       0
15       1
16       1
17       0
18       0
19       0
20       1
21       0
22       1
23       0
24       1
25       0
26       1
27       1
28       0
29       1
        ..
39970    1
39971    1
39972    0
39973    0
39974    0
39975    0
39976    1
39977    0
39978    0
39979    0
39980    0
39981    1
39982    1
39983    1
39984    0
39985    0
39986    1
39987    1
39988    1
39989    1
39990    1
39991    1
39992    1
39993    0
39994    0
39995    1
39996    1
39997    0
39998    1
39999    1
Name: label, Length: 40000, dtype: int64

In [15]:
train_array = train_transform.toarray()
train_array

array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]])

In [10]:
class Logistic_Regression:
    
    # Defining hyper parameters
    def __init__(self, learning_rate, no_of_iterations):
        
        self.learning_rate = learning_rate
        self.no_of_iterations = no_of_iterations
    
    # Sigmoid activation function
    def sigmoid(self, z):
        
          return 1/ (1 + np.exp(-z))
      
    # Fit function for model
    def fit(self, X, Y):
        
        # m -> number of rows in dataset
        # n -> number of columns in dataset
        self.m, self.n = X.shape
        
        # initializing weights and bias model parameters
        self.weights = np.zeros(self.n)
        self.bias = 0
        
        self.X = X
        self.Y = Y

        for i in range(self.no_of_iterations):
              self.update_model_parameters()
    
    # Gradient descent for optimizing model parameters
    def update_model_parameters(self):

        Y_hat = self.sigmoid( self.X.dot(self.weights) + self.bias )  
        
        # using gradient descent optimization algorithm
        dw = (1/self.m)*np.dot(self.X.T, (Y_hat - self.Y))
        db = (1/self.m)*np.sum(Y_hat - self.Y)
        
        # updating weights and bias using learning rate and gradient descent
        self.weights = self.weights - dw * self.learning_rate
        self.bias = self.bias - self.learning_rate * db           

    # Predict function for model
    def predict(self, X):

        Y_pred = 1 / (1 + np.exp( - (X.dot(self.weights) + self.bias ) ))      
        
        # if >0.5 --> positive else negative
        Y_pred1 = np.where( Y_pred > 0.5, 1, 0)
        return Y_pred, Y_pred1

In [4]:
saved_model = joblib.load('final-log-model-09.joblib')

In [5]:
text = input("Review: ")
text = [text]

Review: This film starts out with all the moody promise of a great contemporary noir Western - after the ill-conceived opening flashback sequence anyway. The scenery is beautifully desolate, the characters achingly isolated. While some of the acting is less than believable, the plot ultimately delivers enough tension and twists to make this movie worth a look.


In [6]:
vec = TfidfVectorizer(min_df=0.0015, ngram_range=(1,3), vocabulary = tf.vocabulary_)
text_transform = vec .fit_transform(text)

In [7]:
text_array = text_transform .toarray()
text_array

array([[0., 0., 0., ..., 0., 0., 0.]])

In [8]:
pre, sentiment = saved_model .predict(text_array)
print(pre)

[0.24610154 0.32586837 0.21221989 ... 0.17531206 0.93136337 0.76484966]


In [9]:
if sentiment == 0 and pre <= 0.1 :
    print("Negative sentiment", "Score: 1", emoji.emojize(":angry_face:") )
elif sentiment == 0 and (pre > 0.2 or pre <= 0.4):
    print("Negative sentiment", "Score: 2", emoji.emojize(":disappointed_face:") )
elif sentiment == 0 and (pre <= 0.5):
    print("Negative sentiment", "Score: 3", emoji.emojize(":neutral_face:") )
elif sentiment == 1 and (pre > 0.5 or pre <= 0.7):
    print("Positive sentiment", "Score: 4", emoji.emojize(":slightly_smiling_face:") )
elif sentiment == 1 and (pre > 0.7 or pre <= 0.9):
    print("Positive sentiment", "Score: 5", emoji.emojize(":grinning_face_with_smiling_eyes:") )
else:
    print("Positive sentiment", "Score: 6", emoji.emojize(":smiling_face_with_heart_eyes:"))

ValueError: The truth value of an array with more than one element is ambiguous. Use a.any() or a.all()

In [25]:
# !pip install emoji

Collecting emoji
  Downloading https://files.pythonhosted.org/packages/17/f0/7db2f4d8651951ff4a51ee77f0ffe0b3d015cd963c941c7390b3ba9fb302/emoji-1.6.1.tar.gz (170kB)
Building wheels for collected packages: emoji
  Building wheel for emoji (setup.py): started
  Building wheel for emoji (setup.py): finished with status 'done'
  Stored in directory: C:\Users\Dell\AppData\Local\pip\Cache\wheels\84\b8\d1\9f6225ae43fac1910166a0f53f5679a0f01295513637fe2331
Successfully built emoji
Installing collected packages: emoji
Successfully installed emoji-1.6.1


In [28]:
import emoji