# Part 3: Applying Model to Siskel & Ebert Reviews

Applying model from Part 2 to predict "Rotten" or "Fresh" reviews from reknown critics, Robert Ebert and Gene Siskel.

Data copied from Rotten Tomatoes site.

In [6]:
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.pipeline import Pipeline
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn import svm

## Recreate Best Model - SVM

In [3]:
df = pd.read_csv('../data/rt_new_features.csv')
df.head(3)

Unnamed: 0,rt_review,rt_target,char_length,word_count,rt_label,afinn_score,afinn_type
0,"simplistic , silly and tedious .",0,33,6,Rotten,-1.0,Negative
1,"it's so laddish and juvenile , only teenage bo...",0,80,15,Rotten,4.0,Positive
2,exploitative and largely devoid of the depth o...,0,138,22,Rotten,-3.0,Negative


In [4]:
X = df['rt_review']
y = df['rt_label']
X_train, X_test, y_train, y_test = train_test_split(X,y, random_state=42, stratify=y, test_size=0.15)

In [7]:
pipe_tfv_svm = Pipeline([
    ('tfv',TfidfVectorizer()),
    ('svm', svm.SVC(C=1.0, kernel='linear', degree=3, gamma='auto'))]
)

pipe_tfv_svm.fit(X_train,y_train)

Pipeline(steps=[('tfv', TfidfVectorizer()),
                ('svm', SVC(gamma='auto', kernel='linear'))])

## Applying the Model to Siskel & Ebert Review

In [9]:
sisbert = pd.read_csv('../data/siskel_ebert.csv', encoding="ISO-8859-1")

In [10]:
sisbert.head(5)

Unnamed: 0,rt_label,critic,tomato_meter,tomato_perc,title_year,review
0,Rotten,Robert Ebert,1.0,--,"Like Father, Like SonÊ(1987)",[This] is one of the most desperate comedies I...
1,Rotten,Robert Ebert,1.0,0.81,Real LifeÊ(1979),A great idea. But the movie that Albert Brooks...
2,Rotten,Robert Ebert,1.0,0.57,Garbo TalksÊ(1984),Garbo Talks started out as a great idea for a ...
3,Rotten,Robert Ebert,1.0,--,"I, A Woman, Part IIÊ(1968)","If you can miss only one movie this year, make..."
4,Rotten,Robert Ebert,1.0,--,The Immortal BachelorÊ(1975),It assembles characters of such incredible stu...


In [11]:
X_sisbert = sisbert['review']
y_sisbsert = sisbert['rt_label']

In [15]:
sisbert_preds = pipe_tfv_svm.predict(X_sisbert)
sisbert['predict'] = sisbert_preds

In [17]:
sisbert['outcome'] = sisbert['predict'] == sisbert['rt_label']
sisbert.head(5)

Unnamed: 0,rt_label,critic,tomato_meter,tomato_perc,title_year,review,predict,outcome
0,Rotten,Robert Ebert,1.0,--,"Like Father, Like SonÊ(1987)",[This] is one of the most desperate comedies I...,Rotten,True
1,Rotten,Robert Ebert,1.0,0.81,Real LifeÊ(1979),A great idea. But the movie that Albert Brooks...,Rotten,True
2,Rotten,Robert Ebert,1.0,0.57,Garbo TalksÊ(1984),Garbo Talks started out as a great idea for a ...,Rotten,True
3,Rotten,Robert Ebert,1.0,--,"I, A Woman, Part IIÊ(1968)","If you can miss only one movie this year, make...",Rotten,True
4,Rotten,Robert Ebert,1.0,--,The Immortal BachelorÊ(1975),It assembles characters of such incredible stu...,Rotten,True


In [18]:
sisbert.outcome.value_counts()

True     186
False     96
Name: outcome, dtype: int64

In [19]:
sisbert.outcome.value_counts(normalize=True)

True     0.659574
False    0.340426
Name: outcome, dtype: float64