# Importing Libraries

In [43]:
%%capture
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import pdb
import re
import nltk
from nltk.corpus import stopwords
from nltk.stem.porter import PorterStemmer
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import f1_score

# Importing Dataset

In [2]:
dataset = pd.read_csv('all_reviews.tsv', delimiter = '\t', quoting = 3)

# Data Pre-processing

In [3]:
corpus = []

for i in range(0, len(dataset)):
    try:
        review = re.sub('[^a-zA-Z]', ' ', dataset['Reivew'][i])
        review = review.lower()
        review = review.split()
        ps = PorterStemmer()
        review = [ps.stem(word) for word in review if not word in set(stopwords.words('english'))]
        review = ' '.join(review)
    except:
        review = str(dataset['Reivew'][i])

    corpus.append(review)

# Splitting the Dataset

In [4]:
cv = CountVectorizer()
X = cv.fit_transform(corpus).toarray()
y = dataset.iloc[:, 1].values

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.20, random_state = 0)

# Applying ML Classification Algorithms

## Decision Tree Classifier

In [5]:
dt_classifier = DecisionTreeClassifier(criterion = 'entropy', random_state = 0)
dt_classifier.fit(X_train, y_train)
dt_y_pred = dt_classifier.predict(X_test)

## Random Forest Classifier

In [6]:
rf_classifier = RandomForestClassifier(n_estimators = 10, criterion = 'entropy', random_state = 0)
rf_classifier.fit(X_train, y_train)
rf_y_pred = rf_classifier.predict(X_test)

## Support Vector Machine Classifier

In [7]:
svm_classifier = SVC(kernel = 'linear', random_state = 0)
svm_classifier.fit(X_train, y_train)
svm_y_pred = svm_classifier.predict(X_test)

## Naive Bayes Classifier

In [8]:
nb_classifier = GaussianNB()
nb_classifier.fit(X_train, y_train)
nb_y_pred = nb_classifier.predict(X_test)

## KNN Classifier

In [9]:
knn_classifier = KNeighborsClassifier(n_neighbors = 5, metric = 'minkowski', p = 2)
knn_classifier.fit(X_train, y_train)
knn_y_pred = knn_classifier.predict(X_test)

# Results

In [10]:
dt_cm = confusion_matrix(y_test, dt_y_pred)
accuracy_score(y_test, dt_y_pred)
precision_score(y_test, dt_y_pred)
recall_score(y_test, dt_y_pred)
f1_score(y_test, dt_y_pred)

In [25]:
rf_cm = confusion_matrix(y_test, rf_y_pred)
accuracy_score(y_test, rf_y_pred)
precision_score(y_test, rf_y_pred)
recall_score(y_test, rf_y_pred)
f1_score(y_test, rf_y_pred)

In [None]:
svm_cm = confusion_matrix(y_test, svm_y_pred)
accuracy_score(y_test, svm_y_pred)
precision_score(y_test, svm_y_pred)
recall_score(y_test, svm_y_pred)
f1_score(y_test, svm_y_pred)

In [None]:
nb_m = confusion_matrix(y_test, nb_y_pred)
accuracy_score(y_test, nb_y_pred)
precision_score(y_test, nb_y_pred)
recall_score(y_test, nb_y_pred)
f1_score(y_test, nb_y_pred)

In [45]:
knn_cm = confusion_matrix(y_test, knn_y_pred)
accuracy_score(y_test, knn_y_pred)
precision_score(y_test, knn_y_pred)
recall_score(y_test, knn_y_pred)
f1_score(y_test, knn_y_pred)

0.8733087330873308