<a href="https://colab.research.google.com/github/mimuruth-msft/NLP/blob/main/Text_Classification_1/TextClassification1_5.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [6]:
import pandas as pd
import nltk
#nltk.download('all')
from sklearn.ensemble import RandomForestClassifier
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import MultinomialNB
from sklearn.svm import SVC
from nltk.corpus import stopwords

# Program to Classify Amazon Reviews


def pre_process_data():
    # Read csv data in to a pandas Data frame.
    data = pd.read_csv('/content/sample_data/amazon_reviews.csv')
    # Print first five results
    print(data.head())
    print(data.shape)

    # Remove stop words from the data
    stop_words = stopwords.words('english')
    vectorizer = TfidfVectorizer(stop_words=stop_words, binary=True)

    # set up X and y
    X = vectorizer.fit_transform(data.Review)
    y = data.Rating

    # Create a Test/ train set with 80% test and 20% test
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, train_size=0.8, random_state=1234)

    return X_train, X_test, y_train, y_test


def evaluate_model(model, X_test, y_test):
    # make predictions on the test data
    pred = model.predict(X_test)
    print('accuracy score: ', accuracy_score(y_test, pred))
    print('precision score: ', precision_score(y_test, pred))
    print('recall score: ', recall_score(y_test, pred))
    print('f1 score: ', f1_score(y_test, pred))


def naive_bayes(X_train, X_test, y_train, y_test):
    model = MultinomialNB()
    model.fit(X_train, y_train)
    evaluate_model(model, X_test, y_test)


def logistic_regression(X_train, X_test, y_train, y_test):
    model = LogisticRegression(class_weight='balanced')
    model.fit(X_train, y_train)
    evaluate_model(model, X_test, y_test)


def random_forest(X_train, X_test, y_train, y_test):
    model = RandomForestClassifier()
    model.fit(X_train, y_train)
    evaluate_model(model, X_test, y_test)


def svm_linear(X_train, X_test, y_train, y_test):
    model = SVC(kernel='linear', degree=8)
    model.fit(X_train, y_train)
    evaluate_model(model, X_test, y_test)


def svm_radial(X_train, X_test, y_train, y_test):
    model = SVC(kernel='poly', degree=8)
    model.fit(X_train, y_train)
    evaluate_model(model, X_test, y_test)


def keras(X_train, X_test, y_train, y_test):
    pass


if __name__ == "__main__":
    # Read Data and Analyze
    X_train, X_test, y_train, y_test = pre_process_data()

    # Step 1: Naive Bayes
    print()
    print("*********************************")
    print("Naive Bayes Algorithm")
    naive_bayes(X_train, X_test, y_train, y_test)

    # Step 2: Logistic Regression
    print()
    print("*********************************")
    print("Logistic Regression Algorithm")
    logistic_regression(X_train, X_test, y_train, y_test)

    # Step 3: Random Forest
    print()
    print("*********************************")
    print("Random Forest Algorithm")
    random_forest(X_train, X_test, y_train, y_test)
    
    # Step 4: SVM Linear Algorithm
    print()
    print("*********************************")
    print("SVM Linear Algorithm")
    svm_linear(X_train, X_test, y_train, y_test)
    
    # Step 4: SVM Radial Algorithm
    print()
    print("*********************************")
    print("SVM Radial Algorithm")
    svm_radial(X_train, X_test, y_train, y_test)
    
    # Step 4: SVM Radial Algorithm
    print()
    print("*********************************")
    print("keras")
    keras(X_train, X_test, y_train, y_test)
    
    print('\nCompleted')

   Rating                                             Review
0       2  Stuning even for the non-gamer: This sound tra...
1       2  The best soundtrack ever to anything.: I'm rea...
2       2  Amazing!: This soundtrack is my favorite music...
3       2  Excellent Soundtrack: I truly like this soundt...
4       2  Remember, Pull Your Jaw Off The Floor After He...
(3209, 2)

*********************************
Naive Bayes Algorithm
accuracy score:  0.7414330218068536
precision score:  0.6825726141078838
recall score:  0.9619883040935673
f1 score:  0.7985436893203884

*********************************
Logistic Regression Algorithm
accuracy score:  0.8769470404984424
precision score:  0.8902077151335311
recall score:  0.8771929824561403
f1 score:  0.8836524300441826

*********************************
Random Forest Algorithm
accuracy score:  0.8068535825545171
precision score:  0.7780612244897959
recall score:  0.8918128654970761
f1 score:  0.8310626702997276

*******************************