In [12]:
%pip install -r requirements.txt

Note: you may need to restart the kernel to use updated packages.


In [18]:
from sklearn.metrics import precision_score,recall_score,f1_score,roc_auc_score,confusion_matrix,classification_report,accuracy_score
from sklearn import svm
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

Reading features

In [14]:
train = pd.read_csv("features.csv")

In [15]:
train.head(5)

Unnamed: 0.1,Unnamed: 0,author,content,poem_name,age,type,clean_content,tokens,average_token_length,token_count,longest_token_length,empath_poem,emapth_poem_name,polarity_poem,polarity_poem_name
0,0,WILLIAM SHAKESPEARE,Let the bird of loudest lay\r\nOn the sole Ara...,The Phoenix and the Turtle,Renaissance,Mythology & Folklore,let bird loudest lay sole arabian tree herald ...,"[['let', 'bird', 'loudest', 'lay', 'sole', 'ar...",206.0,1,"['let', 'bird', 'loudest', 'lay', 'sole', 'ara...","{'help': 0.0, 'office': 0.0, 'dance': 0.009708...","{'help': 0.0, 'office': 0.0, 'dance': 0.0, 'mo...",0.132051,0.0
1,1,DUCHESS OF NEWCASTLE MARGARET CAVENDISH,"Sir Charles into my chamber coming in,\r\nWhen...",An Epilogue to the Above,Renaissance,Mythology & Folklore,sir charles chamber coming writing fairy queen...,"[['sir', 'charles', 'chamber', 'coming', 'writ...",58.0,1,"['sir', 'charles', 'chamber', 'coming', 'writi...","{'help': 0.0, 'office': 0.0, 'dance': 0.0, 'mo...","{'help': 0.0, 'office': 0.0, 'dance': 0.0, 'mo...",-0.0875,0.0
2,2,THOMAS BASTARD,"Our vice runs beyond all that old men saw,\r\n...","Book 7, Epigram 42",Renaissance,Mythology & Folklore,vice runs beyond old men saw far authentically...,"[['vice', 'runs', 'beyond', 'old', 'men', 'saw...",35.0,1,"['vice', 'runs', 'beyond', 'old', 'men', 'saw'...","{'help': 0.0, 'office': 0.0, 'dance': 0.0, 'mo...","{'help': 0.0, 'office': 0.0, 'dance': 0.0, 'mo...",0.134063,0.0
3,3,EDMUND SPENSER,"Lo I the man, whose Muse whilome did maske,\r\...","from The Faerie Queene: Book I, Canto I",Renaissance,Mythology & Folklore,lo man whose muse whilome maske time taught lo...,"[['lo', 'man', 'whose', 'muse', 'whilome', 'ma...",2481.0,1,"['lo', 'man', 'whose', 'muse', 'whilome', 'mas...","{'help': 0.0012091898428053204, 'office': 0.00...","{'help': 0.0, 'office': 0.0, 'dance': 0.0, 'mo...",0.080762,0.0
4,4,RICHARD BARNFIELD,"Long have I longd to see my love againe,\r\nSt...",Sonnet 16,Renaissance,Mythology & Folklore,long longd see love againe still wisht never c...,"[['long', 'longd', 'see', 'love', 'againe', 's...",64.0,1,"['long', 'longd', 'see', 'love', 'againe', 'st...","{'help': 0.0, 'office': 0.0, 'dance': 0.015625...","{'help': 0.0, 'office': 0.0, 'dance': 0.0, 'mo...",0.3,0.0


In [17]:
train_cols=['polarity_poem','polarity_poem_name','average_token_length','token_count']
train.dropna(how='any',inplace=True)
train[train_cols]=train[train_cols].replace('',np.nan,regex=True)

In [None]:
# Split the data into training and testing sets
train_x, test_x, train_y, test_y = train_test_split(train[train_cols], train['SENTIMENT_Positive'], train_size=0.8, random_state=42)

# Define the classifiers to use
classifiers = {
    'SVM': svm.SVC(),
    'Random Forest': RandomForestClassifier()
}

# Define an empty dictionary to store the performance metrics
results = {}

# Train and test each classifier and store the performance metrics in the dictionary
for clf_name, clf in classifiers.items():
    clf.fit(train_x, train_y)
    y_pred = clf.predict(test_x)
    accuracy = accuracy_score(test_y, y_pred)
    precision = precision_score(test_y, y_pred)
    recall = recall_score(test_y, y_pred)
    f1 = f1_score(test_y, y_pred)
    roc_auc = roc_auc_score(test_y, y_pred)
    conf_matrix = confusion_matrix(test_y, y_pred)
    class_report = classification_report(test_y, y_pred)
    results[clf_name] = {
        'accuracy': accuracy,
        'precision': precision,
        'recall': recall,
        'f1_score': f1,
        'roc_auc_score': roc_auc,
        'confusion_matrix': conf_matrix,
        'classification_report': class_report
    }

# Print the performance metrics for each classifier
for clf_name, clf_results in results.items():
    print("Classifier: {}".format(clf_name))
    print("Accuracy: {:.2f}".format(clf_results['accuracy']))
    print("Precision: {:.2f}".format(clf_results['precision']))
    print("Recall: {:.2f}".format(clf_results['recall']))
    print("F1 Score: {:.2f}".format(clf_results['f1_score']))
    print("AUC-ROC: {:.2f}".format(clf_results['roc_auc_score']))
    print("Confusion Matrix:\n{}".format(clf_results['confusion_matrix']))
    print("Classification Report:\n{}".format(clf_results['classification_report']))
    print("\n\n")

# Plot the performance metrics for each classifier
metrics = ['accuracy', 'precision', 'recall', 'f1_score', 'roc_auc_score']
for metric in metrics:
    values = [clf_results[metric] for clf_results in results.values()]
    plt.figure()
    plt.bar(list(results.keys()), values)
    plt.xlabel('Classifier')
    plt.ylabel(metric)
    plt.title(metric + ' Comparison')
    plt.show()