In [1]:
import pandas as pd
import numpy as np
import pickle
from sklearn.linear_model import LogisticRegression
from sklearn import metrics

In [2]:
train = pd.read_csv("Wiki_LiguisticFeatures_Train.csv")
test = pd.read_csv("Wiki_LiguisticFeatures_Test.csv")

In [3]:
print(" === TRAIN DATA ===")
print(train.shape)
print(" === TEST DATA ===")
print(test.shape)

 === TRAIN DATA ===
(510467, 29)
 === TEST DATA ===
(199992, 29)


In [4]:
# Context free features used
# columns = ['POS', 'Hedge', 'Factive', 'Assertive',
#        'Implicative', 'Report',
#        'Entailment', 'StrongSub',
#        'WeakSub', 'Polarity',
#        'Positive', 'Negative',
#        'Bias_Lexicon']

# x_train = train[columns]
# x_test = test[columns]

x_train = train.iloc[:,2:28]
x_test = test.iloc[:,2:28]
y_train = train['Label']
y_test = test['Label']

In [5]:
print("Train X: ",x_train.shape, " Train Y: ",y_train.shape)
print("Test X: ",x_test.shape, " Test Y: ",y_test.shape)

Train X:  (510467, 26)  Train Y:  (510467,)
Test X:  (199992, 26)  Test Y:  (199992,)


In [6]:
def train_LR():
    filename = "LR_Task2_TrainedModel.pkl"
    #Class Weights 
    w = {0:40, 1:60}
    logisticRegr = LogisticRegression(class_weight=w)
    logisticRegr.fit(x_train, y_train)
    with open(filename, 'wb') as f:
        pickle.dump(logisticRegr, f)

In [7]:
def test_LR():
    with open('LR_Task2_TrainedModel.pkl', 'rb') as f:
        logisticRegr = pickle.load(f)
    predictions = logisticRegr.predict(x_test)
    score = logisticRegr.score(x_test, y_test)
    print("Accuracy :",score*100)
    matrix = metrics.confusion_matrix(y_test, predictions)
    print("========= CONFUSION MATRIX =========")
    print(matrix)
    print("================ CLASSIFICATION REPORT ===============")
    classes=['Objective/0','Subjective/1']
    print(metrics.classification_report(y_test, predictions,target_names=classes))

In [8]:
train_LR()



In [9]:
test_LR()

Accuracy : 84.03786151446057
[[166677   2419]
 [ 29504   1392]]
              precision    recall  f1-score   support

 Objective/0       0.85      0.99      0.91    169096
Subjective/1       0.37      0.05      0.08     30896

   micro avg       0.84      0.84      0.84    199992
   macro avg       0.61      0.52      0.50    199992
weighted avg       0.77      0.84      0.78    199992



In [10]:
def predict_LR(sentence):
    subjectivity_scores = dict()
    words = sentence['word']
    features = sentence.iloc[:,1:27]
    with open('LR_Task2_TrainedModel.pkl', 'rb') as f:
        logisticRegr = pickle.load(f)
    predictions = logisticRegr.predict_proba(x_test)
    for i in range(0,len(words)):
        subjectivity_scores[words[i]] = predictions[i][1]
    return subjectivity_scores

In [11]:
sent_emb = []
sent_emb.append({'word':'jim','POS':11, 'POS_Prev':31, 'POS_Next':26, 'Sent_Position':0, 'Hedge':0, 'Hedge_Context':0, 'Factive':0, 'Factive_Context':0, 'Assertive':0, 'Assertive_Context':0, 'Implicative':0, 'Implicative_Context':0, 'Report':0, 'Report_Context':0, 'Entailment':0, 'Entailment_Context':0, 'StrongSub':0, 'StrongSub_Context':0, 'WeakSub':0, 'WeakSub_Context':0, 'Polarity':2, 'Positive':0, 'Positive_Context':0, 'Negative':0, 'Negative_Context':0, 'Bias_Lexicon':0})
sent_emb.append({'word':'makes','POS':30, 'POS_Prev':12, 'POS_Next':6, 'Sent_Position':0, 'Hedge':0, 'Hedge_Context':0, 'Factive':0, 'Factive_Context':0, 'Assertive':0, 'Assertive_Context':0, 'Implicative':0, 'Implicative_Context':0, 'Report':0, 'Report_Context':0, 'Entailment':0, 'Entailment_Context':0, 'StrongSub':0, 'StrongSub_Context':0, 'WeakSub':0, 'WeakSub_Context':0, 'Polarity':2, 'Positive':0, 'Positive_Context':0, 'Negative':0, 'Negative_Context':0, 'Bias_Lexicon':0})
sent_emb.append({'word':'best','POS':8, 'POS_Prev':27, 'POS_Next':11, 'Sent_Position':1, 'Hedge':0, 'Hedge_Context':0, 'Factive':0, 'Factive_Context':0, 'Assertive':0, 'Assertive_Context':0, 'Implicative':0, 'Implicative_Context':0, 'Report':0, 'Report_Context':0, 'Entailment':0, 'Entailment_Context':0, 'StrongSub':0, 'StrongSub_Context':0, 'WeakSub':0, 'WeakSub_Context':0, 'Polarity':1, 'Positive':0, 'Positive_Context':0, 'Negative':0, 'Negative_Context':0, 'Bias_Lexicon':0})
sent_emb.append({'word':'cake','POS':11, 'POS_Prev':11, 'POS_Next':11, 'Sent_Position':1, 'Hedge':0, 'Hedge_Context':0, 'Factive':0, 'Factive_Context':0, 'Assertive':0, 'Assertive_Context':0, 'Implicative':0, 'Implicative_Context':0, 'Report':0, 'Report_Context':0, 'Entailment':0, 'Entailment_Context':0, 'StrongSub':0, 'StrongSub_Context':0, 'WeakSub':0, 'WeakSub_Context':0, 'Polarity':2, 'Positive':0, 'Positive_Context':0, 'Negative':0, 'Negative_Context':0, 'Bias_Lexicon':0})
test_df = pd.DataFrame(sent_emb)
ss = predict_LR(test_df)
print(ss)

{'jim': 0.1345213103620577, 'makes': 0.15760014545373743, 'best': 0.1578157431297637, 'cake': 0.15221833688781594}
