In [1]:
import pandas as pd
import numpy as np
import pickle
from sklearn.linear_model import LogisticRegression
from sklearn import metrics

In [2]:
train = pd.read_csv("train_emb.csv")
test = pd.read_csv("test_emb.csv")

In [3]:
def get_embeddings(emb):
    embedding = list()
    for e in emb:
        e = e.split(',')
        e[0] = e[0][1:]
        e[99] = e[99][:-1]
        e = np.array(e).astype(np.float)
        embedding.append(e)
    return embedding

In [4]:
x_train = np.array(get_embeddings(train['embedding']))
y_train = train['label']
x_test = np.array(get_embeddings(test['embedding']))
y_test = test['label']

In [5]:
print("Train X: ",x_train.shape, " Train Y: ",y_train.shape)
print("Test X: ",x_test.shape, " Test Y: ",y_test.shape)

Train X:  (234311, 100)  Train Y:  (234311,)
Test X:  (58579, 100)  Test Y:  (58579,)


In [6]:
def train_LR():
    filename = "LR_Task1_TrainedModel.pkl"
    logisticRegr = LogisticRegression()
    logisticRegr.fit(x_train, y_train)
    with open(filename,'wb') as f:
        pickle.dump(logisticRegr, f)

In [7]:
def test_LR():
    with open('LR_Task1_TrainedModel.pkl', 'rb') as f:
        logisticRegr = pickle.load(f)
    predictions = logisticRegr.predict(x_test)
    score = logisticRegr.score(x_test, y_test)
    print("Accuracy :",score*100, ' %')
    cm = metrics.confusion_matrix(y_test, predictions)
    print("========= CONFUSION MATRIX =========")
    print(cm)
    print("================ CLASSIFICATION REPORT ===============")
    classes=['Objective/0','Subjective/1']
    print(metrics.classification_report(y_test, predictions,target_names=classes))

In [8]:
train_LR()



In [9]:
test_LR()

Accuracy : 66.97792724355143  %
[[ 4332 16639]
 [ 2705 34903]]
              precision    recall  f1-score   support

 Objective/0       0.62      0.21      0.31     20971
Subjective/1       0.68      0.93      0.78     37608

   micro avg       0.67      0.67      0.67     58579
   macro avg       0.65      0.57      0.55     58579
weighted avg       0.66      0.67      0.61     58579



In [10]:
def predict_LR(sent_embedding):
    embedding = np.array(get_embedding(sent_embedding))
    with open('LR_Task1_TrainedModel.pkl', 'rb') as f:
        logisticRegr = pickle.load(f)
    predicted_class = logisticRegr.predict(embedding)
    print("Sentence Class is : ", predicted_class)