In [7]:
import pickle
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, precision_score, recall_score, confusion_matrix
from sklearn.feature_extraction.text import CountVectorizer, TfidfTransformer

df = pd.read_csv('output2.csv')

vectoriser = CountVectorizer()
X_vectorized = vectoriser.fit_transform(df['processed_statement'])

transformer = TfidfTransformer()
tfidf_data = transformer.fit_transform(X_vectorized)

X_train, X_test, Y_train, Y_test = train_test_split(tfidf_data, df['target'], test_size=0.2, random_state=42)

lr = LogisticRegression()
lr.fit(X_train, Y_train)

Y_pred = lr.predict(X_test)
acc = accuracy_score(Y_pred, Y_test)
pre = precision_score(Y_pred, Y_test, average='weighted')
rec = recall_score(Y_pred, Y_test, average='weighted')

print(f"Accuracy: {acc}")
print(f"Precision: {pre}")
print(f"Recall: {rec}")

with open('model.pkl', 'wb') as model_file:
    pickle.dump(lr, model_file)

with open('count_vectorizer.pkl', 'wb') as vectorizer_file:
    pickle.dump(vectoriser, vectorizer_file)

Accuracy: 0.8199329983249581
Precision: 0.9186892027082558
Recall: 0.8199329983249581
