In [1]:
import numpy as np
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import precision_score, recall_score, f1_score, cohen_kappa_score, classification_report
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB, BernoulliNB, CategoricalNB
from sklearn.neighbors import KNeighborsClassifier

In [2]:
data = pd.read_csv('data\preprocessed.csv').dropna()

In [3]:
X = data["Reviews_cleaned"]
y = data["Category"]

In [4]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)
vectorizer = TfidfVectorizer(lowercase=False)
vectorizer.fit(X_train)
X_train = vectorizer.transform(X_train)
X_test = vectorizer.transform(X_test)

In [5]:
model_logreg = LogisticRegression(random_state=0).fit(X_train, y_train)
y_true_logreg = y_test
y_pred_logreg = model_logreg.predict(X_test)
print('Wyniki dla regresji logistycznej:')
print(classification_report(y_true_logreg, y_pred_logreg))
print(f'cohen_kappa_score: {cohen_kappa_score(y_true_logreg, y_pred_logreg)}')
print(f'f1_score: {f1_score(y_true_logreg, y_pred_logreg)}')

Wyniki dla regresji logistycznej:
              precision    recall  f1-score   support

         0.0       0.81      0.48      0.61      2509
         1.0       0.80      0.95      0.87      5631

    accuracy                           0.81      8140
   macro avg       0.81      0.72      0.74      8140
weighted avg       0.81      0.81      0.79      8140

cohen_kappa_score: 0.4878045687718262
f1_score: 0.871498371335505


In [6]:
model_knn = KNeighborsClassifier(n_neighbors=20)
model_knn.fit(X_train, y_train)
y_true_knn = y_test
y_pred_knn = model_knn.predict(X_test)
print('Wyniki dla KNN:')
print(classification_report(y_true_knn, y_pred_knn))
print(f'cohen_kappa_score: {cohen_kappa_score(y_true_knn, y_pred_knn)}')
print(f'f1_score: {f1_score(y_true_knn, y_pred_knn)}')

Wyniki dla KNN:
              precision    recall  f1-score   support

         0.0       0.58      0.53      0.55      2509
         1.0       0.80      0.83      0.81      5631

    accuracy                           0.74      8140
   macro avg       0.69      0.68      0.68      8140
weighted avg       0.73      0.74      0.73      8140

cohen_kappa_score: 0.36633637006145825
f1_score: 0.8121836271600629


In [7]:
model_nb = BernoulliNB()
model_nb.fit(X_train, y_train)
y_true_nb = y_test
y_pred_nb = model_nb.predict(X_test)
print('Wyniki dla NB:')
print(classification_report(y_true_nb, y_pred_nb))
print(f'cohen_kappa_score: {cohen_kappa_score(y_true_nb, y_pred_nb)}')
print(f'f1_score: {f1_score(y_true_nb, y_pred_nb)}')

Wyniki dla NB:
              precision    recall  f1-score   support

         0.0       0.81      0.46      0.59      2509
         1.0       0.80      0.95      0.87      5631

    accuracy                           0.80      8140
   macro avg       0.81      0.71      0.73      8140
weighted avg       0.80      0.80      0.78      8140

cohen_kappa_score: 0.4719161640950392
f1_score: 0.8691141907772104
