In [2]:
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.model_selection import KFold

from sklearn.feature_extraction import DictVectorizer
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.pipeline import Pipeline
from sklearn.naive_bayes import MultinomialNB, GaussianNB
from sklearn.metrics import roc_auc_score

Django code refers to https://github.com/myothida/django_restapi_aionline.git


This is what I want: https://monkeylearn.com/nps-feedback-analysis-online/

In [4]:
names = ['label', 'news']
df = pd.read_csv('./news_data.csv',  encoding='ISO-8859-1', header = None, names = names)

In [5]:
X = df['news'].values
y = df['label'].values


X_train, X_test, y_train, y_test = train_test_split(X,y, 
                                    test_size = 0.40, 
                                    random_state=1)
df['label'].value_counts(normalize=True)

steps = [('tfidf',TfidfVectorizer()),         
         ('mnb', MultinomialNB(alpha=1e-02))] 
         
model = Pipeline(steps)
model.fit(X_train,y_train)

In [6]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# Compute evaluation scores for the model
y_pred = model.predict(X_test)

accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred, average='weighted')
recall = recall_score(y_test, y_pred, average='weighted')
f1 = f1_score(y_test, y_pred, average='weighted')

print("Accuracy:", accuracy)
print("Precision:", precision)
print("Recall:", recall)
print("F1 Score:", f1)

Accuracy: 0.6915936049510056
Precision: 0.6781906316787346
Recall: 0.6915936049510056
F1 Score: 0.6776677016391992


In [34]:
import pickle

C = 1.0
output_file = f'model_C={C}.bin'

with open(output_file, 'wb') as f_out: 
    pickle.dump(model, f_out)

In [35]:
input_file = './models/model_C=1.0.bin'
with open(input_file, 'rb') as f_in: 
    model = pickle.load(f_in)

model

In [36]:
X = ['Sales in Finland increased by 10.5 % in January']
y_pred = model.predict_proba(X)
y_label = model.predict(X)

print('Input:', X[0])
print('Category:', y_label[0])
print('Confidence:', round(np.max(y_pred),2))

Input: Sales in Finland increased by 10.5 % in January
Category: positive
Confidence: 0.58
