# **Creating model**

In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score, classification_report


file_path = '/content/drive/MyDrive/notebooks/fakenews.csv'
df = pd.read_csv(file_path)

df = df.dropna(subset=['text'])

x_text = df['text']
y = df['label'].values

x_train, x_temp, y_train, y_temp = train_test_split(x_text, y, test_size=0.3, random_state=42)
x_val, x_test, y_val, y_test = train_test_split(x_temp, y_temp, test_size=0.5, random_state=42)

In [2]:
# Vectorize the text data using Bag of Words
vectorizer = CountVectorizer(stop_words='english', max_features=5000)
x_train_bow = vectorizer.fit_transform(x_train)
x_val_bow = vectorizer.transform(x_val)
x_test_bow = vectorizer.transform(x_test)


model = MultinomialNB()
model.fit(x_train_bow, y_train)

In [None]:
print(x_train.iloc[0])

FORT LAUDERDALE, Fla.  —   Signs of Esteban Santiago’s unraveling had mounted over the past year. But it was not until early November, when he walked into an F. B. I. office carrying an ammunition clip  —   leaving a pistol and his infant son in his car  —   to complain about a C. I. A. plot against him, that his behavior became disturbing enough to earn him a short stay in a psychiatric hospital unit. In the months before, the police were called repeatedly to his home about domestic disturbances, and the National Guard kicked him out because of “unsatisfactory performance” after nearly a decade of service. Mr. Santiago, an Iraq war veteran, increasingly spoke to relatives and associates about voices in his head that were tormenting him. Then, a little before 1 p. m. Friday, Mr. Santiago, 26, turned up far from his Alaska home, in Terminal 2 of the Fort   International Airport. There, law enforcement officials said, he retrieved his checked luggage, pulled a   handgun out of his suitca

# **Evaluate on val set**

In [3]:
y_val_pred = model.predict(x_val_bow)
val_accuracy = accuracy_score(y_val, y_val_pred)
print(f'Validation Accuracy: {val_accuracy:.4f}')
print('Validation Classification Report:')
print(classification_report(y_val, y_val_pred))

Validation Accuracy: 0.8931
Validation Classification Report:
              precision    recall  f1-score   support

           0       0.88      0.91      0.90      1564
           1       0.91      0.87      0.89      1550

    accuracy                           0.89      3114
   macro avg       0.89      0.89      0.89      3114
weighted avg       0.89      0.89      0.89      3114



# **Evaluate on test set**

In [4]:
y_test_pred = model.predict(x_test_bow)
print('Test Classification Report:')
print(classification_report(y_test, y_test_pred))

Test Classification Report:
              precision    recall  f1-score   support

           0       0.87      0.92      0.89      1574
           1       0.91      0.86      0.88      1541

    accuracy                           0.89      3115
   macro avg       0.89      0.89      0.89      3115
weighted avg       0.89      0.89      0.89      3115



# **Evaluate on different set**

In [5]:
file_path = '/content/drive/MyDrive/notebooks/new_dataset.csv'
df = pd.read_csv(file_path)

x_new = df['text']
y_new = df['label']

x_new_bow = vectorizer.transform(x_new)
y_new_pred = model.predict(x_new_bow)
print(classification_report(y_new, y_new_pred))

              precision    recall  f1-score   support

           0       0.63      0.82      0.71     21417
           1       0.77      0.57      0.66     23481

    accuracy                           0.69     44898
   macro avg       0.70      0.69      0.68     44898
weighted avg       0.71      0.69      0.68     44898



In [None]:
import joblib
import os


preprocessing_model_dir = '/content/drive/MyDrive/models/naive_bayes/'
os.makedirs(preprocessing_model_dir, exist_ok=True)
model_path = os.path.join(preprocessing_model_dir, 'naive_bayes.joblib')
vectorizer_path = os.path.join(preprocessing_model_dir, 'vectorizer.joblib')


joblib.dump(model, model_path)
joblib.dump(vectorizer, vectorizer_path)

print(f'Model and vectorizer saved to {model_path} and {vectorizer_path}')


Model and vectorizer saved to /content/drive/MyDrive/models/naive_bayes/naive_bayes.joblib and /content/drive/MyDrive/models/naive_bayes/vectorizer.joblib
