### **Creating model**

In [6]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score, classification_report


file_path = '/content/drive/MyDrive/notebooks/fakenews.csv'
df = pd.read_csv(file_path)

df = df.dropna(subset=['text'])

x_text = df['text']
y = df['label'].values

x_train, x_temp, y_train, y_temp = train_test_split(x_text, y, test_size=0.3, random_state=42)
x_val, x_test, y_val, y_test = train_test_split(x_temp, y_temp, test_size=0.5, random_state=42)

In [7]:
# Vectorize the text data using Bag of Words
vectorizer = CountVectorizer(stop_words='english', max_features=5000)
x_train_bow = vectorizer.fit_transform(x_train)
x_val_bow = vectorizer.transform(x_val)
x_test_bow = vectorizer.transform(x_test)


model = MultinomialNB()
model.fit(x_train_bow, y_train)

### **Evaluate on val set**

In [8]:
y_val_pred = model.predict(x_val_bow)
val_accuracy = accuracy_score(y_val, y_val_pred)
print(f'Validation Accuracy: {val_accuracy:.4f}')
print('Validation Classification Report:')
print(classification_report(y_val, y_val_pred))

Validation Accuracy: 0.8927
Validation Classification Report:
              precision    recall  f1-score   support

           0       0.88      0.91      0.90      1564
           1       0.91      0.87      0.89      1550

    accuracy                           0.89      3114
   macro avg       0.89      0.89      0.89      3114
weighted avg       0.89      0.89      0.89      3114



### **Evaluate on test set**

In [9]:
y_test_pred = model.predict(x_test_bow)
test_accuracy = accuracy_score(y_test, y_test_pred)
print(f'Test Accuracy: {test_accuracy:.4f}')
print('Test Classification Report:')
print(classification_report(y_test, y_test_pred))

Test Accuracy: 0.8889
Test Classification Report:
              precision    recall  f1-score   support

           0       0.87      0.92      0.89      1574
           1       0.91      0.86      0.88      1541

    accuracy                           0.89      3115
   macro avg       0.89      0.89      0.89      3115
weighted avg       0.89      0.89      0.89      3115



#### **Saving the model**

In [5]:
import joblib
import os


preprocessing_model_dir = '/content/drive/MyDrive/models/naive_bayes/'
os.makedirs(preprocessing_model_dir, exist_ok=True)
model_path = os.path.join(preprocessing_model_dir, 'naive_bayes.joblib')
vectorizer_path = os.path.join(preprocessing_model_dir, 'vectorizer.joblib')


joblib.dump(model, model_path)
joblib.dump(vectorizer, vectorizer_path)

print(f'Model and vectorizer saved to {model_path} and {vectorizer_path}')


Model and vectorizer saved to /content/drive/MyDrive/models/naive_bayes/naive_bayes.joblib and /content/drive/MyDrive/models/naive_bayes/vectorizer.joblib
