<a href="https://colab.research.google.com/github/soumya527/News-Feedback-System/blob/main/models/NaiveBayes.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd

In [None]:
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import classification_report, accuracy_score, precision_score, recall_score, f1_score


In [None]:
df = pd.read_csv("Random.csv")

In [None]:
import string
import nltk
from nltk.corpus import stopwords
nltk.download('stopwords')

def clean_text(text):
    text = text.translate(str.maketrans('', '', string.punctuation))
    text = ''.join(char for char in text if char.isalnum() or char.isspace())
    text = text.lower()
    stop_words = set(stopwords.words('english'))
    text = ' '.join(word for word in text.split() if word not in stop_words)
    return text

df['Headline'] = df['Headline'].apply(clean_text)

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.


In [None]:
sentiment_mapping = {'Negative': 0, 'Neutral': 1, 'Positive': 2}
df['Sentiment'] = df['Sentiment'].map(sentiment_mapping)

In [None]:
X = df['Headline']
y = df['Sentiment']

In [None]:
vectorizer = CountVectorizer()
X_vectorized = vectorizer.fit_transform(X)

In [None]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X_vectorized, y, test_size=0.2, random_state=42)

In [None]:
nb_classifier = MultinomialNB()
nb_classifier.fit(X_train, y_train)

In [None]:
y_pred = nb_classifier.predict(X_test)

In [None]:
from sklearn.metrics import classification_report, accuracy_score, precision_score, recall_score, f1_score

print("Accuracy:", accuracy_score(y_test, y_pred))

# Calculate precision, recall, and F1-score
precision = precision_score(y_test, y_pred, average='weighted')
recall = recall_score(y_test, y_pred, average='weighted')
f1 = f1_score(y_test, y_pred, average='weighted')

# Print precision, recall, and F1-score
print("Precision of the best model:", precision)
print("Recall of the best model:", recall)
print("F1-score of the best model:", f1)

# Print overall classification report
print("Classification Report:")
print(classification_report(y_test, y_pred, target_names=sentiment_mapping.keys()))


Accuracy: 0.6240601503759399
Precision of the best model: 0.6322191940350973
Recall of the best model: 0.6240601503759399
F1-score of the best model: 0.6236564874335772
Classification Report:
              precision    recall  f1-score   support

    Negative       0.70      0.57      0.63        28
     Neutral       0.57      0.70      0.63        50
    Positive       0.65      0.58      0.62        55

    accuracy                           0.62       133
   macro avg       0.64      0.62      0.62       133
weighted avg       0.63      0.62      0.62       133



In [None]:
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import classification_report, accuracy_score, precision_score, recall_score, f1_score
from sklearn.naive_bayes import MultinomialNB

param_grid = {
    'alpha': [0.1, 0.5, 1.0, 1.5, 2.0, 5.0, 10.0],
    'fit_prior': [True, False]
}

grid_search = GridSearchCV(nb_classifier, param_grid, cv=5)
grid_search.fit(X_train, y_train)

best_params = grid_search.best_params_

print("Best Hyperparameters:")
print(best_params)

nb_classifier_best = MultinomialNB(**best_params)
nb_classifier_best.fit(X_train, y_train)

y_pred_best = nb_classifier_best.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred_best))

# Calculate weighted precision, recall, and F1-score
precision = precision_score(y_test, y_pred_best, average='weighted')
recall = recall_score(y_test, y_pred_best, average='weighted')
f1 = f1_score(y_test, y_pred_best, average='weighted')

# Print weighted precision, recall, and F1-score
print("Precision of the best model:", precision)
print("Recall of the best model:", recall)
print("F1-score of the best model:", f1)

# Print overall classification report
print("Classification Report:")
print(classification_report(y_test, y_pred_best, target_names=sentiment_mapping.keys()))


Best Hyperparameters:
{'alpha': 1.5, 'fit_prior': True}
Accuracy: 0.6090225563909775
Precision of the best model: 0.6161967404966098
Recall of the best model: 0.6090225563909775
F1-score of the best model: 0.6094090524706006
Classification Report:
              precision    recall  f1-score   support

    Negative       0.70      0.57      0.63        28
     Neutral       0.56      0.66      0.61        50
    Positive       0.63      0.58      0.60        55

    accuracy                           0.61       133
   macro avg       0.63      0.60      0.61       133
weighted avg       0.62      0.61      0.61       133

