In [40]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import joblib

# Read the preprocessed dataset
df = pd.read_csv('training_cleaned.csv', encoding='ISO-8859-1')
df.dropna(subset=['cleaned_text'], inplace=True)
# Select a subset of the dataset for training and testing
subset_df = df.sample(n=100000, random_state=42)

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(subset_df['cleaned_text'], subset_df['sentiment'], test_size=0.2, random_state=42)

# Vectorize the text data using TfidfVectorizer
vectorizer = TfidfVectorizer()
X_train_vectorized = vectorizer.fit_transform(X_train)
X_test_vectorized = vectorizer.transform(X_test)

# Train the Naive Bayes model
nb = MultinomialNB()
nb.fit(X_train_vectorized, y_train)

# Predict the sentiment of the test data
y_pred = nb.predict(X_test_vectorized)

# Evaluate the performance of the model
accuracy = accuracy_score(y_test, y_pred)
print('Accuracy:', accuracy)

joblib.dump(nb,'naive_bayes_trained.joblib')

# # Read the local dataset from Excel file
# local_df = pd.read_excel('cleaned_local_dataset.xlsx')

# # Vectorize the text data using the same TfidfVectorizer object used in training
# X_local_vectorized = vectorizer.transform(local_df['Cleaned Text'])

# # Predict the sentiment of the local dataset
# y_local_pred = nb.predict(X_local_vectorized)

# # Add the predicted sentiment to the local dataset
# local_df['sentiment'] = y_local_pred

# # Save the local dataset with predicted sentiment to a new Excel file
# local_df.to_excel('local_dataset_with_sentiment_naivebayes.xlsx', index=False)


Accuracy: 0.7386


['naive_bayes_trained.joblib']

# Sentiment_analysis of the individual cleaned text

In [64]:
import pandas as pd
import openpyxl
from sklearn.feature_extraction.text import TfidfVectorizer
import joblib

def sentiment_analysis(filename):
    # Read the local dataset from Excel file
    workbook=openpyxl.load_workbook(filename)
    sheet_names=workbook.sheetnames
    
    for sheetname in sheet_names:
        df=pd.read_excel(filename,sheet_name=sheetname)
        #print(df['cleaned_text'])
        if df.empty or 'cleaned_text' not in df.columns:
            continue
        df.fillna('', inplace=True)
        # Vectorize the text data using the same TfidfVectorizer object used in training
        # vectorizer = nb.vectorizer
        X_local_vectorized = vectorizer.transform(df['cleaned_text'])

        # Predict the sentiment of the local dataset
        y_local_pred = nb.predict(X_local_vectorized)
        
        # Select the correct sheet
        worksheet = workbook[sheetname]

        # Add a new column for the predicted sentiment
        worksheet.cell(row=1, column=worksheet.max_column + 1, value='nb_sentiment')

        # Write the predicted sentiment values to the new column
        for row_idx, sentiment in enumerate(y_local_pred, start=2):
            worksheet.cell(row=row_idx, column=worksheet.max_column, value=sentiment)

        # Save the updated Excel file
        workbook.save(filename)
 

In [65]:
import os
directory='C:/Users/ektag/Minor2/sentiment_analysis/cleaned_filtered_and_sorted_Tweets/'
for filename in os.listdir(directory):
    f=os.path.join(directory,filename)
    if os.path.isfile(f) and filename.endswith('.xlsx'):
        print(directory+filename)
        sentiment_analysis(directory+filename)

C:/Users/ektag/Minor2/sentiment_analysis/cleaned_filtered_and_sorted_Tweets/cleaned_Copy of BasedMikeLee.xlsx
C:/Users/ektag/Minor2/sentiment_analysis/cleaned_filtered_and_sorted_Tweets/cleaned_Copy of bennyjohnson.xlsx
C:/Users/ektag/Minor2/sentiment_analysis/cleaned_filtered_and_sorted_Tweets/cleaned_Copy of BillyM2k.xlsx
C:/Users/ektag/Minor2/sentiment_analysis/cleaned_filtered_and_sorted_Tweets/cleaned_Copy of charliekirk11.xlsx
C:/Users/ektag/Minor2/sentiment_analysis/cleaned_filtered_and_sorted_Tweets/cleaned_Copy of CliffordAsness.xlsx
C:/Users/ektag/Minor2/sentiment_analysis/cleaned_filtered_and_sorted_Tweets/cleaned_Copy of ellagirwin.xlsx
C:/Users/ektag/Minor2/sentiment_analysis/cleaned_filtered_and_sorted_Tweets/cleaned_Copy of EllaVogelsang.xlsx
C:/Users/ektag/Minor2/sentiment_analysis/cleaned_filtered_and_sorted_Tweets/cleaned_Copy of HansMahncke.xlsx
C:/Users/ektag/Minor2/sentiment_analysis/cleaned_filtered_and_sorted_Tweets/cleaned_Copy of jonesville.xlsx
C:/Users/ektag/