In [5]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import VotingClassifier
from sklearn.metrics import accuracy_score, classification_report

# Load the CSV file into a DataFrame
df = pd.read_csv(r'/content/pre_processed_final.csv')
df.dropna(inplace=True)
# Split the data into a training set and a test set
train_df, test_df = train_test_split(df, test_size=0.8, random_state=42)

# Extract the sentiment and text columns from the training set
train_X = train_df['text']
train_y = train_df['sentiment']

# Extract the sentiment and text columns from the test set
test_X = test_df['text']
test_y = test_df['sentiment']

# Initialize the vectorizer
vectorizer = CountVectorizer()

# Convert the text data into numerical features
train_X = vectorizer.fit_transform(train_X)
test_X = vectorizer.transform(test_X)

# Initialize the classifiers
nb = MultinomialNB()
lr = LogisticRegression(max_iter=5000)
svm = SVC(probability=True)
dt = DecisionTreeClassifier()

# Create the ensemble classifier using majority voting
ensemble = VotingClassifier(estimators=[('nb', nb), ('lr', lr), ('svm', svm), ('dt', dt)], voting='soft',weights=[1, 1, 1, 1])

# Fit the ensemble classifier to the training data
ensemble.fit(train_X, train_y)

# Predict the sentiment on the test data
pred_y = ensemble.predict(test_X)

# Calculate the accuracy of the ensemble classifier
accuracy = accuracy_score(test_y, pred_y)

# Print accuracy
print("Accuracy: {:.2f}%".format(accuracy * 100))

# Print classification report
print("Classification Report:")
print(classification_report(test_y, pred_y))


Accuracy: 66.24%
Classification Report:
              precision    recall  f1-score   support

        -1.0       0.69      0.73      0.71      7517
         0.0       0.63      0.54      0.58      6971
         1.0       0.66      0.70      0.68      8231

    accuracy                           0.66     22719
   macro avg       0.66      0.66      0.66     22719
weighted avg       0.66      0.66      0.66     22719

