In [8]:
import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt 
import nltk 
import re
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer 
from sklearn.ensemble import RandomForestClassifier, StackingClassifier
from sklearn.linear_model import LogisticRegression 
from sklearn.tree import DecisionTreeClassifier 
from sklearn.multiclass import OneVsRestClassifier
from sklearn.svm import SVC 
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import f1_score, accuracy_score
from sklearn.model_selection import train_test_split
import json
from sklearn.pipeline import Pipeline
import joblib

In [3]:
train_df = pd.read_csv("train.csv")
train_df.head(2)

Unnamed: 0,tweet,Airtel_Pos,Airtel_Neut,Airtel_Neg,Saf_Pos,Saf_Neut,Saf_Neg,clean_tweets
0,@emukala85 Apologies for the inconveniences ca...,0,1,0,0,0,1,apologies inconveniences caused notified recha...
1,Communications Authority: Airtel Kenya had the...,0,0,1,0,1,0,communications authority airtel kenya worst mo...


In [4]:
y = train_df.drop(columns=["tweet", "clean_tweets"], axis=1)

In [5]:
X_train, X_test, y_train, y_test = train_test_split(train_df['clean_tweets'], y, test_size=0.3, random_state=2022)

In [6]:
tfidf_vect = TfidfVectorizer()
tfidf_vect.fit(train_df['clean_tweets'])

TfidfVectorizer()

In [7]:
#hybrid model
base_estimators = [
    ("log reg", OneVsRestClassifier(LogisticRegression())), 
    ("decision tree", OneVsRestClassifier(DecisionTreeClassifier())), 
    ("random forest", OneVsRestClassifier(RandomForestClassifier())), 
    ("MLP", OneVsRestClassifier(MLPClassifier()))
]
final_estimator = OneVsRestClassifier(StackingClassifier(estimators=base_estimators, final_estimator=OneVsRestClassifier(MLPClassifier())))

In [9]:
com_pipeline = Pipeline([
    ("vect", tfidf_vect), 
    ("hybrid_model", final_estimator)
])
com_pipeline.fit(X_train, y_train)
com_predictions = com_pipeline.predict(X_test)
com_f1 = f1_score(com_predictions, y_test, average="weighted")
outfile = "com_pipeline.sav"
joblib.dump(com_pipeline, outfile)
print(com_f1)

0.7175243599419387


In [24]:
sample_text = ["I enjoy airtel because of cheap airtime and bundles"]
predicted_sentiments = joblib.load("com_pipeline.sav").predict(sample_text)
predicted_sentiments

array([[0, 0, 0, 0, 0, 0]])