In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix, roc_curve, auc
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.svm import SVC
from sklearn.naive_bayes import MultinomialNB
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.metrics import accuracy_score as acc
from sklearn.ensemble import AdaBoostClassifier

In [None]:
df = pd.read_csv('t_dataset.csv', index_col=False)

df['Sarcasm'] = df['Sarcasm'].map({'yes': 1, 'no': 0})

In [None]:
X_train, X_test, y_train, y_test = train_test_split(df['Tweet'], df['Sarcasm'], test_size=0.2, random_state=42)

vectorizer = TfidfVectorizer(max_features=5000, ngram_range=(1, 2))
X_train_tfidf = vectorizer.fit_transform(X_train)
X_test_tfidf = vectorizer.transform(X_test)

In [None]:
def acc(x, y):
    accuracy = accuracy_score(x, y)
    print(f'Accuracy: {accuracy * 100:.2f}%')

    print("Classification Report:")
    print(classification_report(x, y))

#Logistic Regression

In [None]:
model = LogisticRegression()
model.fit(X_train_tfidf, y_train)

y_pred = model.predict(X_test_tfidf)
y_pred_prob = model.predict_proba(X_test_tfidf)[:, 1]

acc(y_test, y_pred)

Accuracy: 77.89%
Classification Report:
              precision    recall  f1-score   support

           0       0.83      0.59      0.69       250
           1       0.76      0.91      0.83       347

    accuracy                           0.78       597
   macro avg       0.79      0.75      0.76       597
weighted avg       0.79      0.78      0.77       597



#SVM(Support Vector Machine) Model

In [None]:
model = SVC(kernel='linear', probability=True)
model.fit(X_train_tfidf, y_train)

y_pred = model.predict(X_test_tfidf)

acc(y_test, y_pred)

Accuracy: 77.55%
Classification Report:
              precision    recall  f1-score   support

           0       0.82      0.60      0.69       250
           1       0.76      0.90      0.82       347

    accuracy                           0.78       597
   macro avg       0.79      0.75      0.76       597
weighted avg       0.78      0.78      0.77       597



#Naive Bayes Model

In [None]:
model = MultinomialNB()
model.fit(X_train_tfidf, y_train)

y_pred = model.predict(X_test_tfidf)

acc(y_test, y_pred)

Accuracy: 78.22%
Classification Report:
              precision    recall  f1-score   support

           0       0.83      0.60      0.70       250
           1       0.76      0.91      0.83       347

    accuracy                           0.78       597
   macro avg       0.80      0.76      0.76       597
weighted avg       0.79      0.78      0.77       597



#Decision Tree Model

In [None]:
model = DecisionTreeClassifier(random_state=42)
model.fit(X_train_tfidf, y_train)

y_pred = model.predict(X_test_tfidf)

acc(y_test, y_pred)

Accuracy: 76.72%
Classification Report:
              precision    recall  f1-score   support

           0       0.72      0.74      0.73       250
           1       0.81      0.79      0.80       347

    accuracy                           0.77       597
   macro avg       0.76      0.76      0.76       597
weighted avg       0.77      0.77      0.77       597



#KNN Model

In [None]:
k = 5
model = KNeighborsClassifier(n_neighbors=k)
model.fit(X_train_tfidf, y_train)

y_pred = model.predict(X_test_tfidf)

acc(y_test, y_pred)

Accuracy: 43.22%
Classification Report:
              precision    recall  f1-score   support

           0       0.42      0.99      0.59       250
           1       0.79      0.03      0.06       347

    accuracy                           0.43       597
   macro avg       0.60      0.51      0.33       597
weighted avg       0.63      0.43      0.28       597



#GBM(Gradient Boosting Machines)

In [None]:
model = GradientBoostingClassifier()

model.fit(X_train_tfidf, y_train)
y_pred = model.predict(X_test_tfidf)

acc(y_test, y_pred)

Accuracy: 79.56%
Classification Report:
              precision    recall  f1-score   support

           0       0.92      0.56      0.70       250
           1       0.75      0.97      0.85       347

    accuracy                           0.80       597
   macro avg       0.84      0.76      0.77       597
weighted avg       0.82      0.80      0.78       597

