## Importing Libraries

In [1]:
import numpy as np
import pandas as pd
import seaborn as sns
import plotly.express as px
import matplotlib.pyplot as plt
import plotly.graph_objects as go
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import BernoulliNB, MultinomialNB
from sklearn.feature_extraction.text import CountVectorizer

## Disable Warnings

In [2]:
import warnings
warnings.filterwarnings("ignore")
warnings.simplefilter("ignore")

In [3]:
data = pd.read_json("Sarcasm.json", lines=True)

In [4]:
data

Unnamed: 0,article_link,headline,is_sarcastic
0,https://www.huffingtonpost.com/entry/versace-b...,former versace store clerk sues over secret 'b...,0
1,https://www.huffingtonpost.com/entry/roseanne-...,the 'roseanne' revival catches up to our thorn...,0
2,https://local.theonion.com/mom-starting-to-fea...,mom starting to fear son's web series closest ...,1
3,https://politics.theonion.com/boehner-just-wan...,"boehner just wants wife to listen, not come up...",1
4,https://www.huffingtonpost.com/entry/jk-rowlin...,j.k. rowling wishes snape happy birthday in th...,0
...,...,...,...
26704,https://www.huffingtonpost.com/entry/american-...,american politics in moral free-fall,0
26705,https://www.huffingtonpost.com/entry/americas-...,america's best 20 hikes,0
26706,https://www.huffingtonpost.com/entry/reparatio...,reparations and obama,0
26707,https://www.huffingtonpost.com/entry/israeli-b...,israeli ban targeting boycott supporters raise...,0


In [5]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 26709 entries, 0 to 26708
Data columns (total 3 columns):
 #   Column        Non-Null Count  Dtype 
---  ------        --------------  ----- 
 0   article_link  26709 non-null  object
 1   headline      26709 non-null  object
 2   is_sarcastic  26709 non-null  int64 
dtypes: int64(1), object(2)
memory usage: 626.1+ KB


In [6]:
data.describe()

Unnamed: 0,is_sarcastic
count,26709.0
mean,0.438953
std,0.496269
min,0.0
25%,0.0
50%,0.0
75%,1.0
max,1.0


In [7]:
data.isna().sum

<bound method NDFrame._add_numeric_operations.<locals>.sum of        article_link  headline  is_sarcastic
0             False     False         False
1             False     False         False
2             False     False         False
3             False     False         False
4             False     False         False
...             ...       ...           ...
26704         False     False         False
26705         False     False         False
26706         False     False         False
26707         False     False         False
26708         False     False         False

[26709 rows x 3 columns]>

In [8]:
data.isnull().sum()

article_link    0
headline        0
is_sarcastic    0
dtype: int64

In [9]:
data["is_sarcastic"] = data["is_sarcastic"].map({0: "Not Sarcasm", 1: "Sarcasm"})

## Removing unnecessary columns

In [10]:
data = data[["headline", "is_sarcastic"]]

## Data Splitting

In [11]:
x = np.array(data["headline"])
y = np.array(data["is_sarcastic"])

In [12]:
cv = CountVectorizer()
X = cv.fit_transform(x)

In [13]:
Xtrain, Xtest, ytrain, ytest = train_test_split(X,
                                                y,
                                                test_size = 0.2,
                                                random_state = 42)

## Training the Sarcasm Catcher Model

In [14]:
m1 = BernoulliNB()
m2 = MultinomialNB()

m1.fit(Xtrain, ytrain)
m2.fit(Xtrain, ytrain)

print(f"Score of BernoulliNB: {m1.score(Xtrain, ytrain)}\nScore of MultinomialNB: {m2.score(Xtrain, ytrain)}")

Score of BernoulliNB: 0.930359900781579
Score of MultinomialNB: 0.9333083727242945


In [15]:
features = input("Enter a text: ")
data1 = cv.transform([features]).toarray()
print(f"Prediction of Bernoulli: {m1.predict(data1)}\nPrediction of Multinomial: {m2.predict(data1)}")

Enter a text:  Cows lose their jobs as milk prices drop


Prediction of Bernoulli: ['Sarcasm']
Prediction of Multinomial: ['Sarcasm']
