In [1]:
 !pip install catboost



In [2]:
import pandas as pd
import numpy as np
import re
import string
from warnings import filterwarnings

# Scikit-learn
from sklearn.model_selection import train_test_split, cross_val_score, GridSearchCV, cross_validate
from sklearn.preprocessing import LabelEncoder
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import MultinomialNB
from sklearn.ensemble import RandomForestClassifier
from catboost import CatBoostClassifier
from lightgbm import LGBMClassifier
# from xgboost import XGBClassifier

# NLTK
import nltk
from nltk.corpus import stopwords, wordnet
from nltk.tokenize import word_tokenize
from nltk.stem import WordNetLemmatizer
from nltk.sentiment import SentimentIntensityAnalyzer

# TextBlob

# WordCloud

# Matplotlib
from matplotlib.ticker import MaxNLocator

# Download NLTK data
nltk.download('stopwords')
nltk.download('punkt')
nltk.download('averaged_perceptron_tagger')
nltk.download('wordnet')

# Filter warnings
filterwarnings('ignore')

# Display options for pandas
pd.set_option('display.max_columns', None)
pd.set_option('display.width', 200)
pd.set_option('display.float_format', lambda x: '%.2f' % x)

[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\lenovo\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\lenovo\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     C:\Users\lenovo\AppData\Roaming\nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!
[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\lenovo\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


In [3]:
train = pd.read_csv("datasets/train.csv", sep=",")
#train.head()
#train.shape
test = pd.read_csv("datasets/test.csv", sep=",")
#test.head()
#test.shape

In [4]:
df_c = pd.concat([train, test], ignore_index=True)

In [5]:
df = df_c[['text', 'target']]
df.head(50)

Unnamed: 0,text,target
0,Our Deeds are the Reason of this #earthquake M...,1.0
1,Forest fire near La Ronge Sask. Canada,1.0
2,All residents asked to 'shelter in place' are ...,1.0
3,"13,000 people receive #wildfires evacuation or...",1.0
4,Just got sent this photo from Ruby #Alaska as ...,1.0
5,#RockyFire Update => California Hwy. 20 closed...,1.0
6,#flood #disaster Heavy rain causes flash flood...,1.0
7,I'm on top of the hill and I can see a fire in...,1.0
8,There's an emergency evacuation happening now ...,1.0
9,I'm afraid that the tornado is coming to our a...,1.0


In [6]:
def remove_urls(text):
    url_pattern = re.compile(r'https?://\S+|www\.\S+')
    clean_text = re.sub(url_pattern, '', text)
    return clean_text
df['text_without_url'] = df['text'].apply(remove_urls)
df.head(50)  # You can see changings

Unnamed: 0,text,target,text_without_url
0,Our Deeds are the Reason of this #earthquake M...,1.0,Our Deeds are the Reason of this #earthquake M...
1,Forest fire near La Ronge Sask. Canada,1.0,Forest fire near La Ronge Sask. Canada
2,All residents asked to 'shelter in place' are ...,1.0,All residents asked to 'shelter in place' are ...
3,"13,000 people receive #wildfires evacuation or...",1.0,"13,000 people receive #wildfires evacuation or..."
4,Just got sent this photo from Ruby #Alaska as ...,1.0,Just got sent this photo from Ruby #Alaska as ...
5,#RockyFire Update => California Hwy. 20 closed...,1.0,#RockyFire Update => California Hwy. 20 closed...
6,#flood #disaster Heavy rain causes flash flood...,1.0,#flood #disaster Heavy rain causes flash flood...
7,I'm on top of the hill and I can see a fire in...,1.0,I'm on top of the hill and I can see a fire in...
8,There's an emergency evacuation happening now ...,1.0,There's an emergency evacuation happening now ...
9,I'm afraid that the tornado is coming to our a...,1.0,I'm afraid that the tornado is coming to our a...


In [7]:
def remove_special_chars(text):
    cleaned_text = re.sub(r'[^\w\s]', '', text)
    return cleaned_text
df['text_without_spec_char'] = df['text_without_url'].apply(remove_special_chars)
df.head(50)

Unnamed: 0,text,target,text_without_url,text_without_spec_char
0,Our Deeds are the Reason of this #earthquake M...,1.0,Our Deeds are the Reason of this #earthquake M...,Our Deeds are the Reason of this earthquake Ma...
1,Forest fire near La Ronge Sask. Canada,1.0,Forest fire near La Ronge Sask. Canada,Forest fire near La Ronge Sask Canada
2,All residents asked to 'shelter in place' are ...,1.0,All residents asked to 'shelter in place' are ...,All residents asked to shelter in place are be...
3,"13,000 people receive #wildfires evacuation or...",1.0,"13,000 people receive #wildfires evacuation or...",13000 people receive wildfires evacuation orde...
4,Just got sent this photo from Ruby #Alaska as ...,1.0,Just got sent this photo from Ruby #Alaska as ...,Just got sent this photo from Ruby Alaska as s...
5,#RockyFire Update => California Hwy. 20 closed...,1.0,#RockyFire Update => California Hwy. 20 closed...,RockyFire Update California Hwy 20 closed in ...
6,#flood #disaster Heavy rain causes flash flood...,1.0,#flood #disaster Heavy rain causes flash flood...,flood disaster Heavy rain causes flash floodin...
7,I'm on top of the hill and I can see a fire in...,1.0,I'm on top of the hill and I can see a fire in...,Im on top of the hill and I can see a fire in ...
8,There's an emergency evacuation happening now ...,1.0,There's an emergency evacuation happening now ...,Theres an emergency evacuation happening now i...
9,I'm afraid that the tornado is coming to our a...,1.0,I'm afraid that the tornado is coming to our a...,Im afraid that the tornado is coming to our area


In [8]:
def remove_emojis(text):
    emoji_pattern = re.compile(
        '['
        u'\U0001F600-\U0001F64F'  # emoticons
        u'\U0001F300-\U0001F5FF'  # symbols & pictographs
        u'\U0001F680-\U0001F6FF'  # transport & map symbols
        u'\U0001F1E0-\U0001F1FF'  # flags (iOS)
        u'\U00002702-\U000027B0'
        u'\U000024C2-\U0001F251'
        ']+',
        flags=re.UNICODE)
    clean_text = emoji_pattern.sub('', text)
    return clean_text
df['text_without_emojis'] = df['text_without_spec_char'].apply(remove_emojis)
df.head(50)  # You can see changings

Unnamed: 0,text,target,text_without_url,text_without_spec_char,text_without_emojis
0,Our Deeds are the Reason of this #earthquake M...,1.0,Our Deeds are the Reason of this #earthquake M...,Our Deeds are the Reason of this earthquake Ma...,Our Deeds are the Reason of this earthquake Ma...
1,Forest fire near La Ronge Sask. Canada,1.0,Forest fire near La Ronge Sask. Canada,Forest fire near La Ronge Sask Canada,Forest fire near La Ronge Sask Canada
2,All residents asked to 'shelter in place' are ...,1.0,All residents asked to 'shelter in place' are ...,All residents asked to shelter in place are be...,All residents asked to shelter in place are be...
3,"13,000 people receive #wildfires evacuation or...",1.0,"13,000 people receive #wildfires evacuation or...",13000 people receive wildfires evacuation orde...,13000 people receive wildfires evacuation orde...
4,Just got sent this photo from Ruby #Alaska as ...,1.0,Just got sent this photo from Ruby #Alaska as ...,Just got sent this photo from Ruby Alaska as s...,Just got sent this photo from Ruby Alaska as s...
5,#RockyFire Update => California Hwy. 20 closed...,1.0,#RockyFire Update => California Hwy. 20 closed...,RockyFire Update California Hwy 20 closed in ...,RockyFire Update California Hwy 20 closed in ...
6,#flood #disaster Heavy rain causes flash flood...,1.0,#flood #disaster Heavy rain causes flash flood...,flood disaster Heavy rain causes flash floodin...,flood disaster Heavy rain causes flash floodin...
7,I'm on top of the hill and I can see a fire in...,1.0,I'm on top of the hill and I can see a fire in...,Im on top of the hill and I can see a fire in ...,Im on top of the hill and I can see a fire in ...
8,There's an emergency evacuation happening now ...,1.0,There's an emergency evacuation happening now ...,Theres an emergency evacuation happening now i...,Theres an emergency evacuation happening now i...
9,I'm afraid that the tornado is coming to our a...,1.0,I'm afraid that the tornado is coming to our a...,Im afraid that the tornado is coming to our area,Im afraid that the tornado is coming to our area


In [9]:
def remove_html(text):
    text = re.sub(r'<.*?>|&([a-z0-9]+|#[0-9]{1,6}|#x[0-9a-f]{1,6});', '', text)
    return text
df['text_without_html'] = df['text_without_emojis'].apply(remove_emojis)
df.head(50)  # You can see changings

Unnamed: 0,text,target,text_without_url,text_without_spec_char,text_without_emojis,text_without_html
0,Our Deeds are the Reason of this #earthquake M...,1.0,Our Deeds are the Reason of this #earthquake M...,Our Deeds are the Reason of this earthquake Ma...,Our Deeds are the Reason of this earthquake Ma...,Our Deeds are the Reason of this earthquake Ma...
1,Forest fire near La Ronge Sask. Canada,1.0,Forest fire near La Ronge Sask. Canada,Forest fire near La Ronge Sask Canada,Forest fire near La Ronge Sask Canada,Forest fire near La Ronge Sask Canada
2,All residents asked to 'shelter in place' are ...,1.0,All residents asked to 'shelter in place' are ...,All residents asked to shelter in place are be...,All residents asked to shelter in place are be...,All residents asked to shelter in place are be...
3,"13,000 people receive #wildfires evacuation or...",1.0,"13,000 people receive #wildfires evacuation or...",13000 people receive wildfires evacuation orde...,13000 people receive wildfires evacuation orde...,13000 people receive wildfires evacuation orde...
4,Just got sent this photo from Ruby #Alaska as ...,1.0,Just got sent this photo from Ruby #Alaska as ...,Just got sent this photo from Ruby Alaska as s...,Just got sent this photo from Ruby Alaska as s...,Just got sent this photo from Ruby Alaska as s...
5,#RockyFire Update => California Hwy. 20 closed...,1.0,#RockyFire Update => California Hwy. 20 closed...,RockyFire Update California Hwy 20 closed in ...,RockyFire Update California Hwy 20 closed in ...,RockyFire Update California Hwy 20 closed in ...
6,#flood #disaster Heavy rain causes flash flood...,1.0,#flood #disaster Heavy rain causes flash flood...,flood disaster Heavy rain causes flash floodin...,flood disaster Heavy rain causes flash floodin...,flood disaster Heavy rain causes flash floodin...
7,I'm on top of the hill and I can see a fire in...,1.0,I'm on top of the hill and I can see a fire in...,Im on top of the hill and I can see a fire in ...,Im on top of the hill and I can see a fire in ...,Im on top of the hill and I can see a fire in ...
8,There's an emergency evacuation happening now ...,1.0,There's an emergency evacuation happening now ...,Theres an emergency evacuation happening now i...,Theres an emergency evacuation happening now i...,Theres an emergency evacuation happening now i...
9,I'm afraid that the tornado is coming to our a...,1.0,I'm afraid that the tornado is coming to our a...,Im afraid that the tornado is coming to our area,Im afraid that the tornado is coming to our area,Im afraid that the tornado is coming to our area


In [10]:
def remove_punctuations(text):
    text = text.translate(str.maketrans('', '', string.punctuation))
    return text
df['text_without_punctuations'] = df['text_without_html'].apply(remove_emojis)
df.head(50)  # You can see changings

Unnamed: 0,text,target,text_without_url,text_without_spec_char,text_without_emojis,text_without_html,text_without_punctuations
0,Our Deeds are the Reason of this #earthquake M...,1.0,Our Deeds are the Reason of this #earthquake M...,Our Deeds are the Reason of this earthquake Ma...,Our Deeds are the Reason of this earthquake Ma...,Our Deeds are the Reason of this earthquake Ma...,Our Deeds are the Reason of this earthquake Ma...
1,Forest fire near La Ronge Sask. Canada,1.0,Forest fire near La Ronge Sask. Canada,Forest fire near La Ronge Sask Canada,Forest fire near La Ronge Sask Canada,Forest fire near La Ronge Sask Canada,Forest fire near La Ronge Sask Canada
2,All residents asked to 'shelter in place' are ...,1.0,All residents asked to 'shelter in place' are ...,All residents asked to shelter in place are be...,All residents asked to shelter in place are be...,All residents asked to shelter in place are be...,All residents asked to shelter in place are be...
3,"13,000 people receive #wildfires evacuation or...",1.0,"13,000 people receive #wildfires evacuation or...",13000 people receive wildfires evacuation orde...,13000 people receive wildfires evacuation orde...,13000 people receive wildfires evacuation orde...,13000 people receive wildfires evacuation orde...
4,Just got sent this photo from Ruby #Alaska as ...,1.0,Just got sent this photo from Ruby #Alaska as ...,Just got sent this photo from Ruby Alaska as s...,Just got sent this photo from Ruby Alaska as s...,Just got sent this photo from Ruby Alaska as s...,Just got sent this photo from Ruby Alaska as s...
5,#RockyFire Update => California Hwy. 20 closed...,1.0,#RockyFire Update => California Hwy. 20 closed...,RockyFire Update California Hwy 20 closed in ...,RockyFire Update California Hwy 20 closed in ...,RockyFire Update California Hwy 20 closed in ...,RockyFire Update California Hwy 20 closed in ...
6,#flood #disaster Heavy rain causes flash flood...,1.0,#flood #disaster Heavy rain causes flash flood...,flood disaster Heavy rain causes flash floodin...,flood disaster Heavy rain causes flash floodin...,flood disaster Heavy rain causes flash floodin...,flood disaster Heavy rain causes flash floodin...
7,I'm on top of the hill and I can see a fire in...,1.0,I'm on top of the hill and I can see a fire in...,Im on top of the hill and I can see a fire in ...,Im on top of the hill and I can see a fire in ...,Im on top of the hill and I can see a fire in ...,Im on top of the hill and I can see a fire in ...
8,There's an emergency evacuation happening now ...,1.0,There's an emergency evacuation happening now ...,Theres an emergency evacuation happening now i...,Theres an emergency evacuation happening now i...,Theres an emergency evacuation happening now i...,Theres an emergency evacuation happening now i...
9,I'm afraid that the tornado is coming to our a...,1.0,I'm afraid that the tornado is coming to our a...,Im afraid that the tornado is coming to our area,Im afraid that the tornado is coming to our area,Im afraid that the tornado is coming to our area,Im afraid that the tornado is coming to our area


In [11]:
def remove_numbers(text):
    text = re.sub(r'\d', '', text)
    return text
df['text_without_numbers'] = df['text_without_punctuations'].apply(remove_emojis)
df.head(50)  # You can see changings

Unnamed: 0,text,target,text_without_url,text_without_spec_char,text_without_emojis,text_without_html,text_without_punctuations,text_without_numbers
0,Our Deeds are the Reason of this #earthquake M...,1.0,Our Deeds are the Reason of this #earthquake M...,Our Deeds are the Reason of this earthquake Ma...,Our Deeds are the Reason of this earthquake Ma...,Our Deeds are the Reason of this earthquake Ma...,Our Deeds are the Reason of this earthquake Ma...,Our Deeds are the Reason of this earthquake Ma...
1,Forest fire near La Ronge Sask. Canada,1.0,Forest fire near La Ronge Sask. Canada,Forest fire near La Ronge Sask Canada,Forest fire near La Ronge Sask Canada,Forest fire near La Ronge Sask Canada,Forest fire near La Ronge Sask Canada,Forest fire near La Ronge Sask Canada
2,All residents asked to 'shelter in place' are ...,1.0,All residents asked to 'shelter in place' are ...,All residents asked to shelter in place are be...,All residents asked to shelter in place are be...,All residents asked to shelter in place are be...,All residents asked to shelter in place are be...,All residents asked to shelter in place are be...
3,"13,000 people receive #wildfires evacuation or...",1.0,"13,000 people receive #wildfires evacuation or...",13000 people receive wildfires evacuation orde...,13000 people receive wildfires evacuation orde...,13000 people receive wildfires evacuation orde...,13000 people receive wildfires evacuation orde...,13000 people receive wildfires evacuation orde...
4,Just got sent this photo from Ruby #Alaska as ...,1.0,Just got sent this photo from Ruby #Alaska as ...,Just got sent this photo from Ruby Alaska as s...,Just got sent this photo from Ruby Alaska as s...,Just got sent this photo from Ruby Alaska as s...,Just got sent this photo from Ruby Alaska as s...,Just got sent this photo from Ruby Alaska as s...
5,#RockyFire Update => California Hwy. 20 closed...,1.0,#RockyFire Update => California Hwy. 20 closed...,RockyFire Update California Hwy 20 closed in ...,RockyFire Update California Hwy 20 closed in ...,RockyFire Update California Hwy 20 closed in ...,RockyFire Update California Hwy 20 closed in ...,RockyFire Update California Hwy 20 closed in ...
6,#flood #disaster Heavy rain causes flash flood...,1.0,#flood #disaster Heavy rain causes flash flood...,flood disaster Heavy rain causes flash floodin...,flood disaster Heavy rain causes flash floodin...,flood disaster Heavy rain causes flash floodin...,flood disaster Heavy rain causes flash floodin...,flood disaster Heavy rain causes flash floodin...
7,I'm on top of the hill and I can see a fire in...,1.0,I'm on top of the hill and I can see a fire in...,Im on top of the hill and I can see a fire in ...,Im on top of the hill and I can see a fire in ...,Im on top of the hill and I can see a fire in ...,Im on top of the hill and I can see a fire in ...,Im on top of the hill and I can see a fire in ...
8,There's an emergency evacuation happening now ...,1.0,There's an emergency evacuation happening now ...,Theres an emergency evacuation happening now i...,Theres an emergency evacuation happening now i...,Theres an emergency evacuation happening now i...,Theres an emergency evacuation happening now i...,Theres an emergency evacuation happening now i...
9,I'm afraid that the tornado is coming to our a...,1.0,I'm afraid that the tornado is coming to our a...,Im afraid that the tornado is coming to our area,Im afraid that the tornado is coming to our area,Im afraid that the tornado is coming to our area,Im afraid that the tornado is coming to our area,Im afraid that the tornado is coming to our area


In [12]:
chat_words_mapping = {
    "lol": "laughing out loud",
    "brb": "be right back",
    "btw": "by the way",
    "afk": "away from keyboard",
    "rofl": "rolling on the floor laughing",
    "ttyl": "talk to you later",
    "np": "no problem",
    "thx": "thanks",
    "omg": "oh my god",
    "idk": "I don't know",
    "np": "no problem",
    "gg": "good game",
    "g2g": "got to go",
    "b4": "before",
    "cu": "see you",
    "yw": "you're welcome",
    "wtf": "what the f*ck",
    "imho": "in my humble opinion",
    "jk": "just kidding",
    "gf": "girlfriend",
    "bf": "boyfriend",
    "u": "you",
    "r": "are",
    "2": "to",
    "4": "for",
    "b": "be",
    "c": "see",
    "y": "why",
    "tho": "though",
    "smh": "shaking my head",
    "lolz": "laughing out loud",
    "h8": "hate",
    "luv": "love",
    "pls": "please",
    "sry": "sorry",
    "tbh": "to be honest",
    "omw": "on my way",
    "omw2syg": "on my way to see your girlfriend",
    "atb":  "all the best",
    "aka": "also known as",
    "adih": "another day in hell",
    "aymm": "are you my mother?",
    "ruok": "are you ok?",
    "aamof": "as a matter of fact",
    "afaict": "as far as i can tell",
    "afaik": "as far as i know",
    "afair": "as far as i remember",
    "afaic": "as far as i’m concerned",
    "asap": "as soon as possible",
    "ama": "ask me anything",
    "atm": "at the moment",
    "ayor": "at your own risk",
    "afk": "away from keyboard",
    "b@u": "back at you",
    "bbias": "be back in a sec",
    "brb": "be right back",
    "bc": "because",
    "b4": "before",
    "bae": "before anyone else",
    "bff": "best friends forever",
    "bsaaw": "big smile and a wink",
    "bf": "boyfriend",
    "bump": "bring up my post",
    "bro": "brother",
    "bwl": "bursting with laughter",
    "btw": "by the way",
    "bbbg": "bye bye be good",
    "csl": "can't stop laughing",
    "cip": "commercially important person",
    "cwot": "complete waste of time",
    "gratz": "congratulations",
    "qq": "crying",
    "d8": "date",
    "dm": "direct message",
    "diy": "do it yourself",
    "dbmib": "don't bother me i'm busy",
    "dwh": "during work hours",
    "emb": "early morning business meeting",
    "e123": "easy as one, two, three",
    "f2f": "face to face",
    "fomo": "fear of missing out",
    "4ao": "for adults only",
    "fawc": "for anyone who cares",
    "ftl": "for the loss",
    "fyi": "for your information",
    "4ever": "forever",
    "fimh": "forever in my heart",
    "fka": "formerly known as",
    "faq": "frequently asked questions",
    "gahoy": "get a hold of yourself",
    "goi": "get over it",
    "gf": "girlfriend",
    "gfn": "gone for now",
    "gg": "good game",
    "gl": "good luck",
    "gr8": "great",
    "gmta": "great minds think alike",
    "goat": "greatest of all time",
    "hb2u": "happy birthday to you",
    "hf": "have fun",
    "xoxo": "hugs and kisses",
    "idc": "i don't care",
    "idk": "i don't know",
    "ifyp": "i feel your pain",
    "ik": "i know",
    "ily/ilu": "i love you",
    "ilysm/lysm": "i love you so much",
    "imu": "i miss you",
    "iirc": "if i remember correctly",
    "icymi": "in case you missed it",
    "imo": "in my opinion",
    "irl": "in real life",
    "j4f": "just for fun",
    "jic": "just in case",
    "jk": "just kidding",
    "jsyk": "just so you know",
    "l8": "late",
    "l8r": "later",
    "lol": "laughing out loud",
    "lmk": "let me know",
    "mfw": "my face when",
    "nvm": "nevermind",
    "nmy": "nice meeting you",
    "np": "no problem",
    "nagi": "not a good idea",
    "n/a": "not available",
    "nbd": "not big deal",
    "nfs": "not for sale",
    "nm": "not much",
    "nsfl": "not safe for life",
    "nsfw": "not safe for work",
    "omg": "oh my god",
    "omw": "on my way",
    "oc": "original content",
    "omdb": "over my dead body",
    "oh": "overheard",
    "ppl": "people",
    "potd": "photo of the day",
    "pls": "please",
    "ptb": "please text back",
    "pov": "point of view",
    "ps": "post script",
    "rbtl": "read between the lines",
    "rsvp": "respondez s’il vous plaît (french)",
    "rofl": "rolling on the floor laughing",
    "sfw": "safe for work",
    "ssdd": "same stuff, different day",
    "c u": "see you",
    "cyt": "see you tomorrow",
    "srsly": "seriously",
    "smh": "shaking my head",
    "sis": "sister",
    "zzz": "sleep",
    "soml": "story of my life",
    "ttyl": "talk to you later",
    "time": "tears in my eyes",
    "tgif": "thank god, it’s friday",
    "thx": "thanks",
    "tia": "thanks in advance",
    "tbt": "throwback thursday",
    "tbc": "to be continued",
    "tbh": "to be honest",
    "til": "today i learned",
    "2nite": "tonight",
    "tl;dr": "too long; didn’t read",
    "tmi": "too much information",
    "tntl": "trying not to laugh",
    "vip": "very important person",
    "w8": "wait",
    "wyd": "what are you doing?",
    "sup?": "what’s up?",
    "wywh": "wish you were here",
    "wfm": "works for me",
    "u": "you",
    "ygtr": "you got that right",
    "ynk": "you never know",
    "hbd": "happy birthday",
    "smh": "shaking my head",
    "idk": "I don't know",
    "imho": "in my humble opinion",
    "tbh": "to be honest",
    "omg": "oh my god",
    "yolo": "you only live once",
    "fml": "fuck my life",
    "tl;dr": "too long; didn't read",
    "fyi": "for your information",
    "ttyl": "talk to you later",
    "bff": "best friends forever",
    "bday": "birthday",
    "gr8": "great",
    "omw": "on my way",
    "lmk": "let me know",
    "g2g": "got to go",
    "asap": "as soon as possible",
    "ttys": "talk to you soon",
    "gfy": "good for you",
    "tl;dr": "too long; didn't read",
    "bbl": "be back later",
    "fyi": "for your information",
    "plz": "please",
    "np": "no problem",
    "hmu": "hit me up",
    "imo": "in my opinion",
    "imho": "in my humble opinion",
    "icymi": "in case you missed it",
}
def expand_chat_words(text):
    """
    Verilen metindeki kısaltmaları, sözlükteki karşılıklarıyla değiştirir.

    Args:
        text (str): İşlenecek metin.
        chat_words_mapping (dict): Kısaltmalar ve anlamları içeren sözlük.

    Returns:
        str: Kısaltmaların açılmış hali.
    """

    words = text.split()
    expanded_words = [chat_words_mapping.get(word.lower(), word) for word in words]
    return ' '.join(expanded_words)
df['text_expanded'] = df['text_without_numbers'].apply(lambda x: expand_chat_words(x))
df.head(50)

Unnamed: 0,text,target,text_without_url,text_without_spec_char,text_without_emojis,text_without_html,text_without_punctuations,text_without_numbers,text_expanded
0,Our Deeds are the Reason of this #earthquake M...,1.0,Our Deeds are the Reason of this #earthquake M...,Our Deeds are the Reason of this earthquake Ma...,Our Deeds are the Reason of this earthquake Ma...,Our Deeds are the Reason of this earthquake Ma...,Our Deeds are the Reason of this earthquake Ma...,Our Deeds are the Reason of this earthquake Ma...,Our Deeds are the Reason of this earthquake Ma...
1,Forest fire near La Ronge Sask. Canada,1.0,Forest fire near La Ronge Sask. Canada,Forest fire near La Ronge Sask Canada,Forest fire near La Ronge Sask Canada,Forest fire near La Ronge Sask Canada,Forest fire near La Ronge Sask Canada,Forest fire near La Ronge Sask Canada,Forest fire near La Ronge Sask Canada
2,All residents asked to 'shelter in place' are ...,1.0,All residents asked to 'shelter in place' are ...,All residents asked to shelter in place are be...,All residents asked to shelter in place are be...,All residents asked to shelter in place are be...,All residents asked to shelter in place are be...,All residents asked to shelter in place are be...,All residents asked to shelter in place are be...
3,"13,000 people receive #wildfires evacuation or...",1.0,"13,000 people receive #wildfires evacuation or...",13000 people receive wildfires evacuation orde...,13000 people receive wildfires evacuation orde...,13000 people receive wildfires evacuation orde...,13000 people receive wildfires evacuation orde...,13000 people receive wildfires evacuation orde...,13000 people receive wildfires evacuation orde...
4,Just got sent this photo from Ruby #Alaska as ...,1.0,Just got sent this photo from Ruby #Alaska as ...,Just got sent this photo from Ruby Alaska as s...,Just got sent this photo from Ruby Alaska as s...,Just got sent this photo from Ruby Alaska as s...,Just got sent this photo from Ruby Alaska as s...,Just got sent this photo from Ruby Alaska as s...,Just got sent this photo from Ruby Alaska as s...
5,#RockyFire Update => California Hwy. 20 closed...,1.0,#RockyFire Update => California Hwy. 20 closed...,RockyFire Update California Hwy 20 closed in ...,RockyFire Update California Hwy 20 closed in ...,RockyFire Update California Hwy 20 closed in ...,RockyFire Update California Hwy 20 closed in ...,RockyFire Update California Hwy 20 closed in ...,RockyFire Update California Hwy 20 closed in b...
6,#flood #disaster Heavy rain causes flash flood...,1.0,#flood #disaster Heavy rain causes flash flood...,flood disaster Heavy rain causes flash floodin...,flood disaster Heavy rain causes flash floodin...,flood disaster Heavy rain causes flash floodin...,flood disaster Heavy rain causes flash floodin...,flood disaster Heavy rain causes flash floodin...,flood disaster Heavy rain causes flash floodin...
7,I'm on top of the hill and I can see a fire in...,1.0,I'm on top of the hill and I can see a fire in...,Im on top of the hill and I can see a fire in ...,Im on top of the hill and I can see a fire in ...,Im on top of the hill and I can see a fire in ...,Im on top of the hill and I can see a fire in ...,Im on top of the hill and I can see a fire in ...,Im on top of the hill and I can see a fire in ...
8,There's an emergency evacuation happening now ...,1.0,There's an emergency evacuation happening now ...,Theres an emergency evacuation happening now i...,Theres an emergency evacuation happening now i...,Theres an emergency evacuation happening now i...,Theres an emergency evacuation happening now i...,Theres an emergency evacuation happening now i...,Theres an emergency evacuation happening now i...
9,I'm afraid that the tornado is coming to our a...,1.0,I'm afraid that the tornado is coming to our a...,Im afraid that the tornado is coming to our area,Im afraid that the tornado is coming to our area,Im afraid that the tornado is coming to our area,Im afraid that the tornado is coming to our area,Im afraid that the tornado is coming to our area,Im afraid that the tornado is coming to our area


In [13]:
# Tokenizing the tweet base texts.
df['tokenized'] = df['text_expanded'].apply(word_tokenize)
df.head(50)

Unnamed: 0,text,target,text_without_url,text_without_spec_char,text_without_emojis,text_without_html,text_without_punctuations,text_without_numbers,text_expanded,tokenized
0,Our Deeds are the Reason of this #earthquake M...,1.0,Our Deeds are the Reason of this #earthquake M...,Our Deeds are the Reason of this earthquake Ma...,Our Deeds are the Reason of this earthquake Ma...,Our Deeds are the Reason of this earthquake Ma...,Our Deeds are the Reason of this earthquake Ma...,Our Deeds are the Reason of this earthquake Ma...,Our Deeds are the Reason of this earthquake Ma...,"[Our, Deeds, are, the, Reason, of, this, earth..."
1,Forest fire near La Ronge Sask. Canada,1.0,Forest fire near La Ronge Sask. Canada,Forest fire near La Ronge Sask Canada,Forest fire near La Ronge Sask Canada,Forest fire near La Ronge Sask Canada,Forest fire near La Ronge Sask Canada,Forest fire near La Ronge Sask Canada,Forest fire near La Ronge Sask Canada,"[Forest, fire, near, La, Ronge, Sask, Canada]"
2,All residents asked to 'shelter in place' are ...,1.0,All residents asked to 'shelter in place' are ...,All residents asked to shelter in place are be...,All residents asked to shelter in place are be...,All residents asked to shelter in place are be...,All residents asked to shelter in place are be...,All residents asked to shelter in place are be...,All residents asked to shelter in place are be...,"[All, residents, asked, to, shelter, in, place..."
3,"13,000 people receive #wildfires evacuation or...",1.0,"13,000 people receive #wildfires evacuation or...",13000 people receive wildfires evacuation orde...,13000 people receive wildfires evacuation orde...,13000 people receive wildfires evacuation orde...,13000 people receive wildfires evacuation orde...,13000 people receive wildfires evacuation orde...,13000 people receive wildfires evacuation orde...,"[13000, people, receive, wildfires, evacuation..."
4,Just got sent this photo from Ruby #Alaska as ...,1.0,Just got sent this photo from Ruby #Alaska as ...,Just got sent this photo from Ruby Alaska as s...,Just got sent this photo from Ruby Alaska as s...,Just got sent this photo from Ruby Alaska as s...,Just got sent this photo from Ruby Alaska as s...,Just got sent this photo from Ruby Alaska as s...,Just got sent this photo from Ruby Alaska as s...,"[Just, got, sent, this, photo, from, Ruby, Ala..."
5,#RockyFire Update => California Hwy. 20 closed...,1.0,#RockyFire Update => California Hwy. 20 closed...,RockyFire Update California Hwy 20 closed in ...,RockyFire Update California Hwy 20 closed in ...,RockyFire Update California Hwy 20 closed in ...,RockyFire Update California Hwy 20 closed in ...,RockyFire Update California Hwy 20 closed in ...,RockyFire Update California Hwy 20 closed in b...,"[RockyFire, Update, California, Hwy, 20, close..."
6,#flood #disaster Heavy rain causes flash flood...,1.0,#flood #disaster Heavy rain causes flash flood...,flood disaster Heavy rain causes flash floodin...,flood disaster Heavy rain causes flash floodin...,flood disaster Heavy rain causes flash floodin...,flood disaster Heavy rain causes flash floodin...,flood disaster Heavy rain causes flash floodin...,flood disaster Heavy rain causes flash floodin...,"[flood, disaster, Heavy, rain, causes, flash, ..."
7,I'm on top of the hill and I can see a fire in...,1.0,I'm on top of the hill and I can see a fire in...,Im on top of the hill and I can see a fire in ...,Im on top of the hill and I can see a fire in ...,Im on top of the hill and I can see a fire in ...,Im on top of the hill and I can see a fire in ...,Im on top of the hill and I can see a fire in ...,Im on top of the hill and I can see a fire in ...,"[Im, on, top, of, the, hill, and, I, can, see,..."
8,There's an emergency evacuation happening now ...,1.0,There's an emergency evacuation happening now ...,Theres an emergency evacuation happening now i...,Theres an emergency evacuation happening now i...,Theres an emergency evacuation happening now i...,Theres an emergency evacuation happening now i...,Theres an emergency evacuation happening now i...,Theres an emergency evacuation happening now i...,"[Theres, an, emergency, evacuation, happening,..."
9,I'm afraid that the tornado is coming to our a...,1.0,I'm afraid that the tornado is coming to our a...,Im afraid that the tornado is coming to our area,Im afraid that the tornado is coming to our area,Im afraid that the tornado is coming to our area,Im afraid that the tornado is coming to our area,Im afraid that the tornado is coming to our area,Im afraid that the tornado is coming to our area,"[Im, afraid, that, the, tornado, is, coming, t..."


In [14]:
# Lower casing clean text.
df['lower'] = df['tokenized'].apply(lambda x: [word.lower() for word in x])

df.head()

Unnamed: 0,text,target,text_without_url,text_without_spec_char,text_without_emojis,text_without_html,text_without_punctuations,text_without_numbers,text_expanded,tokenized,lower
0,Our Deeds are the Reason of this #earthquake M...,1.0,Our Deeds are the Reason of this #earthquake M...,Our Deeds are the Reason of this earthquake Ma...,Our Deeds are the Reason of this earthquake Ma...,Our Deeds are the Reason of this earthquake Ma...,Our Deeds are the Reason of this earthquake Ma...,Our Deeds are the Reason of this earthquake Ma...,Our Deeds are the Reason of this earthquake Ma...,"[Our, Deeds, are, the, Reason, of, this, earth...","[our, deeds, are, the, reason, of, this, earth..."
1,Forest fire near La Ronge Sask. Canada,1.0,Forest fire near La Ronge Sask. Canada,Forest fire near La Ronge Sask Canada,Forest fire near La Ronge Sask Canada,Forest fire near La Ronge Sask Canada,Forest fire near La Ronge Sask Canada,Forest fire near La Ronge Sask Canada,Forest fire near La Ronge Sask Canada,"[Forest, fire, near, La, Ronge, Sask, Canada]","[forest, fire, near, la, ronge, sask, canada]"
2,All residents asked to 'shelter in place' are ...,1.0,All residents asked to 'shelter in place' are ...,All residents asked to shelter in place are be...,All residents asked to shelter in place are be...,All residents asked to shelter in place are be...,All residents asked to shelter in place are be...,All residents asked to shelter in place are be...,All residents asked to shelter in place are be...,"[All, residents, asked, to, shelter, in, place...","[all, residents, asked, to, shelter, in, place..."
3,"13,000 people receive #wildfires evacuation or...",1.0,"13,000 people receive #wildfires evacuation or...",13000 people receive wildfires evacuation orde...,13000 people receive wildfires evacuation orde...,13000 people receive wildfires evacuation orde...,13000 people receive wildfires evacuation orde...,13000 people receive wildfires evacuation orde...,13000 people receive wildfires evacuation orde...,"[13000, people, receive, wildfires, evacuation...","[13000, people, receive, wildfires, evacuation..."
4,Just got sent this photo from Ruby #Alaska as ...,1.0,Just got sent this photo from Ruby #Alaska as ...,Just got sent this photo from Ruby Alaska as s...,Just got sent this photo from Ruby Alaska as s...,Just got sent this photo from Ruby Alaska as s...,Just got sent this photo from Ruby Alaska as s...,Just got sent this photo from Ruby Alaska as s...,Just got sent this photo from Ruby Alaska as s...,"[Just, got, sent, this, photo, from, Ruby, Ala...","[just, got, sent, this, photo, from, ruby, ala..."


In [15]:
# Removing stopwords.
stop = set(stopwords.words('english'))
df['stopwords_removed'] = df['lower'].apply(lambda x: [word for word in x if word not in stop])

df.head(50)

Unnamed: 0,text,target,text_without_url,text_without_spec_char,text_without_emojis,text_without_html,text_without_punctuations,text_without_numbers,text_expanded,tokenized,lower,stopwords_removed
0,Our Deeds are the Reason of this #earthquake M...,1.0,Our Deeds are the Reason of this #earthquake M...,Our Deeds are the Reason of this earthquake Ma...,Our Deeds are the Reason of this earthquake Ma...,Our Deeds are the Reason of this earthquake Ma...,Our Deeds are the Reason of this earthquake Ma...,Our Deeds are the Reason of this earthquake Ma...,Our Deeds are the Reason of this earthquake Ma...,"[Our, Deeds, are, the, Reason, of, this, earth...","[our, deeds, are, the, reason, of, this, earth...","[deeds, reason, earthquake, may, allah, forgiv..."
1,Forest fire near La Ronge Sask. Canada,1.0,Forest fire near La Ronge Sask. Canada,Forest fire near La Ronge Sask Canada,Forest fire near La Ronge Sask Canada,Forest fire near La Ronge Sask Canada,Forest fire near La Ronge Sask Canada,Forest fire near La Ronge Sask Canada,Forest fire near La Ronge Sask Canada,"[Forest, fire, near, La, Ronge, Sask, Canada]","[forest, fire, near, la, ronge, sask, canada]","[forest, fire, near, la, ronge, sask, canada]"
2,All residents asked to 'shelter in place' are ...,1.0,All residents asked to 'shelter in place' are ...,All residents asked to shelter in place are be...,All residents asked to shelter in place are be...,All residents asked to shelter in place are be...,All residents asked to shelter in place are be...,All residents asked to shelter in place are be...,All residents asked to shelter in place are be...,"[All, residents, asked, to, shelter, in, place...","[all, residents, asked, to, shelter, in, place...","[residents, asked, shelter, place, notified, o..."
3,"13,000 people receive #wildfires evacuation or...",1.0,"13,000 people receive #wildfires evacuation or...",13000 people receive wildfires evacuation orde...,13000 people receive wildfires evacuation orde...,13000 people receive wildfires evacuation orde...,13000 people receive wildfires evacuation orde...,13000 people receive wildfires evacuation orde...,13000 people receive wildfires evacuation orde...,"[13000, people, receive, wildfires, evacuation...","[13000, people, receive, wildfires, evacuation...","[13000, people, receive, wildfires, evacuation..."
4,Just got sent this photo from Ruby #Alaska as ...,1.0,Just got sent this photo from Ruby #Alaska as ...,Just got sent this photo from Ruby Alaska as s...,Just got sent this photo from Ruby Alaska as s...,Just got sent this photo from Ruby Alaska as s...,Just got sent this photo from Ruby Alaska as s...,Just got sent this photo from Ruby Alaska as s...,Just got sent this photo from Ruby Alaska as s...,"[Just, got, sent, this, photo, from, Ruby, Ala...","[just, got, sent, this, photo, from, ruby, ala...","[got, sent, photo, ruby, alaska, smoke, wildfi..."
5,#RockyFire Update => California Hwy. 20 closed...,1.0,#RockyFire Update => California Hwy. 20 closed...,RockyFire Update California Hwy 20 closed in ...,RockyFire Update California Hwy 20 closed in ...,RockyFire Update California Hwy 20 closed in ...,RockyFire Update California Hwy 20 closed in ...,RockyFire Update California Hwy 20 closed in ...,RockyFire Update California Hwy 20 closed in b...,"[RockyFire, Update, California, Hwy, 20, close...","[rockyfire, update, california, hwy, 20, close...","[rockyfire, update, california, hwy, 20, close..."
6,#flood #disaster Heavy rain causes flash flood...,1.0,#flood #disaster Heavy rain causes flash flood...,flood disaster Heavy rain causes flash floodin...,flood disaster Heavy rain causes flash floodin...,flood disaster Heavy rain causes flash floodin...,flood disaster Heavy rain causes flash floodin...,flood disaster Heavy rain causes flash floodin...,flood disaster Heavy rain causes flash floodin...,"[flood, disaster, Heavy, rain, causes, flash, ...","[flood, disaster, heavy, rain, causes, flash, ...","[flood, disaster, heavy, rain, causes, flash, ..."
7,I'm on top of the hill and I can see a fire in...,1.0,I'm on top of the hill and I can see a fire in...,Im on top of the hill and I can see a fire in ...,Im on top of the hill and I can see a fire in ...,Im on top of the hill and I can see a fire in ...,Im on top of the hill and I can see a fire in ...,Im on top of the hill and I can see a fire in ...,Im on top of the hill and I can see a fire in ...,"[Im, on, top, of, the, hill, and, I, can, see,...","[im, on, top, of, the, hill, and, i, can, see,...","[im, top, hill, see, fire, woods]"
8,There's an emergency evacuation happening now ...,1.0,There's an emergency evacuation happening now ...,Theres an emergency evacuation happening now i...,Theres an emergency evacuation happening now i...,Theres an emergency evacuation happening now i...,Theres an emergency evacuation happening now i...,Theres an emergency evacuation happening now i...,Theres an emergency evacuation happening now i...,"[Theres, an, emergency, evacuation, happening,...","[theres, an, emergency, evacuation, happening,...","[theres, emergency, evacuation, happening, bui..."
9,I'm afraid that the tornado is coming to our a...,1.0,I'm afraid that the tornado is coming to our a...,Im afraid that the tornado is coming to our area,Im afraid that the tornado is coming to our area,Im afraid that the tornado is coming to our area,Im afraid that the tornado is coming to our area,Im afraid that the tornado is coming to our area,Im afraid that the tornado is coming to our area,"[Im, afraid, that, the, tornado, is, coming, t...","[im, afraid, that, the, tornado, is, coming, t...","[im, afraid, tornado, coming, area]"


In [16]:
# Applying part of speech tags.
df['pos_tags'] = df['stopwords_removed'].apply(nltk.tag.pos_tag)
df.head(50)

Unnamed: 0,text,target,text_without_url,text_without_spec_char,text_without_emojis,text_without_html,text_without_punctuations,text_without_numbers,text_expanded,tokenized,lower,stopwords_removed,pos_tags
0,Our Deeds are the Reason of this #earthquake M...,1.0,Our Deeds are the Reason of this #earthquake M...,Our Deeds are the Reason of this earthquake Ma...,Our Deeds are the Reason of this earthquake Ma...,Our Deeds are the Reason of this earthquake Ma...,Our Deeds are the Reason of this earthquake Ma...,Our Deeds are the Reason of this earthquake Ma...,Our Deeds are the Reason of this earthquake Ma...,"[Our, Deeds, are, the, Reason, of, this, earth...","[our, deeds, are, the, reason, of, this, earth...","[deeds, reason, earthquake, may, allah, forgiv...","[(deeds, NNS), (reason, NN), (earthquake, NN),..."
1,Forest fire near La Ronge Sask. Canada,1.0,Forest fire near La Ronge Sask. Canada,Forest fire near La Ronge Sask Canada,Forest fire near La Ronge Sask Canada,Forest fire near La Ronge Sask Canada,Forest fire near La Ronge Sask Canada,Forest fire near La Ronge Sask Canada,Forest fire near La Ronge Sask Canada,"[Forest, fire, near, La, Ronge, Sask, Canada]","[forest, fire, near, la, ronge, sask, canada]","[forest, fire, near, la, ronge, sask, canada]","[(forest, JJS), (fire, NN), (near, IN), (la, J..."
2,All residents asked to 'shelter in place' are ...,1.0,All residents asked to 'shelter in place' are ...,All residents asked to shelter in place are be...,All residents asked to shelter in place are be...,All residents asked to shelter in place are be...,All residents asked to shelter in place are be...,All residents asked to shelter in place are be...,All residents asked to shelter in place are be...,"[All, residents, asked, to, shelter, in, place...","[all, residents, asked, to, shelter, in, place...","[residents, asked, shelter, place, notified, o...","[(residents, NNS), (asked, VBD), (shelter, JJ)..."
3,"13,000 people receive #wildfires evacuation or...",1.0,"13,000 people receive #wildfires evacuation or...",13000 people receive wildfires evacuation orde...,13000 people receive wildfires evacuation orde...,13000 people receive wildfires evacuation orde...,13000 people receive wildfires evacuation orde...,13000 people receive wildfires evacuation orde...,13000 people receive wildfires evacuation orde...,"[13000, people, receive, wildfires, evacuation...","[13000, people, receive, wildfires, evacuation...","[13000, people, receive, wildfires, evacuation...","[(13000, CD), (people, NNS), (receive, JJ), (w..."
4,Just got sent this photo from Ruby #Alaska as ...,1.0,Just got sent this photo from Ruby #Alaska as ...,Just got sent this photo from Ruby Alaska as s...,Just got sent this photo from Ruby Alaska as s...,Just got sent this photo from Ruby Alaska as s...,Just got sent this photo from Ruby Alaska as s...,Just got sent this photo from Ruby Alaska as s...,Just got sent this photo from Ruby Alaska as s...,"[Just, got, sent, this, photo, from, Ruby, Ala...","[just, got, sent, this, photo, from, ruby, ala...","[got, sent, photo, ruby, alaska, smoke, wildfi...","[(got, VBD), (sent, JJ), (photo, NN), (ruby, N..."
5,#RockyFire Update => California Hwy. 20 closed...,1.0,#RockyFire Update => California Hwy. 20 closed...,RockyFire Update California Hwy 20 closed in ...,RockyFire Update California Hwy 20 closed in ...,RockyFire Update California Hwy 20 closed in ...,RockyFire Update California Hwy 20 closed in ...,RockyFire Update California Hwy 20 closed in ...,RockyFire Update California Hwy 20 closed in b...,"[RockyFire, Update, California, Hwy, 20, close...","[rockyfire, update, california, hwy, 20, close...","[rockyfire, update, california, hwy, 20, close...","[(rockyfire, NN), (update, NN), (california, N..."
6,#flood #disaster Heavy rain causes flash flood...,1.0,#flood #disaster Heavy rain causes flash flood...,flood disaster Heavy rain causes flash floodin...,flood disaster Heavy rain causes flash floodin...,flood disaster Heavy rain causes flash floodin...,flood disaster Heavy rain causes flash floodin...,flood disaster Heavy rain causes flash floodin...,flood disaster Heavy rain causes flash floodin...,"[flood, disaster, Heavy, rain, causes, flash, ...","[flood, disaster, heavy, rain, causes, flash, ...","[flood, disaster, heavy, rain, causes, flash, ...","[(flood, NN), (disaster, NN), (heavy, JJ), (ra..."
7,I'm on top of the hill and I can see a fire in...,1.0,I'm on top of the hill and I can see a fire in...,Im on top of the hill and I can see a fire in ...,Im on top of the hill and I can see a fire in ...,Im on top of the hill and I can see a fire in ...,Im on top of the hill and I can see a fire in ...,Im on top of the hill and I can see a fire in ...,Im on top of the hill and I can see a fire in ...,"[Im, on, top, of, the, hill, and, I, can, see,...","[im, on, top, of, the, hill, and, i, can, see,...","[im, top, hill, see, fire, woods]","[(im, NN), (top, NN), (hill, NN), (see, VBP), ..."
8,There's an emergency evacuation happening now ...,1.0,There's an emergency evacuation happening now ...,Theres an emergency evacuation happening now i...,Theres an emergency evacuation happening now i...,Theres an emergency evacuation happening now i...,Theres an emergency evacuation happening now i...,Theres an emergency evacuation happening now i...,Theres an emergency evacuation happening now i...,"[Theres, an, emergency, evacuation, happening,...","[theres, an, emergency, evacuation, happening,...","[theres, emergency, evacuation, happening, bui...","[(theres, NNS), (emergency, NN), (evacuation, ..."
9,I'm afraid that the tornado is coming to our a...,1.0,I'm afraid that the tornado is coming to our a...,Im afraid that the tornado is coming to our area,Im afraid that the tornado is coming to our area,Im afraid that the tornado is coming to our area,Im afraid that the tornado is coming to our area,Im afraid that the tornado is coming to our area,Im afraid that the tornado is coming to our area,"[Im, afraid, that, the, tornado, is, coming, t...","[im, afraid, that, the, tornado, is, coming, t...","[im, afraid, tornado, coming, area]","[(im, NN), (afraid, JJ), (tornado, NN), (comin..."


In [17]:
# Converting part of speeches to wordnet format.

def get_wordnet_pos(tag):
    if tag.startswith('J'):
        return wordnet.ADJ
    elif tag.startswith('V'):
        return wordnet.VERB
    elif tag.startswith('N'):
        return wordnet.NOUN
    elif tag.startswith('R'):
        return wordnet.ADV
    else:
        return wordnet.NOUN


df['wordnet_pos'] = df['pos_tags'].apply(lambda x: [(word, get_wordnet_pos(pos_tag)) for (word, pos_tag) in x])

df.head(50)

Unnamed: 0,text,target,text_without_url,text_without_spec_char,text_without_emojis,text_without_html,text_without_punctuations,text_without_numbers,text_expanded,tokenized,lower,stopwords_removed,pos_tags,wordnet_pos
0,Our Deeds are the Reason of this #earthquake M...,1.0,Our Deeds are the Reason of this #earthquake M...,Our Deeds are the Reason of this earthquake Ma...,Our Deeds are the Reason of this earthquake Ma...,Our Deeds are the Reason of this earthquake Ma...,Our Deeds are the Reason of this earthquake Ma...,Our Deeds are the Reason of this earthquake Ma...,Our Deeds are the Reason of this earthquake Ma...,"[Our, Deeds, are, the, Reason, of, this, earth...","[our, deeds, are, the, reason, of, this, earth...","[deeds, reason, earthquake, may, allah, forgiv...","[(deeds, NNS), (reason, NN), (earthquake, NN),...","[(deeds, n), (reason, n), (earthquake, n), (ma..."
1,Forest fire near La Ronge Sask. Canada,1.0,Forest fire near La Ronge Sask. Canada,Forest fire near La Ronge Sask Canada,Forest fire near La Ronge Sask Canada,Forest fire near La Ronge Sask Canada,Forest fire near La Ronge Sask Canada,Forest fire near La Ronge Sask Canada,Forest fire near La Ronge Sask Canada,"[Forest, fire, near, La, Ronge, Sask, Canada]","[forest, fire, near, la, ronge, sask, canada]","[forest, fire, near, la, ronge, sask, canada]","[(forest, JJS), (fire, NN), (near, IN), (la, J...","[(forest, a), (fire, n), (near, n), (la, a), (..."
2,All residents asked to 'shelter in place' are ...,1.0,All residents asked to 'shelter in place' are ...,All residents asked to shelter in place are be...,All residents asked to shelter in place are be...,All residents asked to shelter in place are be...,All residents asked to shelter in place are be...,All residents asked to shelter in place are be...,All residents asked to shelter in place are be...,"[All, residents, asked, to, shelter, in, place...","[all, residents, asked, to, shelter, in, place...","[residents, asked, shelter, place, notified, o...","[(residents, NNS), (asked, VBD), (shelter, JJ)...","[(residents, n), (asked, v), (shelter, a), (pl..."
3,"13,000 people receive #wildfires evacuation or...",1.0,"13,000 people receive #wildfires evacuation or...",13000 people receive wildfires evacuation orde...,13000 people receive wildfires evacuation orde...,13000 people receive wildfires evacuation orde...,13000 people receive wildfires evacuation orde...,13000 people receive wildfires evacuation orde...,13000 people receive wildfires evacuation orde...,"[13000, people, receive, wildfires, evacuation...","[13000, people, receive, wildfires, evacuation...","[13000, people, receive, wildfires, evacuation...","[(13000, CD), (people, NNS), (receive, JJ), (w...","[(13000, n), (people, n), (receive, a), (wildf..."
4,Just got sent this photo from Ruby #Alaska as ...,1.0,Just got sent this photo from Ruby #Alaska as ...,Just got sent this photo from Ruby Alaska as s...,Just got sent this photo from Ruby Alaska as s...,Just got sent this photo from Ruby Alaska as s...,Just got sent this photo from Ruby Alaska as s...,Just got sent this photo from Ruby Alaska as s...,Just got sent this photo from Ruby Alaska as s...,"[Just, got, sent, this, photo, from, Ruby, Ala...","[just, got, sent, this, photo, from, ruby, ala...","[got, sent, photo, ruby, alaska, smoke, wildfi...","[(got, VBD), (sent, JJ), (photo, NN), (ruby, N...","[(got, v), (sent, a), (photo, n), (ruby, n), (..."
5,#RockyFire Update => California Hwy. 20 closed...,1.0,#RockyFire Update => California Hwy. 20 closed...,RockyFire Update California Hwy 20 closed in ...,RockyFire Update California Hwy 20 closed in ...,RockyFire Update California Hwy 20 closed in ...,RockyFire Update California Hwy 20 closed in ...,RockyFire Update California Hwy 20 closed in ...,RockyFire Update California Hwy 20 closed in b...,"[RockyFire, Update, California, Hwy, 20, close...","[rockyfire, update, california, hwy, 20, close...","[rockyfire, update, california, hwy, 20, close...","[(rockyfire, NN), (update, NN), (california, N...","[(rockyfire, n), (update, n), (california, n),..."
6,#flood #disaster Heavy rain causes flash flood...,1.0,#flood #disaster Heavy rain causes flash flood...,flood disaster Heavy rain causes flash floodin...,flood disaster Heavy rain causes flash floodin...,flood disaster Heavy rain causes flash floodin...,flood disaster Heavy rain causes flash floodin...,flood disaster Heavy rain causes flash floodin...,flood disaster Heavy rain causes flash floodin...,"[flood, disaster, Heavy, rain, causes, flash, ...","[flood, disaster, heavy, rain, causes, flash, ...","[flood, disaster, heavy, rain, causes, flash, ...","[(flood, NN), (disaster, NN), (heavy, JJ), (ra...","[(flood, n), (disaster, n), (heavy, a), (rain,..."
7,I'm on top of the hill and I can see a fire in...,1.0,I'm on top of the hill and I can see a fire in...,Im on top of the hill and I can see a fire in ...,Im on top of the hill and I can see a fire in ...,Im on top of the hill and I can see a fire in ...,Im on top of the hill and I can see a fire in ...,Im on top of the hill and I can see a fire in ...,Im on top of the hill and I can see a fire in ...,"[Im, on, top, of, the, hill, and, I, can, see,...","[im, on, top, of, the, hill, and, i, can, see,...","[im, top, hill, see, fire, woods]","[(im, NN), (top, NN), (hill, NN), (see, VBP), ...","[(im, n), (top, n), (hill, n), (see, v), (fire..."
8,There's an emergency evacuation happening now ...,1.0,There's an emergency evacuation happening now ...,Theres an emergency evacuation happening now i...,Theres an emergency evacuation happening now i...,Theres an emergency evacuation happening now i...,Theres an emergency evacuation happening now i...,Theres an emergency evacuation happening now i...,Theres an emergency evacuation happening now i...,"[Theres, an, emergency, evacuation, happening,...","[theres, an, emergency, evacuation, happening,...","[theres, emergency, evacuation, happening, bui...","[(theres, NNS), (emergency, NN), (evacuation, ...","[(theres, n), (emergency, n), (evacuation, n),..."
9,I'm afraid that the tornado is coming to our a...,1.0,I'm afraid that the tornado is coming to our a...,Im afraid that the tornado is coming to our area,Im afraid that the tornado is coming to our area,Im afraid that the tornado is coming to our area,Im afraid that the tornado is coming to our area,Im afraid that the tornado is coming to our area,Im afraid that the tornado is coming to our area,"[Im, afraid, that, the, tornado, is, coming, t...","[im, afraid, that, the, tornado, is, coming, t...","[im, afraid, tornado, coming, area]","[(im, NN), (afraid, JJ), (tornado, NN), (comin...","[(im, n), (afraid, a), (tornado, n), (coming, ..."


In [18]:
# Applying word lemmatizer.

wnl = WordNetLemmatizer()

df['lemmatized'] = df['wordnet_pos'].apply(lambda x: [wnl.lemmatize(word, tag) for word, tag in x])

df['lemmatized'] = df['lemmatized'].apply(lambda x: [word for word in x if word not in stop])

df['lemma_str'] = [' '.join(map(str, l)) for l in df['lemmatized']]
df['text_last'] = df['lemma_str']
df.head(50)

Unnamed: 0,text,target,text_without_url,text_without_spec_char,text_without_emojis,text_without_html,text_without_punctuations,text_without_numbers,text_expanded,tokenized,lower,stopwords_removed,pos_tags,wordnet_pos,lemmatized,lemma_str,text_last
0,Our Deeds are the Reason of this #earthquake M...,1.0,Our Deeds are the Reason of this #earthquake M...,Our Deeds are the Reason of this earthquake Ma...,Our Deeds are the Reason of this earthquake Ma...,Our Deeds are the Reason of this earthquake Ma...,Our Deeds are the Reason of this earthquake Ma...,Our Deeds are the Reason of this earthquake Ma...,Our Deeds are the Reason of this earthquake Ma...,"[Our, Deeds, are, the, Reason, of, this, earth...","[our, deeds, are, the, reason, of, this, earth...","[deeds, reason, earthquake, may, allah, forgiv...","[(deeds, NNS), (reason, NN), (earthquake, NN),...","[(deeds, n), (reason, n), (earthquake, n), (ma...","[deed, reason, earthquake, may, allah, forgive...",deed reason earthquake may allah forgive u,deed reason earthquake may allah forgive u
1,Forest fire near La Ronge Sask. Canada,1.0,Forest fire near La Ronge Sask. Canada,Forest fire near La Ronge Sask Canada,Forest fire near La Ronge Sask Canada,Forest fire near La Ronge Sask Canada,Forest fire near La Ronge Sask Canada,Forest fire near La Ronge Sask Canada,Forest fire near La Ronge Sask Canada,"[Forest, fire, near, La, Ronge, Sask, Canada]","[forest, fire, near, la, ronge, sask, canada]","[forest, fire, near, la, ronge, sask, canada]","[(forest, JJS), (fire, NN), (near, IN), (la, J...","[(forest, a), (fire, n), (near, n), (la, a), (...","[forest, fire, near, la, ronge, sask, canada]",forest fire near la ronge sask canada,forest fire near la ronge sask canada
2,All residents asked to 'shelter in place' are ...,1.0,All residents asked to 'shelter in place' are ...,All residents asked to shelter in place are be...,All residents asked to shelter in place are be...,All residents asked to shelter in place are be...,All residents asked to shelter in place are be...,All residents asked to shelter in place are be...,All residents asked to shelter in place are be...,"[All, residents, asked, to, shelter, in, place...","[all, residents, asked, to, shelter, in, place...","[residents, asked, shelter, place, notified, o...","[(residents, NNS), (asked, VBD), (shelter, JJ)...","[(residents, n), (asked, v), (shelter, a), (pl...","[resident, ask, shelter, place, notify, office...",resident ask shelter place notify officer evac...,resident ask shelter place notify officer evac...
3,"13,000 people receive #wildfires evacuation or...",1.0,"13,000 people receive #wildfires evacuation or...",13000 people receive wildfires evacuation orde...,13000 people receive wildfires evacuation orde...,13000 people receive wildfires evacuation orde...,13000 people receive wildfires evacuation orde...,13000 people receive wildfires evacuation orde...,13000 people receive wildfires evacuation orde...,"[13000, people, receive, wildfires, evacuation...","[13000, people, receive, wildfires, evacuation...","[13000, people, receive, wildfires, evacuation...","[(13000, CD), (people, NNS), (receive, JJ), (w...","[(13000, n), (people, n), (receive, a), (wildf...","[13000, people, receive, wildfire, evacuation,...",13000 people receive wildfire evacuation order...,13000 people receive wildfire evacuation order...
4,Just got sent this photo from Ruby #Alaska as ...,1.0,Just got sent this photo from Ruby #Alaska as ...,Just got sent this photo from Ruby Alaska as s...,Just got sent this photo from Ruby Alaska as s...,Just got sent this photo from Ruby Alaska as s...,Just got sent this photo from Ruby Alaska as s...,Just got sent this photo from Ruby Alaska as s...,Just got sent this photo from Ruby Alaska as s...,"[Just, got, sent, this, photo, from, Ruby, Ala...","[just, got, sent, this, photo, from, ruby, ala...","[got, sent, photo, ruby, alaska, smoke, wildfi...","[(got, VBD), (sent, JJ), (photo, NN), (ruby, N...","[(got, v), (sent, a), (photo, n), (ruby, n), (...","[get, sent, photo, ruby, alaska, smoke, wildfi...",get sent photo ruby alaska smoke wildfires pou...,get sent photo ruby alaska smoke wildfires pou...
5,#RockyFire Update => California Hwy. 20 closed...,1.0,#RockyFire Update => California Hwy. 20 closed...,RockyFire Update California Hwy 20 closed in ...,RockyFire Update California Hwy 20 closed in ...,RockyFire Update California Hwy 20 closed in ...,RockyFire Update California Hwy 20 closed in ...,RockyFire Update California Hwy 20 closed in ...,RockyFire Update California Hwy 20 closed in b...,"[RockyFire, Update, California, Hwy, 20, close...","[rockyfire, update, california, hwy, 20, close...","[rockyfire, update, california, hwy, 20, close...","[(rockyfire, NN), (update, NN), (california, N...","[(rockyfire, n), (update, n), (california, n),...","[rockyfire, update, california, hwy, 20, close...",rockyfire update california hwy 20 closed dire...,rockyfire update california hwy 20 closed dire...
6,#flood #disaster Heavy rain causes flash flood...,1.0,#flood #disaster Heavy rain causes flash flood...,flood disaster Heavy rain causes flash floodin...,flood disaster Heavy rain causes flash floodin...,flood disaster Heavy rain causes flash floodin...,flood disaster Heavy rain causes flash floodin...,flood disaster Heavy rain causes flash floodin...,flood disaster Heavy rain causes flash floodin...,"[flood, disaster, Heavy, rain, causes, flash, ...","[flood, disaster, heavy, rain, causes, flash, ...","[flood, disaster, heavy, rain, causes, flash, ...","[(flood, NN), (disaster, NN), (heavy, JJ), (ra...","[(flood, n), (disaster, n), (heavy, a), (rain,...","[flood, disaster, heavy, rain, cause, flash, f...",flood disaster heavy rain cause flash flood st...,flood disaster heavy rain cause flash flood st...
7,I'm on top of the hill and I can see a fire in...,1.0,I'm on top of the hill and I can see a fire in...,Im on top of the hill and I can see a fire in ...,Im on top of the hill and I can see a fire in ...,Im on top of the hill and I can see a fire in ...,Im on top of the hill and I can see a fire in ...,Im on top of the hill and I can see a fire in ...,Im on top of the hill and I can see a fire in ...,"[Im, on, top, of, the, hill, and, I, can, see,...","[im, on, top, of, the, hill, and, i, can, see,...","[im, top, hill, see, fire, woods]","[(im, NN), (top, NN), (hill, NN), (see, VBP), ...","[(im, n), (top, n), (hill, n), (see, v), (fire...","[im, top, hill, see, fire, wood]",im top hill see fire wood,im top hill see fire wood
8,There's an emergency evacuation happening now ...,1.0,There's an emergency evacuation happening now ...,Theres an emergency evacuation happening now i...,Theres an emergency evacuation happening now i...,Theres an emergency evacuation happening now i...,Theres an emergency evacuation happening now i...,Theres an emergency evacuation happening now i...,Theres an emergency evacuation happening now i...,"[Theres, an, emergency, evacuation, happening,...","[theres, an, emergency, evacuation, happening,...","[theres, emergency, evacuation, happening, bui...","[(theres, NNS), (emergency, NN), (evacuation, ...","[(theres, n), (emergency, n), (evacuation, n),...","[emergency, evacuation, happen, building, acro...",emergency evacuation happen building across st...,emergency evacuation happen building across st...
9,I'm afraid that the tornado is coming to our a...,1.0,I'm afraid that the tornado is coming to our a...,Im afraid that the tornado is coming to our area,Im afraid that the tornado is coming to our area,Im afraid that the tornado is coming to our area,Im afraid that the tornado is coming to our area,Im afraid that the tornado is coming to our area,Im afraid that the tornado is coming to our area,"[Im, afraid, that, the, tornado, is, coming, t...","[im, afraid, that, the, tornado, is, coming, t...","[im, afraid, tornado, coming, area]","[(im, NN), (afraid, JJ), (tornado, NN), (comin...","[(im, n), (afraid, a), (tornado, n), (coming, ...","[im, afraid, tornado, come, area]",im afraid tornado come area,im afraid tornado come area


In [19]:
train_df = df[df['target'].notnull()]
test_df = df[df['target'].isnull()]

In [20]:
# Vectorizers

# Count Vectorizer
count_vectorizer = CountVectorizer()
X_train_count = count_vectorizer.fit_transform(train_df['text_last'])
X_test_count = count_vectorizer.transform(test_df['text_last'])



# Tfidf Vectorizer 
tfidf_vectorizer = TfidfVectorizer()
X_train_tfidf = tfidf_vectorizer.fit_transform(train_df['text_last'])
X_test_tfidf = tfidf_vectorizer.transform(test_df['text_last'])


# TfidfVectorizer ile vektörleştirme (n-gram ile)
tfidf_vectorizer_ngram = TfidfVectorizer(ngram_range=(1, 2))  # unigram ve bigram kullanımı
X_train_tfidf_ngram = tfidf_vectorizer_ngram.fit_transform(train_df['text_last'])
X_test_tfidf_ngram = tfidf_vectorizer_ngram.transform(test_df['text_last'])

In [21]:
y_train = train_df['target']

In [22]:
models = [('LR', LogisticRegression(random_state=12)),
          ('KNN', KNeighborsClassifier()),
          ('CART', DecisionTreeClassifier(random_state=12)),
          ('RF', RandomForestClassifier(random_state=12)),
          ('NaiveBayes', MultinomialNB()),
          ('SVM', SVC(gamma='auto', random_state=12)),
          # ("LightGBM", LGBMClassifier(random_state=12)),  # Low F1 score; commented out to avoid cluttering the output
          ("CatBoost", CatBoostClassifier(verbose=False, random_state=12)),
          #('XGB', XGBClassifier(random_state=12))
]

In [25]:
# Evaluate models with CountVectorizer
print("Evaluating models with CountVectorizer")
for name, model in models:
    # Perform cross-validation with CountVectorizer features
    cv_results = cross_validate(model, X_train_count, y_train, cv=10, scoring='f1')
    # Print the model name and its mean F1 score
    print(f"########## {name} ##########")
    print(f"F1: {round(cv_results['test_score'].mean(), 4)}")

Evaluating models with CountVectorizer
########## LR ##########
F1: 0.5562
########## KNN ##########
F1: 0.0956
########## CART ##########
F1: 0.4768
########## RF ##########
F1: 0.4988
########## NaiveBayes ##########
F1: 0.6364
########## SVM ##########
F1: 0.0
########## CatBoost ##########
F1: 0.4651


In [None]:
# Evaluate models with TfidfVectorizer
print("Evaluating models with TfidfVectorizer")
for name, model in models:
    # Perform cross-validation with TfidfVectorizer features
    cv_results = cross_validate(model, X_train_tfidf, y_train, cv=10, scoring='f1')
    # Print the model name and its mean F1 score
    print(f"########## {name} ##########")
    print(f"F1: {round(cv_results['test_score'].mean(), 4)}")

Evaluating models with TfidfVectorizer
########## LR ##########
F1: 0.557
########## KNN ##########
F1: 0.5729
########## CART ##########
F1: 0.4851
########## RF ##########
F1: 0.5014
########## NaiveBayes ##########
F1: 0.6243
########## SVM ##########
F1: 0.0


In [None]:
# Evaluate models with TfidfVectorizer (n-gram)
print("Evaluating models with TfidfVectorizer (n-gram)")
for name, model in models:
    # Perform cross-validation with TfidfVectorizer features including n-grams
    cv_results = cross_validate(model, X_train_tfidf_ngram, y_train, cv=10, scoring='f1')
    # Print the model name and its mean F1 score
    print(f"########## {name} ##########")
    print(f"F1: {round(cv_results['test_score'].mean(), 4)}")

In [23]:
 # Naive Bayes

# Create a Multinomial Naive Bayes model
nb_model = MultinomialNB()

# Define the hyperparameter space (we will tune the alpha parameter)
param_grid = {'alpha': [0.01, 0.1, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0]}

In [24]:
# Perform Grid Search with CountVectorizer to find the best parameters
grid_search_count = GridSearchCV(nb_model, param_grid, cv=5, n_jobs=-1, verbose=True).fit(X_train_count, y_train)
# Print the best parameters and the best score
print(f"Best parameters: {grid_search_count.best_params_}")
print(f"Best score: {grid_search_count.best_score_}")

Fitting 5 folds for each of 8 candidates, totalling 40 fits
Best parameters: {'alpha': 1.0}
Best score: 0.7003893863950308


In [25]:
# Perform Grid Search with TfidfVectorizer to find the best parameters
grid_search_tfidf = GridSearchCV(nb_model, param_grid, cv=5, n_jobs=-1, verbose=True).fit(X_train_tfidf, y_train)
# Print the best parameters and the best score
print(f"Best parameters: {grid_search_tfidf.best_params_}")
print(f"Best score: {grid_search_tfidf.best_score_}")

Fitting 5 folds for each of 8 candidates, totalling 40 fits
Best parameters: {'alpha': 1.0}
Best score: 0.7221925223662062


In [26]:
# Perform Grid Search with TfidfVectorizer with n-grams to find the best parameters
grid_search_tfidf_ngram = GridSearchCV(nb_model, param_grid, cv=5, n_jobs=-1, verbose=True).fit(X_train_tfidf_ngram, y_train)
# Print the best parameters and the best score
print(f"Best parameters: {grid_search_tfidf_ngram.best_params_}")
print(f"Best score: {grid_search_tfidf_ngram.best_score_}")

Fitting 5 folds for each of 8 candidates, totalling 40 fits
Best parameters: {'alpha': 0.9}
Best score: 0.7257404855725136


In [27]:
# Train the Naive Bayes model with the best parameters found using TfidfVectorizer
nb_final = nb_model.set_params(**grid_search_count.best_params_)
nb_final.fit(X_train_count, y_train)

# Evaluate the model performance using cross-validation
cv_results = cross_validate(nb_final, X_train_count, y_train, cv=10, scoring="f1")
print(f"F1 Score: {cv_results['test_score'].mean()}")

F1 Score: 0.6363507774770503


In [38]:
def submission(submission_file_path, model, test_vectors):
    """
    Function to create a submission file.

    Parameters:
    submission_file_path (str): Path to the sample submission CSV file.
    model (sklearn model): Trained model to make predictions.
    test_vectors (array-like): Test feature vectors to predict.
    """
    # Read the sample submission file
    sample_submission = pd.read_csv(submission_file_path)
    # Predict target values using the trained model
    sample_submission["target"] = model.predict(test_vectors)
    # Convert predictions to integer type
    sample_submission["target"] = sample_submission["target"].astype(int)
    # Save the results to a new CSV file
    sample_submission.to_csv("ofa_submission.csv", index=False)


# Define file path and test vectors
submission_file_path = "datasets/sample_submission.csv"
test_vectors = X_test_count

In [39]:
# Create and save the submission file
submission(submission_file_path, nb_final, test_vectors)

In [None]:
# on kaggle competition f1_Score: 0.79742

In [29]:
import joblib
joblib.dump(count_vectorizer, 'count_vectorizer.pkl')
joblib.dump(nb_final, 'disaster_or_not.pkl')

['disaster_or_not.pkl']