In [None]:
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.pipeline import Pipeline
from sklearn.metrics import accuracy_score,precision_recall_fscore_support

import tensorflow as tf
from tensorflow.keras.layers import TextVectorization
from tensorflow.keras.layers import Embedding

In [None]:
!wget https://raw.githubusercontent.com/mrdbourke/tensorflow-deep-learning/main/extras/helper_functions.py
from helper_functions import unzip_data

--2024-09-05 05:34:13--  https://raw.githubusercontent.com/mrdbourke/tensorflow-deep-learning/main/extras/helper_functions.py
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.111.133, 185.199.110.133, 185.199.108.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.111.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 10246 (10K) [text/plain]
Saving to: ‘helper_functions.py.1’


2024-09-05 05:34:14 (86.3 MB/s) - ‘helper_functions.py.1’ saved [10246/10246]



In [None]:
!wget https://storage.googleapis.com/ztm_tf_course/nlp_getting_started.zip
unzip_data("nlp_getting_started.zip")

--2024-09-05 05:34:14--  https://storage.googleapis.com/ztm_tf_course/nlp_getting_started.zip
Resolving storage.googleapis.com (storage.googleapis.com)... 142.251.8.207, 142.251.170.207, 173.194.174.207, ...
Connecting to storage.googleapis.com (storage.googleapis.com)|142.251.8.207|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 607343 (593K) [application/zip]
Saving to: ‘nlp_getting_started.zip.1’


2024-09-05 05:34:16 (931 KB/s) - ‘nlp_getting_started.zip.1’ saved [607343/607343]



In [None]:
train_df = pd.read_csv("/content/train.csv")
test_df = pd.read_csv("/content/test.csv")

In [None]:
train_df.head()

Unnamed: 0,id,keyword,location,text,target
0,1,,,Our Deeds are the Reason of this #earthquake M...,1
1,4,,,Forest fire near La Ronge Sask. Canada,1
2,5,,,All residents asked to 'shelter in place' are ...,1
3,6,,,"13,000 people receive #wildfires evacuation or...",1
4,7,,,Just got sent this photo from Ruby #Alaska as ...,1


In [None]:
# shuffling training dataset

train_df = train_df.sample(frac=1, random_state=10)
train_df.head()

Unnamed: 0,id,keyword,location,text,target
6524,9332,survive,,@DDNewsLive @NitishKumar and @ArvindKejriwal ...,0
701,1009,blazing,,@ACOUSTICMALOLEY no he was blazing it,0
3119,4479,electrocuted,"Redondo Beach, CA",Do babies actually get electrocuted from wall ...,1
4204,5972,hazard,Massachusetts,Precious cargo onesie recalled for choking haz...,0
1651,2386,collapsed,,My portable closet has collapsed 3x and it fin...,0


In [None]:
#splitting the datasets

train_sen, validation_sen, train_labels, validation_labels = train_test_split(train_df['text'].to_numpy(),
                                                                              train_df['target'].to_numpy(),
                                                                              test_size = 0.1,
                                                                              random_state = 10)

In [None]:
(len(train_sen), len(validation_sen), len(train_labels), len(validation_labels))

(6851, 762, 6851, 762)

In [None]:
model = Pipeline([
    ("tfidf",TfidfVectorizer()),
    ("clf",MultinomialNB())
])

model.fit(train_sen, train_labels)

In [None]:
predictions = model.predict(validation_sen)
predictions

array([0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0,
       0, 0, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0,
       0, 1, 1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0,
       0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0,
       0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 1,
       1, 0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 1, 1, 1, 0, 0, 0,
       1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0,
       0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1,
       0, 1, 0, 1, 0, 1, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1,
       0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1, 0, 0, 1, 0, 1, 0,
       0, 1, 1, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0,
       0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1,

In [None]:
def cal_result(y_true,y_pred):
    mod_acc=accuracy_score(y_true,y_pred)*100
    mod_prec,mod_rec,model_f1,_=precision_recall_fscore_support(y_true,y_pred,average="weighted")
    model_results={"accuracy": mod_acc,
                    "precision": mod_rec,
                    "recall":mod_rec,
                    "f1":model_f1
                    }
    return model_results

In [None]:
results = cal_result(validation_labels, predictions)
results

{'accuracy': 81.10236220472441,
 'precision': 0.8110236220472441,
 'recall': 0.8110236220472441,
 'f1': 0.8045920048681613}

In [None]:
# predicting disaster or not via input

user_sen = input("Enter a sentence: ")

predict = model.predict([user_sen])

if predict:
    print("Model Prediction: Disaster")
else:
    print("Model Prediction: Not disaster")

Enter a sentence: floods
Model Prediction: Disaster
