In [1]:
import joblib
import pandas as pd
import re
import nltk
#nltk.download('wordnet')
from nltk.stem import WordNetLemmatizer
import string

from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics import accuracy_score, cohen_kappa_score, f1_score, classification_report
from sklearn.model_selection import StratifiedKFold, train_test_split
from sklearn.naive_bayes import MultinomialNB

In [2]:
#Fire = 0, Police = 1, Medical = 2
categories = ['Fire','Police','Medical']


In [3]:
#Read Data
df = pd.read_csv('emergency_data_update.txt', sep='\t', header=0)
x=df['label'].value_counts()
print(x)
#df

label
1    63
0    60
2    60
Name: count, dtype: int64


In [4]:
#Process Text

def process_text(text):
    text = str(text).lower()
    text = re.sub(
        f"[{re.escape(string.punctuation)}]", " ", text
    )
    text = " ".join(text.split())
    return text

vec = CountVectorizer(
    ngram_range=(1, 3), 
    stop_words="english",
)

In [5]:
# Lemmatize Text data
lemmatizer = WordNetLemmatizer()
def lemmatize_text(word):
    def lemmatizeHelper(text):
        return lemmatizer.lemmatize(text)
    word_filtered = process_text(word)
    word_lst = []
    for word in word_filtered.split():
        word_lst += [lemmatizeHelper(word)]
    
    return " ".join(word_lst)
    
jay = "fire, burning, tractors, fires, men, women, cars, trackstars, monkeys, christians, pimps"
lemmatize_text(jay) 


#print(lemmatize("corpora"))


'fire burning tractor fire men woman car trackstars monkey christian pimp'

In [36]:
#Split Training Data and Test Data
df["clean_text"] = df.text.map(lemmatize_text)
df_train, df_test = train_test_split(df, test_size=0.20, stratify=df.label)

In [37]:
#Train Model
X_train = vec.fit_transform(df_train.clean_text)
X_test = vec.transform(df_test.clean_text)

y_train = df_train.label
y_test = df_test.label

nb = MultinomialNB()
nb.fit(X_train, y_train)

In [38]:
preds = nb.predict(X_test)
print(classification_report(y_test, preds))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00        12
           1       1.00      1.00      1.00        13
           2       1.00      1.00      1.00        12

    accuracy                           1.00        37
   macro avg       1.00      1.00      1.00        37
weighted avg       1.00      1.00      1.00        37



In [47]:
sample_text = [input()]
clean_sample_text = [process_text(sample_text)]
sample_vec = vec.transform(clean_sample_text)
print(nb.predict(sample_vec))
print(categories[nb.predict(sample_vec)[0]])

 burning building at


[0]
Fire


In [45]:
#Save the model
joblib.dump(nb, "emergency_nb3.joblib")
joblib.dump(vec, "emergency_vec3.joblib")

['emergency_vec3.joblib']

In [None]:
#Load the model
#Be sure to re-run the imports
nb_saved = joblib.load("nb.joblib")
vec_saved = joblib.load("vec.joblib")

In [None]:
#Run the loaded model
sample_text = [input()]
clean_sample_text = [process_text(sample_text)]
sample_vec = vec_saved.transform(clean_sample_text)
print(categories[nb_saved.predict(sample_vec)[0]])