In [1]:
import pandas as pd
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report
import joblib
import warnings


data = pd.read_csv("clean_dataset.csv")
df = pd.DataFrame(data)

In [4]:
print(df.head())

                                      message          label
0                     tree branch fell on car      emergency
1  heard a loud crash outside, sounds serious      emergency
2                appreciation for the teacher  non-emergency
3                  need help with a flat tire  non-emergency
4           booked a trip to Hawaii, excited!  non-emergency


In [5]:
# Text preprocessing and feature extraction
vectorizer = CountVectorizer()
X = vectorizer.fit_transform(df['message'])
y = df['label']

# Split the dataset into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Build and train the model
model = LogisticRegression()
model.fit(X_train, y_train)

# Evaluate the model
predictions = model.predict(X_test)
print(classification_report(y_test, predictions))


               precision    recall  f1-score   support

    emergency       0.91      1.00      0.95        42
non-emergency       1.00      0.93      0.96        54

     accuracy                           0.96        96
    macro avg       0.96      0.96      0.96        96
 weighted avg       0.96      0.96      0.96        96



In [6]:
joblib.dump(model,"Trained_model.pkl")

['Trained_model.pkl']