# Load Dataset

In [3]:
from sklearn.model_selection import train_test_split
import pandas as pd


tweets = pd.read_csv('train.csv', sep=',')
selected_columns = ['text', 'target']
twitter_df = tweets[selected_columns]

In [4]:
twitter_df.head()

Unnamed: 0,text,target
0,Our Deeds are the Reason of this #earthquake M...,1
1,Forest fire near La Ronge Sask. Canada,1
2,All residents asked to 'shelter in place' are ...,1
3,"13,000 people receive #wildfires evacuation or...",1
4,Just got sent this photo from Ruby #Alaska as ...,1


In [5]:
twitter_df.count()

text      7613
target    7613
dtype: int64

# Vectorize the Data Points

In [6]:
from sklearn.feature_extraction.text import TfidfVectorizer

# Initialize the vectorizer
tfidf_vectorizer = TfidfVectorizer()

# Fit and transform the cleaned text
X = tfidf_vectorizer.fit_transform(twitter_df['text'])

In [7]:
from sklearn.model_selection import train_test_split

# Split the data
X_train, X_test, y_train, y_test = train_test_split(X, twitter_df['target'], test_size=0.2, random_state=42)

# Logistic Regression

In [8]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report

# Initialize the Logistic Regression model
logistic_regression_model = LogisticRegression()

# Train the model on the training data
logistic_regression_model.fit(X_train, y_train)

# Predict the labels on the test set
y_pred = logistic_regression_model.predict(X_test)

# Generate a classification report
report = classification_report(y_test, y_pred)

print(report)

              precision    recall  f1-score   support

           0       0.78      0.90      0.84       874
           1       0.83      0.67      0.74       649

    accuracy                           0.80      1523
   macro avg       0.81      0.78      0.79      1523
weighted avg       0.80      0.80      0.80      1523



# Naive Bayes

In [9]:
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import classification_report

# Initialize the Naive Bayes model
naive_bayes_model = MultinomialNB()

# Train the model on the training data
naive_bayes_model.fit(X_train, y_train)

# Predict the labels on the test set
y_pred = naive_bayes_model.predict(X_test)

# Generate a classification report
report = classification_report(y_test, y_pred)

print(report)

              precision    recall  f1-score   support

           0       0.77      0.93      0.84       874
           1       0.86      0.63      0.73       649

    accuracy                           0.80      1523
   macro avg       0.82      0.78      0.78      1523
weighted avg       0.81      0.80      0.79      1523



# XGBoost

In [10]:
from xgboost import XGBClassifier
from sklearn.metrics import classification_report

# Initialize the XGBoost classifier
xgboost_model = XGBClassifier(use_label_encoder=False, eval_metric='logloss')

# Train the model on the training data
xgboost_model.fit(X_train, y_train)

# Predict the labels on the test set
y_pred_xgb = xgboost_model.predict(X_test)

# Generate a classification report
report_xgb = classification_report(y_test, y_pred_xgb)
print(report_xgb)

              precision    recall  f1-score   support

           0       0.78      0.89      0.83       874
           1       0.81      0.66      0.73       649

    accuracy                           0.79      1523
   macro avg       0.80      0.77      0.78      1523
weighted avg       0.79      0.79      0.79      1523



# SVM

In [11]:
from sklearn.svm import SVC
from sklearn.metrics import classification_report

# Initialize the Support Vector Classifier
svm_model = SVC()

# Train the model on the training data
svm_model.fit(X_train, y_train)

# Predict the labels on the test set
y_pred_svm = svm_model.predict(X_test)

# Generate a classification report
report_svm = classification_report(y_test, y_pred_svm)
print(report_svm)

              precision    recall  f1-score   support

           0       0.78      0.93      0.85       874
           1       0.87      0.65      0.75       649

    accuracy                           0.81      1523
   macro avg       0.83      0.79      0.80      1523
weighted avg       0.82      0.81      0.81      1523



# Random Forest

In [12]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report

# Initialize the Random Forest classifier
random_forest_model = RandomForestClassifier()

# Train the model on the training data
random_forest_model.fit(X_train, y_train)

# Predict the labels on the test set
y_pred_rf = random_forest_model.predict(X_test)

# Generate a classification report
report_rf = classification_report(y_test, y_pred_rf)
print(report_rf)

              precision    recall  f1-score   support

           0       0.76      0.93      0.83       874
           1       0.86      0.59      0.70       649

    accuracy                           0.79      1523
   macro avg       0.81      0.76      0.77      1523
weighted avg       0.80      0.79      0.78      1523



# Gradio Demo

In [None]:
!pip install gradio

In [15]:
import gradio as gr

def ml_classify_tweet(tweet, model, vectorizer):
    tweet_vector = vectorizer.transform([tweet])
    prediction = model.predict(tweet_vector)
    if prediction[0] == 1:
      return 'Disaster'
    else:
      return 'Not Disaster'


def predict(model, tweet):
    return ml_classify_tweet(tweet, model, tfidf_vectorizer)


def classify_tweet(tweet, expected):
    results = {}
    justifications = {}

    # Classic ML Models
    models = {
        'Logistic Regression': logistic_regression_model,
        'Naive Bayes': naive_bayes_model,
        'XGBoost': xgboost_model,
        'SVM': svm_model,
        'Random Forest': random_forest_model,
    }
    for model_name, model in models.items():
        prediction = predict(model, tweet)
        match = 'green' if prediction == expected else 'red'
        results[model_name] = f"<div style='color: {match}; border:2px solid {match}; padding:5px; margin:2px;'>{model_name}: {prediction}/{expected}</div>"

    return list(results.values())

# Set up the Gradio interface
iface = gr.Interface(
    fn=classify_tweet,
    inputs=[
        gr.Textbox(lines=2, placeholder="Enter your tweet here..."),
        gr.Radio(choices=["Disaster", "Not Disaster"], label="Expected Classification")
    ],
    outputs=[
        gr.HTML(label=model_name) for model_name in [
            'Logistic Regression', 'Naive Bayes', 'XGBoost', 'SVM', 'Random Forest'
        ]

    ]
)

# Run the Gradio app
iface.launch()


Setting queue=True in a Colab notebook requires sharing enabled. Setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
Running on public URL: https://8722360cc6d19c3e3d.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)


