In [23]:
# ========== Imports ==========
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from imblearn.over_sampling import SMOTE
from sklearn.metrics import accuracy_score, f1_score, classification_report
import tensorflow as tf
from tensorflow.keras import layers, models, callbacks
from tensorflow.keras.utils import to_categorical
import os

# ========== Load Dataset ==========
file_path = "/kaggle/input/mergefile/Merged_Disasters(2).csv"
df = pd.read_csv(file_path)

# ========== Preprocess Labels ==========
df["Binary_class"] = df["Binary_class"].astype(str).str.strip().str.lower().map({"damage": 1, "non-damage": 0})
df["Processed_data"] = df["Processed_data"].fillna("")

# ========== TF-IDF Vectorizer (For later) ==========
vectorizer = TfidfVectorizer(max_features=5000)

# ========== TF-IDF Features ==========
X = vectorizer.fit_transform(df["Processed_data"]).toarray()
y = df["Binary_class"].values

# ========== Apply SMOTE to Full Data ==========
print("Before SMOTE:", np.bincount(y))
smote = SMOTE(random_state=42)
X_smote, y_smote = smote.fit_resample(X, y)
print("After SMOTE:", np.bincount(y_smote))

# ========== Train-Test Split AFTER SMOTE ==========
X_train, X_test, y_train, y_test = train_test_split(X_smote, y_smote, test_size=0.2, stratify=y_smote, random_state=42)

# ========== Reshape for CNN ==========
X_train_cnn = X_train.reshape(X_train.shape[0], X_train.shape[1], 1)
X_test_cnn = X_test.reshape(X_test.shape[0], X_test.shape[1], 1)

# ========== Build CNN Model ==========
model = models.Sequential([
    layers.Conv1D(128, 5, activation='relu', input_shape=(X_train.shape[1], 1)),
    layers.MaxPooling1D(2),
    layers.Conv1D(64, 3, activation='relu'),
    layers.GlobalMaxPooling1D(),
    layers.Dense(64, activation='relu'),
    layers.Dropout(0.5),
    layers.Dense(1, activation='sigmoid')
])
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# ========== Custom Callback to Track Best F1 on Test Set ==========
class TestF1Callback(callbacks.Callback):
    def __init__(self, X_test, y_test):
        self.X_test = X_test
        self.y_test = y_test
        self.best_f1 = 0
        self.best_weights = None
        self.best_epoch = 0

    def on_epoch_end(self, epoch, logs=None):
        y_pred_probs = self.model.predict(self.X_test, verbose=0).ravel()
        y_pred = (y_pred_probs > 0.5).astype(int)

        acc = accuracy_score(self.y_test, y_pred)
        f1 = f1_score(self.y_test, y_pred)
        loss = self.model.evaluate(self.X_test, self.y_test, verbose=0)[0]

        print(f"Epoch {epoch+1} - Accuracy: {acc:.4f} |  Loss: {loss:.4f} | F1 Score: {f1:.4f}")

        if f1 > self.best_f1:
            self.best_f1 = f1
            self.best_weights = self.model.get_weights()
            self.best_epoch = epoch + 1

    def on_train_end(self, logs=None):
        self.model.set_weights(self.best_weights)
        print(f"\n✅ Best Model from Epoch {self.best_epoch} with F1 Score: {self.best_f1:.4f}")

# ========== Train Model ==========
f1_callback = TestF1Callback(X_test_cnn, y_test)
model.fit(X_train_cnn, y_train, epochs=10, batch_size=64, verbose=0, callbacks=[f1_callback])

# ========== Final Evaluation ==========
final_preds = (model.predict(X_test_cnn) > 0.5).astype(int)
print("\n📊 Final Test Classification Report:\n")
print(classification_report(y_test, final_preds))

# ========== Save Model ==========
model.save("/kaggle/working/best_cnn_tfidf_model.h5")
print("✅ Model saved to /kaggle/working/best_cnn_tfidf_model.h5")


Before SMOTE: [15505  2577]
After SMOTE: [15505 15505]


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1 - Accuracy: 0.6849 |  Loss: 0.5833 | F1 Score: 0.6482
Epoch 2 - Accuracy: 0.7681 |  Loss: 0.4987 | F1 Score: 0.7266
Epoch 3 - Accuracy: 0.7743 |  Loss: 0.4530 | F1 Score: 0.7675
Epoch 4 - Accuracy: 0.7760 |  Loss: 0.4529 | F1 Score: 0.7167
Epoch 5 - Accuracy: 0.7989 |  Loss: 0.4268 | F1 Score: 0.7612
Epoch 6 - Accuracy: 0.8015 |  Loss: 0.4248 | F1 Score: 0.7637
Epoch 7 - Accuracy: 0.8023 |  Loss: 0.4205 | F1 Score: 0.7681
Epoch 8 - Accuracy: 0.7997 |  Loss: 0.4268 | F1 Score: 0.7812
Epoch 9 - Accuracy: 0.7944 |  Loss: 0.4423 | F1 Score: 0.7686
Epoch 10 - Accuracy: 0.8049 |  Loss: 0.4186 | F1 Score: 0.7664

✅ Best Model from Epoch 8 with F1 Score: 0.7812
[1m194/194[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step

📊 Final Test Classification Report:

              precision    recall  f1-score   support

           0       0.76      0.88      0.82      3101
           1       0.86      0.71      0.78      3101

    accuracy                           0.80      6202