<a href="https://colab.research.google.com/github/prafullahas/Samrakshak-Flood-Alert-System/blob/main/Untitled33.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [130]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score, f1_score, confusion_matrix

In [131]:
# --------------------------
# Step 1: Load CSV data
# --------------------------
df = pd.read_csv("database.csv")

In [132]:
print(df.columns)

Index(['Declaration Number', 'Declaration Type', 'Declaration Date', 'State',
       'County', 'Disaster Type', 'Disaster Title', 'Start Date', 'End Date',
       'Close Date', 'Individual Assistance Program',
       'Individuals & Households Program', 'Public Assistance Program',
       'Hazard Mitigation Program'],
      dtype='object')


In [133]:
# 2. Feature Engineering
# --------------------------
df['Start Date'] = pd.to_datetime(df['Start Date'])
df['End Date'] = pd.to_datetime(df['End Date'])
df['Duration_days'] = (df['End Date'] - df['Start Date']).dt.days

program_cols = [
    'Individual Assistance Program',
    'Individuals & Households Program',
    'Public Assistance Program',
    'Hazard Mitigation Program'
]

In [134]:

# Convert Yes/No to 1/0
for col in program_cols:
    df[col] = df[col].map({'Yes':1, 'No':0})

In [135]:
# 3. Features & Target
# --------------------------
X = df[['State', 'County', 'Declaration Type'] + program_cols + ['Duration_days']]
y = df['Disaster Type']

In [136]:
# 4. Encode categorical features (fit once)
# --------------------------
feature_encoders = {}
categorical_cols = ['State', 'County', 'Declaration Type']

for col in categorical_cols:
    le = LabelEncoder()
    X[col] = le.fit_transform(X[col].astype(str))
    feature_encoders[col] = le

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X[col] = le.fit_transform(X[col].astype(str))
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X[col] = le.fit_transform(X[col].astype(str))
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X[col] = le.fit_transform(X[col].astype(str))


In [137]:
# Encode target
target_encoder = LabelEncoder()
y_encoded = target_encoder.fit_transform(y)

# Fill missing values
X = X.fillna(X.median())

In [138]:
# 5. Train-Test Split
# --------------------------
X_train, X_test, y_train, y_test = train_test_split(
    X, y_encoded, test_size=0.2, random_state=42, stratify=y_encoded
)


In [139]:
# 6. Train XGBoost
# --------------------------
model = XGBClassifier(
    n_estimators=300,
    max_depth=5,
    learning_rate=0.1,
    subsample=0.8,
    colsample_bytree=0.8,
    random_state=42,
    use_label_encoder=False,
    eval_metric='mlogloss'
)
model.fit(X_train, y_train)

Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


In [140]:
# 7. Predictions & Metrics
# --------------------------
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred, average='weighted')
conf_matrix = confusion_matrix(y_test, y_pred)

print("Accuracy:", accuracy)
print("Weighted F1-score:", f1)
print("Confusion Matrix:\n", conf_matrix)

Accuracy: 0.8845945653350655
Weighted F1-score: 0.882874093005355
Confusion Matrix:
 [[   0    0    0    0    1    0    0    0    0    0    0    0    3    0
     0    0    0    0    0    0]
 [   0    0    0    0    0    1    0    0    0    0    0    0    0    0
     0    0    0    0    0    0]
 [   0    0  227    0    0   15    0    1    0    0    0   15    0    0
     0    0    0    0    0    0]
 [   0    0    0   13    1    2    0    0    0    0    1    0    1    0
     0    2    0    0    0    1]
 [   0    0    0    0  491   36    0    0    0    0    0    0    1    0
     0    0    1    0    0    0]
 [   0    0    2    0    0 1622    0   30    9    0    0    6  165    0
    26    0    0    3    0    1]
 [   0    0    1    0    0    0    0    0    0    0    0    0    0    0
     0    0    0    0    0    0]
 [   0    0    0    0    0   60    0 1617    2    0    6    1   56    0
     7    0    3    0    0    1]
 [   0    0    0    0    1   10    0    0  336    0    0    2   45    0
   

In [141]:
# 8. Predict Hazard for Single Input from Dataset
# --------------------------
sample_row = df.iloc[0]  # pick a row from dataset

sample_input = {
    'State': sample_row['State'],
    'County': sample_row['County'],
    'Declaration Type': sample_row['Declaration Type'],
    'Individual Assistance Program': sample_row['Individual Assistance Program'],
    'Individuals & Households Program': sample_row['Individuals & Households Program'],
    'Public Assistance Program': sample_row['Public Assistance Program'],
    'Hazard Mitigation Program': sample_row['Hazard Mitigation Program'],
    'Duration_days': sample_row['Duration_days']
}

def predict_hazard(input_dict, model, feature_encoders, target_encoder):
    input_df = pd.DataFrame([input_dict])

    # Encode categorical features
    for col, le in feature_encoders.items():
        input_df[col] = le.transform(input_df[col].astype(str))

    # Predict probabilities
    probs = model.predict_proba(input_df)[0]
    predicted_idx = np.argmax(probs)
    hazard_score = probs[predicted_idx]

    # Decode predicted class
    predicted_type = target_encoder.inverse_transform([predicted_idx])[0]
    return predicted_type, hazard_score

predicted_type, hazard_score = predict_hazard(sample_input, model, feature_encoders, target_encoder)
print("\nPredicted Hazard Type:", predicted_type)
print("Hazard Score (confidence):", round(hazard_score, 2))


Predicted Hazard Type: Tornado
Hazard Score (confidence): 0.4
