In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, accuracy_score


In [None]:
import pandas as pd

# Load your dataset 
# Make sure the dataset includes the columns: 'Seasonal rainfall', 'Nino3.4', 'Nino4', 'EIOD', 'WIOD'
data = pd.read_csv('droughtprediction.csv')

# Handle missing values if any
data = data.dropna()

# Define the drought condition based on your threshold 
# Drought is 1 if Rainfall anomalies are less than -1.5, 0 otherwise
data['Drought'] = (data['Seasonal rainfall'] < -1.5).astype(int)

# Define features and target variable
X = data[['Nino3.4', 'Nino4', 'Eastern_IOD', 'Western_IOD']]  
y = data['Drought']  # Drought occurrence is the target




In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [None]:
# Initialize the Random Forest Classifier
rf = RandomForestClassifier(random_state=42)

# Define hyperparameters to tune
param_grid = {
    'n_estimators': [100, 200, 300],
    'max_depth': [None, 10, 20, 30],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4]
}

# Initialize GridSearchCV
grid_search = GridSearchCV(estimator=rf, param_grid=param_grid, cv=5, n_jobs=-1, verbose=2)

# Fit the model
grid_search.fit(X_train, y_train)

# Get the best estimator
best_rf = grid_search.best_estimator_


In [None]:
# Make predictions
y_pred = best_rf.predict(X_test)

# Evaluate the model
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Classification Report:\n", classification_report(y_test, y_pred))


In [None]:
# Get feature importances
importances = best_rf.feature_importances_
feature_names = X.columns
feature_importance_df = pd.DataFrame({'Feature': feature_names, 'Importance': importances})

# Sort by importance
feature_importance_df = feature_importance_df.sort_values(by='Importance', ascending=False)

# Display feature importances
print("Feature Importances:\n", feature_importance_df)
