## cross_val_score

In [None]:
from sklearn.model_selection import cross_val_score
import matplotlib.pyplot as plt

# Cross-validation
cv_scores = cross_val_score(rf_rfe, X_train_rfe, y_train, cv=5)  # 5-fold cross-validation
print("Cross-validation Scores:", cv_scores)
print("Mean Cross-validation Score:", cv_scores.mean())

# Feature importances
importances = rf_rfe.feature_importances_
feature_importances = pd.DataFrame({'Feature': selected_features, 'Importance': importances})
print("Feature Importances:")
print(feature_importances.sort_values(by='Importance', ascending=False))

# Plot feature importances
plt.figure(figsize=(10, 8))
plt.barh(feature_importances['Feature'], feature_importances['Importance'])
plt.xlabel('Importance')
plt.ylabel('Feature')
plt.title('Feature Importances')
plt.gca().invert_yaxis()
plt.show()


## 1. Create DataFrame with Selected Features

In [None]:
import pandas as pd

# Assuming df is your original DataFrame
selected_features = [
    'MonsoonIntensity', 'WetlandLoss', 'PopulationScore',
    'DeterioratingInfrastructure', 'Landslides', 'CoastalVulnerability',
    'PoliticalFactors', 'IneffectiveDisasterPreparedness', 'TopographyDrainage',
    'Siltation'
]

# Create a new DataFrame with selected features and target column 'occurrence'
df_selected = df[selected_features + ['occurrence']]


In [None]:
df_selected.to_csv('features.csv', index=False)

In [None]:
df_selected

## 2. Normalize the Data

In [None]:
from sklearn.preprocessing import MinMaxScaler

# Initialize MinMaxScaler
scaler = MinMaxScaler()

# Separate features and target
X = df_selected[selected_features]
y = df_selected['occurrence']

# Fit and transform the features
X_normalized = scaler.fit_transform(X)

# Create a DataFrame with normalized features
df_normalized = pd.DataFrame(X_normalized, columns=selected_features)
df_normalized['occurrence'] = y.values

In [None]:
df_normalized.to_csv('normalized_features.csv', index=False)

In [None]:
import pickle
file_name='scaler.sav'
pickle.dump(scaler,open(file_name,'wb'))

In [None]:
df_normalized

In [None]:
from sklearn.model_selection import train_test_split

# Split the data
X_train, X_test, y_train, y_test = train_test_split(
    df_normalized[selected_features], df_normalized['occurrence'],
    test_size=0.3, random_state=42
)
