In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder, StandardScaler, LabelEncoder
from sklearn.compose import ColumnTransformer
from sklearn.ensemble import RandomForestClassifier
from sklearn.impute import SimpleImputer
from sklearn.metrics import accuracy_score

# Load historical disaster data into a Pandas DataFrame
data = pd.read_csv('/content/world_risk_index.csv')

# Define mapping criteria for Impact Level based on existing categories
def assign_impact_level(row):
    if row['Exposure Category'] == 'Very High' and row['WRI Category'] == 'Very High' \
            and row['Vulnerability Category'] == 'Very High' and row['Susceptibility Category'] == 'Very High':
        return 'Very High Impact'
    elif row['Exposure Category'] == 'Very High' and row['WRI Category'] == 'Very High' \
            and row['Vulnerability Category'] == 'Very High' and row['Susceptibility Category'] == 'High':
        return 'Very High Impact'
    elif row['Exposure Category'] == 'Very High' and row['WRI Category'] == 'Very High' \
            and row['Vulnerability Category'] == 'Very High' and row['Susceptibility Category'] == 'Medium':
        return 'High Impact'
    elif row['Exposure Category'] == 'Very High' and row['WRI Category'] == 'Very High' \
            and row['Vulnerability Category'] == 'Very High' and row['Susceptibility Category'] == 'Low':
        return 'High Impact'
    elif row['Exposure Category'] == 'Very High' and row['WRI Category'] == 'Very High' \
            and row['Vulnerability Category'] == 'High' and row['Susceptibility Category'] == 'Very High':
        return 'Very High Impact'
    elif row['Exposure Category'] == 'Very High' and row['WRI Category'] == 'High' \
            and row['Vulnerability Category'] == 'Very High' and row['Susceptibility Category'] == 'Very High':
        return 'Very High Impact'
    elif row['Exposure Category'] == 'High' and row['WRI Category'] == 'Very High' \
            and row['Vulnerability Category'] == 'Very High' and row['Susceptibility Category'] == 'Very High':
        return 'Very High Impact'
    elif row['Exposure Category'] == 'Very High' and row['WRI Category'] == 'High' \
            and row['Vulnerability Category'] == 'High' and row['Susceptibility Category'] == 'Very High':
        return 'Very High Impact'
    elif row['Exposure Category'] == 'High' and row['WRI Category'] == 'Very High' \
            and row['Vulnerability Category'] == 'High' and row['Susceptibility Category'] == 'Very High':
        return 'Very High Impact'
    elif row['Exposure Category'] == 'High' and row['WRI Category'] == 'High' \
            and row['Vulnerability Category'] == 'High' and row['Susceptibility Category'] == 'Very High':
        return 'High Impact'
    elif row['Exposure Category'] == 'High' and row['WRI Category'] == 'High' \
            and row['Vulnerability Category'] == 'High' and row['Susceptibility Category'] == 'High':
        return 'Medium Impact'
    elif row['Exposure Category'] == 'High' and row['WRI Category'] == 'High' \
            and row['Vulnerability Category'] == 'High' and row['Susceptibility Category'] == 'Medium':
        return 'Low Impact'
    elif row['Exposure Category'] == 'High' and row['WRI Category'] == 'High' \
            and row['Vulnerability Category'] == 'Medium' and row['Susceptibility Category'] == 'High':
        return 'Low Impact'
    elif row['Exposure Category'] == 'High' and row['WRI Category'] == 'High' \
            and row['Vulnerability Category'] == 'Medium' and row['Susceptibility Category'] == 'Medium':
        return 'Low Impact'
    elif row['Exposure Category'] == 'High' and row['WRI Category'] == 'High' \
            and row['Vulnerability Category'] == 'Medium' and row['Susceptibility Category'] == 'Low':
        return 'Very Low Impact'
    else:
        return 'Unknown Impact'

# Create a new column 'Impact Level' using the mapping criteria
data['Impact Level'] = data.apply(assign_impact_level, axis=1)

# Separate features and target variable
X = data.drop(['Impact Level', 'Region'], axis=1)  # Exclude 'Region' from features
y = data['Impact Level']

# Identify categorical and numerical columns
categorical_cols = ['Exposure Category', 'WRI Category', 'Vulnerability Category', 'Susceptibility Category']
numerical_cols = [col for col in X.columns if col not in categorical_cols]

# Apply LabelEncoder to categorical columns before OneHotEncoding
label_encoder = LabelEncoder()
for col in categorical_cols:
    X[col] = label_encoder.fit_transform(X[col])

# Apply OneHotEncoder to categorical columns and StandardScaler to numerical columns
preprocessor = ColumnTransformer(
    transformers=[
        ('cat', OneHotEncoder(), categorical_cols),
        ('num', StandardScaler(), numerical_cols)
    ],
    remainder='passthrough'
)

# Apply preprocessing including handling missing values and split data into training and testing sets
preprocessed_data = preprocessor.fit_transform(X)

# Impute missing values using SimpleImputer
imputer = SimpleImputer(strategy='mean')  # You can choose a different strategy if needed
X_imputed = imputer.fit_transform(preprocessed_data)

X_train, X_test, y_train, y_test = train_test_split(X_imputed, y, test_size=0.2, random_state=42)

# Initialize and train a Random Forest Classifier
clf = RandomForestClassifier()
clf.fit(X_train, y_train)

# Evaluate the model
predictions = clf.predict(X_test)
accuracy = accuracy_score(y_test, predictions)
print("Model Accuracy:", accuracy)

Model Accuracy: 0.9947916666666666
