In [16]:
# Imports 
import pandas as pd
import numpy as np 
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score

In [17]:
# Using cleaned data with correct columns 
file_path = "DATA/global_climate_events_economic_impact_2020_2025.csv"
df = pd.read_csv(file_path)


# Drop unnecessary columns
columns_to_drop = ['latitude', 'longitude', 'international_aid_million_usd', 'aid_percentage']
df = df.drop(columns=columns_to_drop)

# Handle missing values (drop rows with missing values for simplicity)
df = df.dropna()

# Encoding
categorical_columns = ['event_id', 'date', 'country', 'event_type']
df = pd.get_dummies(df, columns = categorical_columns, drop_first = True)


In [None]:
# Define features (X) and target (y)
X = df.drop(columns=['economic_impact_million_usd'])
y = df['economic_impact_million_usd']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train the Random Forest Regressor
rf_model = RandomForestRegressor(n_estimators=100, random_state=42)
rf_model.fit(X_train, y_train)

# Make predictions
y_pred = rf_model.predict(X_test)

# Evaluate the model
rmse = np.sqrt(mean_squared_error(y_test, y_pred))
r2 = r2_score(y_test, y_pred)

print(f"RMSE: {rmse}")
print(f"R²: {r2}")