In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_absolute_error, mean_squared_error, accuracy_score
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.regularizers import l2

data = pd.read_csv('/content/heatwave_score.csv')

data = data.drop(columns=['State', 'lat', 'lon'])

X = data.drop(columns=['Heatwave Susceptiblity Score', 'Category'])
y = data['Heatwave Susceptiblity Score']
y_category = data['Category']

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

X_train, X_test, y_train, y_test, y_category_train, y_category_test = train_test_split(X_scaled, y, y_category, test_size=0.2, random_state=42)

model = Sequential()

model.add(Dense(128, input_dim=X_train.shape[1], activation='relu', kernel_regularizer=l2(0.01)))
model.add(Dropout(0.2))


model.add(Dense(64, activation='relu', kernel_regularizer=l2(0.01)))
model.add(Dropout(0.2))

model.add(Dense(64, activation='relu', kernel_regularizer=l2(0.01)))
model.add(Dropout(0.2))

model.add(Dense(32, activation='relu', kernel_regularizer=l2(0.01)))
model.add(Dropout(0.2))

model.add(Dense(32, activation='relu', kernel_regularizer=l2(0.01)))
model.add(Dropout(0.2))

model.add(Dense(16, activation='relu', kernel_regularizer=l2(0.01)))
model.add(Dropout(0.2))

model.add(Dense(1))

model.compile(loss='mean_squared_error', optimizer='adam', metrics=['mae'])

model.fit(X_train, y_train, epochs=150, batch_size=10, validation_data=(X_test, y_test))

y_pred = model.predict(X_test)

def classify_hss(hss):
    if 0.00 <= hss <= 0.30:
        return 'Low Susceptibility'
    elif 0.31 <= hss <= 0.50:
        return 'Moderate Susceptibility'
    elif 0.51 <= hss <= 0.70:
        return 'High Susceptibility'
    elif 0.71 <= hss <= 1.00:
        return 'Very High Susceptibility'
    return 'Unknown'

y_pred_categories = [classify_hss(score) for score in y_pred]

category_accuracy = accuracy_score(y_category_test, y_pred_categories)

print(f"Mean Absolute Error (MAE): {mean_absolute_error(y_test, y_pred):.4f}")
print(f"Mean Squared Error (MSE): {mean_squared_error(y_test, y_pred):.4f}")
print(f"Categorical Classification Accuracy: {category_accuracy:.4f}")

for i in range(5):
    print(f"Predicted HSS: {y_pred[i][0]:.4f}, Actual HSS: {y_test.iloc[i]:.4f}, Predicted Category: {y_pred_categories[i]}, Actual Category: {y_category_test.iloc[i]}")


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/150
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 3s/step - loss: 2.9487 - mae: 0.5876 - val_loss: 2.7900 - val_mae: 0.4810
Epoch 2/150
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 31ms/step - loss: 2.7849 - mae: 0.5018 - val_loss: 2.6881 - val_mae: 0.4298
Epoch 3/150
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 28ms/step - loss: 2.6852 - mae: 0.4495 - val_loss: 2.6083 - val_mae: 0.3988
Epoch 4/150
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 28ms/step - loss: 2.5810 - mae: 0.3927 - val_loss: 2.5302 - val_mae: 0.3659
Epoch 5/150
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 29ms/step - loss: 2.5044 - mae: 0.3604 - val_loss: 2.4566 - val_mae: 0.3353
Epoch 6/150
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step - loss: 2.4341 - mae: 0.3366 - val_loss: 2.3842 - val_mae: 0.3040
Epoch 7/150
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 33ms/step - loss: 2.3666 - 

In [9]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, accuracy_score

rf_model = RandomForestRegressor(n_estimators=100, random_state=42)
rf_model.fit(X_train, y_train)

y_pred_rf = rf_model.predict(X_test)

y_pred_rf = np.clip(y_pred_rf, 0, 1)

y_pred_categories_rf = [classify_hss(score) for score in y_pred_rf]

category_accuracy_rf = accuracy_score(y_category_test, y_pred_categories_rf)

print(f"Random Forest - Mean Absolute Error (MAE): {mean_absolute_error(y_test, y_pred_rf):.4f}")
print(f"Random Forest - Mean Squared Error (MSE): {mean_squared_error(y_test, y_pred_rf):.4f}")
print(f"Random Forest - Categorical Classification Accuracy: {category_accuracy_rf:.4f}")

for i in range(5):
    print(f"Predicted HSS: {y_pred_rf[i]:.4f}, Actual HSS: {y_test.iloc[i]:.4f}, Predicted Category: {y_pred_categories_rf[i]}, Actual Category: {y_category_test.iloc[i]}")


Random Forest - Mean Absolute Error (MAE): 0.1108
Random Forest - Mean Squared Error (MSE): 0.0171
Random Forest - Categorical Classification Accuracy: 0.3333
Predicted HSS: 0.4339, Actual HSS: 0.2591, Predicted Category: Moderate Susceptibility, Actual Category: Low Susceptibility
Predicted HSS: 0.5069, Actual HSS: 0.7085, Predicted Category: Unknown, Actual Category: Very High Susceptibility
Predicted HSS: 0.4069, Actual HSS: 0.2500, Predicted Category: Moderate Susceptibility, Actual Category: Low Susceptibility
Predicted HSS: 0.4864, Actual HSS: 0.5179, Predicted Category: Moderate Susceptibility, Actual Category: High Susceptibility
Predicted HSS: 0.5219, Actual HSS: 0.5490, Predicted Category: High Susceptibility, Actual Category: High Susceptibility


In [10]:
from sklearn.ensemble import GradientBoostingRegressor

gb_model = GradientBoostingRegressor(n_estimators=100, learning_rate=0.1, max_depth=3, random_state=42)
gb_model.fit(X_train, y_train)

y_pred_gb = gb_model.predict(X_test)

y_pred_gb = np.clip(y_pred_gb, 0, 1)

y_pred_categories_gb = [classify_hss(score) for score in y_pred_gb]

category_accuracy_gb = accuracy_score(y_category_test, y_pred_categories_gb)

print(f"Gradient Boosting - Mean Absolute Error (MAE): {mean_absolute_error(y_test, y_pred_gb):.4f}")
print(f"Gradient Boosting - Mean Squared Error (MSE): {mean_squared_error(y_test, y_pred_gb):.4f}")
print(f"Gradient Boosting - Categorical Classification Accuracy: {category_accuracy_gb:.4f}")

for i in range(5):
    print(f"Predicted HSS: {y_pred_gb[i]:.4f}, Actual HSS: {y_test.iloc[i]:.4f}, Predicted Category: {y_pred_categories_gb[i]}, Actual Category: {y_category_test.iloc[i]}")


Gradient Boosting - Mean Absolute Error (MAE): 0.1255
Gradient Boosting - Mean Squared Error (MSE): 0.0200
Gradient Boosting - Categorical Classification Accuracy: 0.0000
Predicted HSS: 0.4396, Actual HSS: 0.2591, Predicted Category: Moderate Susceptibility, Actual Category: Low Susceptibility
Predicted HSS: 0.4996, Actual HSS: 0.7085, Predicted Category: Moderate Susceptibility, Actual Category: Very High Susceptibility
Predicted HSS: 0.4109, Actual HSS: 0.2500, Predicted Category: Moderate Susceptibility, Actual Category: Low Susceptibility
Predicted HSS: 0.4762, Actual HSS: 0.5179, Predicted Category: Moderate Susceptibility, Actual Category: High Susceptibility
Predicted HSS: 0.5089, Actual HSS: 0.5490, Predicted Category: Unknown, Actual Category: High Susceptibility


In [11]:
from sklearn.svm import SVR

svr_model = SVR(kernel='rbf', C=100, gamma='auto', epsilon=0.1)
svr_model.fit(X_train, y_train)

y_pred_svr = svr_model.predict(X_test)

y_pred_svr = np.clip(y_pred_svr, 0, 1)

y_pred_categories_svr = [classify_hss(score) for score in y_pred_svr]

category_accuracy_svr = accuracy_score(y_category_test, y_pred_categories_svr)

print(f"SVR - Mean Absolute Error (MAE): {mean_absolute_error(y_test, y_pred_svr):.4f}")
print(f"SVR - Mean Squared Error (MSE): {mean_squared_error(y_test, y_pred_svr):.4f}")
print(f"SVR - Categorical Classification Accuracy: {category_accuracy_svr:.4f}")

for i in range(5):
    print(f"Predicted HSS: {y_pred_svr[i]:.4f}, Actual HSS: {y_test.iloc[i]:.4f}, Predicted Category: {y_pred_categories_svr[i]}, Actual Category: {y_category_test.iloc[i]}")


SVR - Mean Absolute Error (MAE): 0.1234
SVR - Mean Squared Error (MSE): 0.0211
SVR - Categorical Classification Accuracy: 0.1667
Predicted HSS: 0.3996, Actual HSS: 0.2591, Predicted Category: Moderate Susceptibility, Actual Category: Low Susceptibility
Predicted HSS: 0.4418, Actual HSS: 0.7085, Predicted Category: Moderate Susceptibility, Actual Category: Very High Susceptibility
Predicted HSS: 0.4054, Actual HSS: 0.2500, Predicted Category: Moderate Susceptibility, Actual Category: Low Susceptibility
Predicted HSS: 0.4468, Actual HSS: 0.5179, Predicted Category: Moderate Susceptibility, Actual Category: High Susceptibility
Predicted HSS: 0.4728, Actual HSS: 0.5490, Predicted Category: Moderate Susceptibility, Actual Category: High Susceptibility


In [12]:
import xgboost as xgb

xgb_model = xgb.XGBRegressor(n_estimators=100, learning_rate=0.1, max_depth=3, random_state=42)
xgb_model.fit(X_train, y_train)

y_pred_xgb = xgb_model.predict(X_test)

y_pred_xgb = np.clip(y_pred_xgb, 0, 1)

y_pred_categories_xgb = [classify_hss(score) for score in y_pred_xgb]

category_accuracy_xgb = accuracy_score(y_category_test, y_pred_categories_xgb)

print(f"XGBoost - Mean Absolute Error (MAE): {mean_absolute_error(y_test, y_pred_xgb):.4f}")
print(f"XGBoost - Mean Squared Error (MSE): {mean_squared_error(y_test, y_pred_xgb):.4f}")
print(f"XGBoost - Categorical Classification Accuracy: {category_accuracy_xgb:.4f}")

for i in range(5):
    print(f"Predicted HSS: {y_pred_xgb[i]:.4f}, Actual HSS: {y_test.iloc[i]:.4f}, Predicted Category: {y_pred_categories_xgb[i]}, Actual Category: {y_category_test.iloc[i]}")


XGBoost - Mean Absolute Error (MAE): 0.1044
XGBoost - Mean Squared Error (MSE): 0.0160
XGBoost - Categorical Classification Accuracy: 0.3333
Predicted HSS: 0.4576, Actual HSS: 0.2591, Predicted Category: Moderate Susceptibility, Actual Category: Low Susceptibility
Predicted HSS: 0.5364, Actual HSS: 0.7085, Predicted Category: High Susceptibility, Actual Category: Very High Susceptibility
Predicted HSS: 0.4019, Actual HSS: 0.2500, Predicted Category: Moderate Susceptibility, Actual Category: Low Susceptibility
Predicted HSS: 0.4727, Actual HSS: 0.5179, Predicted Category: Moderate Susceptibility, Actual Category: High Susceptibility
Predicted HSS: 0.5205, Actual HSS: 0.5490, Predicted Category: High Susceptibility, Actual Category: High Susceptibility


In [13]:
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error, mean_squared_error, accuracy_score

lr_model = LinearRegression()
lr_model.fit(X_train, y_train)

y_pred_lr = lr_model.predict(X_test)

y_pred_lr = np.clip(y_pred_lr, 0, 1)

y_pred_categories_lr = [classify_hss(score) for score in y_pred_lr]

category_accuracy_lr = accuracy_score(y_category_test, y_pred_categories_lr)

print(f"Linear Regression - Mean Absolute Error (MAE): {mean_absolute_error(y_test, y_pred_lr):.4f}")
print(f"Linear Regression - Mean Squared Error (MSE): {mean_squared_error(y_test, y_pred_lr):.4f}")
print(f"Linear Regression - Categorical Classification Accuracy: {category_accuracy_lr:.4f}")

for i in range(5):
    print(f"Predicted HSS: {y_pred_lr[i]:.4f}, Actual HSS: {y_test.iloc[i]:.4f}, Predicted Category: {y_pred_categories_lr[i]}, Actual Category: {y_category_test.iloc[i]}")


Linear Regression - Mean Absolute Error (MAE): 0.0000
Linear Regression - Mean Squared Error (MSE): 0.0000
Linear Regression - Categorical Classification Accuracy: 0.8333
Predicted HSS: 0.2591, Actual HSS: 0.2591, Predicted Category: Low Susceptibility, Actual Category: Low Susceptibility
Predicted HSS: 0.7085, Actual HSS: 0.7085, Predicted Category: Unknown, Actual Category: Very High Susceptibility
Predicted HSS: 0.2500, Actual HSS: 0.2500, Predicted Category: Low Susceptibility, Actual Category: Low Susceptibility
Predicted HSS: 0.5179, Actual HSS: 0.5179, Predicted Category: High Susceptibility, Actual Category: High Susceptibility
Predicted HSS: 0.5490, Actual HSS: 0.5490, Predicted Category: High Susceptibility, Actual Category: High Susceptibility


In [14]:
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error, mean_squared_error, accuracy_score

degree = 3
poly = PolynomialFeatures(degree)

X_poly_train = poly.fit_transform(X_train)
X_poly_test = poly.transform(X_test)

poly_lr_model = LinearRegression()
poly_lr_model.fit(X_poly_train, y_train)

y_pred_poly = poly_lr_model.predict(X_poly_test)

y_pred_poly = np.clip(y_pred_poly, 0, 1)

y_pred_categories_poly = [classify_hss(score) for score in y_pred_poly]

category_accuracy_poly = accuracy_score(y_category_test, y_pred_categories_poly)

print(f"Polynomial Regression - Mean Absolute Error (MAE): {mean_absolute_error(y_test, y_pred_poly):.4f}")
print(f"Polynomial Regression - Mean Squared Error (MSE): {mean_squared_error(y_test, y_pred_poly):.4f}")
print(f"Polynomial Regression - Categorical Classification Accuracy: {category_accuracy_poly:.4f}")

for i in range(5):
    print(f"Predicted HSS: {y_pred_poly[i]:.4f}, Actual HSS: {y_test.iloc[i]:.4f}, Predicted Category: {y_pred_categories_poly[i]}, Actual Category: {y_category_test.iloc[i]}")


Polynomial Regression - Mean Absolute Error (MAE): 0.1121
Polynomial Regression - Mean Squared Error (MSE): 0.0215
Polynomial Regression - Categorical Classification Accuracy: 0.6667
Predicted HSS: 0.4049, Actual HSS: 0.2591, Predicted Category: Moderate Susceptibility, Actual Category: Low Susceptibility
Predicted HSS: 1.0000, Actual HSS: 0.7085, Predicted Category: Very High Susceptibility, Actual Category: Very High Susceptibility
Predicted HSS: 0.3733, Actual HSS: 0.2500, Predicted Category: Moderate Susceptibility, Actual Category: Low Susceptibility
Predicted HSS: 0.6028, Actual HSS: 0.5179, Predicted Category: High Susceptibility, Actual Category: High Susceptibility
Predicted HSS: 0.5286, Actual HSS: 0.5490, Predicted Category: High Susceptibility, Actual Category: High Susceptibility


In [15]:
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error, mean_squared_error, accuracy_score

degree = 2
poly = PolynomialFeatures(degree)

X_poly_train = poly.fit_transform(X_train)
X_poly_test = poly.transform(X_test)

poly_lr_model = LinearRegression()
poly_lr_model.fit(X_poly_train, y_train)

y_pred_poly = poly_lr_model.predict(X_poly_test)

y_pred_poly = np.clip(y_pred_poly, 0, 1)

y_pred_categories_poly = [classify_hss(score) for score in y_pred_poly]

category_accuracy_poly = accuracy_score(y_category_test, y_pred_categories_poly)

print(f"Polynomial Regression - Mean Absolute Error (MAE): {mean_absolute_error(y_test, y_pred_poly):.4f}")
print(f"Polynomial Regression - Mean Squared Error (MSE): {mean_squared_error(y_test, y_pred_poly):.4f}")
print(f"Polynomial Regression - Categorical Classification Accuracy: {category_accuracy_poly:.4f}")

for i in range(5):
    print(f"Predicted HSS: {y_pred_poly[i]:.4f}, Actual HSS: {y_test.iloc[i]:.4f}, Predicted Category: {y_pred_categories_poly[i]}, Actual Category: {y_category_test.iloc[i]}")


Polynomial Regression - Mean Absolute Error (MAE): 0.0754
Polynomial Regression - Mean Squared Error (MSE): 0.0094
Polynomial Regression - Categorical Classification Accuracy: 0.3333
Predicted HSS: 0.3775, Actual HSS: 0.2591, Predicted Category: Moderate Susceptibility, Actual Category: Low Susceptibility
Predicted HSS: 0.5383, Actual HSS: 0.7085, Predicted Category: High Susceptibility, Actual Category: Very High Susceptibility
Predicted HSS: 0.3571, Actual HSS: 0.2500, Predicted Category: Moderate Susceptibility, Actual Category: Low Susceptibility
Predicted HSS: 0.4995, Actual HSS: 0.5179, Predicted Category: Moderate Susceptibility, Actual Category: High Susceptibility
Predicted HSS: 0.5487, Actual HSS: 0.5490, Predicted Category: High Susceptibility, Actual Category: High Susceptibility


In [16]:
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error, mean_squared_error, accuracy_score

degree = 4
poly = PolynomialFeatures(degree)

X_poly_train = poly.fit_transform(X_train)
X_poly_test = poly.transform(X_test)

poly_lr_model = LinearRegression()
poly_lr_model.fit(X_poly_train, y_train)

y_pred_poly = poly_lr_model.predict(X_poly_test)

y_pred_poly = np.clip(y_pred_poly, 0, 1)

y_pred_categories_poly = [classify_hss(score) for score in y_pred_poly]

category_accuracy_poly = accuracy_score(y_category_test, y_pred_categories_poly)

print(f"Polynomial Regression - Mean Absolute Error (MAE): {mean_absolute_error(y_test, y_pred_poly):.4f}")
print(f"Polynomial Regression - Mean Squared Error (MSE): {mean_squared_error(y_test, y_pred_poly):.4f}")
print(f"Polynomial Regression - Categorical Classification Accuracy: {category_accuracy_poly:.4f}")

for i in range(5):
    print(f"Predicted HSS: {y_pred_poly[i]:.4f}, Actual HSS: {y_test.iloc[i]:.4f}, Predicted Category: {y_pred_categories_poly[i]}, Actual Category: {y_category_test.iloc[i]}")


Polynomial Regression - Mean Absolute Error (MAE): 0.0892
Polynomial Regression - Mean Squared Error (MSE): 0.0107
Polynomial Regression - Categorical Classification Accuracy: 0.5000
Predicted HSS: 0.4256, Actual HSS: 0.2591, Predicted Category: Moderate Susceptibility, Actual Category: Low Susceptibility
Predicted HSS: 0.6650, Actual HSS: 0.7085, Predicted Category: High Susceptibility, Actual Category: Very High Susceptibility
Predicted HSS: 0.4047, Actual HSS: 0.2500, Predicted Category: Moderate Susceptibility, Actual Category: Low Susceptibility
Predicted HSS: 0.5929, Actual HSS: 0.5179, Predicted Category: High Susceptibility, Actual Category: High Susceptibility
Predicted HSS: 0.5208, Actual HSS: 0.5490, Predicted Category: High Susceptibility, Actual Category: High Susceptibility


In [17]:
from sklearn.linear_model import Ridge

ridge_model = Ridge(alpha=1.0)
ridge_model.fit(X_train, y_train)

y_pred_ridge = ridge_model.predict(X_test)

y_pred_ridge = np.clip(y_pred_ridge, 0, 1)

y_pred_categories_ridge = [classify_hss(score) for score in y_pred_ridge]

category_accuracy_ridge = accuracy_score(y_category_test, y_pred_categories_ridge)

print(f"Ridge Regression - Mean Absolute Error (MAE): {mean_absolute_error(y_test, y_pred_ridge):.4f}")
print(f"Ridge Regression - Mean Squared Error (MSE): {mean_squared_error(y_test, y_pred_ridge):.4f}")
print(f"Ridge Regression - Categorical Classification Accuracy: {category_accuracy_ridge:.4f}")

for i in range(5):
    print(f"Predicted HSS: {y_pred_ridge[i]:.4f}, Actual HSS: {y_test.iloc[i]:.4f}, Predicted Category: {y_pred_categories_ridge[i]}, Actual Category: {y_category_test.iloc[i]}")


Ridge Regression - Mean Absolute Error (MAE): 0.0073
Ridge Regression - Mean Squared Error (MSE): 0.0001
Ridge Regression - Categorical Classification Accuracy: 0.8333
Predicted HSS: 0.2750, Actual HSS: 0.2591, Predicted Category: Low Susceptibility, Actual Category: Low Susceptibility
Predicted HSS: 0.7007, Actual HSS: 0.7085, Predicted Category: Unknown, Actual Category: Very High Susceptibility
Predicted HSS: 0.2642, Actual HSS: 0.2500, Predicted Category: Low Susceptibility, Actual Category: Low Susceptibility
Predicted HSS: 0.5216, Actual HSS: 0.5179, Predicted Category: High Susceptibility, Actual Category: High Susceptibility
Predicted HSS: 0.5469, Actual HSS: 0.5490, Predicted Category: High Susceptibility, Actual Category: High Susceptibility


In [18]:
from sklearn.linear_model import Lasso

lasso_model = Lasso(alpha=0.1)
lasso_model.fit(X_train, y_train)

y_pred_lasso = lasso_model.predict(X_test)

y_pred_lasso = np.clip(y_pred_lasso, 0, 1)

y_pred_categories_lasso = [classify_hss(score) for score in y_pred_lasso]

category_accuracy_lasso = accuracy_score(y_category_test, y_pred_categories_lasso)

print(f"Lasso Regression - Mean Absolute Error (MAE): {mean_absolute_error(y_test, y_pred_lasso):.4f}")
print(f"Lasso Regression - Mean Squared Error (MSE): {mean_squared_error(y_test, y_pred_lasso):.4f}")
print(f"Lasso Regression - Categorical Classification Accuracy: {category_accuracy_lasso:.4f}")

for i in range(5):
    print(f"Predicted HSS: {y_pred_lasso[i]:.4f}, Actual HSS: {y_test.iloc[i]:.4f}, Predicted Category: {y_pred_categories_lasso[i]}, Actual Category: {y_category_test.iloc[i]}")


Lasso Regression - Mean Absolute Error (MAE): 0.1439
Lasso Regression - Mean Squared Error (MSE): 0.0267
Lasso Regression - Categorical Classification Accuracy: 0.1667
Predicted HSS: 0.4533, Actual HSS: 0.2591, Predicted Category: Moderate Susceptibility, Actual Category: Low Susceptibility
Predicted HSS: 0.4533, Actual HSS: 0.7085, Predicted Category: Moderate Susceptibility, Actual Category: Very High Susceptibility
Predicted HSS: 0.4533, Actual HSS: 0.2500, Predicted Category: Moderate Susceptibility, Actual Category: Low Susceptibility
Predicted HSS: 0.4533, Actual HSS: 0.5179, Predicted Category: Moderate Susceptibility, Actual Category: High Susceptibility
Predicted HSS: 0.4533, Actual HSS: 0.5490, Predicted Category: Moderate Susceptibility, Actual Category: High Susceptibility


In [19]:
from sklearn.linear_model import ElasticNet

elastic_net_model = ElasticNet(alpha=1.0, l1_ratio=0.5)
elastic_net_model.fit(X_train, y_train)

y_pred_en = elastic_net_model.predict(X_test)

y_pred_en = np.clip(y_pred_en, 0, 1)

y_pred_categories_en = [classify_hss(score) for score in y_pred_en]

category_accuracy_en = accuracy_score(y_category_test, y_pred_categories_en)

print(f"Elastic Net Regression - Mean Absolute Error (MAE): {mean_absolute_error(y_test, y_pred_en):.4f}")
print(f"Elastic Net Regression - Mean Squared Error (MSE): {mean_squared_error(y_test, y_pred_en):.4f}")
print(f"Elastic Net Regression - Categorical Classification Accuracy: {category_accuracy_en:.4f}")

for i in range(5):
    print(f"Predicted HSS: {y_pred_en[i]:.4f}, Actual HSS: {y_test.iloc[i]:.4f}, Predicted Category: {y_pred_categories_en[i]}, Actual Category: {y_category_test.iloc[i]}")


Elastic Net Regression - Mean Absolute Error (MAE): 0.1439
Elastic Net Regression - Mean Squared Error (MSE): 0.0267
Elastic Net Regression - Categorical Classification Accuracy: 0.1667
Predicted HSS: 0.4533, Actual HSS: 0.2591, Predicted Category: Moderate Susceptibility, Actual Category: Low Susceptibility
Predicted HSS: 0.4533, Actual HSS: 0.7085, Predicted Category: Moderate Susceptibility, Actual Category: Very High Susceptibility
Predicted HSS: 0.4533, Actual HSS: 0.2500, Predicted Category: Moderate Susceptibility, Actual Category: Low Susceptibility
Predicted HSS: 0.4533, Actual HSS: 0.5179, Predicted Category: Moderate Susceptibility, Actual Category: High Susceptibility
Predicted HSS: 0.4533, Actual HSS: 0.5490, Predicted Category: Moderate Susceptibility, Actual Category: High Susceptibility


In [2]:
from sklearn.neighbors import KNeighborsRegressor

knn_model = KNeighborsRegressor(n_neighbors=5)
knn_model.fit(X_train, y_train)

y_pred_knn = knn_model.predict(X_test)

y_pred_knn = np.clip(y_pred_knn, 0, 1)

y_pred_categories_knn = [classify_hss(score) for score in y_pred_knn]

category_accuracy_knn = accuracy_score(y_category_test, y_pred_categories_knn)

print(f"KNN Regression - Mean Absolute Error (MAE): {mean_absolute_error(y_test, y_pred_knn):.4f}")
print(f"KNN Regression - Mean Squared Error (MSE): {mean_squared_error(y_test, y_pred_knn):.4f}")
print(f"KNN Regression - Categorical Classification Accuracy: {category_accuracy_knn:.4f}")

for i in range(5):
    print(f"Predicted HSS: {y_pred_knn[i]:.4f}, Actual HSS: {y_test.iloc[i]:.4f}, Predicted Category: {y_pred_categories_knn[i]}, Actual Category: {y_category_test.iloc[i]}")


KNN Regression - Mean Absolute Error (MAE): 0.0802
KNN Regression - Mean Squared Error (MSE): 0.0103
KNN Regression - Categorical Classification Accuracy: 0.3333
Predicted HSS: 0.3606, Actual HSS: 0.2591, Predicted Category: Moderate Susceptibility, Actual Category: Low Susceptibility
Predicted HSS: 0.5257, Actual HSS: 0.7085, Predicted Category: High Susceptibility, Actual Category: Very High Susceptibility
Predicted HSS: 0.3776, Actual HSS: 0.2500, Predicted Category: Moderate Susceptibility, Actual Category: Low Susceptibility
Predicted HSS: 0.4975, Actual HSS: 0.5179, Predicted Category: Moderate Susceptibility, Actual Category: High Susceptibility
Predicted HSS: 0.5164, Actual HSS: 0.5490, Predicted Category: High Susceptibility, Actual Category: High Susceptibility


In [3]:
from sklearn.tree import DecisionTreeRegressor

dt_model = DecisionTreeRegressor(random_state=42)
dt_model.fit(X_train, y_train)

y_pred_dt = dt_model.predict(X_test)

y_pred_dt = np.clip(y_pred_dt, 0, 1)

y_pred_categories_dt = [classify_hss(score) for score in y_pred_dt]

category_accuracy_dt = accuracy_score(y_category_test, y_pred_categories_dt)

print(f"Decision Tree Regression - Mean Absolute Error (MAE): {mean_absolute_error(y_test, y_pred_dt):.4f}")
print(f"Decision Tree Regression - Mean Squared Error (MSE): {mean_squared_error(y_test, y_pred_dt):.4f}")
print(f"Decision Tree Regression - Categorical Classification Accuracy: {category_accuracy_dt:.4f}")

for i in range(5):
    print(f"Predicted HSS: {y_pred_dt[i]:.4f}, Actual HSS: {y_test.iloc[i]:.4f}, Predicted Category: {y_pred_categories_dt[i]}, Actual Category: {y_category_test.iloc[i]}")


Decision Tree Regression - Mean Absolute Error (MAE): 0.0995
Decision Tree Regression - Mean Squared Error (MSE): 0.0173
Decision Tree Regression - Categorical Classification Accuracy: 0.3333
Predicted HSS: 0.4442, Actual HSS: 0.2591, Predicted Category: Moderate Susceptibility, Actual Category: Low Susceptibility
Predicted HSS: 0.4939, Actual HSS: 0.7085, Predicted Category: Moderate Susceptibility, Actual Category: Very High Susceptibility
Predicted HSS: 0.4000, Actual HSS: 0.2500, Predicted Category: Moderate Susceptibility, Actual Category: Low Susceptibility
Predicted HSS: 0.4939, Actual HSS: 0.5179, Predicted Category: Moderate Susceptibility, Actual Category: High Susceptibility
Predicted HSS: 0.5341, Actual HSS: 0.5490, Predicted Category: High Susceptibility, Actual Category: High Susceptibility


Ridge Hyperparameter Tuning

In [5]:
from sklearn.model_selection import GridSearchCV
from sklearn.linear_model import Ridge
from sklearn.metrics import mean_absolute_error, mean_squared_error, accuracy_score

param_grid = {'alpha': [0.01, 0.1, 1, 10, 100]}

ridge = Ridge()

grid_search = GridSearchCV(estimator=ridge, param_grid=param_grid, cv=5, scoring='neg_mean_absolute_error')

grid_search.fit(X_train, y_train)

best_alpha = grid_search.best_params_['alpha']
print(f"Best alpha found: {best_alpha}")

best_ridge_model = grid_search.best_estimator_

y_pred_best_ridge = best_ridge_model.predict(X_test)

y_pred_best_ridge = np.clip(y_pred_best_ridge, 0, 1)

y_pred_categories_best_ridge = [classify_hss(score) for score in y_pred_best_ridge]

category_accuracy_best_ridge = accuracy_score(y_category_test, y_pred_categories_best_ridge)

print(f"Best Ridge Regression - Mean Absolute Error (MAE): {mean_absolute_error(y_test, y_pred_best_ridge):.8f}")
print(f"Best Ridge Regression - Mean Squared Error (MSE): {mean_squared_error(y_test, y_pred_best_ridge):.8f}")
print(f"Best Ridge Regression - Categorical Classification Accuracy: {category_accuracy_best_ridge:.8f}")

for i in range(5):
    print(f"Predicted HSS: {y_pred_best_ridge[i]:.8f}, Actual HSS: {y_test.iloc[i]:.8f}, Predicted Category: {y_pred_categories_best_ridge[i]}, Actual Category: {y_category_test.iloc[i]}")


Best alpha found: 0.01
Best Ridge Regression - Mean Absolute Error (MAE): 0.00008371
Best Ridge Regression - Mean Squared Error (MSE): 0.00000001
Best Ridge Regression - Categorical Classification Accuracy: 0.83333333
Predicted HSS: 0.25926879, Actual HSS: 0.25909000, Predicted Category: Low Susceptibility, Actual Category: Low Susceptibility
Predicted HSS: 0.70842335, Actual HSS: 0.70851000, Predicted Category: Unknown, Actual Category: Very High Susceptibility
Predicted HSS: 0.25011362, Actual HSS: 0.24995500, Predicted Category: Low Susceptibility, Actual Category: Low Susceptibility
Predicted HSS: 0.51796562, Actual HSS: 0.51791500, Predicted Category: High Susceptibility, Actual Category: High Susceptibility
Predicted HSS: 0.54901663, Actual HSS: 0.54904000, Predicted Category: High Susceptibility, Actual Category: High Susceptibility


In [6]:
import joblib

joblib.dump(best_ridge_model, 'ML_model.pkl')

['ML_model.pkl']