In [1]:
pip install torch

Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cufft-cu12==11.2.1.3 (from torch)
  Downloading nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-curand-cu12==10.3.5.147 (from torch)
  Downloading nvidia_curand_cu12-10.3.5

In [2]:
import torch
import torch.nn as nn
import torch.optim as optim
import pandas as pd
import numpy as np
import joblib
import sklearn
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

In [3]:
file_path = "Waterstuff take two.xlsx"
df = pd.read_excel(file_path)
df.head()

Unnamed: 0,Country,Area,Population using Safely Managed Drinking Water Service (%),Year,Total Population,Estimated Water Stress (%)
0,Indonesia,Rural,0.230875,2018,120600987,0.297
1,Indonesia,Urban,0.340483,2018,149350859,0.297
2,Indonesia,Overall,0.291516,2018,269951846,0.297
3,Indonesia,Rural,0.233796,2019,119936201,0.297
4,Indonesia,Urban,0.341938,2019,152553180,0.297


In [4]:
X_full = df[['Country', 'Area', 'Year', 'Total Population', 'Estimated Water Stress (%)']]
y_full = df['Population using Safely Managed Drinking Water Service (%)']

X_train_df, X_test_df, y_train, y_test = train_test_split(X_full, y_full, test_size=0.2, random_state=42)

In [5]:
numeric_cols = ['Year', 'Total Population', 'Estimated Water Stress (%)']
categorical_cols = ['Country', 'Area']

In [6]:
scaler = StandardScaler()
X_train_num = scaler.fit_transform(X_train_df[numeric_cols])
X_test_num = scaler.transform(X_test_df[numeric_cols])

In [7]:
if sklearn.__version__ >= '1.2':
    encoder = OneHotEncoder(sparse_output=False, handle_unknown='ignore')
else:
    encoder = OneHotEncoder(sparse=False, handle_unknown='ignore')

In [8]:
X_train_cat = encoder.fit_transform(X_train_df[categorical_cols])
X_test_cat = encoder.transform(X_test_df[categorical_cols])

X_train = np.hstack([X_train_num, X_train_cat])
X_test = np.hstack([X_test_num, X_test_cat])

X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train.values.reshape(-1, 1), dtype=torch.float32)

In [9]:
class WaterNet(nn.Module):
    def __init__(self, input_size):
        super(WaterNet, self).__init__()
        self.net = nn.Sequential(
            nn.Linear(input_size, 64),
            nn.ReLU(),
            nn.Linear(64, 32),
            nn.ReLU(),
            nn.Linear(32, 16),
            nn.ReLU(),
            nn.Linear(16, 8),
            nn.ReLU(),
            nn.Linear(8, 1)
        )

    def forward(self, x):
        return self.net(x)

In [10]:
model = WaterNet(input_size=X_train.shape[1])
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.01)

In [11]:
epochs = 500
for epoch in range(epochs):
    model.train()
    optimizer.zero_grad()
    output = model(X_train_tensor)
    loss = criterion(output, y_train_tensor)
    loss.backward()
    optimizer.step()

In [12]:
model.eval()
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)

with torch.no_grad():
    y_pred = model(X_test_tensor).numpy().flatten()
    y_true = y_test.values

In [13]:
results = X_test_df.copy()
results['y_true'] = y_true
results['y_pred'] = y_pred
results['error'] = results['y_true'] - results['y_pred']
results['squared_error'] = results['error']**2
results['abs_error'] = results['error'].abs()

def compute_group_metrics(df, groupby_cols):
    return df.groupby(groupby_cols).agg(
        MSE=('squared_error', 'mean'),
        RMSE=('squared_error', lambda x: np.sqrt(x.mean())),
        MAE=('abs_error', 'mean')
    ).reset_index()

country_metrics = compute_group_metrics(results, 'Country')
area_metrics = compute_group_metrics(results, 'Area')
year_metrics = compute_group_metrics(results, 'Year')

mse = mean_squared_error(y_true, y_pred)
rmse = np.sqrt(mse)
mae = mean_absolute_error(y_true, y_pred)
r2 = r2_score(y_true, y_pred)

print("\n📌 Overall Model Evaluation:")
print(f"  - MSE : {mse:.6f}")
print(f"  - RMSE: {rmse:.6f}")
print(f"  - MAE : {mae:.6f}")
print(f"  - R²  : {r2:.4f}")

print("\n📌 Metrics by Country:")
print(country_metrics)

print("\n📌 Metrics by Area:")
print(area_metrics)

print("\n📌 Metrics by Year:")
print(year_metrics)



📌 Overall Model Evaluation:
  - MSE : 0.000098
  - RMSE: 0.009910
  - MAE : 0.007120
  - R²  : 0.9986

📌 Metrics by Country:
       Country       MSE      RMSE       MAE
0     Cambodia  0.000006  0.002490  0.002461
1    Indonesia  0.000037  0.006117  0.004162
2         Laos  0.000195  0.013957  0.011040
3     Malaysia  0.000019  0.004409  0.004326
4      Myanmar  0.000233  0.015253  0.013013
5  Philippines  0.000014  0.003794  0.003156
6    Singapore  0.000271  0.016477  0.015292
7      Vietnam  0.000032  0.005648  0.005547

📌 Metrics by Area:
      Area       MSE      RMSE       MAE
0  Overall  0.000122  0.011067  0.007930
1    Rural  0.000159  0.012617  0.010925
2    Urban  0.000056  0.007495  0.005073

📌 Metrics by Year:
   Year       MSE      RMSE       MAE
0  2018  0.000195  0.013979  0.012028
1  2019  0.000008  0.002797  0.002459
2  2020  0.000102  0.010098  0.010098
3  2021  0.000006  0.002525  0.002154
4  2022  0.000162  0.012725  0.010273


In [None]:
torch.save(model.state_dict(), "water_model.pt")
joblib.dump(scaler, "scaler.pkl")
joblib.dump(encoder, "encoder.pkl")