In [58]:
pip install streamlit



In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.preprocessing import StandardScaler
import streamlit as st
import joblib
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

In [60]:
file_path = "Waterstuff take two.xlsx"
df = pd.read_excel(file_path)
df.head()

Unnamed: 0,Country,Area,Population using Safely Managed Drinking Water Service (%),Year,Total Population,Estimated Water Stress (%)
0,Indonesia,Rural,0.230875,2018,120600987,0.297
1,Indonesia,Urban,0.340483,2018,149350859,0.297
2,Indonesia,Overall,0.291516,2018,269951846,0.297
3,Indonesia,Rural,0.233796,2019,119936201,0.297
4,Indonesia,Urban,0.341938,2019,152553180,0.297


In [61]:
X_full = df[['Country', 'Area', 'Year', 'Total Population', 'Estimated Water Stress (%)']]
y_full = df['Population using Safely Managed Drinking Water Service (%)']

X_train_df, X_test_df, y_train, y_test = train_test_split(
    X_full, y_full, test_size=0.2, random_state=42
)

In [62]:
scaler = StandardScaler()
numeric_cols = ['Year', 'Total Population', 'Estimated Water Stress (%)']
X_train_df[numeric_cols] = scaler.fit_transform(X_train_df[numeric_cols])
X_test_df[numeric_cols] = scaler.transform(X_test_df[numeric_cols])

X_train_tensor = torch.tensor(X_train_df[numeric_cols].values, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train.values.reshape(-1, 1), dtype=torch.float32)

In [63]:
class WaterNet(nn.Module):
    def __init__(self):
        super(WaterNet, self).__init__()
        self.net = nn.Sequential(
            nn.Linear(3, 64),
            nn.ReLU(),
            nn.Linear(64, 32),
            nn.ReLU(),
            nn.Linear(32, 16),
            nn.ReLU(),
            nn.Linear(16, 8),
            nn.ReLU(),
            nn.Linear(8, 1)
        )

    def forward(self, x):
        return self.net(x)

In [64]:
model = WaterNet()
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.01)

In [65]:
epochs = 500
for epoch in range(epochs):
    model.train()
    optimizer.zero_grad()
    output = model(X_train_tensor)
    loss = criterion(output, y_train_tensor)
    loss.backward()
    optimizer.step()

In [66]:
model.eval()
X_test_tensor = torch.tensor(X_test_df[numeric_cols].values, dtype=torch.float32)

with torch.no_grad():
    y_pred = model(X_test_tensor).numpy().flatten()
    y_true = y_test.values

In [67]:
results = X_test_df.copy()
results['y_true'] = y_true
results['y_pred'] = y_pred
results['error'] = results['y_true'] - results['y_pred']
results['squared_error'] = results['error']**2
results['abs_error'] = results['error'].abs()

In [68]:
def compute_group_metrics(df, groupby_cols):
    return df.groupby(groupby_cols).agg(
        MSE=('squared_error', 'mean'),
        RMSE=('squared_error', lambda x: np.sqrt(x.mean())),
        MAE=('abs_error', 'mean')
    ).reset_index()

country_metrics = compute_group_metrics(results, 'Country')
area_metrics = compute_group_metrics(results, 'Area')
year_metrics = compute_group_metrics(results, 'Year')

In [69]:
mse = mean_squared_error(y_true, y_pred)
rmse = np.sqrt(mse)
mae = mean_absolute_error(y_true, y_pred)
r2 = r2_score(y_true, y_pred)

# 11. Print all metrics
print("\n📌 Overall Model Evaluation:")
print(f"  - MSE : {mse:.6f}")
print(f"  - RMSE: {rmse:.6f}")
print(f"  - MAE : {mae:.6f}")
print(f"  - R²  : {r2:.4f}")

print("\n📌 Metrics by Country:")
print(country_metrics)

print("\n📌 Metrics by Area:")
print(area_metrics)

print("\n📌 Metrics by Year:")
print(year_metrics)


📌 Overall Model Evaluation:
  - MSE : 0.014940
  - RMSE: 0.122230
  - MAE : 0.074517
  - R²  : 0.7939

📌 Metrics by Country:
       Country       MSE      RMSE       MAE
0     Cambodia  0.000001  0.001047  0.000970
1    Indonesia  0.002868  0.053550  0.036059
2         Laos  0.012511  0.111852  0.109689
3     Malaysia  0.000284  0.016847  0.013197
4      Myanmar  0.000883  0.029721  0.023247
5  Philippines  0.031727  0.178121  0.144405
6    Singapore  0.000007  0.002624  0.002504
7      Vietnam  0.067590  0.259981  0.190149

📌 Metrics by Area:
      Area       MSE      RMSE       MAE
0  Overall  0.001469  0.038334  0.023292
1    Rural  0.005243  0.072410  0.060589
2    Urban  0.029488  0.171720  0.121492

📌 Metrics by Year:
       Year       MSE      RMSE       MAE
0 -1.479695  0.005345  0.073108  0.053317
1 -0.751230  0.002853  0.053418  0.038995
2 -0.022765  0.008930  0.094500  0.094500
3  0.705701  0.012745  0.112892  0.061858
4  1.434166  0.044297  0.210468  0.150911


In [70]:
torch.save(model.state_dict(), "water_model.pt")

joblib.dump(scaler, "scaler.pkl")

['scaler.pkl']