In [None]:
# -------- Static country ranking (no login) --------
import pandas as pd
import numpy as np
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.impute import SimpleImputer
from sklearn.neural_network import MLPRegressor
from sklearn.model_selection import train_test_split

CSV_PATH = "../FINALIZED_cities_data.csv"  # <- update if needed

df = pd.read_csv(CSV_PATH)
if "City" not in df.columns:
    for c in ["city","Place","place"]:
        if c in df.columns: df.rename(columns={c:"City"}, inplace=True); break
if "Country" not in df.columns:
    for c in ["country"]:
        if c in df.columns: df.rename(columns={c:"Country"}, inplace=True); break

def z(s: pd.Series) -> pd.Series:
    s = pd.to_numeric(s, errors="coerce")
    if s.notna().sum() == 0: return pd.Series(0.0, index=s.index)
    s = s.fillna(s.median()); std = s.std()
    return (s - s.mean()) / (std + 1e-9) if np.isfinite(std) and std != 0 else pd.Series(0.0, index=s.index)

# ==== Build a static target using ONLY the three public factors ====
parts = []
if "monthly_cost_usd" in df.columns: parts.append(z(-df["monthly_cost_usd"]))   # cheaper = better
temp_col = next((c for c in ["weather_avg_temp_c","climate_avg_temp_c"] if c in df.columns), None)
if temp_col:
    temp_dev = (pd.to_numeric(df[temp_col], errors="coerce") - 22.0).abs()
    parts.append(z(-temp_dev))                                                  # closer to 22C = better
if "safety_score" in df.columns: parts.append(z(df["safety_score"]))            # safer = better
df["suitability_score"] = pd.concat(parts, axis=1).mean(axis=1).fillna(0.0) if parts else 0.0

# ==== Train NN once (uses all features; target above) ====
num_feats = df.select_dtypes(include=[np.number]).columns.tolist()
num_feats = [c for c in num_feats if c != "suitability_score"]
cat_feats = [c for c in df.columns if c not in num_feats + ["suitability_score"]]

pre = ColumnTransformer([
    ("num", Pipeline([("imputer", SimpleImputer(strategy="median")), ("scaler", StandardScaler())]), num_feats),
    ("cat", Pipeline([("imputer", SimpleImputer(strategy="most_frequent")), ("onehot", OneHotEncoder(handle_unknown="ignore"))]), cat_feats),
])
model = Pipeline([("pre", pre),
                  ("nn", MLPRegressor(hidden_layer_sizes=(96,48,16), activation="relu",
                                      early_stopping=True, n_iter_no_change=20,
                                      random_state=42, max_iter=800))])

X = df[num_feats + cat_feats]; y = df["suitability_score"]
X_tr, X_te, y_tr, y_te = train_test_split(X, y, test_size=0.2, random_state=42)
model.fit(X_tr, y_tr)

# ==== Predict per city → aggregate by country → scale 0–100 ====
df["_pred_static"] = model.predict(X)
country_scores = (df.groupby("Country", dropna=False)["_pred_static"]
                    .mean().reset_index(name="country_score"))
mn, mx = country_scores["country_score"].min(), country_scores["country_score"].max()
country_scores["country_score_0_100"] = ((country_scores["country_score"] - mn) / (mx - mn) * 100) if mx > mn else 100.0
country_scores["country_score_0_100"] = country_scores["country_score_0_100"].round(2)
country_scores = country_scores.sort_values("country_score_0_100", ascending=False).reset_index(drop=True)

print(country_scores.head(10))
country_scores.to_csv("country_scores_static_0_100.csv", index=False)
print("Saved: country_scores_static_0_100.csv")