In [None]:
import pandas as pd

# 1) Read CSVs
landuse_df   = pd.read_csv("landuse_station_mapping.csv")
rain_df      = pd.read_csv("station_rainfall_scores.csv")
humidity_df  = pd.read_csv("station_humidity_scores.csv")

# 2) Select only the columns we need from the score table in each data frame
rain_scores     = rain_df[["station_id", "overall_rain_score"]]
humidity_scores = humidity_df[["station_id", "overall_humidity_score"]]

# 3) Merge scores into your landuse mapping
merged = (
    landuse_df
      .merge(rain_scores,
             on="station_id",
             how="left")   # left‑join keeps all landuse rows
      .merge(humidity_scores,
             on="station_id",
             how="left")   # another left‑join adds humidity
)

# 4) Inspect the result
merged.head()

# 5) (Optional) Fill missing scores with something
merged["overall_rain_score"].fillna(0, inplace=True)
merged["overall_humidity_score"].fillna(0, inplace=True)

# 6) Save out to a new file
merged.to_csv("landuse_with_scores.csv", index=False)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  merged["overall_rain_score"].fillna(0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  merged["overall_humidity_score"].fillna(0, inplace=True)


In [2]:
import pandas as pd

# 1) Load the enriched CSV
df = pd.read_csv("landuse_with_scores.csv")

# 2) Totals
total_rows       = len(df)                                 # total mapping entries (rows)
total_stations   = df["station_id"].nunique()              # distinct stations

# 3) Zero‐score counts (row‑level)
zero_rain_rows     = (df["overall_rain_score"] == 0).sum()
zero_humid_rows    = (df["overall_humidity_score"] == 0).sum()

# 4) Zero‐score counts (station‑level)
stations_zero_rain  = df.loc[df["overall_rain_score"] == 0, "station_id"].nunique()
stations_zero_humid = df.loc[df["overall_humidity_score"] == 0, "station_id"].nunique()

# 5) Quick descriptive stats on the score columns
score_stats = df[["overall_rain_score", "overall_humidity_score"]].describe()

# 6) Print out
print(f"Total rows in mapping file:          {total_rows}")
print(f"Total distinct stations mapped:      {total_stations}\n")

print(f"Rows with zero rainfall score:       {zero_rain_rows}")
print(f"Rows with zero humidity score:       {zero_humid_rows}\n")

print(f"Distinct stations with zero rain:    {stations_zero_rain}")
print(f"Distinct stations with zero humid.:  {stations_zero_humid}\n")

print("Score column summary statistics:")
print(score_stats)

Total rows in mapping file:          112833
Total distinct stations mapped:      63

Rows with zero rainfall score:       0
Rows with zero humidity score:       95679

Distinct stations with zero rain:    0
Distinct stations with zero humid.:  50

Score column summary statistics:
       overall_rain_score  overall_humidity_score
count       112833.000000           112833.000000
mean             0.733761                0.057668
std              0.124938                0.147941
min              0.462024                0.000000
25%              0.638362                0.000000
50%              0.707878                0.000000
75%              0.857187                0.000000
max              1.000000                1.000000
