In [2]:
import pandas as pd
import os

BASE_PATH = r"C:\Users\sagni\Downloads\Noise Mapper"

def load_and_prepare():
    # Load station_month.csv
    station_month_path = os.path.join(BASE_PATH, "station_month.csv")
    df = pd.read_csv(station_month_path)

    # Clean column names
    df.columns = df.columns.str.strip()

    # Ensure Year, Month, Day exist
    for col in ["Year", "Month", "Day"]:
        if col not in df.columns:
            raise ValueError(f"Missing column '{col}' in station_month.csv")

    # Convert to integers safely
    for col in ["Year", "Month", "Day"]:
        df[col] = pd.to_numeric(df[col], errors="coerce")  # Convert non-numeric to NaN
        df[col] = df[col].fillna(1).astype(int)  # Fill missing with 1 (safe default)

    # Create date column
    df["date"] = pd.to_datetime(
        dict(year=df["Year"], month=df["Month"], day=df["Day"]),
        errors="coerce"
    )

    # Drop invalid dates
    df = df.dropna(subset=["date"])

    return df

if __name__ == "__main__":
    data = load_and_prepare()
    print(data.head())
    print(data.dtypes)


     Station  Year  Month  Day  Night  DayLimit  NightLimit       date
1913   DEL03  2015      5    1    NaN        50          40 2015-05-01
1914   DEL03  2015      6    1    NaN        50          40 2015-06-01
1915   DEL03  2015      7    1    NaN        50          40 2015-07-01
1916   DEL03  2015      8   31   25.0        50          40 2015-08-31
Station               object
Year                   int32
Month                  int32
Day                    int32
Night                float64
DayLimit               int64
NightLimit             int64
date          datetime64[ns]
dtype: object
