In [7]:
import pandas as pd
import numpy as np


df = pd.read_csv(r"D:\MSDS CODE\DSLC_EXAM_S25M19030\output\data\cleaned_weather.csv", parse_dates=["DATE"])

#### DESCRIPTIVE ANALYTICS

In [10]:
df[[ "TMP_C",
        "DEW_C",
        "WIND_MS",
        "VIS_M",
        "PRECIP_MM"
    ]].describe()

Unnamed: 0,TMP_C,DEW_C,WIND_MS,VIS_M,PRECIP_MM
count,8722.0,8704.0,8754.0,8754.0,8754.0
mean,24.241034,18.336213,2.738257,11399.43146,2.85907
std,2.910246,1.450463,36.99194,54085.470202,37.437571
min,14.8,3.1,0.0,0.0,0.0
25%,22.0,17.8,0.0,6000.0,0.0
50%,24.5,18.5,1.0,9000.0,0.0
75%,26.5,19.2,2.1,10000.0,0.0
max,37.5,31.5,999.9,999999.0,999.9


##### Data skewness

In [12]:
dist_stats = pd.DataFrame({
    "skewness": [
        df["TMP_C"].skew(),
        df["PRECIP_MM"].skew()
    ],
    "kurtosis": [
        df["TMP_C"].kurtosis(),
        df["PRECIP_MM"].kurtosis()
    ]
}, index=["TMP_C", "PRECIP_MM"])

In [14]:
dist_stats

Unnamed: 0,skewness,kurtosis
TMP_C,-0.142917,-0.686513
PRECIP_MM,25.928986,687.191271


#### QUANTILE ANALYTICS

In [16]:
quantiles = df[["TMP_C", "PRECIP_MM"]].quantile(
    [0.90, 0.95, 0.99]
)

quantiles.index = ["P90", "P95", "P99"]
print(quantiles)

     TMP_C  PRECIP_MM
P90   27.8        2.0
P95   28.5       10.0
P99   30.2       32.0


##### EXTREME HEAT - ANALYSIS

In [24]:
#taking 95th percentile
heat_threshold = df["TMP_C"].quantile(0.95)
print("\nExtreme heat threshold (P95):", heat_threshold)


Extreme heat threshold (P95): 28.5


In [26]:
df["heat_extreme"] = (df["TMP_C"] > heat_threshold).astype(int)

print("\nHeat extreme counts:")
print(df["heat_extreme"].value_counts())


Heat extreme counts:
heat_extreme
0    8353
1     401
Name: count, dtype: int64


#### EXTREME RAIN - ANALYSIS

In [29]:
rain_threshold = df["PRECIP_MM"].quantile(0.95)
print("\nExtreme rain threshold (P95):", rain_threshold)

df["rain_extreme"] = (df["PRECIP_MM"] > rain_threshold).astype(int)

print("\nRain extreme counts:")
print(df["rain_extreme"].value_counts())


Extreme rain threshold (P95): 10.0

Rain extreme counts:
rain_extreme
0    8329
1     425
Name: count, dtype: int64


In [33]:
# derived column/feature
df["TEMP_DEW_SPREAD"] = df["TMP_C"] - df["DEW_C"]

#### CONDITIONAL MEANS — HEAT EXTREMES

In [35]:
df.groupby("heat_extreme")[[
    "DEW_C",
    "TEMP_DEW_SPREAD",
    "WIND_MS"
]].mean()


CONDITIONAL MEANS — HEAT EXTREMES



Unnamed: 0_level_0,DEW_C,TEMP_DEW_SPREAD,WIND_MS
heat_extreme,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,18.415862,5.574943,2.778726
1,16.687032,12.973566,1.895262


#### CONDITIONAL MEANS — RAIN EXTREMES

In [44]:
df.groupby("rain_extreme")[[
    "VIS_M",
    "WIND_MS",
    "PRECIP_MM"
]].mean()

Unnamed: 0_level_0,VIS_M,WIND_MS,PRECIP_MM
rain_extreme,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,11544.221875,2.609653,0.377656
1,8561.88,5.258588,51.488941
