## Interpretability & Insights

In [6]:
import pandas as pd
import numpy as np

from pytorch_tabnet.tab_model import TabNetRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, r2_score, mean_squared_error
from sklearn.preprocessing import LabelEncoder


#### Data Loading

In [2]:
df = pd.read_csv('../data/modelingData/modelingDataFrame.csv')

### TabNet - Interpretability

In [4]:
event_names = ['Astronomical Low Tide', 'Extreme Cold/Wind Chill', 'Flood','Winter Weather', 
               'Wildfire', 'Heavy Rain', 'Cold/Wind Chill', 'Dense Fog', 'Frost/Freeze', 'Strong Wind',
               'Lake-Effect Snow', 'Funnel Cloud', 'Flash Flood', 'Heavy Snow', 'Ice Storm', 
               'Thunderstorm Wind', 'Avalanche', 'Excessive Heat', 'Coastal Flood', 'Storm Surge/Tide', 
               'Sleet', 'Debris Flow', 'Winter Storm', 'Tropical Storm', 'Dust Storm', 'Drought', 
               'Blizzard', 'Lightning', 'Tornado', 'Hail', 'Rip Current', 'Heat', 'Freezing Fog', 
               'High Surf', 'High Wind']

In [7]:
df = df[df['ValidDataFlag'] == 1]
df = df[~df['Year'].isin([2015, 2016, 2017])]

zero_percentages = {}

for col in event_names:
    if col in df.columns:
        zero_count = (df[col] == 0).sum()
        total_count = len(df)
        zero_percentage = (zero_count / total_count) * 100
        zero_percentages[col] = zero_percentage

zero_percentages_df = pd.DataFrame.from_dict(zero_percentages, orient='index', columns=['%_zero'])
zero_percentages_df = zero_percentages_df.sort_values('%_zero', ascending=False)

selected_events_name = [
    col for col, perc in zero_percentages.items() if perc < 99.8
]

target = 'CustomersOut'

numeric_features = ['Tmin', 'Tmax', 'Tavg', 'Ppt', 'Lat', 'Lng']
categorical_features = ['Season', 'Region', 'Division', 'Month', 'StateName', 'CountyName']
event_features = [col for col in df.columns if col in selected_events_name]

X = df[numeric_features + categorical_features + event_features]
y = df[target]

X_encoded = X.copy()
for col in categorical_features:
    le = LabelEncoder()
    X_encoded[col] = le.fit_transform(X_encoded[col].astype(str))


X_train, X_test, y_train, y_test = train_test_split(
    X_encoded, y, test_size=0.2, random_state=42
)

# później podmień na lepszy model!

In [8]:
model_loaded = TabNetRegressor()
model_loaded.load_model('../models/tabnet_model.zip')
preds = model_loaded.predict(X_test.values)




In [None]:
explain_matrix, masks = model_loaded.explain(X_test.values)
feature_importance = np.mean(masks[0], axis=0)
feature_importance_df = pd.DataFrame({
    'feature': X_test.columns,
    'importance': feature_importance
}).sort_values('importance', ascending=False)

display(feature_importance_df[feature_importance_df['importance'] > 0.001])