In [38]:
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import accuracy_score
import pandas as pd
import numpy as np

In [39]:
train_df = pd.read_csv('./gdz-elektrik-datathon-2024/train.csv')
test_df = pd.read_csv('./gdz-elektrik-datathon-2024/test.csv')
holidays_df = pd.read_csv('./gdz-elektrik-datathon-2024/holidays.csv')
weather_df = pd.read_csv('./gdz-elektrik-datathon-2024/weather.csv')

In [40]:
train_df['tarih'] = pd.to_datetime(train_df['tarih'])
train_df['ilce'] = train_df['ilce'].astype('category')
train_df["bildirimsiz_sum"] = train_df["bildirimsiz_sum"].astype(np.int8)
train_df["bildirimli_sum"] = train_df["bildirimli_sum"].astype(np.int8)

test_df['tarih'] = pd.to_datetime(test_df['tarih'])
test_df['ilce'] = test_df['ilce'].astype('category')
test_df["bildirimli_sum"] = test_df["bildirimli_sum"].astype(np.int8)

holidays_df["tarih"] = holidays_df['Yıl'].astype(str) + '-' + holidays_df['Ay'].astype(str) + '-' + holidays_df['Gün'].astype(str)
holidays_df["tarih"] = pd.to_datetime(holidays_df["tarih"])
holidays_df = holidays_df.drop(columns=['Yıl', 'Ay', 'Gün'])


weather_df["tarih"] = pd.to_datetime(weather_df["date"])
weather_df['ilce'] = weather_df['name'].astype('category')
weather_df = weather_df.drop(columns=['date','name'])
#Train
merged_train_df = pd.merge(train_df, holidays_df, on='tarih', how='left').reset_index()
merged_train_df['Bayram_Flag'] = merged_train_df['Tatil Adı'].fillna(0)
merged_train_df['Bayram_Flag'] = merged_train_df['Bayram_Flag'].astype('category')
merged_train_df = merged_train_df.drop(columns=['Tatil Adı'])

merged_train_df['is_Bayram'] = merged_train_df['Bayram_Flag'].apply(lambda x: 0 if x == 0 else 1)
merged_train_df['is_Bayram'] = merged_train_df['Bayram_Flag'].astype(bool)
merged_train_df['ilce']=merged_train_df['ilce'].astype('category')

#Test
merged_test_df = pd.merge(test_df, holidays_df, on='tarih', how='left').reset_index()
merged_test_df['Bayram_Flag'] = merged_test_df['Tatil Adı'].fillna(0)
merged_test_df['Bayram_Flag'] = merged_test_df['Bayram_Flag'].astype('category')
merged_test_df = merged_test_df.drop(columns=['Tatil Adı'])

merged_test_df['is_Bayram'] = merged_test_df['Bayram_Flag'].apply(lambda x: 0 if x == 0 else 1)
merged_test_df['is_Bayram'] = merged_test_df['Bayram_Flag'].astype(bool)
merged_test_df['ilce']=merged_test_df['ilce'].astype('category')
#weather op
daily_df = weather_df.groupby(['ilce', pd.Grouper(freq='D', key='tarih')])

daily_df = daily_df.agg({
    't_2m:C': ['max', 'min'],  # temperature
    'prob_precip_1h:p': ['sum', 'max' ,'mean',lambda x: x.mode()[0]],  # precipitation
    'wind_speed_10m:ms': ['max', 'mean','std',lambda x: x.mode()[0]],  # wind speed
    'wind_dir_10m:d': 'mean',  # wind direction
    'global_rad:W': 'sum',  # sunshine duration
    'effective_cloud_cover:p': ['mean','std'],  # cloud cover
    'relative_humidity_2m:p': ['max', 'min',lambda x: x.mode()[0]]  # humidity
})

daily_df.columns = ['_'.join(col).strip() for col in daily_df.columns.values]
daily_df = daily_df.reset_index()
daily_df = daily_df.rename(columns={col: col.replace('<lambda_0>', 'mode') for col in daily_df.columns})
daily_df['ilce'] = daily_df['ilce'].str.lower()
weather_df=daily_df

#merging all
merged_test_df = pd.merge(weather_df, merged_test_df, on=['tarih', 'ilce'], how='inner')
merged_train_df = pd.merge(weather_df, merged_train_df, on=['tarih', 'ilce'], how='inner')

merged_train_df['ilce']=merged_train_df['ilce'].astype('category')
merged_test_df['ilce']=merged_test_df['ilce'].astype('category')

covariance = merged_train_df.select_dtypes(include=['float64', 'int8','bool']).cov()['bildirimsiz_sum']
correlation = merged_train_df.select_dtypes(include=['float64', 'int8','bool']).corr()["bildirimsiz_sum"]

  merged_test_df['Bayram_Flag'] = merged_test_df['Tatil Adı'].fillna(0)
  daily_df = weather_df.groupby(['ilce', pd.Grouper(freq='D', key='tarih')])


In [51]:
merged_train_df.dtypes


ilce                                  category
tarih                           datetime64[ns]
t_2m:C_max                             float64
t_2m:C_min                             float64
prob_precip_1h:p_sum                   float64
prob_precip_1h:p_max                   float64
prob_precip_1h:p_mean                  float64
prob_precip_1h:p_mode                  float64
wind_speed_10m:ms_max                  float64
wind_speed_10m:ms_mean                 float64
wind_speed_10m:ms_std                  float64
wind_speed_10m:ms_mode                 float64
wind_dir_10m:d_mean                    float64
global_rad:W_sum                       float64
effective_cloud_cover:p_mean           float64
effective_cloud_cover:p_std            float64
relative_humidity_2m:p_max             float64
relative_humidity_2m:p_min             float64
relative_humidity_2m:p_mode            float64
index                                    int64
bildirimsiz_sum                           int8
bildirimli_su

In [53]:
features = ["t_2m:C_max", "t_2m:C_min", "prob_precip_1h:p_sum", "prob_precip_1h:p_max",
            "wind_speed_10m:ms_max", "wind_speed_10m:ms_mean", "wind_speed_10m:ms_std",
            "wind_dir_10m:d_mean", "global_rad:W_sum", "effective_cloud_cover:p_mean",
            "effective_cloud_cover:p_std", "relative_humidity_2m:p_max", "relative_humidity_2m:p_min",
             "is_Bayram","bildirimsiz_sum"]

X_train = merged_train_df[features]
y_train = merged_train_df["bildirimsiz_sum"]

X_test = merged_test_df[features]

# Ulaştığım en uygun parametreler 🠓🠓🠓🠓🠓
model = RandomForestClassifier(n_estimators=160, max_depth=1, min_samples_split=4, min_samples_leaf=4) 
model.fit(X_train, y_train)


y_pred = model.predict(X_test)

accuracy = accuracy_score(X_train, X_test)
print("Test verileri üzerinde doğruluk skoru:", accuracy)

KeyError: "['bildirimsiz_sum'] not in index"