In [24]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error
import numpy as np
import keras_tuner as kt
from keras.models import Sequential # type: ignore
from keras.layers import Conv1D, MaxPooling1D, Flatten, Dense, Dropout # type: ignore
from tensorflow.keras.optimizers import Adam, SGD, RMSprop # type: ignore
from tensorflow.keras.callbacks import EarlyStopping # type: ignore
from keras_tuner import Hyperband # type: ignore
from sklearn.metrics import mean_absolute_error
import tensorflow as tf

In [6]:
# train_df= pd.read_csv('../input/gdz-elektrik-datathon/train.csv')
# test_df= pd.read_csv('../input/gdz-elektrik-datathon/test.csv')
# holidays_df = pd.read_csv('../input/gdz-elektrik-datathon/holidays.csv')
# weather_df= pd.read_csv('../input/gdz-elektrik-datathon/weather.csv')
train_df= pd.read_csv('./train.csv')
test_df= pd.read_csv('./test.csv')
holidays_df = pd.read_csv('./holidays.csv')
weather_df= pd.read_csv('./weather.csv')

train_df['tarih'] = pd.to_datetime(train_df['tarih'])
train_df['ilce'] = train_df['ilce'].astype('category')
train_df["bildirimsiz_sum"] = train_df["bildirimsiz_sum"].astype(int)
train_df["bildirimli_sum"] = train_df["bildirimli_sum"].astype(int)

test_df['tarih'] = pd.to_datetime(test_df['tarih'])
test_df['ilce'] = test_df['ilce'].astype('category')
test_df["bildirimli_sum"] = test_df["bildirimli_sum"].astype(int)

holidays_df["tarih"] = holidays_df['Yıl'].astype(str) + '-' + holidays_df['Ay'].astype(str) + '-' + holidays_df['Gün'].astype(str)
holidays_df["tarih"] = pd.to_datetime(holidays_df["tarih"])
holidays_df = holidays_df.drop(columns=['Yıl', 'Ay', 'Gün'])


weather_df["tarih"] = pd.to_datetime(weather_df["date"])
weather_df['ilce'] = weather_df['name'].astype('category')
weather_df = weather_df.drop(columns=['date','name'])
#Train
merged_train_df = pd.merge(train_df, holidays_df, on='tarih', how='left').reset_index()
merged_train_df['Bayram_Flag'] = merged_train_df['Tatil Adı'].fillna(0)
merged_train_df['Bayram_Flag'] = merged_train_df['Bayram_Flag'].astype('category')
merged_train_df = merged_train_df.drop(columns=['Tatil Adı'])

merged_train_df['is_Bayram'] = merged_train_df['Bayram_Flag'].apply(lambda x: 0 if x == 0 else 1)
merged_train_df['is_Bayram'] = merged_train_df['Bayram_Flag'].astype(bool)
merged_train_df['ilce']=merged_train_df['ilce'].astype('category')

#Test
merged_test_df = pd.merge(test_df, holidays_df, on='tarih', how='left').reset_index()
merged_test_df['Bayram_Flag'] = merged_test_df['Tatil Adı'].fillna(0)
merged_test_df['Bayram_Flag'] = merged_test_df['Bayram_Flag'].astype('category')
merged_test_df = merged_test_df.drop(columns=['Tatil Adı'])

merged_test_df['is_Bayram'] = merged_test_df['Bayram_Flag'].apply(lambda x: 0 if x == 0 else 1)
merged_test_df['is_Bayram'] = merged_test_df['Bayram_Flag'].astype(bool)
merged_test_df['ilce']=merged_test_df['ilce'].astype('category')
#weather op
daily_df = weather_df.groupby(['ilce', pd.Grouper(freq='D', key='tarih')])

daily_df = daily_df.agg({
    't_2m:C': ['max', 'min'],  # temperature
    'prob_precip_1h:p': ['sum', 'max' ,'mean',lambda x: x.mode()[0]],  # precipitation
    'wind_speed_10m:ms': ['max', 'mean','std',lambda x: x.mode()[0]],  # wind speed
    'wind_dir_10m:d': 'mean',  # wind direction
    'global_rad:W': 'sum',  # sunshine duration
    'effective_cloud_cover:p': ['mean','std'],  # cloud cover
    'relative_humidity_2m:p': ['max', 'min',lambda x: x.mode()[0]]  # humidity
})

daily_df.columns = ['_'.join(col).strip() for col in daily_df.columns.values]
daily_df = daily_df.reset_index()
daily_df = daily_df.rename(columns={col: col.replace('<lambda_0>', 'mode') for col in daily_df.columns})
daily_df['ilce'] = daily_df['ilce'].str.lower()
weather_df=daily_df

#merging all
merged_test_df = pd.merge(weather_df, merged_test_df, on=['tarih', 'ilce'], how='inner')
merged_train_df = pd.merge(weather_df, merged_train_df, on=['tarih', 'ilce'], how='inner')

merged_test_df['ilce'] = merged_test_df['ilce'].astype('category')
merged_train_df['ilce'] = merged_train_df['ilce'].astype('category')



CNN

In [25]:
features = ['t_2m:C_max', 't_2m:C_min', 'prob_precip_1h:p_sum','prob_precip_1h:p_max', 'prob_precip_1h:p_mean',
       'prob_precip_1h:p_mode', 'wind_speed_10m:ms_max','wind_speed_10m:ms_mean', 'wind_speed_10m:ms_std','wind_speed_10m:ms_mode',
       'wind_dir_10m:d_mean', 'global_rad:W_sum','effective_cloud_cover:p_mean', 'effective_cloud_cover:p_std','relative_humidity_2m:p_max',
       'relative_humidity_2m:p_min','relative_humidity_2m:p_mode', 'bildirimsiz_sum',
       'bildirimli_sum','is_Bayram']

grouped = merged_train_df.groupby('ilce')

for district, group in grouped:
    predict_df = group[features]
    X, y = predict_df.drop('bildirimsiz_sum', axis=1), predict_df[['bildirimsiz_sum']]

    X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1)

    X_train = tf.constant(X_train)
    X_test = tf.constant(X_test)
    y_train = tf.constant(y_train)
    y_test = tf.constant(y_test)
    # Reshape data for CNN
    X_train = X_train.values.reshape(-1, X_train.shape[1], 1)
    X_test = X_test.values.reshape(-1, X_test.shape[1], 1)

    def build_model(hp):
        model = Sequential()
        model.add(Conv1D(
                filters=hp.Choice('conv_filters', values=[16, 32, 48, 64, 80, 96, 112, 128]),
                kernel_size=hp.Choice('conv_kernel_size', values=[2, 3, 4, 5]),
                strides=hp.Choice('conv_strides_x', values=[1, 2]),
                padding=hp.Choice('conv_padding', values=['valid', 'same']),
                activation=hp.Choice('activation', values=['relu', 'tanh', 'swish']),
                input_shape=(X_train.shape[1], 1)
        ))
        model.add(MaxPooling1D(
                pool_size=hp.Choice('pool_size_choice', values=[2, 3]),
                strides=hp.Choice('strides_choice', values=[2, 3])
        ))
        model.add(Flatten())
        model.add(Dense(
                units=hp.Choice('dense_units', values=[32, 64, 96, 128, 160, 192, 224, 256]),
                activation=hp.Choice('activation', values=['relu', 'tanh', 'swish'])
        ))
        model.add(Dropout(rate=hp.Choice('dropout_rate', values=[0.1, 0.2, 0.3, 0.4, 0.5])))
        model.add(Dense(1))

        model.compile(
                loss='mean_squared_error',
                optimizer=hp.Choice('optimizer', values=['adam', 'sgd', 'rmsprop', 'adagrad']),
        )

        return model


    tuner = kt.Hyperband(
    build_model,
    max_epochs=100,
    factor=3,
    directory='/tmp/keras-tuner-hyperband',
    objective='val_mean_absolute_error'
    )
    
    tuner.search(X_train, y_train, epochs=100, batch_size=32, validation_data=(X_test, y_test))

    # Get the best model
    best_model = tuner.get_best_models(num_models=1)[0]

    # Evaluate the best model
    preds = best_model.predict(X_test)
    mae = mean_absolute_error(y_test, preds, squared=False)
    print(f"MAE for {district}: {mae}")

#modeldeki hatayı çözemedim chatgpt saolsun o da çözemedi :D 


ValueError: Failed to convert a NumPy array to a Tensor (Unsupported object type float).