In [34]:
import pandas as pd
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, MaxPooling1D, Flatten, Dense
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import GridSearchCV
import numpy as np

In [19]:
# train_df= pd.read_csv('../input/gdz-elektrik-datathon/train.csv')
# test_df= pd.read_csv('../input/gdz-elektrik-datathon/test.csv')
# holidays_df = pd.read_csv('../input/gdz-elektrik-datathon/holidays.csv')
# weather_df= pd.read_csv('../input/gdz-elektrik-datathon/weather.csv')
train_df= pd.read_csv('./train.csv')
test_df= pd.read_csv('./test.csv')
holidays_df = pd.read_csv('./holidays.csv')
weather_df= pd.read_csv('./weather.csv')

train_df['tarih'] = pd.to_datetime(train_df['tarih'])
train_df['ilce'] = train_df['ilce'].astype('category')
train_df["bildirimsiz_sum"] = train_df["bildirimsiz_sum"].astype(int)
train_df["bildirimli_sum"] = train_df["bildirimli_sum"].astype(int)

test_df['tarih'] = pd.to_datetime(test_df['tarih'])
test_df['ilce'] = test_df['ilce'].astype('category')
test_df["bildirimli_sum"] = test_df["bildirimli_sum"].astype(int)

holidays_df["tarih"] = holidays_df['Yıl'].astype(str) + '-' + holidays_df['Ay'].astype(str) + '-' + holidays_df['Gün'].astype(str)
holidays_df["tarih"] = pd.to_datetime(holidays_df["tarih"])
holidays_df = holidays_df.drop(columns=['Yıl', 'Ay', 'Gün'])


weather_df["tarih"] = pd.to_datetime(weather_df["date"])
weather_df['ilce'] = weather_df['name'].astype('category')
weather_df = weather_df.drop(columns=['date','name'])
#Train
merged_train_df = pd.merge(train_df, holidays_df, on='tarih', how='left').reset_index()
merged_train_df['Bayram_Flag'] = merged_train_df['Tatil Adı'].fillna(0)
merged_train_df['Bayram_Flag'] = merged_train_df['Bayram_Flag'].astype('category')
merged_train_df = merged_train_df.drop(columns=['Tatil Adı'])

merged_train_df['is_Bayram'] = merged_train_df['Bayram_Flag'].apply(lambda x: 0 if x == 0 else 1)
merged_train_df['is_Bayram'] = merged_train_df['Bayram_Flag'].astype(bool)
merged_train_df['ilce']=merged_train_df['ilce'].astype('category')

#Test
merged_test_df = pd.merge(test_df, holidays_df, on='tarih', how='left').reset_index()
merged_test_df['Bayram_Flag'] = merged_test_df['Tatil Adı'].fillna(0)
merged_test_df['Bayram_Flag'] = merged_test_df['Bayram_Flag'].astype('category')
merged_test_df = merged_test_df.drop(columns=['Tatil Adı'])

merged_test_df['is_Bayram'] = merged_test_df['Bayram_Flag'].apply(lambda x: 0 if x == 0 else 1)
merged_test_df['is_Bayram'] = merged_test_df['Bayram_Flag'].astype(bool)
merged_test_df['ilce']=merged_test_df['ilce'].astype('category')
#weather op
daily_df = weather_df.groupby(['ilce', pd.Grouper(freq='D', key='tarih')])

daily_df = daily_df.agg({
    't_2m:C': ['max', 'min'],  # temperature
    'prob_precip_1h:p': ['sum', 'max' ,'mean',lambda x: x.mode()[0]],  # precipitation
    'wind_speed_10m:ms': ['max', 'mean','std',lambda x: x.mode()[0]],  # wind speed
    'wind_dir_10m:d': 'mean',  # wind direction
    'global_rad:W': 'sum',  # sunshine duration
    'effective_cloud_cover:p': ['mean','std'],  # cloud cover
    'relative_humidity_2m:p': ['max', 'min',lambda x: x.mode()[0]]  # humidity
})

daily_df.columns = ['_'.join(col).strip() for col in daily_df.columns.values]
daily_df = daily_df.reset_index()
daily_df = daily_df.rename(columns={col: col.replace('<lambda_0>', 'mode') for col in daily_df.columns})
daily_df['ilce'] = daily_df['ilce'].str.lower()
weather_df=daily_df

#merging all
merged_test_df = pd.merge(weather_df, merged_test_df, on=['tarih', 'ilce'], how='inner')
merged_train_df = pd.merge(weather_df, merged_train_df, on=['tarih', 'ilce'], how='inner')

merged_test_df['ilce'] = merged_test_df['ilce'].astype('category')
merged_train_df['ilce'] = merged_train_df['ilce'].astype('category')

merged_train_df = merged_train_df.drop(columns=['index'])
merged_test_df = merged_test_df.drop(columns=['index'])

merged_test_df=merged_test_df.sort_values(by=['tarih', 'ilce'])
merged_train_df=merged_train_df.sort_values(by=['tarih', 'ilce'])

Merged Train ve Merged Test csv dosyalarına ulaşmak için aşağıdaki yeri çalıştırın

In [20]:
# merged_train_df.to_excel("./merged_df/merged_train.xlsx", index=False)
# merged_test_df.to_excel("./merged_df/merged_test.xlsx", index=False)

In [21]:
merged_test_df.info()
normalized_df = merged_train_df[merged_train_df['bildirimsiz_sum'] <= 35]

<class 'pandas.core.frame.DataFrame'>
Int64Index: 1363 entries, 0 to 1362
Data columns (total 22 columns):
 #   Column                        Non-Null Count  Dtype         
---  ------                        --------------  -----         
 0   ilce                          1363 non-null   category      
 1   tarih                         1363 non-null   datetime64[ns]
 2   t_2m:C_max                    1363 non-null   float64       
 3   t_2m:C_min                    1363 non-null   float64       
 4   prob_precip_1h:p_sum          1363 non-null   float64       
 5   prob_precip_1h:p_max          1363 non-null   float64       
 6   prob_precip_1h:p_mean         1363 non-null   float64       
 7   prob_precip_1h:p_mode         1363 non-null   float64       
 8   wind_speed_10m:ms_max         1363 non-null   float64       
 9   wind_speed_10m:ms_mean        1363 non-null   float64       
 10  wind_speed_10m:ms_std         1363 non-null   float64       
 11  wind_speed_10m:ms_mode        

CNN

In [22]:
independent_variables=['t_2m:C_max', 't_2m:C_min', 'prob_precip_1h:p_sum','wind_speed_10m:ms_max',
       'wind_speed_10m:ms_mean', 'wind_speed_10m:ms_std',
       'wind_speed_10m:ms_mode','global_rad:W_sum',
       'relative_humidity_2m:p_max', 'relative_humidity_2m:p_min',
       'relative_humidity_2m:p_mode','is_Bayram']
dependent_variables= ['bildirimsiz_sum' ]

scaler_standard = StandardScaler()
#normalizing the values
train_independent_variables_standardized = scaler_standard.fit_transform(normalized_df[independent_variables])
train_dependent_variables = normalized_df[dependent_variables]
test_independent_variables_standardized = scaler_standard.transform(merged_test_df[independent_variables])

In [23]:
#creating the model
model = Sequential()
model.add(Conv1D(filters=32, kernel_size=3, activation='relu', input_shape=(train_independent_variables_standardized.shape[1], 1)))
model.add(MaxPooling1D(pool_size=2))
model.add(Flatten())
model.add(Dense(64, activation='relu'))
model.add(Dense(1, activation='linear'))
model.compile(optimizer='adam',loss='mae', metrics=['mae'])
model.summary()

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [24]:
#train the model
history = model.fit(train_independent_variables_standardized, train_dependent_variables,
                    epochs=15, batch_size=32, validation_split=0.2)

Epoch 1/15
[1m1201/1201[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 3ms/step - loss: 3.6334 - mae: 3.6334 - val_loss: 3.5103 - val_mae: 3.5103
Epoch 2/15
[1m1201/1201[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 3ms/step - loss: 3.4053 - mae: 3.4053 - val_loss: 3.5310 - val_mae: 3.5310
Epoch 3/15
[1m1201/1201[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 3ms/step - loss: 3.4763 - mae: 3.4763 - val_loss: 3.4950 - val_mae: 3.4950
Epoch 4/15
[1m1201/1201[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 5ms/step - loss: 3.4160 - mae: 3.4160 - val_loss: 3.5160 - val_mae: 3.5160
Epoch 5/15
[1m1201/1201[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 4ms/step - loss: 3.4242 - mae: 3.4242 - val_loss: 3.5007 - val_mae: 3.5007
Epoch 6/15
[1m1201/1201[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 3ms/step - loss: 3.4508 - mae: 3.4508 - val_loss: 3.4894 - val_mae: 3.4894
Epoch 7/15
[1m1201/1201[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[

In [25]:
#testing the model 
predictions = model.predict(test_independent_variables_standardized)


[1m43/43[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step


In [26]:
model1 = Sequential()
model1.add(Conv1D(filters=32,kernel_size=3, activation='relu', input_shape=(train_independent_variables_standardized.shape[1], 1)))
model1.add(MaxPooling1D(pool_size=2))
for _ in range(10):
    model1.add(Conv1D(filters=32,kernel_size=3, activation='relu', padding='same'))
model1.add(Flatten())
model1.add(Dense(128, activation='relu'))
model1.add(Dense(1, activation='linear'))
model1.compile(optimizer=Adam(learning_rate=0.1), loss='mae', metrics= ['mae'])
model1.summary()
model1.fit(train_independent_variables_standardized, train_dependent_variables,
                    epochs=25, batch_size=32, validation_split=0.2)


Epoch 1/25
[1m1201/1201[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m25s[0m 14ms/step - loss: 12.9360 - mae: 12.9360 - val_loss: 3.6079 - val_mae: 3.6079
Epoch 2/25
[1m1201/1201[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 13ms/step - loss: 22.3212 - mae: 22.3212 - val_loss: 3.6076 - val_mae: 3.6076
Epoch 3/25
[1m1201/1201[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 13ms/step - loss: 3.4987 - mae: 3.4987 - val_loss: 3.6066 - val_mae: 3.6066
Epoch 4/25
[1m1201/1201[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 12ms/step - loss: 3.5176 - mae: 3.5176 - val_loss: 3.6160 - val_mae: 3.6160
Epoch 5/25
[1m1201/1201[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 12ms/step - loss: 3.5037 - mae: 3.5037 - val_loss: 3.6117 - val_mae: 3.6117
Epoch 6/25
[1m1201/1201[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 13ms/step - loss: 3.4899 - mae: 3.4899 - val_loss: 3.6053 - val_mae: 3.6053
Epoch 7/25
[1m1201/1201[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[

<keras.src.callbacks.history.History at 0x2bc00cb8690>

In [27]:
predictions1 = model1.predict(test_independent_variables_standardized)

[1m43/43[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 13ms/step


In [28]:
model2 = Sequential()
model2.add(Conv1D(filters=256, kernel_size=9, activation='relu', input_shape=(train_independent_variables_standardized.shape[1], 1)))
model2.add(MaxPooling1D(pool_size=2))

model2.add(Conv1D(filters=256, kernel_size=9, activation='relu', padding='same'))
model2.add(MaxPooling1D(pool_size=2))

model2.add(Flatten())
model2.add(Dense(256, activation='relu'))
model2.add(Dense(1, activation='linear'))


model2.compile(optimizer=Adam(learning_rate=0.0001), loss='mse', metrics=['mae'])


model2.fit(train_independent_variables_standardized, train_dependent_variables, epochs=27, batch_size=32, validation_split=0.3)

Epoch 1/27
[1m1051/1051[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m32s[0m 28ms/step - loss: 30.5335 - mae: 3.9540 - val_loss: 24.0505 - val_mae: 3.5558
Epoch 2/27
[1m1051/1051[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m30s[0m 29ms/step - loss: 23.7100 - mae: 3.5933 - val_loss: 24.3019 - val_mae: 3.7487
Epoch 3/27
[1m1051/1051[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m30s[0m 29ms/step - loss: 23.7520 - mae: 3.6008 - val_loss: 23.8222 - val_mae: 3.5256
Epoch 4/27
[1m1051/1051[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m31s[0m 30ms/step - loss: 23.0102 - mae: 3.5585 - val_loss: 23.7702 - val_mae: 3.6039
Epoch 5/27
[1m1051/1051[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m31s[0m 29ms/step - loss: 23.3824 - mae: 3.5831 - val_loss: 23.8945 - val_mae: 3.4865
Epoch 6/27
[1m1051/1051[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m33s[0m 31ms/step - loss: 22.6071 - mae: 3.5274 - val_loss: 23.7231 - val_mae: 3.6133
Epoch 7/27
[1m1051/1051[0m [32m━━━━━━━━━━━━━━━━━━

<keras.src.callbacks.history.History at 0x2bc1aab22d0>

In [31]:
predictions2 = model2.predict(test_independent_variables_standardized)


[1m43/43[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 15ms/step


array([[5.960788 ],
       [5.0398808],
       [4.3943334],
       ...,
       [3.9716434],
       [4.813523 ],
       [4.6849337]], dtype=float32)

In [41]:
predictions=np.round(predictions).astype(np.int8)


In [42]:
sample_submission= pd.read_csv('./sample_submission.csv')
submission = sample_submission.copy()
submission["bildirimsiz_sum"] = predictions
submission.to_csv("cnn_submission2.csv", index=False)