In [1]:
import pandas as pd
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, MaxPooling1D, Flatten, Dense
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import GridSearchCV

In [2]:
# train_df= pd.read_csv('../input/gdz-elektrik-datathon/train.csv')
# test_df= pd.read_csv('../input/gdz-elektrik-datathon/test.csv')
# holidays_df = pd.read_csv('../input/gdz-elektrik-datathon/holidays.csv')
# weather_df= pd.read_csv('../input/gdz-elektrik-datathon/weather.csv')
train_df= pd.read_csv('./train.csv')
test_df= pd.read_csv('./test.csv')
holidays_df = pd.read_csv('./holidays.csv')
weather_df= pd.read_csv('./weather.csv')

train_df['tarih'] = pd.to_datetime(train_df['tarih'])
train_df['ilce'] = train_df['ilce'].astype('category')
train_df["bildirimsiz_sum"] = train_df["bildirimsiz_sum"].astype(int)
train_df["bildirimli_sum"] = train_df["bildirimli_sum"].astype(int)

test_df['tarih'] = pd.to_datetime(test_df['tarih'])
test_df['ilce'] = test_df['ilce'].astype('category')
test_df["bildirimli_sum"] = test_df["bildirimli_sum"].astype(int)

holidays_df["tarih"] = holidays_df['Yıl'].astype(str) + '-' + holidays_df['Ay'].astype(str) + '-' + holidays_df['Gün'].astype(str)
holidays_df["tarih"] = pd.to_datetime(holidays_df["tarih"])
holidays_df = holidays_df.drop(columns=['Yıl', 'Ay', 'Gün'])


weather_df["tarih"] = pd.to_datetime(weather_df["date"])
weather_df['ilce'] = weather_df['name'].astype('category')
weather_df = weather_df.drop(columns=['date','name'])
#Train
merged_train_df = pd.merge(train_df, holidays_df, on='tarih', how='left').reset_index()
merged_train_df['Bayram_Flag'] = merged_train_df['Tatil Adı'].fillna(0)
merged_train_df['Bayram_Flag'] = merged_train_df['Bayram_Flag'].astype('category')
merged_train_df = merged_train_df.drop(columns=['Tatil Adı'])

merged_train_df['is_Bayram'] = merged_train_df['Bayram_Flag'].apply(lambda x: 0 if x == 0 else 1)
merged_train_df['is_Bayram'] = merged_train_df['Bayram_Flag'].astype(bool)
merged_train_df['ilce']=merged_train_df['ilce'].astype('category')

#Test
merged_test_df = pd.merge(test_df, holidays_df, on='tarih', how='left').reset_index()
merged_test_df['Bayram_Flag'] = merged_test_df['Tatil Adı'].fillna(0)
merged_test_df['Bayram_Flag'] = merged_test_df['Bayram_Flag'].astype('category')
merged_test_df = merged_test_df.drop(columns=['Tatil Adı'])

merged_test_df['is_Bayram'] = merged_test_df['Bayram_Flag'].apply(lambda x: 0 if x == 0 else 1)
merged_test_df['is_Bayram'] = merged_test_df['Bayram_Flag'].astype(bool)
merged_test_df['ilce']=merged_test_df['ilce'].astype('category')
#weather op
daily_df = weather_df.groupby(['ilce', pd.Grouper(freq='D', key='tarih')])

daily_df = daily_df.agg({
    't_2m:C': ['max', 'min'],  # temperature
    'prob_precip_1h:p': ['sum', 'max' ,'mean',lambda x: x.mode()[0]],  # precipitation
    'wind_speed_10m:ms': ['max', 'mean','std',lambda x: x.mode()[0]],  # wind speed
    'wind_dir_10m:d': 'mean',  # wind direction
    'global_rad:W': 'sum',  # sunshine duration
    'effective_cloud_cover:p': ['mean','std'],  # cloud cover
    'relative_humidity_2m:p': ['max', 'min',lambda x: x.mode()[0]]  # humidity
})

daily_df.columns = ['_'.join(col).strip() for col in daily_df.columns.values]
daily_df = daily_df.reset_index()
daily_df = daily_df.rename(columns={col: col.replace('<lambda_0>', 'mode') for col in daily_df.columns})
daily_df['ilce'] = daily_df['ilce'].str.lower()
weather_df=daily_df

#merging all
merged_test_df = pd.merge(weather_df, merged_test_df, on=['tarih', 'ilce'], how='inner')
merged_train_df = pd.merge(weather_df, merged_train_df, on=['tarih', 'ilce'], how='inner')

merged_test_df['ilce'] = merged_test_df['ilce'].astype('category')
merged_train_df['ilce'] = merged_train_df['ilce'].astype('category')

merged_train_df = merged_train_df.drop(columns=['index'])
merged_test_df = merged_test_df.drop(columns=['index'])

Merged Train ve Merged Test csv dosyalarına ulaşmak için aşağıdaki yeri çalıştırın

In [3]:
# merged_train_df.to_excel("./merged_df/merged_train.xlsx", index=False)
# merged_test_df.to_excel("./merged_df/merged_test.xlsx", index=False)

In [14]:
merged_test_df.info()
normalized_df = merged_train_df[merged_train_df['bildirimsiz_sum'] <= 60]

<class 'pandas.core.frame.DataFrame'>
Int64Index: 1363 entries, 0 to 1362
Data columns (total 22 columns):
 #   Column                        Non-Null Count  Dtype         
---  ------                        --------------  -----         
 0   ilce                          1363 non-null   category      
 1   tarih                         1363 non-null   datetime64[ns]
 2   t_2m:C_max                    1363 non-null   float64       
 3   t_2m:C_min                    1363 non-null   float64       
 4   prob_precip_1h:p_sum          1363 non-null   float64       
 5   prob_precip_1h:p_max          1363 non-null   float64       
 6   prob_precip_1h:p_mean         1363 non-null   float64       
 7   prob_precip_1h:p_mode         1363 non-null   float64       
 8   wind_speed_10m:ms_max         1363 non-null   float64       
 9   wind_speed_10m:ms_mean        1363 non-null   float64       
 10  wind_speed_10m:ms_std         1363 non-null   float64       
 11  wind_speed_10m:ms_mode        

CNN

In [5]:
independent_variables=['t_2m:C_max', 't_2m:C_min', 'prob_precip_1h:p_sum','wind_speed_10m:ms_max',
       'wind_speed_10m:ms_mean', 'wind_speed_10m:ms_std',
       'wind_speed_10m:ms_mode','global_rad:W_sum',
       'relative_humidity_2m:p_max', 'relative_humidity_2m:p_min',
       'relative_humidity_2m:p_mode','is_Bayram']
dependent_variables= ['bildirimsiz_sum' ]

In [15]:
scaler_standard = StandardScaler()
#normalizing the values
train_independent_variables_standardized = scaler_standard.fit_transform(normalized_df[independent_variables])
train_dependent_variables = normalized_df[dependent_variables]
test_independent_variables_standardized = scaler_standard.transform(merged_test_df[independent_variables])

In [16]:
#creating the model
model = Sequential()
model.add(Conv1D(filters=32, kernel_size=3, activation='relu', input_shape=(train_independent_variables_standardized.shape[1], 1)))
model.add(MaxPooling1D(pool_size=2))
model.add(Flatten())
model.add(Dense(64, activation='relu'))
model.add(Dense(1, activation='linear'))
model.compile(optimizer='adam',loss='mse', metrics=['mae'])
model.summary()

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [18]:
#train the model
history = model.fit(train_independent_variables_standardized, train_dependent_variables,
                    epochs=15, batch_size=32, validation_split=0.2)

Epoch 1/15
[1m1204/1204[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 4ms/step - loss: 25.5072 - val_loss: 24.6216
Epoch 2/15
[1m1204/1204[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 4ms/step - loss: 26.1724 - val_loss: 24.8346
Epoch 3/15
[1m1204/1204[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 5ms/step - loss: 25.4502 - val_loss: 24.5150
Epoch 4/15
[1m1204/1204[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 4ms/step - loss: 25.7315 - val_loss: 24.2961
Epoch 5/15
[1m1204/1204[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 3ms/step - loss: 25.8457 - val_loss: 24.5925
Epoch 6/15
[1m1204/1204[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 4ms/step - loss: 26.4368 - val_loss: 24.4902
Epoch 7/15
[1m1204/1204[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 4ms/step - loss: 25.5378 - val_loss: 24.4464
Epoch 8/15
[1m1204/1204[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 5ms/step - loss: 25.6660 - val_loss: 24.5964
Epoch 9/

In [19]:
#testing the model 
predictions = model.predict(test_independent_variables_standardized)


[1m43/43[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step


In [20]:
model1 = Sequential()
model1.add(Conv1D(filters=32,kernel_size=3, activation='relu', input_shape=(train_independent_variables_standardized.shape[1], 1)))
model1.add(MaxPooling1D(pool_size=2))
for _ in range(10):
    model1.add(Conv1D(filters=32,kernel_size=3, activation='relu', padding='same'))
model1.add(Flatten())
model1.add(Dense(128, activation='relu'))
model1.add(Dense(1, activation='linear'))
model1.compile(optimizer=Adam(learning_rate=0.1), loss='mae', metrics= ['mae'])
model1.summary()
model1.fit(train_independent_variables_standardized, train_dependent_variables,
                    epochs=25, batch_size=32, validation_split=0.2)


Epoch 1/25
[1m1204/1204[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 14ms/step - loss: 1010405.0625 - val_loss: 26.5478
Epoch 2/25
[1m1204/1204[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 13ms/step - loss: 29.5741 - val_loss: 26.5298
Epoch 3/25
[1m1204/1204[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m29s[0m 20ms/step - loss: 29.2004 - val_loss: 29.4776
Epoch 4/25
[1m1204/1204[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 17ms/step - loss: 29.5983 - val_loss: 26.6427
Epoch 5/25
[1m1204/1204[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 16ms/step - loss: 28.5799 - val_loss: 26.6917
Epoch 6/25
[1m1204/1204[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 16ms/step - loss: 28.4021 - val_loss: 26.8087
Epoch 7/25
[1m1204/1204[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 13ms/step - loss: 29.2617 - val_loss: 26.9933
Epoch 8/25
[1m1204/1204[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 12ms/step - loss: 29.7784 - val_l

<keras.src.callbacks.history.History at 0x256077253d0>

In [21]:
predictions = model1.predict(test_independent_variables_standardized)

[1m43/43[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step


In [23]:
model2 = Sequential()
model2.add(Conv1D(filters=32, kernel_size=9, activation='relu', input_shape=(train_independent_variables_standardized.shape[1], 1)))
model2.add(MaxPooling1D(pool_size=2))

model2.add(Conv1D(filters=64, kernel_size=9, activation='relu', padding='same'))
model2.add(MaxPooling1D(pool_size=2))

model2.add(Conv1D(filters=128, kernel_size=9, activation='relu', padding='same'))
model2.add(MaxPooling1D(pool_size=2))

model2.add(Flatten())
model2.add(Dense(128, activation='relu'))
model2.add(Dense(1, activation='linear'))

model2.compile(optimizer=Adam(learning_rate=0.00001), loss='mse', metrics=['mae'])

model2.summary()

model2.fit(train_independent_variables_standardized, train_dependent_variables, epochs=50, batch_size=32, validation_split=0.2)

Epoch 1/50
[1m1204/1204[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 9ms/step - loss: 55.7527 - mae: 5.2219 - val_loss: 28.3145 - val_mae: 3.5847
Epoch 2/50
[1m1204/1204[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 8ms/step - loss: 29.3841 - mae: 3.7713 - val_loss: 27.8185 - val_mae: 3.5759
Epoch 3/50
[1m1204/1204[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 8ms/step - loss: 28.1845 - mae: 3.7147 - val_loss: 27.4049 - val_mae: 3.5823
Epoch 4/50
[1m1204/1204[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 8ms/step - loss: 28.2836 - mae: 3.7461 - val_loss: 27.1666 - val_mae: 3.5758
Epoch 5/50
[1m1204/1204[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 8ms/step - loss: 28.6863 - mae: 3.7511 - val_loss: 27.0891 - val_mae: 3.5533
Epoch 6/50
[1m1204/1204[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 10ms/step - loss: 28.1232 - mae: 3.7249 - val_loss: 26.8633 - val_mae: 3.5599
Epoch 7/50
[1m1204/1204[0m [32m━━━━━━━━━━━━━━━━━━━━[0m

<keras.src.callbacks.history.History at 0x25606b9bb50>

In [25]:
predictions = model2.predict(test_independent_variables_standardized)
predictions

[1m43/43[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step


array([[5.58697  ],
       [5.683231 ],
       [5.80965  ],
       ...,
       [4.2651916],
       [4.7880855],
       [5.913503 ]], dtype=float32)