In [27]:
import pandas as pd
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, MaxPooling1D, Flatten, Dense
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import GridSearchCV

In [28]:
# train_df= pd.read_csv('../input/gdz-elektrik-datathon/train.csv')
# test_df= pd.read_csv('../input/gdz-elektrik-datathon/test.csv')
# holidays_df = pd.read_csv('../input/gdz-elektrik-datathon/holidays.csv')
# weather_df= pd.read_csv('../input/gdz-elektrik-datathon/weather.csv')
train_df= pd.read_csv('./train.csv')
test_df= pd.read_csv('./test.csv')
holidays_df = pd.read_csv('./holidays.csv')
weather_df= pd.read_csv('./weather.csv')

train_df['tarih'] = pd.to_datetime(train_df['tarih'])
train_df['ilce'] = train_df['ilce'].astype('category')
train_df["bildirimsiz_sum"] = train_df["bildirimsiz_sum"].astype(int)
train_df["bildirimli_sum"] = train_df["bildirimli_sum"].astype(int)

test_df['tarih'] = pd.to_datetime(test_df['tarih'])
test_df['ilce'] = test_df['ilce'].astype('category')
test_df["bildirimli_sum"] = test_df["bildirimli_sum"].astype(int)

holidays_df["tarih"] = holidays_df['Yıl'].astype(str) + '-' + holidays_df['Ay'].astype(str) + '-' + holidays_df['Gün'].astype(str)
holidays_df["tarih"] = pd.to_datetime(holidays_df["tarih"])
holidays_df = holidays_df.drop(columns=['Yıl', 'Ay', 'Gün'])


weather_df["tarih"] = pd.to_datetime(weather_df["date"])
weather_df['ilce'] = weather_df['name'].astype('category')
weather_df = weather_df.drop(columns=['date','name'])
#Train
merged_train_df = pd.merge(train_df, holidays_df, on='tarih', how='left').reset_index()
merged_train_df['Bayram_Flag'] = merged_train_df['Tatil Adı'].fillna(0)
merged_train_df['Bayram_Flag'] = merged_train_df['Bayram_Flag'].astype('category')
merged_train_df = merged_train_df.drop(columns=['Tatil Adı'])

merged_train_df['is_Bayram'] = merged_train_df['Bayram_Flag'].apply(lambda x: 0 if x == 0 else 1)
merged_train_df['is_Bayram'] = merged_train_df['Bayram_Flag'].astype(bool)
merged_train_df['ilce']=merged_train_df['ilce'].astype('category')

#Test
merged_test_df = pd.merge(test_df, holidays_df, on='tarih', how='left').reset_index()
merged_test_df['Bayram_Flag'] = merged_test_df['Tatil Adı'].fillna(0)
merged_test_df['Bayram_Flag'] = merged_test_df['Bayram_Flag'].astype('category')
merged_test_df = merged_test_df.drop(columns=['Tatil Adı'])

merged_test_df['is_Bayram'] = merged_test_df['Bayram_Flag'].apply(lambda x: 0 if x == 0 else 1)
merged_test_df['is_Bayram'] = merged_test_df['Bayram_Flag'].astype(bool)
merged_test_df['ilce']=merged_test_df['ilce'].astype('category')
#weather op
daily_df = weather_df.groupby(['ilce', pd.Grouper(freq='D', key='tarih')])

daily_df = daily_df.agg({
    't_2m:C': ['max', 'min'],  # temperature
    'prob_precip_1h:p': ['sum', 'max' ,'mean',lambda x: x.mode()[0]],  # precipitation
    'wind_speed_10m:ms': ['max', 'mean','std',lambda x: x.mode()[0]],  # wind speed
    'wind_dir_10m:d': 'mean',  # wind direction
    'global_rad:W': 'sum',  # sunshine duration
    'effective_cloud_cover:p': ['mean','std'],  # cloud cover
    'relative_humidity_2m:p': ['max', 'min',lambda x: x.mode()[0]]  # humidity
})

daily_df.columns = ['_'.join(col).strip() for col in daily_df.columns.values]
daily_df = daily_df.reset_index()
daily_df = daily_df.rename(columns={col: col.replace('<lambda_0>', 'mode') for col in daily_df.columns})
daily_df['ilce'] = daily_df['ilce'].str.lower()
weather_df=daily_df

#merging all
merged_test_df = pd.merge(weather_df, merged_test_df, on=['tarih', 'ilce'], how='inner')
merged_train_df = pd.merge(weather_df, merged_train_df, on=['tarih', 'ilce'], how='inner')

merged_test_df['ilce'] = merged_test_df['ilce'].astype('category')
merged_train_df['ilce'] = merged_train_df['ilce'].astype('category')

merged_train_df = merged_train_df.drop(columns=['index'])
merged_test_df = merged_test_df.drop(columns=['index'])

merged_test_df=merged_test_df.sort_values(by=['tarih', 'ilce'])
merged_train_df=merged_train_df.sort_values(by=['tarih', 'ilce'])

Merged Train ve Merged Test csv dosyalarına ulaşmak için aşağıdaki yeri çalıştırın

In [29]:
# merged_train_df.to_excel("./merged_df/merged_train.xlsx", index=False)
# merged_test_df.to_excel("./merged_df/merged_test.xlsx", index=False)

In [30]:
merged_test_df.info()
normalized_df = merged_train_df[merged_train_df['bildirimsiz_sum'] <= 60]

<class 'pandas.core.frame.DataFrame'>
Int64Index: 1363 entries, 0 to 1362
Data columns (total 22 columns):
 #   Column                        Non-Null Count  Dtype         
---  ------                        --------------  -----         
 0   ilce                          1363 non-null   category      
 1   tarih                         1363 non-null   datetime64[ns]
 2   t_2m:C_max                    1363 non-null   float64       
 3   t_2m:C_min                    1363 non-null   float64       
 4   prob_precip_1h:p_sum          1363 non-null   float64       
 5   prob_precip_1h:p_max          1363 non-null   float64       
 6   prob_precip_1h:p_mean         1363 non-null   float64       
 7   prob_precip_1h:p_mode         1363 non-null   float64       
 8   wind_speed_10m:ms_max         1363 non-null   float64       
 9   wind_speed_10m:ms_mean        1363 non-null   float64       
 10  wind_speed_10m:ms_std         1363 non-null   float64       
 11  wind_speed_10m:ms_mode        

CNN

In [31]:
independent_variables=['t_2m:C_max', 't_2m:C_min', 'prob_precip_1h:p_sum','wind_speed_10m:ms_max',
       'wind_speed_10m:ms_mean', 'wind_speed_10m:ms_std',
       'wind_speed_10m:ms_mode','global_rad:W_sum',
       'relative_humidity_2m:p_max', 'relative_humidity_2m:p_min',
       'relative_humidity_2m:p_mode','is_Bayram']
dependent_variables= ['bildirimsiz_sum' ]

scaler_standard = StandardScaler()
#normalizing the values
train_independent_variables_standardized = scaler_standard.fit_transform(normalized_df[independent_variables])
train_dependent_variables = normalized_df[dependent_variables]
test_independent_variables_standardized = scaler_standard.transform(merged_test_df[independent_variables])

In [32]:
#creating the model
model = Sequential()
model.add(Conv1D(filters=32, kernel_size=3, activation='relu', input_shape=(train_independent_variables_standardized.shape[1], 1)))
model.add(MaxPooling1D(pool_size=2))
model.add(Flatten())
model.add(Dense(64, activation='relu'))
model.add(Dense(1, activation='linear'))
model.compile(optimizer='adam',loss='mae', metrics=['mae'])
model.summary()

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [33]:
#train the model
history = model.fit(train_independent_variables_standardized, train_dependent_variables,
                    epochs=15, batch_size=32, validation_split=0.2)

Epoch 1/15
[1m1204/1204[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 6ms/step - loss: 29.1556 - mae: 3.7982 - val_loss: 28.9068 - val_mae: 3.9994
Epoch 2/15
[1m1204/1204[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 4ms/step - loss: 27.0276 - mae: 3.6851 - val_loss: 28.3053 - val_mae: 3.8648
Epoch 3/15
[1m1204/1204[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 4ms/step - loss: 27.3691 - mae: 3.7135 - val_loss: 28.0046 - val_mae: 3.8150
Epoch 4/15
[1m1204/1204[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 4ms/step - loss: 26.3740 - mae: 3.6523 - val_loss: 27.6326 - val_mae: 3.7691
Epoch 5/15
[1m1204/1204[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 5ms/step - loss: 26.9061 - mae: 3.6502 - val_loss: 27.9313 - val_mae: 3.7084
Epoch 6/15
[1m1204/1204[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 4ms/step - loss: 27.2186 - mae: 3.6579 - val_loss: 27.6434 - val_mae: 3.7107
Epoch 7/15
[1m1204/1204[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m

In [34]:
#testing the model 
predictions = model.predict(test_independent_variables_standardized)


[1m43/43[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step


In [35]:
model1 = Sequential()
model1.add(Conv1D(filters=32,kernel_size=3, activation='relu', input_shape=(train_independent_variables_standardized.shape[1], 1)))
model1.add(MaxPooling1D(pool_size=2))
for _ in range(10):
    model1.add(Conv1D(filters=32,kernel_size=3, activation='relu', padding='same'))
model1.add(Flatten())
model1.add(Dense(128, activation='relu'))
model1.add(Dense(1, activation='linear'))
model1.compile(optimizer=Adam(learning_rate=0.1), loss='mae', metrics= ['mae'])
model1.summary()
model1.fit(train_independent_variables_standardized, train_dependent_variables,
                    epochs=25, batch_size=32, validation_split=0.2)


Epoch 1/25
[1m1204/1204[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 13ms/step - loss: 22.6062 - mae: 22.6062 - val_loss: 3.6931 - val_mae: 3.6931
Epoch 2/25
[1m1204/1204[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 11ms/step - loss: 3.6342 - mae: 3.6342 - val_loss: 3.6935 - val_mae: 3.6935
Epoch 3/25
[1m1204/1204[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 11ms/step - loss: 3.5705 - mae: 3.5705 - val_loss: 3.6987 - val_mae: 3.6987
Epoch 4/25
[1m1204/1204[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 10ms/step - loss: 3.6067 - mae: 3.6067 - val_loss: 3.6964 - val_mae: 3.6964
Epoch 5/25
[1m1204/1204[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 11ms/step - loss: 3.6335 - mae: 3.6335 - val_loss: 3.6885 - val_mae: 3.6885
Epoch 6/25
[1m1204/1204[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 11ms/step - loss: 3.5835 - mae: 3.5835 - val_loss: 3.6877 - val_mae: 3.6877
Epoch 7/25
[1m1204/1204[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37

<keras.src.callbacks.history.History at 0x2a821948350>

In [36]:
predictions = model1.predict(test_independent_variables_standardized)

[1m43/43[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 13ms/step


In [41]:
model2 = Sequential()
model2.add(Conv1D(filters=256, kernel_size=9, activation='relu', input_shape=(train_independent_variables_standardized.shape[1], 1)))
model2.add(MaxPooling1D(pool_size=2))

model2.add(Conv1D(filters=256, kernel_size=9, activation='relu', padding='same'))
model2.add(MaxPooling1D(pool_size=2))

model2.add(Flatten())
model2.add(Dense(256, activation='relu'))
model2.add(Dense(1, activation='linear'))


model2.compile(optimizer=Adam(learning_rate=0.00001), loss='mae', metrics=['mae'])


model2.fit(train_independent_variables_standardized, train_dependent_variables, epochs=10000, batch_size=32, validation_split=0.2)

Epoch 1/10000
[1m1204/1204[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m42s[0m 31ms/step - loss: 4.8915 - mae: 4.8915 - val_loss: 3.6401 - val_mae: 3.6401
Epoch 2/10000
[1m1204/1204[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m37s[0m 31ms/step - loss: 3.6211 - mae: 3.6211 - val_loss: 3.6270 - val_mae: 3.6270
Epoch 3/10000
[1m1204/1204[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m39s[0m 32ms/step - loss: 3.5789 - mae: 3.5789 - val_loss: 3.6169 - val_mae: 3.6169
Epoch 4/10000
[1m1204/1204[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 34ms/step - loss: 3.5742 - mae: 3.5742 - val_loss: 3.6076 - val_mae: 3.6076
Epoch 5/10000
[1m 444/1204[0m [32m━━━━━━━[0m[37m━━━━━━━━━━━━━[0m [1m23s[0m 31ms/step - loss: 3.5157 - mae: 3.5157

KeyboardInterrupt: 

In [38]:
predictions = model2.predict(test_independent_variables_standardized)
predictions

[1m43/43[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 12ms/step


array([[0.10946447],
       [6.7712708 ],
       [4.230462  ],
       ...,
       [4.273994  ],
       [5.014435  ],
       [5.4397564 ]], dtype=float32)

In [39]:
sample_submission= pd.read_csv('./sample_submission.csv')
submission = sample_submission.copy()
submission["bildirimsiz_sum"] = predictions
submission.to_csv("cnn_submission.csv", index=False)