In [67]:

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.exceptions import ConvergenceWarning
from sklearn.tree import DecisionTreeRegressor
from xgboost import XGBRegressor
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import mean_squared_error
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split, cross_val_score,GridSearchCV

warnings.simplefilter(action='ignore', category=FutureWarning)
warnings.simplefilter("ignore", category=ConvergenceWarning)
warnings.filterwarnings("ignore")


pd.set_option('display.max_columns', None)
#pd.set_option('display.max_rows', None)
pd.set_option('display.width', None)
pd.set_option('display.float_format', lambda x: '%.3f' % x)



In [68]:
holidays = pd.read_csv("/content/holidays.csv")
holidays.head()

Unnamed: 0,Yıl,Ay,Gün,Tatil Adı
0,2021,1,1,New Year's Day
1,2021,4,23,National Sovereignty and Children's Day
2,2021,5,1,Labour Day
3,2021,5,19,"Commemoration of Ataturk, Youth and Sports Day"
4,2021,7,15,Democracy and National Unity Day


In [69]:
# "Gün", "Ay" ve "Yıl" sütunlarını birleştirme
holidays['tarih'] = holidays['Yıl'].astype(str) + '-' + holidays['Ay'].astype(str) + '-' + holidays['Gün'].astype(str)


# Sonucu kontrol etme
print(holidays.head())

    Yıl  Ay  Gün                                       Tatil Adı      tarih
0  2021   1    1                                  New Year's Day   2021-1-1
1  2021   4   23         National Sovereignty and Children's Day  2021-4-23
2  2021   5    1                                      Labour Day   2021-5-1
3  2021   5   19  Commemoration of Ataturk, Youth and Sports Day  2021-5-19
4  2021   7   15                Democracy and National Unity Day  2021-7-15


In [70]:
holidays.drop(columns=["Yıl", "Ay", "Gün"], inplace=True)

# Sonucu kontrol etme
print(holidays.head())

                                        Tatil Adı      tarih
0                                  New Year's Day   2021-1-1
1         National Sovereignty and Children's Day  2021-4-23
2                                      Labour Day   2021-5-1
3  Commemoration of Ataturk, Youth and Sports Day  2021-5-19
4                Democracy and National Unity Day  2021-7-15


In [71]:
holidays['tarih'] = pd.to_datetime(holidays['tarih'])


In [72]:
weather= pd.read_csv("/content/weather.csv")
weather.head()

Unnamed: 0,date,lat,lon,t_2m:C,effective_cloud_cover:p,global_rad:W,relative_humidity_2m:p,wind_dir_10m:d,wind_speed_10m:ms,prob_precip_1h:p,t_apparent:C,name
0,2021-01-01 00:00:00,38.618,28.671,7.6,10.5,0.0,86.1,173.3,2.6,1.0,5.9,Manisa-Ahmetli
1,2021-01-01 01:00:00,38.618,28.671,7.4,6.8,0.0,88.0,174.5,2.7,1.0,5.7,Manisa-Ahmetli
2,2021-01-01 02:00:00,38.618,28.671,7.3,4.3,0.0,90.0,177.5,2.8,1.0,5.4,Manisa-Ahmetli
3,2021-01-01 03:00:00,38.618,28.671,7.2,8.9,0.0,90.8,175.3,2.7,1.0,5.3,Manisa-Ahmetli
4,2021-01-01 04:00:00,38.618,28.671,8.0,22.1,0.0,89.7,172.1,2.5,1.0,6.5,Manisa-Ahmetli


In [73]:
weather.shape

(1302840, 12)

In [74]:
weather[['tarih', 'saat']] = weather['date'].str.split(' ', expand=True)

# 'il_ilce' sütununu artık kullanmayacağımız için silelim
weather.drop(columns=['date'], inplace=True)


weather['tarih'] = pd.to_datetime(weather['tarih'])

print(weather.head())

     lat    lon  t_2m:C  effective_cloud_cover:p  global_rad:W  relative_humidity_2m:p  \
0 38.618 28.671   7.600                   10.500         0.000                  86.100   
1 38.618 28.671   7.400                    6.800         0.000                  88.000   
2 38.618 28.671   7.300                    4.300         0.000                  90.000   
3 38.618 28.671   7.200                    8.900         0.000                  90.800   
4 38.618 28.671   8.000                   22.100         0.000                  89.700   

   wind_dir_10m:d  wind_speed_10m:ms  prob_precip_1h:p  t_apparent:C            name      tarih  \
0         173.300              2.600             1.000         5.900  Manisa-Ahmetli 2021-01-01   
1         174.500              2.700             1.000         5.700  Manisa-Ahmetli 2021-01-01   
2         177.500              2.800             1.000         5.400  Manisa-Ahmetli 2021-01-01   
3         175.300              2.700             1.000         

In [75]:
weather.drop(columns=['saat'], inplace=True)


In [77]:
weather = weather.groupby(['tarih',"name"]).mean()
weather.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,lat,lon,t_2m:C,effective_cloud_cover:p,global_rad:W,relative_humidity_2m:p,wind_dir_10m:d,wind_speed_10m:ms,prob_precip_1h:p,t_apparent:C
tarih,name,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
2021-01-01,Izmir-Aliaga,38.8,26.971,13.275,55.046,73.021,85.992,167.204,4.267,1.0,13.929
2021-01-01,Izmir-Balcova,38.389,27.05,13.088,60.45,65.987,88.167,145.367,3.279,1.287,13.858
2021-01-01,Izmir-Bayindir,38.218,27.648,11.333,66.0,63.304,89.246,58.483,1.667,1.0,12.033
2021-01-01,Izmir-Bayrakli,38.461,27.188,12.742,58.254,66.446,89.212,141.758,2.646,1.217,13.592
2021-01-01,Izmir-Bergama,39.121,27.18,11.842,63.004,64.458,87.283,186.417,1.696,1.0,12.583


In [78]:
weather.reset_index(inplace=True)

print(weather.head())

       tarih            name    lat    lon  t_2m:C  effective_cloud_cover:p  global_rad:W  \
0 2021-01-01    Izmir-Aliaga 38.800 26.971  13.275                   55.046        73.021   
1 2021-01-01   Izmir-Balcova 38.389 27.050  13.088                   60.450        65.987   
2 2021-01-01  Izmir-Bayindir 38.218 27.648  11.333                   66.000        63.304   
3 2021-01-01  Izmir-Bayrakli 38.461 27.188  12.742                   58.254        66.446   
4 2021-01-01   Izmir-Bergama 39.121 27.180  11.842                   63.004        64.458   

   relative_humidity_2m:p  wind_dir_10m:d  wind_speed_10m:ms  prob_precip_1h:p  t_apparent:C  
0                  85.992         167.204              4.267             1.000        13.929  
1                  88.167         145.367              3.279             1.287        13.858  
2                  89.246          58.483              1.667             1.000        12.033  
3                  89.212         141.758              2.646 

In [79]:
weather['unique_id'] = weather['tarih'].astype(str) + '-' + weather['name']


In [81]:
weather.drop(columns=["name","tarih"	], inplace=True)


In [83]:
weather['unique_id'] = weather['unique_id'].str.lower()


In [84]:

weather.head()

Unnamed: 0,lat,lon,t_2m:C,effective_cloud_cover:p,global_rad:W,relative_humidity_2m:p,wind_dir_10m:d,wind_speed_10m:ms,prob_precip_1h:p,t_apparent:C,unique_id
0,38.8,26.971,13.275,55.046,73.021,85.992,167.204,4.267,1.0,13.929,2021-01-01-izmir-aliaga
1,38.389,27.05,13.088,60.45,65.987,88.167,145.367,3.279,1.287,13.858,2021-01-01-izmir-balcova
2,38.218,27.648,11.333,66.0,63.304,89.246,58.483,1.667,1.0,12.033,2021-01-01-izmir-bayindir
3,38.461,27.188,12.742,58.254,66.446,89.212,141.758,2.646,1.217,13.592,2021-01-01-izmir-bayrakli
4,39.121,27.18,11.842,63.004,64.458,87.283,186.417,1.696,1.0,12.583,2021-01-01-izmir-bergama


In [85]:
train = pd.read_csv("/content/train.csv")
train.head()

Unnamed: 0,tarih,ilce,bildirimsiz_sum,bildirimli_sum
0,2021-01-01,izmir-aliaga,5,0
1,2021-01-02,izmir-aliaga,13,0
2,2021-01-03,izmir-aliaga,4,0
3,2021-01-04,izmir-aliaga,9,0
4,2021-01-05,izmir-aliaga,2,0


In [86]:

def check_df(dataframe):
    print("##################### Shape #####################")
    print(dataframe.shape)
    print("##################### Types #####################")
    print(dataframe.dtypes)
    print("##################### Head #####################")
    print(dataframe.head(3))
    print("##################### Tail #####################")
    print(dataframe.tail(3))
    print("##################### NA #####################")
    print(dataframe.isnull().sum())
check_df(train)


##################### Shape #####################
(48148, 4)
##################### Types #####################
tarih              object
ilce               object
bildirimsiz_sum     int64
bildirimli_sum      int64
dtype: object
##################### Head #####################
        tarih          ilce  bildirimsiz_sum  bildirimli_sum
0  2021-01-01  izmir-aliaga                5               0
1  2021-01-02  izmir-aliaga               13               0
2  2021-01-03  izmir-aliaga                4               0
##################### Tail #####################
            tarih            ilce  bildirimsiz_sum  bildirimli_sum
48145  2024-01-25  manisa-ahmetli                2               0
48146  2024-01-26  manisa-ahmetli                2               0
48147  2024-01-29  manisa-ahmetli                0               1
##################### NA #####################
tarih              0
ilce               0
bildirimsiz_sum    0
bildirimli_sum     0
dtype: int64


In [23]:
test = pd.read_csv("/content/test.csv")
test.head()

Unnamed: 0,tarih,ilce,bildirimli_sum
0,2024-02-01,izmir-aliaga,0
1,2024-02-01,izmir-bayindir,1
2,2024-02-01,izmir-bayrakli,0
3,2024-02-01,izmir-bergama,1
4,2024-02-01,izmir-bornova,1


In [24]:

def check_df(dataframe):
    print("##################### Shape #####################")
    print(dataframe.shape)
    print("##################### Types #####################")
    print(dataframe.dtypes)
    print("##################### Head #####################")
    print(dataframe.head(3))
    print("##################### Tail #####################")
    print(dataframe.tail(3))
    print("##################### NA #####################")
    print(dataframe.isnull().sum())
check_df(test)

##################### Shape #####################
(1363, 3)
##################### Types #####################
tarih             object
ilce              object
bildirimli_sum     int64
dtype: object
##################### Head #####################
        tarih            ilce  bildirimli_sum
0  2024-02-01    izmir-aliaga               0
1  2024-02-01  izmir-bayindir               1
2  2024-02-01  izmir-bayrakli               0
##################### Tail #####################
           tarih            ilce  bildirimli_sum
1360  2024-02-29    izmir-selcuk               0
1361  2024-02-29     manisa-kula               0
1362  2024-02-29  manisa-ahmetli               0
##################### NA #####################
tarih             0
ilce              0
bildirimli_sum    0
dtype: int64


In [89]:
df = pd.merge(train, test, on=['tarih', 'ilce',"bildirimli_sum"], how='outer')

print(df)

            tarih             ilce  bildirimsiz_sum  bildirimli_sum
0      2021-01-01     izmir-aliaga            5.000               0
1      2021-01-02     izmir-aliaga           13.000               0
2      2021-01-03     izmir-aliaga            4.000               0
3      2021-01-04     izmir-aliaga            9.000               0
4      2021-01-05     izmir-aliaga            2.000               0
...           ...              ...              ...             ...
49506  2024-02-29     izmir-beydag              NaN               0
49507  2024-02-29  izmir-narlidere              NaN               0
49508  2024-02-29     izmir-selcuk              NaN               0
49509  2024-02-29      manisa-kula              NaN               0
49510  2024-02-29   manisa-ahmetli              NaN               0

[49511 rows x 4 columns]


In [90]:
df['unique_id'] = df['tarih'] + '-' + df['ilce']

# 'tarih' ve 'ilce' sütunlarını ve diğer sütunları seçme

print(df.head())

        tarih          ilce  bildirimsiz_sum  bildirimli_sum                unique_id
0  2021-01-01  izmir-aliaga            5.000               0  2021-01-01-izmir-aliaga
1  2021-01-02  izmir-aliaga           13.000               0  2021-01-02-izmir-aliaga
2  2021-01-03  izmir-aliaga            4.000               0  2021-01-03-izmir-aliaga
3  2021-01-04  izmir-aliaga            9.000               0  2021-01-04-izmir-aliaga
4  2021-01-05  izmir-aliaga            2.000               0  2021-01-05-izmir-aliaga


In [91]:
df[['yıl', 'ay',"gün"]] = df['tarih'].str.split('-', expand=True)
df.drop(columns=['gün'], inplace=True)

print(df)

            tarih             ilce  bildirimsiz_sum  bildirimli_sum                   unique_id  \
0      2021-01-01     izmir-aliaga            5.000               0     2021-01-01-izmir-aliaga   
1      2021-01-02     izmir-aliaga           13.000               0     2021-01-02-izmir-aliaga   
2      2021-01-03     izmir-aliaga            4.000               0     2021-01-03-izmir-aliaga   
3      2021-01-04     izmir-aliaga            9.000               0     2021-01-04-izmir-aliaga   
4      2021-01-05     izmir-aliaga            2.000               0     2021-01-05-izmir-aliaga   
...           ...              ...              ...             ...                         ...   
49506  2024-02-29     izmir-beydag              NaN               0     2024-02-29-izmir-beydag   
49507  2024-02-29  izmir-narlidere              NaN               0  2024-02-29-izmir-narlidere   
49508  2024-02-29     izmir-selcuk              NaN               0     2024-02-29-izmir-selcuk   
49509  202

In [92]:
df['ay'] = df['ay'].astype(int)
# 'mevsim' sütununu oluşturma ve varsayılan olarak 0 atama
df['mevsim'] = 0

# Koşullu atamaları yapma
df.loc[df['ay'] <= 3, 'mevsim'] = 'kış'
df.loc[(df['ay'] >= 3) & (df['ay'] < 6), 'mevsim'] = 'ilkbahar'
df.loc[(df['ay'] >= 6) & (df['ay'] < 9), 'mevsim'] = 'yaz'
df.loc[(df['ay'] >= 9) & (df['ay'] < 12), 'mevsim'] = 'sonbahar'
df.loc[df['ay'] == 12, 'mevsim'] = 'kış'

# DataFrame'i yazdırma
print(df.head())

        tarih          ilce  bildirimsiz_sum  bildirimli_sum                unique_id   yıl  ay  \
0  2021-01-01  izmir-aliaga            5.000               0  2021-01-01-izmir-aliaga  2021   1   
1  2021-01-02  izmir-aliaga           13.000               0  2021-01-02-izmir-aliaga  2021   1   
2  2021-01-03  izmir-aliaga            4.000               0  2021-01-03-izmir-aliaga  2021   1   
3  2021-01-04  izmir-aliaga            9.000               0  2021-01-04-izmir-aliaga  2021   1   
4  2021-01-05  izmir-aliaga            2.000               0  2021-01-05-izmir-aliaga  2021   1   

  mevsim  
0    kış  
1    kış  
2    kış  
3    kış  
4    kış  


In [93]:
df['ay'] = df['ay'].astype(object)

In [94]:
df['tarih'] = pd.to_datetime(df['tarih'])

In [95]:
df_merged = pd.merge(df, weather, on="unique_id", how='outer')

df_merged

Unnamed: 0,tarih,ilce,bildirimsiz_sum,bildirimli_sum,unique_id,yıl,ay,mevsim,lat,lon,t_2m:C,effective_cloud_cover:p,global_rad:W,relative_humidity_2m:p,wind_dir_10m:d,wind_speed_10m:ms,prob_precip_1h:p,t_apparent:C
0,2021-01-01,izmir-aliaga,5.000,0.000,2021-01-01-izmir-aliaga,2021,1,kış,38.800,26.971,13.275,55.046,73.021,85.992,167.204,4.267,1.000,13.929
1,2021-01-02,izmir-aliaga,13.000,0.000,2021-01-02-izmir-aliaga,2021,1,kış,38.800,26.971,12.871,36.125,92.858,82.138,106.650,2.892,1.000,13.646
2,2021-01-03,izmir-aliaga,4.000,0.000,2021-01-03-izmir-aliaga,2021,1,kış,38.800,26.971,12.633,64.871,39.267,79.121,122.521,3.438,8.429,12.846
3,2021-01-04,izmir-aliaga,9.000,0.000,2021-01-04-izmir-aliaga,2021,1,kış,38.800,26.971,13.733,47.783,78.946,70.312,123.408,4.608,1.275,13.775
4,2021-01-05,izmir-aliaga,2.000,0.000,2021-01-05-izmir-aliaga,2021,1,kış,38.800,26.971,14.375,30.379,95.717,75.221,195.721,3.992,10.721,14.900
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
54280,NaT,,,,2024-01-31-manisa-ahmetli,,,,38.618,28.671,2.842,2.883,143.171,64.167,70.100,2.471,1.000,2.167
54281,NaT,,,,2024-01-31-manisa-demirci,,,,39.047,28.659,-0.379,1.121,143.575,74.821,23.571,2.962,1.000,-2.442
54282,NaT,,,,2024-01-31-manisa-golmarmara,,,,38.708,27.917,4.808,11.846,134.479,71.671,310.429,2.150,1.000,4.467
54283,NaT,,,,2024-01-31-manisa-kirkagac,,,,39.105,27.673,4.529,60.762,118.633,73.746,13.708,3.983,1.000,2.571


In [96]:
df_merged.isnull().sum()

Unnamed: 0,0
tarih,4774
ilce,4774
bildirimsiz_sum,6137
bildirimli_sum,4774
unique_id,0
yıl,4774
ay,4774
mevsim,4774
lat,0
lon,0


In [97]:
df=df_merged

In [98]:
df.head()

Unnamed: 0,tarih,ilce,bildirimsiz_sum,bildirimli_sum,unique_id,yıl,ay,mevsim,lat,lon,t_2m:C,effective_cloud_cover:p,global_rad:W,relative_humidity_2m:p,wind_dir_10m:d,wind_speed_10m:ms,prob_precip_1h:p,t_apparent:C
0,2021-01-01,izmir-aliaga,5.0,0.0,2021-01-01-izmir-aliaga,2021,1,kış,38.8,26.971,13.275,55.046,73.021,85.992,167.204,4.267,1.0,13.929
1,2021-01-02,izmir-aliaga,13.0,0.0,2021-01-02-izmir-aliaga,2021,1,kış,38.8,26.971,12.871,36.125,92.858,82.138,106.65,2.892,1.0,13.646
2,2021-01-03,izmir-aliaga,4.0,0.0,2021-01-03-izmir-aliaga,2021,1,kış,38.8,26.971,12.633,64.871,39.267,79.121,122.521,3.438,8.429,12.846
3,2021-01-04,izmir-aliaga,9.0,0.0,2021-01-04-izmir-aliaga,2021,1,kış,38.8,26.971,13.733,47.783,78.946,70.312,123.408,4.608,1.275,13.775
4,2021-01-05,izmir-aliaga,2.0,0.0,2021-01-05-izmir-aliaga,2021,1,kış,38.8,26.971,14.375,30.379,95.717,75.221,195.721,3.992,10.721,14.9


In [101]:
df.dropna(subset=["bildirimli_sum"], inplace=True)

In [102]:
df = pd.merge(holidays, df, on="tarih", how='outer')

print(df)

            Tatil Adı      tarih             ilce  bildirimsiz_sum  bildirimli_sum  \
0      New Year's Day 2021-01-01     izmir-aliaga            5.000           0.000   
1      New Year's Day 2021-01-01   izmir-bayindir            3.000           0.000   
2      New Year's Day 2021-01-01   izmir-bayrakli            7.000           0.000   
3      New Year's Day 2021-01-01    izmir-bergama            2.000           0.000   
4      New Year's Day 2021-01-01    izmir-bornova            9.000           0.000   
...               ...        ...              ...              ...             ...   
49519             NaN 2024-02-29     izmir-beydag              NaN           0.000   
49520             NaN 2024-02-29  izmir-narlidere              NaN           0.000   
49521             NaN 2024-02-29     izmir-selcuk              NaN           0.000   
49522             NaN 2024-02-29      manisa-kula              NaN           0.000   
49523             NaN 2024-02-29   manisa-ahmetli     

In [103]:
df['tarih'] = pd.to_datetime(df['tarih'])


In [105]:
df[['il', 'ilce_']] = df['ilce'].str.split('-', expand=True)


In [107]:
df.drop(columns=["tarih",	"ilce"	], inplace=True)


In [111]:
df.dropna(subset=["bildirimli_sum"], inplace=True)

In [112]:
# Rüzgar hızını "m/s" cinsinden "km/s" cinsine dönüştür
df['wind_speed_10m:km/s'] = df['wind_speed_10m:ms'] / 1000

# Hissedilen sıcaklık formülünü kullanarak hissedilen sıcaklık değerlerini hesapla
df['hissedilen_sıcaklık:C'] = 13.12 + 0.6215 * df['t_2m:C'] - 11.37 * (df['wind_speed_10m:km/s'] ** 0.16) + 0.3965 * df['t_2m:C'] * (df['wind_speed_10m:km/s'] ** 0.16)
df['hissedilen_sıcaklık:C'] = df['hissedilen_sıcaklık:C'].round(2)  # İstenirse virgülden sonra iki basamak olacak şekilde yuvarlayabiliriz


In [113]:
df['koordinat'] = df['lat'].astype(str) + "-" + df['lon'].astype(str)
df.drop(columns=['lat',"lon"], inplace=True)


In [114]:
df['koordinat'] = df['koordinat'].astype(object)


In [118]:
column = "Tatil Adı"

# Boş değerleri 0 ile doldurun
df[column].fillna(0, inplace=True)

In [121]:
df['sicaklik_farki'] = df['t_2m:C'] - df['hissedilen_sıcaklık:C']
df['yagisli_gun'] = (df['effective_cloud_cover:p'] > 0.5) | (df['prob_precip_1h:p'] > 0.3)
df['nem_durumu'] = pd.cut(df['relative_humidity_2m:p'], bins=[0, 30, 70, 100], labels=['düşük', 'orta', 'yüksek'], right=False)
df['gunesli_gun'] = df['global_rad:W'] > 200
df['sicaklik_durumu'] = pd.cut(df['t_2m:C'], bins=[-float('inf'), 10, 20, 25, float('inf')], labels=['çok soğuk', 'soğuk', 'ılık', 'çok sıcak'])
df['ruzgar_durumu'] = pd.cut(df['wind_speed_10m:ms'], bins=[-float('inf'), 10, 20, float('inf')], labels=['hafif rüzgar', 'orta şiddetli rüzgar', 'şiddetli rüzgar'])


In [122]:

def grab_col_names(dataframe, cat_th=10, car_th=47):


    # cat_cols, cat_but_car
    cat_cols = [col for col in dataframe.columns if dataframe[col].dtypes == "O"]
    num_but_cat = [col for col in dataframe.columns if dataframe[col].nunique() < cat_th and
                   dataframe[col].dtypes != "O"]
    cat_but_car = [col for col in dataframe.columns if dataframe[col].nunique() > car_th and
                   dataframe[col].dtypes == "O"]
    cat_cols = cat_cols + num_but_cat
    cat_cols = [col for col in cat_cols if col not in cat_but_car]

    # num_cols
    num_cols = [col for col in dataframe.columns if dataframe[col].dtypes != "O"]
    num_cols = [col for col in num_cols if col not in num_but_cat]

    print(f"Observations: {dataframe.shape[0]}")
    print(f"Variables: {dataframe.shape[1]}")
    print(f'cat_cols: {len(cat_cols)}')
    print(f'num_cols: {len(num_cols)}')
    print(f'cat_but_car: {len(cat_but_car)}')
    print(f'num_but_cat: {len(num_but_cat)}')
    return cat_cols, num_cols, cat_but_car

cat_cols, num_cols, cat_but_car = grab_col_names(df)

Observations: 49511
Variables: 26
cat_cols: 12
num_cols: 13
cat_but_car: 1
num_but_cat: 5


In [124]:
def label_encoder(dataframe, binary_col):
    labelencoder = LabelEncoder()
    dataframe[binary_col] = labelencoder.fit_transform(dataframe[binary_col])
    return dataframe

binary_cols = [col for col in df.columns if df[col].dtypes == "O" and len(df[col].unique()) == 2]

for col in binary_cols:
    label_encoder(df, col)

In [126]:

def label_encoder(dataframe, binary_col):
    labelencoder = LabelEncoder()
    dataframe[binary_col] = labelencoder.fit_transform(dataframe[binary_col])
    return dataframe

binary_cols = [col for col in df.columns if df[col].dtypes == "O" and len(df[col].unique()) == 2]

for col in binary_cols:
    label_encoder(df, col)



In [127]:


def one_hot_encoder(dataframe, cat_cols, drop_first=True):
    dataframe = pd.get_dummies(dataframe, columns=cat_cols, drop_first=drop_first)
    return dataframe

df = one_hot_encoder(df, cat_cols, drop_first=True)
df.head()

Unnamed: 0,bildirimsiz_sum,bildirimli_sum,unique_id,t_2m:C,effective_cloud_cover:p,global_rad:W,relative_humidity_2m:p,wind_dir_10m:d,wind_speed_10m:ms,prob_precip_1h:p,t_apparent:C,wind_speed_10m:km/s,hissedilen_sıcaklık:C,sicaklik_farki,"Tatil Adı_Commemoration of Ataturk, Youth and Sports Day",Tatil Adı_Democracy and National Unity Day,Tatil Adı_Labour Day,Tatil Adı_National Sovereignty and Children's Day,Tatil Adı_National Sovereignty and Children's Day; Ramadan Feast Holiday* (*estimated),Tatil Adı_New Year's Day,Tatil Adı_Ramadan Feast Holiday* (*estimated),Tatil Adı_Ramadan Feast* (*estimated),Tatil Adı_Republic Day,Tatil Adı_Sacrifice Feast Holiday* (*estimated),Tatil Adı_Sacrifice Feast* (*estimated),Tatil Adı_Victory Day,yıl_2022,yıl_2023,yıl_2024,ay_2,ay_3,ay_4,ay_5,ay_6,ay_7,ay_8,ay_9,ay_10,ay_11,ay_12,mevsim_kış,mevsim_sonbahar,mevsim_yaz,il_1,ilce__akhisar,ilce__alasehir,ilce__aliaga,ilce__balcova,ilce__bayindir,ilce__bayrakli,ilce__bergama,ilce__beydag,ilce__bornova,ilce__buca,ilce__cesme,ilce__cigli,ilce__demirci,ilce__dikili,ilce__foca,ilce__gaziemir,ilce__golmarmara,ilce__gordes,ilce__guzelbahce,ilce__karabaglar,ilce__karaburun,ilce__karsiyaka,ilce__kemalpasa,ilce__kinik,ilce__kiraz,ilce__kirkagac,ilce__konak,ilce__koprubasi,ilce__kula,ilce__menderes,ilce__menemen,ilce__narlidere,ilce__odemis,ilce__salihli,ilce__sarigol,ilce__saruhanli,ilce__seferihisar,ilce__sehzadeler,ilce__selcuk,ilce__selendi,ilce__soma,ilce__tire,ilce__torbali,ilce__turgutlu,ilce__urla,ilce__yunusemre,koordinat_38.0847-28.2106,koordinat_38.0895-27.731800000000003,koordinat_38.1558-27.3646,koordinat_38.1952-26.8344,koordinat_38.2178-27.6478,koordinat_38.2283-27.9748,koordinat_38.2302-28.2064,koordinat_38.2409-28.6949,koordinat_38.2517-27.1327,koordinat_38.3243-26.3032,koordinat_38.325-26.7668,koordinat_38.3253-27.1219,koordinat_38.3545-28.516499999999997,koordinat_38.3626-26.882500000000004,koordinat_38.3855-27.1747,koordinat_38.3891-27.05,koordinat_38.3967-26.997,koordinat_38.3968-27.1307,koordinat_38.4177-27.1283,koordinat_38.4275-27.4188,koordinat_38.4555-27.1199,koordinat_38.4612-27.188100000000002,koordinat_38.471-27.217699999999997,koordinat_38.4907-28.1401,koordinat_38.494-26.961699999999997,koordinat_38.5002-27.7084,koordinat_38.5466-28.644099999999998,koordinat_38.6104-27.0697,koordinat_38.614-27.429600000000004,koordinat_38.6166-27.4196,koordinat_38.6184-28.6712,koordinat_38.6383-26.5127,koordinat_38.6704-26.757899999999996,koordinat_38.7078-27.917,koordinat_38.7375-27.5729,koordinat_38.745-28.8693,koordinat_38.7503-28.4024,koordinat_38.7996-26.970700000000004,koordinat_38.9177-27.8366,koordinat_38.933-28.288699999999995,koordinat_39.0473-28.6585,koordinat_39.0749-26.8892,koordinat_39.0859-27.3818,koordinat_39.1054-27.6733,koordinat_39.1214-27.1799,koordinat_39.1881-27.61,nem_durumu_orta,nem_durumu_yüksek,gunesli_gun_True,sicaklik_durumu_soğuk,sicaklik_durumu_ılık,sicaklik_durumu_çok sıcak,ruzgar_durumu_orta şiddetli rüzgar,ruzgar_durumu_şiddetli rüzgar
0,5.0,0.0,2021-01-01-izmir-aliaga,13.275,55.046,73.021,85.992,167.204,4.267,1.0,13.929,0.004,18.82,-5.545,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,True,False,True,False,False,False,False
1,3.0,0.0,2021-01-01-izmir-bayindir,11.333,66.0,63.304,89.246,58.483,1.667,1.0,12.033,0.002,17.69,-6.357,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,True,False,False,False,False
2,7.0,0.0,2021-01-01-izmir-bayrakli,12.742,58.254,66.446,89.212,141.758,2.646,1.217,13.592,0.003,18.59,-5.848,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,True,False,False,False,False
3,2.0,0.0,2021-01-01-izmir-bergama,11.842,63.004,64.458,87.283,186.417,1.696,1.0,12.583,0.002,18.07,-6.228,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,True,False,True,False,False,False,False
4,9.0,0.0,2021-01-01-izmir-bornova,11.971,60.442,66.925,90.571,145.812,2.396,1.208,12.804,0.002,18.04,-6.069,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,True,False,False,False,False


In [128]:
cat_cols, num_cols, cat_but_car = grab_col_names(df)

Observations: 49511
Variables: 144
cat_cols: 130
num_cols: 13
cat_but_car: 1
num_but_cat: 130


In [129]:
num_cols = [col for col in num_cols if col not in ["bildirimsiz_sum"]]

In [134]:


from sklearn.preprocessing import StandardScaler, MinMaxScaler
scaler = StandardScaler()
df[num_cols] = scaler.fit_transform(df[num_cols])

In [135]:
train_df = df[df['bildirimsiz_sum'].notnull()]
test_df = df[df['bildirimsiz_sum'].isnull()]

In [136]:
y = train_df['bildirimsiz_sum']
X = train_df.drop(["unique_id","bildirimsiz_sum"], axis=1)

In [137]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)


In [138]:
# XGBoost modelini oluşturma
xgboost_final = XGBRegressor(random_state=42)

# Modeli eğitim veri seti üzerinde eğitin
xgboost_final.fit(X_train, y_train, verbose=False)



# Eğitilmiş modeli test veri seti üzerinde değerlendirin
y_test_pred = xgboost_final.predict(X_test)

In [139]:
 from sklearn.metrics import mean_absolute_error

 mae = mean_absolute_error(y_test, y_test_pred)
 mae

2.7717800016084846

In [140]:
predictions = xgboost_final.predict(test_df.drop(["unique_id","bildirimsiz_sum"], axis=1))


# Tahminleri DataFrame'e dönüştürme
dictionary = {"unique_id": test_df["unique_id"], "bildirimsiz_sum": predictions}
dfSubmission = pd.DataFrame(dictionary)
# Sonuçları CSV dosyasına yazma
dfSubmission.to_csv("submission.csv", index=False)