In [3]:
import pandas as pd
import numpy as np

In [28]:
train = pd.read_csv("dataset/train.csv")
test = pd.read_csv("dataset/test.csv")
weather = pd.read_csv("dataset/weather.csv")
holiday = pd.read_csv("dataset/holidays.csv")

In [5]:
train.head()

Unnamed: 0,tarih,ilce,bildirimsiz_sum,bildirimli_sum
0,2021-01-01,manisa-akhisar,9,0
1,2021-01-02,manisa-akhisar,10,0
2,2021-01-03,manisa-akhisar,5,2
3,2021-01-04,manisa-akhisar,6,0
4,2021-01-05,manisa-akhisar,15,4


In [6]:
train.shape

(46944, 4)

In [7]:
weather.head()

Unnamed: 0,date,lat,lon,t_2m:C,effective_cloud_cover:p,global_rad:W,relative_humidity_2m:p,wind_dir_10m:d,wind_speed_10m:ms,prob_precip_1h:p,t_apparent:C,ilce
0,2021-01-01 00:00:00,38.7996,26.9707,13.8,40.2,0.0,80.0,168.3,6.0,1.0,13.4,izmir-aliaga
1,2021-01-01 01:00:00,38.7996,26.9707,13.5,56.8,0.0,83.0,173.3,5.9,1.0,13.1,izmir-aliaga
2,2021-01-01 02:00:00,38.7996,26.9707,13.3,71.6,0.0,83.9,176.7,5.2,1.0,13.1,izmir-aliaga
3,2021-01-01 03:00:00,38.7996,26.9707,12.9,69.8,0.0,88.3,183.1,5.6,1.0,12.8,izmir-aliaga
4,2021-01-01 04:00:00,38.7996,26.9707,12.6,56.3,0.0,90.0,166.9,6.4,1.0,12.4,izmir-aliaga


In [8]:
weather["date"] = pd.to_datetime(weather["date"])

In [9]:
time = weather["date"].dt.time

In [10]:
weather["date"] = weather["date"].dt.date

In [29]:
holiday["Yıl"] = holiday["Yıl"].astype(str)
holiday["Ay"] = holiday["Ay"].astype(str)
holiday["Gün"] = holiday["Gün"].astype(str)

In [31]:
holiday["tarih"] = holiday["Yıl"] + "-" + holiday["Ay"] + "-" + holiday["Gün"]

In [34]:
holiday = holiday.drop(["Yıl","Ay","Gün"],axis=1)

In [35]:
holiday

Unnamed: 0,Bayram_Flag,tarih
0,Yılbaşı,2021-1-1
1,Ulusal Egemenlik ve Çocuk Bayramı,2021-4-23
2,Emek ve Dayanışma Günü,2021-5-1
3,"Atatürk'ü Anma, Gençlik ve Spor Bayramı",2021-5-19
4,Demokrasi ve Millî Birlik Günü,2021-7-15
5,Zafer Bayramı,2021-8-30
6,Cumhuriyet Bayramı,2021-10-29
7,Ramazan Bayramı,2021-5-13
8,Ramazan Bayramı,2021-5-14
9,Ramazan Bayramı,2021-5-15


In [11]:
def create_date_interval(df,col):
    max_date = np.datetime64(df[col].unique().max())
    min_date = np.datetime64(df[col].unique().min())
    date_interval = np.arange(min_date,max_date+1,step=1)
    return date_interval

In [12]:
def create_extend_dataset(df,dates):
    df = df.copy()
    ilceler = train["ilce"].unique()
    ilceler.sort()
    extended_train = pd.concat(
    [pd.DataFrame({'tarih': dates, 'ilce': ilce}) for ilce in ilceler],
    ignore_index=True,
)
    df["tarih"] = pd.to_datetime(df["tarih"])
    df_grouped = df.groupby(['tarih', 'ilce']).sum().reset_index()
    extended_train = pd.merge(extended_train, df_grouped, on=['tarih', 'ilce'], how='left')
    extended_train.fillna(0,inplace=True)
    return extended_train

In [13]:
def add_weather_features(df,w_df,dates):
    filtered_weather = w_df[(w_df.date >= dates.min()) & (w_df.date <= dates.max())]
    mean_weather = filtered_weather.groupby(["ilce","date"]).mean()
    median_weather = filtered_weather.groupby(["ilce","date"]).median()
    mean_weather.reset_index(inplace=True)
    median_weather.reset_index(inplace=True)
    mean_weather.drop(["ilce","date"],axis=1,inplace=True)
    median_weather.drop(["ilce","date"],axis=1,inplace=True)
    mean_data = df.copy()
    median_data = df.copy()
    for col in mean_weather.columns:
        mean_data[col] = mean_weather[col]
        median_data[col] = median_weather[col]
    return mean_data,median_data

In [14]:
def add_holiday_feature(df,h_df,dates):
    filtered_weather = h_df[(h_df.dates >= dates.min()) & (h_df <= dates.max())]
    print(filtered_weather)

In [15]:
train_date_interval = create_date_interval(train,"tarih")
train_df = create_extend_dataset(train,train_date_interval)

In [16]:
weather_interval = create_date_interval(weather,"date")

In [17]:
mean_data_train,median_data_train = add_weather_features(train_df,weather,weather_interval)

In [19]:
add_holiday_feature(train,holiday,train_date_interval)

AttributeError: 'function' object has no attribute 'date'

In [15]:
test_interval = create_date_interval(test,"tarih")
test_df = create_extend_dataset(test,test_interval)

In [16]:
mean_data_test,median_data_test = add_weather_features(test_df,weather,test_interval)

In [20]:
mean_data_train.to_csv("mean_data_train.csv")
median_data_train.to_csv("median_data_train.csv")
mean_data_test.to_csv("mean_data_test.csv")
median_data_test.to_csv("median_data_test.csv")