# Questions to the data
1. Does the number of hauses in all houses groups remain stable or there were a construction side finished in the given time frame?
2. What was the weather in the region?
3. What was the price for gas/oil/elictricity?

In [61]:
import pandas as pd
import numpy as np
from datetime import datetime
from typing import List, Tuple, Any

import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
import warnings
from data.starting_kit.utils import create_submission
from sklego.preprocessing import RepeatingBasisFunction
warnings.filterwarnings("ignore")

In [62]:
data = pd.read_csv('../data/public_data/train.csv')
final_index = data['pseudo_id']

In [63]:
# drop index for feature preparation
data_ = data.drop(columns='pseudo_id')
# convert dates to pandas datetime
data_.columns = [datetime.strptime(c, "%Y-%m-%d %H:%M:%S") for c in data_.columns]
data_.head()

Unnamed: 0,2017-01-01 00:00:00,2017-01-01 00:30:00,2017-01-01 01:00:00,2017-01-01 01:30:00,2017-01-01 02:00:00,2017-01-01 02:30:00,2017-01-01 03:00:00,2017-01-01 03:30:00,2017-01-01 04:00:00,2017-01-01 04:30:00,...,2019-08-28 19:00:00,2019-08-28 19:30:00,2019-08-28 20:00:00,2019-08-28 20:30:00,2019-08-28 21:00:00,2019-08-28 21:30:00,2019-08-28 22:00:00,2019-08-28 22:30:00,2019-08-28 23:00:00,2019-08-28 23:30:00
0,45.023,39.985,36.5695,34.748,35.972,38.439,36.591,36.3155,32.6605,0.142,...,24.288,23.994,26.1995,25.027,23.0665,26.093,23.4295,25.4715,26.246,22.602
1,2.931,1.641,2.26,2.273,2.651,3.137,2.532,3.142,2.528,0.0,...,2.57,1.446,1.523,1.563,2.588,2.19,1.486,2.527,2.288,1.794
2,11.014,12.6525,10.824,13.7485,12.383,12.342,13.413,11.484,11.5105,0.0455,...,6.3565,5.766,5.4955,5.0885,6.814,7.492,5.7705,6.824,6.072,6.7205
3,55.813,49.04,49.095,41.133,45.66,48.477,50.539,45.737,42.68,0.0,...,32.646,30.439,30.247,31.266,34.339,33.076,33.108,33.726,30.009,34.84
4,26.925,28.118,25.6,28.091,26.53,23.858,26.556,27.714,23.174,0.0,...,13.398,13.28,13.734,13.606,14.7,16.29,15.124,15.365,14.36,13.935


# GROUP BY DAY

In [64]:
# Aggregate energy use values per day
data_ = data_.T.groupby(data_.T.index.date).sum()
data_.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,51,52,53,54,55,56,57,58,59,60
2017-01-01,1673.8655,143.162,586.9155,2094.588,1260.608,432.268,211.028,1191.652,1456.7335,503.095,...,23.534,313.948,196.943,36.989,153.074,19.533,80.263,175.512,40.8825,112.112
2017-01-02,1463.323,130.557,532.933,1845.933,1024.438,361.388,158.281,1022.2485,1319.965,356.898,...,32.657,190.435,153.46,42.53,104.633,20.703,69.607,140.667,31.2955,92.511
2017-01-03,1132.291,99.234,407.063,1475.75,725.995,279.199,125.445,765.0,991.8735,257.116,...,15.138,110.994,88.819,33.476,79.647,14.527,38.887,108.7145,29.8745,64.486
2017-01-04,1183.8085,91.932,325.472,1553.638,702.53,306.612,119.487,741.478,1011.5705,278.864,...,19.211,55.876,78.585,30.363,75.137,16.648,53.533,117.044,33.002,54.094
2017-01-05,1125.287,76.781,328.4645,1572.033,672.427,277.665,116.686,677.7,1038.2905,284.385,...,10.2,57.125,79.498,30.64,76.458,17.598,48.185,119.899,37.034,48.022


In [65]:
# features["hour"] = data.index.hour
data_.columns = [c+1 for c in range(len(data_.columns))]
data_.head()

Unnamed: 0,1,2,3,4,5,6,7,8,9,10,...,52,53,54,55,56,57,58,59,60,61
2017-01-01,1673.8655,143.162,586.9155,2094.588,1260.608,432.268,211.028,1191.652,1456.7335,503.095,...,23.534,313.948,196.943,36.989,153.074,19.533,80.263,175.512,40.8825,112.112
2017-01-02,1463.323,130.557,532.933,1845.933,1024.438,361.388,158.281,1022.2485,1319.965,356.898,...,32.657,190.435,153.46,42.53,104.633,20.703,69.607,140.667,31.2955,92.511
2017-01-03,1132.291,99.234,407.063,1475.75,725.995,279.199,125.445,765.0,991.8735,257.116,...,15.138,110.994,88.819,33.476,79.647,14.527,38.887,108.7145,29.8745,64.486
2017-01-04,1183.8085,91.932,325.472,1553.638,702.53,306.612,119.487,741.478,1011.5705,278.864,...,19.211,55.876,78.585,30.363,75.137,16.648,53.533,117.044,33.002,54.094
2017-01-05,1125.287,76.781,328.4645,1572.033,672.427,277.665,116.686,677.7,1038.2905,284.385,...,10.2,57.125,79.498,30.64,76.458,17.598,48.185,119.899,37.034,48.022


In [66]:
data_.tail()

Unnamed: 0,1,2,3,4,5,6,7,8,9,10,...,52,53,54,55,56,57,58,59,60,61
2019-08-24,1868.8055,164.618,591.834,2320.42,1344.475,441.298,197.4,1254.031,1609.6795,376.426,...,8.485,304.653,163.251,35.316,142.662,34.878,99.062,175.1965,39.1895,119.239
2019-08-25,1349.2125,112.434,368.2365,1808.059,806.568,309.58,127.497,845.2855,1198.7305,268.204,...,6.636,132.507,105.201,27.233,94.105,26.529,63.83,116.643,38.679,74.454
2019-08-26,1255.452,95.373,250.1105,1712.203,707.746,286.692,140.51,710.827,1069.1295,272.98,...,9.405,69.509,87.798,31.184,92.002,17.889,62.093,124.4755,32.689,62.347
2019-08-27,1279.7365,103.733,210.3665,1652.541,704.239,313.001,125.637,728.527,1089.493,287.823,...,3.807,62.82,82.163,34.891,100.209,16.43,62.977,111.695,25.0275,59.164
2019-08-28,1021.8995,78.113,214.379,1326.983,573.143,235.565,95.4,616.3795,887.841,228.176,...,5.509,77.663,74.817,25.212,73.528,13.629,48.472,74.801,24.938,54.552


In [67]:
# check wether datetime in ascending order , it is important for time series
print(data_.index.is_monotonic)

True


In [68]:
# Set dates for development phase
new_date_range = pd.date_range(start="2019-04-01", end="2019-09-04", freq="D")
# Add test dates in the data frame
data_ = data_.reindex(new_date_range)
# using dummy values in test set , fill_value = 100
# df_ = df_.T

#data_.head()

In [69]:
data_

Unnamed: 0,1,2,3,4,5,6,7,8,9,10,...,52,53,54,55,56,57,58,59,60,61
2019-04-01,2853.2180,186.783,543.0710,3395.435,1820.848,821.635,297.746,1779.3360,2713.5490,716.119,...,32.935,293.662,245.143,80.878,210.774,57.016,120.485,244.6710,76.2535,181.910
2019-04-02,3062.2365,237.937,736.2320,3606.995,1894.726,902.473,335.484,1922.6595,2793.0085,770.350,...,32.042,372.209,244.726,75.152,278.001,58.522,132.933,289.3000,96.1405,215.398
2019-04-03,3935.9800,298.059,941.8085,4488.140,2672.574,1108.149,391.764,2380.7210,3361.0830,929.588,...,47.996,567.363,313.667,90.844,351.721,85.646,165.802,358.2290,100.3645,277.137
2019-04-04,3427.4685,241.841,728.3470,4053.331,2088.122,918.774,385.364,2040.0370,2964.5215,784.625,...,43.350,416.331,290.620,74.842,319.316,73.028,143.664,312.6355,102.6860,203.728
2019-04-05,2712.7985,188.766,380.9225,3165.591,1569.395,733.520,281.320,1566.7325,2367.9860,654.183,...,28.232,185.906,206.245,67.961,184.108,52.768,110.620,237.6950,87.5670,134.729
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2019-08-31,,,,,,,,,,,...,,,,,,,,,,
2019-09-01,,,,,,,,,,,...,,,,,,,,,,
2019-09-02,,,,,,,,,,,...,,,,,,,,,,
2019-09-03,,,,,,,,,,,...,,,,,,,,,,


In [70]:
# saving indexes for next steps
idx_test_date = data_.index[data_[1].isna()]
idx_test_date

DatetimeIndex(['2019-04-29', '2019-04-30', '2019-05-01', '2019-05-02',
               '2019-05-03', '2019-05-04', '2019-05-05', '2019-06-13',
               '2019-06-14', '2019-06-15', '2019-06-16', '2019-06-17',
               '2019-06-18', '2019-06-19', '2019-07-28', '2019-07-29',
               '2019-07-30', '2019-07-31', '2019-08-01', '2019-08-02',
               '2019-08-03', '2019-08-29', '2019-08-30', '2019-08-31',
               '2019-09-01', '2019-09-02', '2019-09-03', '2019-09-04'],
              dtype='datetime64[ns]', freq=None)

In [71]:
data_ = data_.fillna(method="ffill")

In [72]:
#plt.rcParams.update({'figure.figsize':(9,3), 'figure.dpi':160})
#data_[1].plot()
#plt.title('Energy use forecasts for houshold group' + " 1")
#plt.show()

In [73]:
data_\
    .isna()\
    .sum()\
    .to_frame()\
    .assign(perc = lambda row: 100 * row[0] / data_.shape[0])\
    .rename(columns={0: 'Number of missed data  points', 'perc': '% of missed data points'})

Unnamed: 0,Number of missed data points,% of missed data points
1,0,0.0
2,0,0.0
3,0,0.0
4,0,0.0
5,0,0.0
...,...,...
57,0,0.0
58,0,0.0
59,0,0.0
60,0,0.0


# Adding features

In [74]:
df = data_.copy(deep=True)
df["weekday"] = data_.index.weekday
df["dayofyear"] = data_.index.dayofyear
df["month"] = data_.index.month
df["season"] = (data_.index.month % 12 + 3) // 3
df["is_weekend"] = data_.index.weekday.isin([5, 6]).astype(np.int32)

In [75]:
df_month = pd.get_dummies(df["month"], drop_first=True, prefix="month")
df_season = pd.get_dummies(df["season"], drop_first=True, prefix="season")
df_weekday = pd.get_dummies(df["weekday"], drop_first=True, prefix="weekday")
df = pd.concat([df, df_month, df_season, df_weekday], axis=1)
df.drop(columns=['month','season','weekday'], inplace=True)

In [76]:
# rbf = RepeatingBasisFunction(n_periods=12, column="dayofyear", input_range=(1,365), remainder="drop")

In [77]:
# rbf.fit(df)
# X_3 = pd.DataFrame(index=df.index,  data=rbf.transform(df))

In [78]:
# X_3.columns = [str(c)+'_day_month' for c in range(len(X_3.columns))]

In [79]:
#df = pd.concat([df, X_3], axis=1)

In [80]:
#df

Unnamed: 0,1,2,3,4,5,6,7,8,9,10,...,2_day_month,3_day_month,4_day_month,5_day_month,6_day_month,7_day_month,8_day_month,9_day_month,10_day_month,11_day_month
2019-04-01,2853.2180,186.783,543.0710,3395.435,1820.848,821.635,297.746,1779.3360,2713.5490,716.119,...,3.925259e-01,9.989138e-01,3.440319e-01,0.016035,0.000101,8.635293e-08,9.976816e-12,3.441402e-16,1.929034e-11,1.463375e-07
2019-04-02,3062.2365,237.937,736.2320,3606.995,1894.726,902.473,335.484,1922.6595,2793.0085,770.350,...,3.678794e-01,1.000000e+00,3.678794e-01,0.018316,0.000123,1.125352e-07,1.388794e-11,2.319523e-16,1.388794e-11,1.125352e-07
2019-04-03,3935.9800,298.059,941.8085,4488.140,2672.574,1108.149,391.764,2380.7210,3361.0830,929.588,...,3.440319e-01,9.989138e-01,3.925259e-01,0.020875,0.000150,1.463375e-07,1.929034e-11,3.441402e-16,9.976816e-12,8.635293e-08
2019-04-04,3427.4685,241.841,728.3470,4053.331,2088.122,918.774,385.364,2040.0370,2964.5215,784.625,...,3.210317e-01,9.956621e-01,4.179142e-01,0.023740,0.000183,1.898798e-07,2.673609e-11,5.094813e-16,7.151579e-12,6.611833e-08
2019-04-05,2712.7985,188.766,380.9225,3165.591,1569.395,733.520,281.320,1566.7325,2367.9860,654.183,...,2.989187e-01,9.902663e-01,4.439785e-01,0.026939,0.000221,2.458430e-07,3.697532e-11,7.526222e-16,5.115262e-12,5.051527e-08
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2019-08-31,1021.8995,78.113,214.3790,1326.983,573.143,235.565,95.400,616.3795,887.8410,228.176,...,3.018061e-16,1.729329e-11,1.341028e-07,0.000141,0.019989,3.842250e-01,9.995171e-01,3.518891e-01,1.676612e-02,1.081111e-04
2019-09-01,1021.8995,78.113,214.3790,1326.983,573.143,235.565,95.400,616.3795,887.8410,228.176,...,2.646158e-16,1.244116e-11,1.030519e-07,0.000116,0.017526,3.598389e-01,9.998792e-01,3.760088e-01,1.913637e-02,1.318050e-04
2019-09-02,1021.8995,78.113,214.3790,1326.983,573.143,235.565,95.400,616.3795,887.8410,228.176,...,3.923177e-16,8.931002e-12,7.901876e-08,0.000095,0.015333,3.362689e-01,9.980697e-01,4.009093e-01,2.179428e-02,1.603429e-04
2019-09-03,1021.8995,78.113,214.3790,1326.983,573.143,235.565,95.400,616.3795,887.8410,228.176,...,5.803849e-16,6.397282e-12,6.045892e-08,0.000077,0.013385,3.135604e-01,9.941003e-01,4.265307e-01,2.476745e-02,1.946361e-04


In [81]:
def create_features(houshold_id,df=df) -> pd.DataFrame:
    df_new = df[[houshold_id, 'is_weekend',
                 #'month_2', 'month_3', 'month_4',
                 'month_5', 'month_6', 'month_7','month_8','month_9',
                 #'month_10','month_11','month_12',
                 #'season_2',
                 'season_3','season_4','weekday_1', 'weekday_2', 'weekday_3','weekday_4','weekday_5','weekday_6',
                 #'0_day_month','1_day_month','2_day_month','3_day_month','4_day_month','5_day_month','6_day_month','7_day_month','8_day_month','9_day_month','10_day_month','11_day_month'
                 ]]
    df_new['std'] = df_new[houshold_id].rolling(7).std().fillna(method="backfill")
    df_new['mean'] = df_new[houshold_id].rolling(7).mean().fillna(method="backfill")
    df_new['lag_1'] = df[houshold_id].shift(1).fillna(method="backfill")
    df_new['lag_2'] = df[houshold_id].shift(2).fillna(method="backfill")
    df_new['lag_3'] = df_new[houshold_id].shift(3).fillna(method="backfill")
    df_new['lag_4'] = df_new[houshold_id].shift(4).fillna(method="backfill")
    df_new['lag_5'] = df_new[houshold_id].shift(5).fillna(method="backfill")
    df_new['lag_6'] = df_new[houshold_id].shift(6).fillna(method="backfill")
    df_new['lag_7'] = df_new[houshold_id].shift(7).fillna(method="backfill")
    return df_new

In [82]:
create_features(1)

Unnamed: 0,1,is_weekend,month_5,month_6,month_7,month_8,month_9,season_3,season_4,weekday_1,...,weekday_6,std,mean,lag_1,lag_2,lag_3,lag_4,lag_5,lag_6,lag_7
2019-04-01,2853.2180,0,0,0,0,0,0,0,0,0,...,0,627.554028,2919.720214,2853.2180,2853.2180,2853.2180,2853.2180,2853.2180,2853.2180,2853.2180
2019-04-02,3062.2365,0,0,0,0,0,0,0,0,1,...,0,627.554028,2919.720214,2853.2180,2853.2180,2853.2180,2853.2180,2853.2180,2853.2180,2853.2180
2019-04-03,3935.9800,0,0,0,0,0,0,0,0,0,...,0,627.554028,2919.720214,3062.2365,2853.2180,2853.2180,2853.2180,2853.2180,2853.2180,2853.2180
2019-04-04,3427.4685,0,0,0,0,0,0,0,0,0,...,0,627.554028,2919.720214,3935.9800,3062.2365,2853.2180,2853.2180,2853.2180,2853.2180,2853.2180
2019-04-05,2712.7985,0,0,0,0,0,0,0,0,0,...,0,627.554028,2919.720214,3427.4685,3935.9800,3062.2365,2853.2180,2853.2180,2853.2180,2853.2180
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2019-08-31,1021.8995,1,0,0,0,1,0,1,0,0,...,0,148.552511,1138.857000,1021.8995,1021.8995,1021.8995,1279.7365,1255.4520,1349.2125,1868.8055
2019-09-01,1021.8995,1,0,0,0,0,1,0,1,0,...,1,120.091551,1092.098000,1021.8995,1021.8995,1021.8995,1021.8995,1279.7365,1255.4520,1349.2125
2019-09-02,1021.8995,0,0,0,0,0,1,0,1,0,...,0,97.453226,1058.733357,1021.8995,1021.8995,1021.8995,1021.8995,1021.8995,1279.7365,1255.4520
2019-09-03,1021.8995,0,0,0,0,0,1,0,1,1,...,0,0.000034,1021.899500,1021.8995,1021.8995,1021.8995,1021.8995,1021.8995,1021.8995,1279.7365


In [83]:
def get_weeks(idx)-> List[List[pd._libs.tslibs.timestamps.Timestamp]]:
    idx = list(idx)
    weeks_to_predict = []
    week = []
    for i in range(1,len(idx)+1,1):
        #print(idx[i-1])
        if i%7 == 0:
            week.append(idx[i-1])
            weeks_to_predict.append(week)
            #print(week)
            week = []
        elif i%7 > 0:
            week.append(idx[i-1])
    return weeks_to_predict

In [84]:
weeks_test_date = get_weeks(idx_test_date)

In [85]:
weeks_test_date[0][6]

Timestamp('2019-05-05 00:00:00')

In [86]:
def data_split_accumlated(result, weeks, n, houshold) -> Tuple[Any, Any]:
    print(weeks[n])
    result_splitted = result[result.index < weeks[n][0]]
    result_splitted_features = result_splitted.drop([houshold], axis=1)
    result_splitted_target = result_splitted[houshold]
    print("target: ", result_splitted_target)
    result_splitted_to_predict = result[(result.index >= weeks[n][0]) & (result.index <= weeks[n][6])]
    result_splitted_to_predict_features = result_splitted_to_predict.drop([houshold], axis=1)
    result_splitted_to_predict_target = result_splitted_to_predict[houshold]
    result_splitted_to_predict_target = result_splitted_to_predict_target.to_frame()

    return result_splitted_features, result_splitted_target, result_splitted_to_predict_features, result_splitted_to_predict_target

In [87]:
all_predictions = []
 
for houshold in data_.columns:
    houshold_predictions = []
    print("***** Houshold " + str(houshold) + " dataset created ****** ")
    for week in range(len(weeks_test_date)):
        #print("week "+  str(week) + " splitting started")
        result = create_features(houshold)
        features, target, features_predict, target_predict = data_split_accumlated(result, weeks_test_date, week, houshold)
        model_linear = LinearRegression()
        model_linear.fit(features, target)
        print("trained on " + str(week))
        prediction = model_linear.predict(features_predict)
        houshold_predictions.append(prediction)
        week_timestamps = weeks_test_date[week]
        target_predict['predict'] = prediction
        df.loc[week_timestamps[0]:week_timestamps[6],houshold] = target_predict['predict']
        print("=============================================")
        print(prediction)
        #print(df.loc[week_timestamps[0]:week_timestamps[6],houshold])
        print("=============================================")
    all_predictions.append(houshold_predictions)

***** Houshold 1 dataset created ****** 
[Timestamp('2019-04-29 00:00:00'), Timestamp('2019-04-30 00:00:00'), Timestamp('2019-05-01 00:00:00'), Timestamp('2019-05-02 00:00:00'), Timestamp('2019-05-03 00:00:00'), Timestamp('2019-05-04 00:00:00'), Timestamp('2019-05-05 00:00:00')]
target:  2019-04-01    2853.2180
2019-04-02    3062.2365
2019-04-03    3935.9800
2019-04-04    3427.4685
2019-04-05    2712.7985
2019-04-06    2354.3905
2019-04-07    2091.9495
2019-04-08    2758.3585
2019-04-09    3514.5575
2019-04-10    3665.8395
2019-04-11    3367.1005
2019-04-12    2958.3540
2019-04-13    2422.4110
2019-04-14    2350.1320
2019-04-15    2986.8305
2019-04-16    3797.2145
2019-04-17    2981.9895
2019-04-18    2350.5830
2019-04-19    2411.6165
2019-04-20    2394.4400
2019-04-21    2194.4385
2019-04-22    3099.6380
2019-04-23    3441.0740
2019-04-24    3602.2455
2019-04-25    2756.7650
2019-04-26    2432.2695
2019-04-27    2160.2425
2019-04-28    2321.5155
Freq: D, Name: 1, dtype: float64
traine

In [88]:
weeks_columns = [week for weeks in weeks_test_date for week in weeks]
weeks_columns

[Timestamp('2019-04-29 00:00:00'),
 Timestamp('2019-04-30 00:00:00'),
 Timestamp('2019-05-01 00:00:00'),
 Timestamp('2019-05-02 00:00:00'),
 Timestamp('2019-05-03 00:00:00'),
 Timestamp('2019-05-04 00:00:00'),
 Timestamp('2019-05-05 00:00:00'),
 Timestamp('2019-06-13 00:00:00'),
 Timestamp('2019-06-14 00:00:00'),
 Timestamp('2019-06-15 00:00:00'),
 Timestamp('2019-06-16 00:00:00'),
 Timestamp('2019-06-17 00:00:00'),
 Timestamp('2019-06-18 00:00:00'),
 Timestamp('2019-06-19 00:00:00'),
 Timestamp('2019-07-28 00:00:00'),
 Timestamp('2019-07-29 00:00:00'),
 Timestamp('2019-07-30 00:00:00'),
 Timestamp('2019-07-31 00:00:00'),
 Timestamp('2019-08-01 00:00:00'),
 Timestamp('2019-08-02 00:00:00'),
 Timestamp('2019-08-03 00:00:00'),
 Timestamp('2019-08-29 00:00:00'),
 Timestamp('2019-08-30 00:00:00'),
 Timestamp('2019-08-31 00:00:00'),
 Timestamp('2019-09-01 00:00:00'),
 Timestamp('2019-09-02 00:00:00'),
 Timestamp('2019-09-03 00:00:00'),
 Timestamp('2019-09-04 00:00:00')]

In [89]:
def flatten(hous):
    return [week for weeks in hous for week in weeks]

In [90]:
flatened_predictions = []
for hous in all_predictions:
    flatened_predictions.append(flatten(hous))

In [91]:
df_from_list = pd.DataFrame([i for i in flatened_predictions], columns= [weeks_columns],index = final_index)

In [92]:
df_from_list.reset_index(inplace = True)

In [93]:
df_from_list.to_csv("./sample_submission_daily_max_new_features.csv", index = False)

In [94]:
df_from_list

Unnamed: 0,pseudo_id,2019-04-29 00:00:00,2019-04-30 00:00:00,2019-05-01 00:00:00,2019-05-02 00:00:00,2019-05-03 00:00:00,2019-05-04 00:00:00,2019-05-05 00:00:00,2019-06-13 00:00:00,2019-06-14 00:00:00,...,2019-08-01 00:00:00,2019-08-02 00:00:00,2019-08-03 00:00:00,2019-08-29 00:00:00,2019-08-30 00:00:00,2019-08-31 00:00:00,2019-09-01 00:00:00,2019-09-02 00:00:00,2019-09-03 00:00:00,2019-09-04 00:00:00
0,0x16cb02173ebf3059efdc97fd1819f14a2,2749.336254,2749.922320,2162.133104,1797.825432,1660.787637,1308.582487,1101.589913,2214.411826,2238.190228,...,1840.039576,1875.229693,1862.803820,1097.438159,1122.326252,996.945750,810.547254,798.872640,719.647190,749.123997
1,0x1c9d08cd16fce04790ef900695861e786,189.624484,228.669543,221.221195,191.313054,187.380062,172.313515,162.564024,157.961636,177.008472,...,170.223509,178.711019,182.016828,87.552334,92.419448,84.873582,82.882239,78.704997,78.004771,78.531920
2,0x1612e4cbe3b1b85c3dbcaeaa504ee8424,443.382560,549.833282,198.343896,73.971310,69.993780,65.376960,-6.327350,412.653223,399.933942,...,437.380242,441.052510,435.658360,274.042221,299.580684,201.864706,114.358605,115.111579,113.176785,114.395176
3,0x20158d36236a640cf0524dba149459169,3407.038610,3553.220226,2895.947674,2224.488431,2100.290357,1587.088176,1338.598457,2779.852026,2901.879480,...,2346.031434,2377.266231,2354.984914,1413.090246,1457.186430,1347.588465,1331.311547,1336.829547,1243.634953,1271.781104
4,0xc305005dcb1ed6128d816954c5ab9e7e,1777.256253,1652.790428,1570.378493,1146.580609,1146.535930,852.682777,806.055000,1311.231363,1289.191357,...,1293.785107,1368.593946,1361.666960,683.657166,657.051276,501.019095,308.303110,302.620460,240.956386,260.196477
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
56,0x12342fbadc0ca9418f2d540bb3cb8364a,56.014758,50.561924,38.231982,29.329684,28.698667,24.795205,17.145807,34.354503,35.337982,...,22.849192,23.101131,22.881758,16.088290,16.661553,15.222376,14.386588,13.558478,12.227614,13.755472
57,0x16d1816bc5d185c47de080d4c6a64bc9a,119.265677,119.920012,114.523682,110.794677,103.692667,93.104120,74.208723,107.673003,109.232395,...,86.918489,86.943382,87.635241,52.666711,54.361762,51.446755,50.254248,49.614304,47.857891,49.132625
58,0x14f480f24c435af1b8574c1c6bab38a1c,235.991761,289.052452,114.896822,85.876787,45.691007,3.106126,-21.976736,171.759566,182.275705,...,225.887585,228.079196,226.538889,76.839577,83.407569,73.114843,72.051302,69.882525,63.612480,67.078942
59,0x1c7fc724d0a4f89ed1de8a0a4b302db22,56.371928,64.137472,49.023510,37.646826,40.065845,27.838957,19.422855,50.713224,53.709922,...,54.084869,56.389390,55.766317,24.525142,25.603388,24.593945,19.086438,17.784886,17.487447,17.857932


In [95]:
#daily = pd.read_csv("./sample_submission_daily_max.csv")
#hourly = pd.read_csv("./sample_submission_hourly_max.csv")

In [96]:
#create_submission(daily,hourly)