# libraries

In [None]:
!pip install fastai2

In [None]:
from fastai2.basics import *
from fastai2.tabular.all import *
from fastai2.callback.all import *

# data

In [None]:
import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

In [None]:
path = Path('/kaggle/input/ashrae-energy-prediction')

In [None]:
train = pd.read_csv(path/'train.csv')

In [None]:
train = pd.read_csv(path/'train.csv', skiprows=lambda x: x%2==1)
bldg = pd.read_csv(path/'building_metadata.csv')
weather_train = pd.read_csv(path/"weather_train.csv")

In [None]:
len(train)

In [None]:
train.head()

In [None]:
train["timestamp"] = pd.to_datetime(train["timestamp"])
train = train[np.isfinite(train['meter_reading'])]
train['meter_reading'] = np.log1p(train['meter_reading'])

In [None]:
len(train)

In [None]:
train = train.query('not (building_id <= 104 & meter == 0 & timestamp <= "2016-05-20")')

In [None]:
def preprocess(df):
    df["hour"] = df["timestamp"].dt.hour
    df["weekend"] = df["timestamp"].dt.weekday
    df["month"] = df["timestamp"].dt.month
    df["dayofweek"] = df["timestamp"].dt.dayofweek

In [None]:
preprocess(train)

In [None]:
train.shape

In [None]:
df_group = train.groupby('building_id')['meter_reading']
building_mean = df_group.mean().astype(np.float16)
building_median = df_group.median().astype(np.float16)
building_min = df_group.min().astype(np.float16)
building_max = df_group.max().astype(np.float16)
building_std = df_group.std().astype(np.float16)

train['building_mean'] = train['building_id'].map(building_mean)
train['building_median'] = train['building_id'].map(building_median)
train['building_min'] = train['building_id'].map(building_min)
train['building_max'] = train['building_id'].map(building_max)
train['building_std'] = train['building_id'].map(building_std)

In [None]:
del df_group, building_mean, building_median, building_min, building_max, building_std

In [None]:
train.shape

In [None]:
weather_train = weather_train.groupby('site_id').apply(lambda group: group.interpolate(limit_direction='both'))

In [None]:
weather_train["timestamp"] = pd.to_datetime(weather_train["timestamp"])

In [None]:
train = train.merge(bldg, left_on = 'building_id', right_on = 'building_id', how = 'left')

In [None]:
train = train.merge(weather_train, left_on = ['site_id', 'timestamp'], right_on = ['site_id', 'timestamp'])

In [None]:
del weather_train, bldg

In [None]:
train.drop('timestamp', axis=1, inplace=True)

In [None]:
cat_vars = ["building_id", "primary_use", "hour", "weekend", "month", "meter", "dayofweek"]
cont_vars = ["square_feet", "year_built", "air_temperature", "cloud_coverage",
             "dew_temperature", "building_mean", "building_median", "building_min", "building_max",
             "building_std", "floor_count"
            ]
dep_var = 'meter_reading'

In [None]:
train.columns

In [None]:
import gc
gc.collect()

In [None]:
procs = [Normalize, Categorify, FillMissing]
splits = RandomSplitter()(range_of(train))

In [None]:
type(splits)

In [None]:
splits[:5]

In [None]:
train = TabularPandas(train, procs, cat_vars, cont_vars, y_names=dep_var, splits=splits, block_y=RegressionBlock())

In [None]:
del splits

In [None]:
dls = train.dataloaders()

In [None]:
with open(r"train.pkl", "wb") as output_file:
    pickle.dump(train, output_file)

In [None]:
emb_szs = get_emb_sz(train)

In [None]:
cont_len = len(train.cont_names); cont_len

# model

In [None]:
net = TabularModel(emb_szs, cont_len, 1, [200,100])

In [None]:
net

In [None]:
learn = tabular_learner(dls, [200,100], loss_func=MSELossFlat(), metrics=accuracy, n_out=1)

In [None]:
learn.fit(2)

In [None]:
learn.save('train1')

# inference

In [None]:
test = pd.read_csv(path/'test.csv')
bldg = pd.read_csv(path/'building_metadata.csv')
weather_test = pd.read_csv(path/"weather_test.csv")

In [None]:
test = test.merge(bldg, left_on = 'building_id', right_on = 'building_id', how = 'left')

In [None]:
test = test.merge(weather_test, left_on = ['site_id', 'timestamp'], right_on = ['site_id', 'timestamp'])

In [None]:
test["timestamp"] = pd.to_datetime(test["timestamp"])
test["hour"] = test["timestamp"].dt.hour
test["day"] = test["timestamp"].dt.day
test["weekend"] = test["timestamp"].dt.weekday
test["month"] = test["timestamp"].dt.month

In [None]:
test.drop('timestamp', axis=1, inplace=True)
test['meter_reading'] = np.log1p(test['meter_reading'])

In [None]:
test = TabularPandas(test, procs, cat_vars, cont_vars, y_names=dep_var, block_y=RegressionBlock())
test_dl = TabDataLoader(to_test, bs=128, shuffle=False, drop_last=False)

In [None]:
preds, _ = learn.get_preds(dl=test_dl) 
preds = np.expm1(preds.numpy())

In [None]:
submission = pd.DataFrame(columns=['row_id', 'meter_reading'])

In [None]:
test.head()

In [None]:
submission['row_id'] = test['building_id']

In [None]:
submission['meter_reading'] = preds

In [None]:
submission.head()

In [None]:
submission.to_csv('submission.csv', index=False)