In [None]:
from datetime import datetime, date

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
# Constants
INPUT_DATA_PATH = "/kaggle/input/ashrae-energy-prediction"

MIN_TRAIN_TIMESTAMP = pd.Timestamp("2016-01-01 00:00:00")
MAX_TRAIN_TIMESTAMP = pd.Timestamp("2016-12-31 23:00:00")
TRAIN_DATA_RESOLUTION = "1h"

## Load train data

In [None]:
train_df = pd.read_csv(
    f"{INPUT_DATA_PATH}/train.csv",
    header=0,
    names=["building_id", "meter_id", "timestamp", "meter_reading"],
    parse_dates=["timestamp"],
    dtype={"meter_reading": float}
)

In [None]:
def kbtu_to_kwh(df: pd.DataFrame) -> pd.DataFrame:
    mask = (df["building_id"] == 0) & (df["meter_id"] == 0)
    df.loc[mask, "meter_reading"] = df.loc[mask, "meter_reading"] * 0.2931
    return df


def get_meter_id_dummies(df: pd.DataFrame) -> pd.DataFrame:
    meter_dummies = pd.get_dummies(df["meter_id"], dtype=float, prefix="meter_id")
    return pd.concat([df, meter_dummies], axis=1)


def cyclic_features(df: pd.DataFrame, feature: str, period: int) -> pd.DataFrame:
    df[f"{feature}_sin"] = np.sin(2 * np.pi * df[feature] / period)
    df[f"{feature}_cos"] = np.cos(2 * np.pi * df[feature] / period)
    return df


def temporal_features(df: pd.DataFrame) -> pd.DataFrame:
    df["hour"] = df["timestamp"].dt.hour
    df = cyclic_features(df, "hour", 24)
    
    df["day_of_week"] = df["timestamp"].dt.weekday
    df = cyclic_features(df, "day_of_week", 7)
    
    df["month"] = df["timestamp"].dt.month
    df = cyclic_features(df, "month", 12)
    
    df["is_weekend"] = (df["timestamp"].dt.weekday >= 5).astype(float)
    
    return df


def target_transform(df: pd.DataFrame) -> pd.DataFrame:
    df["log_reading"] = np.log1p(np.array(df["meter_reading"]))
    return df

In [None]:
train_df = kbtu_to_kwh(train_df)
train_df = get_meter_id_dummies(train_df)
train_df = temporal_features(train_df)
train_df = target_transform(train_df)

In [None]:
train_df.head()

## Submission