# Load Dataset

In [None]:
import numpy as np
import pandas as pd # Read and analysis dataset
import matplotlib.pyplot as plt # Visualization

The dataset is stored in `../input/beijing-pm25-data-data-set/PRSA_data_2010.1.1-2014.12.31.csv`

In [None]:
!ls ../input/beijing-pm25-data-data-set

In [None]:
# Read the dataset using Pandas

df = pd.read_csv('../input/beijing-pm25-data-data-set/PRSA_data_2010.1.1-2014.12.31.csv')
df.head()

In [None]:
# Discard records with missing values

########################################
#           YOUR CODE HERE             #
########################################

# HINT: use dropna()

df.head()

In [None]:
########################################
#                TEST                  #
########################################
assert df.shape == (41757, 13)

In [None]:
# Discard useless attributes

df.drop('No', axis = 1, inplace=True)
df.drop('year', axis = 1, inplace=True)
df.drop('month', axis = 1, inplace=True)
df.drop('day', axis = 1, inplace=True)
df.drop('hour', axis = 1, inplace=True)
df.drop('cbwd', axis = 1, inplace=True)

df.head()

In [None]:
# The first column is our target (PM2.5). The last 6 attributes are our features

data_x = df.iloc[:,1:].values
data_y = df.iloc[:, 0].values

print(data_x.shape)
print(data_y.shape)

# Pre-Processing

In [None]:
from sklearn.preprocessing import MinMaxScaler

########################################
#           YOUR CODE HERE             #
########################################

# Scale `data_x` into the range of [0, 1]

########################################



########################################
#           YOUR CODE HERE             #
########################################

# Scale `data_y` into the range of [0, 1]

########################################

print(np.min(data_x), np.max(data_x))

In [None]:
########################################
#                TEST                  #
########################################
assert np.min(data_x) == np.min(data_y) == 0.0
assert np.max(data_x) == np.max(data_y) == 1.0
assert data_x.shape == (41757, 6)
assert data_y.shape == (41757, 1)

In [None]:
fig, axes = plt.subplots(figsize=(12,4))
ax = fig.add_subplot()

ax.plot(data_y)

## Parameter Setting

In [None]:
lookback = 90
batch_size = 1024
input_dim = data_x.shape[1]
hidden_dim = 256
output_dim = 1
n_layers = 3
learning_rate = 1e-3
epochs = 20
device = 'cuda'

In [None]:
# Define lookback period and split inputs/labels

inputs = np.zeros((len(data_x)-lookback, lookback, data_x.shape[1]))
labels = np.zeros(len(data_x)-lookback)

for i in range(lookback, len(data_x)):
    inputs[i-lookback] = data_x[i-lookback:i, :]
    labels[i-lookback] = data_y[i]
inputs = inputs.reshape(-1,lookback, data_x.shape[1])
labels = labels.reshape(-1,1)

In [None]:
# Here each sample is a segment of the time-series

print(inputs.shape)
print(labels.shape)

# Define Your Dataset

In [None]:
import torch
from torch.utils.data import Dataset, DataLoader

In [None]:
class TSDataset(Dataset):
    def __init__(self, X, y):
        self.X = X
        self.y = y
        
    def __getitem__(self, item):
        instance_x = self.X[item]
        instance_y = self.y[item]
        
        return torch.from_numpy(instance_x.astype(np.float32)), torch.from_numpy(instance_y.astype(np.float32))
    
    def __len__(self):
        return len(self.X)

In [None]:
# Using first 70% instances for training

train_x = inputs[:int(inputs.shape[0]*0.7),:,:]
train_y = labels[:int(labels.shape[0]*0.7)]

In [None]:
# Using last 30% instances for evaluating

test_x = inputs[int(inputs.shape[0]*0.7):,:,:]
test_y = labels[int(labels.shape[0]*0.7):]

In [None]:
train_dataset = TSDataset(train_x, train_y)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, drop_last=True)

In [None]:
test_dataset = TSDataset(test_x, test_y)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, drop_last=True)

# Define Your Network

In [None]:
import torch.nn as nn

In [None]:
class Net(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim, n_layers, device, drop_prob=0.2):
        super().__init__()
        
        self.hidden_dim = hidden_dim
        self.n_layers = n_layers
        self.device = device
        
        ########################################
        #           YOUR CODE HERE             #
        ########################################
        
        # Define the lstm layer
        self.lstm = ...
        
        ########################################
        
        self.fc = nn.Linear(hidden_dim, output_dim)
        self.relu = nn.ReLU()
        
    def forward(self, x, h):
        out, h = self.lstm(x, h)
        
        ########################################
        #           YOUR CODE HERE             #
        ########################################
        
        # Output
        out = ...
        
        ########################################
        
        return out, h
    
    def init_hidden(self, batch_size):
        ########################################
        #           YOUR CODE HERE             #
        ########################################
        
        # Initialize the hidden vector
        h0 = ...
        c0 = ...
        
        ########################################
        
        return h0, c0

## Quiz 1
**What's the effects of h0 and c0?**

**Answer:**

In [None]:
net = Net(input_dim, hidden_dim, output_dim, n_layers, device).to(device)

In [None]:
########################################
#                TEST                  #
########################################

assert hasattr(net, 'lstm')
out_, h_ = net(torch.randn(128, 90, 6).to(device), net.init_hidden(128))
assert out_.shape == torch.Size([128, 1])
assert h_[0].shape == h_[1].shape == torch.Size([3, 128, 256])

# Training

In [None]:
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(net.parameters(), lr=learning_rate)

In [None]:
from tqdm.notebook import tqdm

In [None]:
net.train()

print('Start training...')

for epoch in range(epochs):
    h0, c0 = net.init_hidden(batch_size)
    losses = []
    for x, label in tqdm(train_loader):
        optimizer.zero_grad()
        
        ########################################
        #           YOUR CODE HERE             #
        ########################################
        ...
        ...
        
        ... = net(...)
        loss = ...
        
        ########################################
        
        losses.append(loss.item())
        loss.backward()
        optimizer.step()
    print('Loss: ', np.mean(losses))

# Evaluating

In [None]:
net.eval()

h0, c0 = net.init_hidden(batch_size)

predictions = []
truths = []

for x, label in tqdm(test_loader):        
    x, label = x.to(device), label.to(device)
    
    with torch.no_grad():
        out, (h0, c0) = net(x, (h0, c0))

    predictions.append(out.cpu().numpy())
    truths.append(label.cpu().numpy())

# Results

In [None]:
plt.figure(figsize=(12,8))

inds = np.random.choice(list(range(len(predictions))), size=4, replace=False)

for i in range(4):
    ax = plt.subplot(2,2,i+1)
    ax.plot(predictions[inds[i]], label='prediction')
    ax.plot(truths[inds[i]], label='truth')
    ax.set_title('MAE: {}'.format(np.abs((predictions[inds[i]] - truths[inds[i]]).mean())))
    ax.legend()