## Dataloader

In [4]:
import pandas as pd
import numpy as np
from torch.utils import data
import torch
from datetime import datetime

def check_weekday(date_string):
    date = datetime.strptime(date_string, "%d/%m/%Y")
    weekday = date.weekday()  # 获取星期几，0代表星期一，6代表星期日
    
    if weekday < 5:
        return 1
    else:
        return 0

class SeoulBikeDataset(data.Dataset):
    def __init__(self, file_dir):

        # file_dir represents the path to the csv file
        super(SeoulBikeDataset, self).__init__()


        with open(file_dir, 'r') as f:
            self.data = pd.read_csv(f)
            self.rented_count = self.data['RentedBikeCount']

            self.date = self.data['Date']
            self.hour = self.data['Hour']
            self.temperature = self.data['Temperature']
            self.humidity = self.data['Humidity']
            self.wind_speed = self.data['Windspeed']
            self.visibility = self.data['Visibility']
            self.dew_point_temperature = self.data['DewPointTemperature']
            self.solar_radiation = self.data['SolarRadiation']
            self.rainfall = self.data['Rainfall']
            self.snowfall = self.data['Snowfall']
            self.season = self.data['Seasons']
            self.holiday = self.data['Holiday']
            self.functioning_day = self.data['FunctioningDay']
            self.is_weekday = self.date.apply(check_weekday)

        from utils import SEASON2NUM, HOLIDAY2NUM, FUNCTIONINGDAY2NUM
        self.season = self.season.map(SEASON2NUM)
        self.holiday = self.holiday.map(HOLIDAY2NUM)
        self.functioning_day = self.functioning_day.map(FUNCTIONINGDAY2NUM)


    def __len__(self):
        return len(self.data)

    

    def __getitem__(self, index):
        hour_one_hot = np.zeros(24)
        hour_one_hot[self.hour[index]] = 1
        season_one_hot = np.zeros(4)
        season_one_hot[self.season[index]] = 1
        holiday_one_hot = np.zeros(2)
        holiday_one_hot[self.holiday[index]] = 1
        functioning_day_one_hot = np.zeros(2)
        functioning_day_one_hot[self.functioning_day[index]] = 1
        is_weekday_one_hot = np.zeros(2)
        is_weekday_one_hot[self.is_weekday[index]] = 1

        features = torch.tensor([
            float(self.temperature[index]),
            float(self.humidity[index]),
            float(self.wind_speed[index]),
            float(self.visibility[index]),
            float(self.dew_point_temperature[index]),
            float(self.solar_radiation[index]),
            float(self.rainfall[index]),
            float(self.snowfall[index]),
        ], dtype=torch.float32)

        features = torch.cat((features, torch.tensor(hour_one_hot, dtype=torch.float32)))
        features = torch.cat((features, torch.tensor(season_one_hot, dtype=torch.float32)))
        features = torch.cat((features, torch.tensor(holiday_one_hot, dtype=torch.float32)))
        features = torch.cat((features, torch.tensor(functioning_day_one_hot, dtype=torch.float32)))
        features = torch.cat((features, torch.tensor(is_weekday_one_hot, dtype=torch.float32)))
        
        label = torch.tensor(self.rented_count[index], dtype=torch.float32)

        return features, label

## Model

In [5]:
import torch
import torch.nn as nn
from torch.nn import Linear
import torch.nn.functional as F

class NeuralNetwork(nn.Module):
    def __init__(self, nfeat, nhid, nlayers, dropout, alpha, training):
        super(NeuralNetwork, self).__init__()
        self.node_feature_dim = nfeat
        self.hidden_dim = nhid
        self.n_layers = nlayers
        self.drop = dropout
        self.batch_size=256
        self.training = training

        self.first_linear = nn.Linear(self.node_feature_dim, self.hidden_dim)
        self.linears = nn.ModuleList()
        for i in range(self.n_layers):
            self.linears.append(nn.Linear(self.hidden_dim, self.hidden_dim))
        self.last_linear = nn.Linear(self.hidden_dim, 1)
        
        self.norm_features = nn.BatchNorm1d(self.node_feature_dim)
        self.norm = nn.BatchNorm1d(self.hidden_dim)

        self._reset_parameters()

    def _reset_parameters(self):
        """Initiate parameters in the transformer model."""

        for p in self.parameters():
            print('reset_parameters', p)
            if p.dim() > 1:
                nn.init.xavier_uniform_(p)

    def forward(self, x):
        x = self.norm_features(x)
        x = F.relu(self.first_linear(x))
        x = F.dropout(x, p=self.drop, training=self.training)

        for i in range(self.n_layers):
            if i == self.n_layers - 1:
                x = self.last_linear(x)
                continue

            x = self.linears[i](x)
            x = self.norm(x)
            if i < self.n_layers - 1:
                x = F.relu(x)
                x = F.dropout(x, p=self.drop, training=self.training)
        
        x = torch.sigmoid(x)
        return x
    

## utils

In [6]:
def debug_log(msg, file):
    with open(file, 'a+') as f:
        f.write(msg + '\n')

## Training

In [None]:
import torch
import torch.nn.functional as F
import torch.optim as optim
import yaml
from torch.utils import data
from torch.utils.data import DataLoader
from sklearn.metrics import r2_score

def train(train_params, batch_size):
    model.train()
    train_loss = 0
    train_mae = 0
    train_rmse = 0
    accuracy_train = 0

    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    for batch_idx, (data, target) in enumerate(train_loader):
        data = data.cuda()
        target = target.cuda()

        optimizer.zero_grad()
 
        output = model(data)

        loss = F.mse_loss(output, target)
        loss.backward()
        optimizer.step()

        train_loss += loss.item()
        train_mae += F.l1_loss(output, target).item()
        train_rmse += torch.sqrt(F.mse_loss(output, target)).item()
        # use r2 to calculate accuracy
        accuracy_train += r2_score(target.cpu().detach().numpy(), output.cpu().detach().numpy())

    train_loss /= len(train_loader)
    train_mae /= len(train_loader)
    train_rmse /= len(train_loader)
    accuracy_train /= len(train_loader)
    print("Train accuracy:", accuracy_train)
    print('Train set: Average loss: {:.4f}, MAE: {:.4f}, RMSE: {:.4f}'.format(
        train_loss, train_mae, train_rmse))
    debug_log('Train set: Average loss: {:.4f}, MAE: {:.4f}, RMSE: {:.4f}'.format(
        train_loss, train_mae, train_rmse), train_params['log_file'])

def test():
    model.eval()
    test_loss = 0
    test_mae = 0
    test_rmse = 0
    accuracy_test = 0

    test_loader = DataLoader(test_dataset, batch_size=1, shuffle=True)

    targets = torch.Tensor()
    outputs = torch.Tensor()
    targets = targets.cuda()
    outputs = outputs.cuda()

    for batch_idx, (data, target) in enumerate(test_loader):
        data = data.cuda()
        target = target.cuda()
        output = model(data)
        targets = torch.cat((targets, target), 0)
        outputs = torch.cat((outputs, output), 0)


    test_loss = F.mse_loss(outputs, targets)
    test_mae = F.l1_loss(outputs, targets)
    test_rmse = torch.sqrt(F.mse_loss(outputs, targets))
    accuracy_test = r2_score(targets.cpu().detach().numpy(), outputs.cpu().detach().numpy())


    print("Test accuracy:", accuracy_test)
    print('Test set: Average loss: {:.4f}, MAE: {:.4f}, RMSE: {:.4f}'.format(
        test_loss, test_mae, test_rmse))
    
    debug_log('Test set: Average loss: {:.4f}, MAE: {:.4f}, RMSE: {:.4f}'.format(
        test_loss, test_mae, test_rmse), train_params['log_file'])


train_params = yaml.load(open('trainparams.yaml', 'r'), Loader=yaml.FullLoader)

# load the dataset
dataset = SeoulBikeDataset(train_params['dataset_dir'])
train_size = int(len(dataset) *
    (train_params['train_ratio'] + train_params['val_ratio']))
test_size = len(dataset) - train_size
train_dataset, test_dataset = data.random_split(dataset, [train_size, test_size])

# load the model
model = NeuralNetwork(
    nfeat = dataset[0][0].shape[0],
    nhid = train_params['hidden_units'],
    nlayers = train_params['n_layers'],
    dropout = train_params['dropout'],
    alpha = train_params['alpha'],
    training = True
)

optimizer = optim.SGD(model.parameters(), lr=train_params['lr'], weight_decay=train_params['weight_decay'])
model = model.cuda()

for epoch in range(train_params['epochs']):
    debug_log('Epoch {}'.format(epoch), train_params['log_file'])
    # train the model
    train(train_params, batch_size=train_params['batch_size'])
    # test the model
    test()