In [1]:
import numpy as np
import torch

In [4]:
## Import data
bikes_path = "../practice_data/bike-sharing-dataset/hour-fixed.csv"
bikes_numpy = np.loadtxt(
    bikes_path,
    dtype=np.float32,
    delimiter=",",
    skiprows=1,
    converters={1: lambda x: float(x[8:10])} ## Gets the day of month as a float
)
bikes = torch.from_numpy(bikes_numpy)
bikes, bikes.shape, bikes.stride()

(tensor([[1.0000e+00, 1.0000e+00, 1.0000e+00,  ..., 3.0000e+00, 1.3000e+01,
          1.6000e+01],
         [2.0000e+00, 1.0000e+00, 1.0000e+00,  ..., 8.0000e+00, 3.2000e+01,
          4.0000e+01],
         [3.0000e+00, 1.0000e+00, 1.0000e+00,  ..., 5.0000e+00, 2.7000e+01,
          3.2000e+01],
         ...,
         [1.7377e+04, 3.1000e+01, 1.0000e+00,  ..., 7.0000e+00, 8.3000e+01,
          9.0000e+01],
         [1.7378e+04, 3.1000e+01, 1.0000e+00,  ..., 1.3000e+01, 4.8000e+01,
          6.1000e+01],
         [1.7379e+04, 3.1000e+01, 1.0000e+00,  ..., 1.2000e+01, 3.7000e+01,
          4.9000e+01]]),
 torch.Size([17520, 17]),
 (17, 1))

For every hour, the dataset reports the following variables:

0) Index of record: instant
1) Day of month: day
2) Season: season (1: spring, 2: summer, 3: fall, 4: winter)
3) Year: yr (0: 2011, 1: 2012)
4) Month:mnth(1to12)
5) Hour:hr(0to23)
6) Holiday status: holiday
7) Day of the week: weekday
8) Working day status: workingday
9) Weather situation: weathersit (1: clear, 2:mist, 3: light rain/snow, 4: heavy
rain/snow)
10) Temperature in °C: temp
11) Perceived temperature in °C: atemp
12) Humidity:hum
13) Wind speed: windspeed
14) Number of casual users: casual
15) Number of registered users: registered
16) Count of rental bikes: cnt

In [19]:
## Group data into 24 hour increments
daily_bikes = bikes.view(-1,24,bikes.shape[1])
daily_bikes.shape, daily_bikes.stride()

(torch.Size([730, 24, 17]), (408, 17, 1))

In [20]:
## Data is now arranged as Sample X Days X Categories
## but we want it to be Sample X Categories X Days
daily_bikes = daily_bikes.transpose(1,2)
daily_bikes.shape, daily_bikes.stride()

(torch.Size([730, 17, 24]), (408, 1, 17))

# Data Prep
## One-hot the weather condition
Here we're just practicing by doing this for the first day in the data

In [26]:
first_day = bikes[:24].long()
weather_onehot = torch.zeros(first_day.shape[0], 4)
first_day[:,9]

tensor([1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 3, 3, 2, 2, 2, 2])

In [27]:
weather_onehot.scatter_(
    dim=1,
    index = first_day[:,9].unsqueeze(1).long() -1,
    value = 1.0
)

tensor([[1., 0., 0., 0.],
        [1., 0., 0., 0.],
        [1., 0., 0., 0.],
        [1., 0., 0., 0.],
        [1., 0., 0., 0.],
        [0., 1., 0., 0.],
        [1., 0., 0., 0.],
        [1., 0., 0., 0.],
        [1., 0., 0., 0.],
        [1., 0., 0., 0.],
        [1., 0., 0., 0.],
        [1., 0., 0., 0.],
        [1., 0., 0., 0.],
        [0., 1., 0., 0.],
        [0., 1., 0., 0.],
        [0., 1., 0., 0.],
        [0., 1., 0., 0.],
        [0., 1., 0., 0.],
        [0., 0., 1., 0.],
        [0., 0., 1., 0.],
        [0., 1., 0., 0.],
        [0., 1., 0., 0.],
        [0., 1., 0., 0.],
        [0., 1., 0., 0.]])

In [28]:
## Concatenate this data with the original data
torch.cat((bikes[:24], weather_onehot), 1)[:1]

tensor([[ 1.0000,  1.0000,  1.0000,  0.0000,  1.0000,  0.0000,  0.0000,  6.0000,
          0.0000,  1.0000,  0.2400,  0.2879,  0.8100,  0.0000,  3.0000, 13.0000,
         16.0000,  1.0000,  0.0000,  0.0000,  0.0000]])

## Onehot on the whole dataset

In [29]:
## Initialize the zeros
daily_weather_onehot = torch.zeros(
    daily_bikes.shape[0],
    4,
    daily_bikes.shape[2]
)

In [31]:
daily_weather_onehot.shape

torch.Size([730, 4, 24])

In [32]:
daily_bikes[:,9,:]

tensor([[1., 1., 1.,  ..., 2., 2., 2.],
        [2., 2., 2.,  ..., 1., 1., 1.],
        [1., 1., 1.,  ..., 1., 1., 1.],
        ...,
        [2., 2., 2.,  ..., 2., 2., 2.],
        [2., 2., 2.,  ..., 1., 1., 1.],
        [1., 1., 1.,  ..., 1., 1., 1.]])

In [33]:
## Scatter the weather state into the onehot tensor
daily_weather_onehot.scatter(
    dim=1,
    index=daily_bikes[:,9,:].unsqueeze(1).long() - 1,
    value=1.0
)
daily_weather_onehot.shape

torch.Size([730, 4, 24])

In [34]:
## Concat to the Categories dimension
daily_bikes = torch.cat((daily_bikes, daily_weather_onehot), dim=1)
daily_bikes.shape

torch.Size([730, 21, 24])

## Rescaling Temps
Here are two ways to rescale the temps:

* Map range to \[0.0, 1.0\]
* Normalize around zero (if our data is "normal", then ~70\% will like in the interval \[-1,1\])

In [35]:
## Map to [0,1]
temp = daily_bikes[:, 10, :]
temp_min = torch.min(temp)
temp_max = torch.max(temp)
new_temp = ((daily_bikes[:, 10, :] - temp_min)
            / (temp_max - temp_min))

In [36]:
## Normalize
temp = daily_bikes[:, 10, :]
new_temp = ((daily_bikes[:, 10, :] - torch.mean(temp))
                                 / torch.std(temp))