In [3]:
# 2维数据转换为3维

import numpy as np
import torch


In [10]:
bikes_numpy = np.loadtxt('../data/p1ch4/bike-sharing-dataset/hour-fixed.csv',
                        dtype = np.float32,
                        delimiter=',',
                        skiprows =1,
                        converters={1:lambda x: float(x[8:10])})  # 将日期转换成当月的第几天的形式，放到1列
bikes = torch.from_numpy(bikes_numpy)
bikes.shape

torch.Size([17520, 17])

In [6]:
bikes

tensor([[1.0000e+00, 1.0000e+00, 1.0000e+00,  ..., 3.0000e+00, 1.3000e+01,
         1.6000e+01],
        [2.0000e+00, 1.0000e+00, 1.0000e+00,  ..., 8.0000e+00, 3.2000e+01,
         4.0000e+01],
        [3.0000e+00, 1.0000e+00, 1.0000e+00,  ..., 5.0000e+00, 2.7000e+01,
         3.2000e+01],
        ...,
        [1.7377e+04, 3.1000e+01, 1.0000e+00,  ..., 7.0000e+00, 8.3000e+01,
         9.0000e+01],
        [1.7378e+04, 3.1000e+01, 1.0000e+00,  ..., 1.3000e+01, 4.8000e+01,
         6.1000e+01],
        [1.7379e+04, 3.1000e+01, 1.0000e+00,  ..., 1.2000e+01, 3.7000e+01,
         4.9000e+01]])

数据共有17列：
0. Index of record: instant
1. Day of month: day
2. Season: season (1: spring, 2: summer, 3: fall, 4: winter)
3. Year: yr (0: 2011, 1: 2012)
4. Month: mnth (1 to 12)
5. Hour: hr (0 to 23)
6. Holiday status: holiday
7. Day of the week: weekday
8. Working day status: workingday
9. Weather situation: weathersit (1: clear, 2:mist, 3: light rain/snow, 4: heavy rain/snow)
10. Temperature in °C: temp
11. Perceived temperature in °C: atemp
12. Humidity: hum
13. Wind speed: windspeed
14. Number of casual users: casual
15. Number of registered users: registered
16. Count of rental bikes: cnt

In [11]:
# 转换为 N × C × L 的shape
# C 是 17个特征（行），L是24个小时

bikes.shape,bikes.stride()

(torch.Size([17520, 17]), (17, 1))

In [12]:
daily_bikes = bikes.view(-1,24,bikes.shape[1])   # 转换为天，小时，17(将17520分解)
daily_bikes.shape,daily_bikes.stride()           # 返回的是新的张量，但是存储不变


(torch.Size([730, 24, 17]), (408, 17, 1))

In [13]:
# 再次转置
daily_bikes = daily_bikes.transpose(1,2)
daily_bikes.shape,daily_bikes.stride()

(torch.Size([730, 17, 24]), (408, 1, 17))

In [14]:
# 取天气情况，把它作为分类变量
first_day = bikes[:24].long()
weather_onehot = torch.zeros(first_day.shape[0],4)  # 行为24个小时，列为4种情况
first_day[:,9]                                      # bikes中第9列为天气情况

tensor([1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 3, 3, 2, 2, 2, 2])

In [15]:
# 把天气情况对应到weather_onehot
weather_onehot.scatter_(dim=1,
                       index = first_day[:,9].unsqueeze(1).long()-1,   #减1是因为天气情况为1-4，而指数为0-3,应该匹配
                       value = 1.0)  


tensor([[1., 0., 0., 0.],
        [1., 0., 0., 0.],
        [1., 0., 0., 0.],
        [1., 0., 0., 0.],
        [1., 0., 0., 0.],
        [0., 1., 0., 0.],
        [1., 0., 0., 0.],
        [1., 0., 0., 0.],
        [1., 0., 0., 0.],
        [1., 0., 0., 0.],
        [1., 0., 0., 0.],
        [1., 0., 0., 0.],
        [1., 0., 0., 0.],
        [0., 1., 0., 0.],
        [0., 1., 0., 0.],
        [0., 1., 0., 0.],
        [0., 1., 0., 0.],
        [0., 1., 0., 0.],
        [0., 0., 1., 0.],
        [0., 0., 1., 0.],
        [0., 1., 0., 0.],
        [0., 1., 0., 0.],
        [0., 1., 0., 0.],
        [0., 1., 0., 0.]])

In [16]:
# 使用cat函数将矩阵连接到原始数据集上，先拿第一天举例

torch.cat((bikes[:24],weather_onehot),1)[:1]

tensor([[ 1.0000,  1.0000,  1.0000,  0.0000,  1.0000,  0.0000,  0.0000,  6.0000,
          0.0000,  1.0000,  0.2400,  0.2879,  0.8100,  0.0000,  3.0000, 13.0000,
         16.0000,  1.0000,  0.0000,  0.0000,  0.0000]])

In [19]:

# 在整体数据集上进行

daily_weather_onehot = torch.zeros(daily_bikes.shape[0],4,daily_bikes.shape[2])
# 把所有的天气数据转换为onehot
daily_weather_onehot.scatter(dim = 1,
                            index = daily_bikes[:,9,:].long().unsqueeze(1) -1 ,
                            value = 1.0)

daily_weather_onehot.shape


torch.Size([730, 4, 24])

In [20]:
# onehot 与原数据连接
daily_bikes = torch.cat((daily_bikes,daily_weather_onehot),dim = 1)

In [21]:
# 还可以将天气数据看作连续变量
daily_bikes[:,9,:] = (daily_bikes[:,9,:]-1.0)/3.0   # 1-4 转换成0-1

In [22]:
# 调整温度信息，映射到0-1
temp = daily_bikes[:,10,:]
temp_min = torch.min(temp)
temp_max = torch.max(temp)
daily_bikes[:,10,:] = (daily_bikes[:,10,:]-temp_min)/(temp_max-temp_min)

In [23]:
# 或者减去均值后除以标准差

daily_bikes[:,10,:] = (daily_bikes[:,10,:]-torch.mean(temp))/torch.std(temp)