In [1]:
from collections import OrderedDict
import numpy as np
import pandas as pd
import torch
import datetime
from torch import nn
from torch.autograd import Variable
from torch.utils.data import DataLoader, Dataset
import visdom

In [2]:
def read_dust(path):
    dust = pd.read_csv(path, engine='python', index_col=0, encoding='utf-8')
    dust.index = [datetime.datetime.strptime(date, '%Y-%m-%d %H:%M:%S') for date in dust.index]
    dust.sort_index(inplace=True)
    return dust

In [3]:
class DustDataset():
    
    def __init__(self, fpath, mean=0, std=1, wind_direction=False):
        dust = read_dust(fpath)
        dust = (dust - mean) / std
        if wind_direction:
            dust = dust / 360
        coordinates = self._get_coordinates()
        self.size = dust.shape[0]
        self.map = np.zeros((self.size, 6, 6))
        
        for i in range(self.size):
            hour_data = dust.iloc[i]
            for guname, (x, y) in coordinates.items():
                self.map[i, x, y] = hour_data[guname]
            
    def __getitem__(self, index):
        return self.map[index]
    
    def __len__(self):
        return self.size
        
    def _get_coordinates(self):
        return {'은평구':(0, 1), '강북구': (0, 3), '도봉구': (0, 4),
                '서대문구': (1, 1), '종로구': (1, 2), '성북구': (1, 3), '노원구': (1, 4),
                '강서구': (2, 0), '마포구': (2, 1), '중구': (2, 2), '동대문구': (2, 3), '중랑구': (2, 4),
                '양천구': (3,0), '영등포구': (3, 1), '용산구': (3, 2), '성동구': (3, 3), '광진구': (3, 4), '강동구': (3, 5),
                '구로구': (4, 0), '동작구': (4, 1), '서초구': (4, 2), '강남구': (4, 3), '송파구': (4, 4),
                '금천구': (5, 0), '관악구': (5, 1)}

In [4]:
class OneHotEncoding():
    
    def __init__(self, factors):
        self.factors = factors
        self.lookup = {factor: onehot for factor, onehot in zip(factors, np.eye(len(factors)))}
    
    def encode(self, factor):
        return self.lookup[factor]
    
    def decode(self, index):
        return self.factors[index]
    
    def __call__(self, factor):
        return self.lookup[factor]

In [5]:
class DustTimeDataset(Dataset):
    
    def __init__(self, dirpath, time_step):

        CO = DustDataset(dirpath + '/CO.csv', 0.52937789296025439, 0.24188712719322902)
        NO2 = DustDataset(dirpath + '/NO2.csv',0.031987616627473126, 0.016389330127089471)
        O3 = DustDataset(dirpath + '/O3.csv', 0.023747050797629739, 0.018816394358766475)
        SO2 = DustDataset(dirpath + '/SO2.csv', 0.0051391399997366815, 0.001860628414223503)
        PM10 = DustDataset(dirpath + '/PM10.csv', 48.091213787881287, 37.271392635341527)
        PM25 = DustDataset(dirpath + '/PM25.csv', 25.568989121078925, 15.9262427894759099)
        
        기온 = DustDataset(dirpath + '/기온.csv', 12.687561455578845, 10.657978816868261)
        풍속 = DustDataset(dirpath + '/풍속.csv', 1.6514838451122911, 1.1074905606923966)
        풍향 = DustDataset(dirpath + '/풍향.csv', wind_direction=True)
        강수량 = DustDataset(dirpath + '/강수량.csv')
        
        PM10_y = read_dust(dirpath + '/PM25.csv')
        PM10_y = PM10_y.values
        
        
        data = list(zip(CO, NO2, O3, SO2, PM10, PM25, 기온, 풍향, 풍속, 강수량))
        X = []
        y = []
        for i in range(len(CO) - time_step - 3):
            X.append(data[i: (i+time_step)])
            y.append(PM10_y[(i+time_step): (i+time_step+3)])
        self.X = torch.Tensor(X)
        self.y = torch.Tensor(y)
        
    def __getitem__(self, index):
        return self.X[index], self.y[index]
    
    def __len__(self):
        return len(self.X)
        

In [47]:
class CRNN(nn.Module):
    
    def __init__(self, dropout_p):
        super(CRNN, self).__init__()
        self.conv1 = nn.Sequential(
            nn.Conv2d(in_channels=10, out_channels=32, kernel_size=1),
            nn.AvgPool2d(kernel_size=2, stride=1),
            nn.BatchNorm2d(32),
            nn.Dropout2d(dropout_p),
            nn.ReLU(),
        )
        self.conv2 = nn.Sequential(
            nn.Conv2d(in_channels=10, out_channels=32, kernel_size=2),
            nn.BatchNorm2d(32),
            nn.Dropout2d(dropout_p),
            nn.ReLU(),
        )
        self.conv_fc = nn.Sequential(
            nn.Linear(1600, 256),
            nn.Dropout(dropout_p),
            nn.BatchNorm1d(256)
        )
        self.encoder = nn.LSTM(input_size=256, hidden_size=512, num_layers=2, batch_first=True, dropout=dropout_p)
        self.decoder = nn.LSTM(input_size=512, hidden_size=512, num_layers=2, batch_first=True, dropout=dropout_p)
        self.decoder_fc =  nn.Sequential(
            nn.BatchNorm1d(512),
            nn.Linear(512, 256),
            nn.BatchNorm1d(256),
            nn.Dropout(dropout_p),
            nn.Tanh(), 
            nn.Dropout(dropout_p),
            nn.Linear(256, 25)
        )
        
    def forward(self, x):
        batch_size = x.size(0)
        time_step = x.size(1)
        
        x = x.view(batch_size * time_step, 10, 6, 6)
        conv1 = self.conv1(x)
        conv2 = self.conv2(x)
        x = torch.cat([conv1, conv2], dim=1)
        x = x.view(batch_size * time_step, -1)
        x = self.conv_fc(x)
        x = x.view(batch_size, time_step, -1)
        en_outputs, en_hidden  = self.encoder(x)
        
        outputs = []
        dec_input = en_outputs[:, -1:, :]
        dec_hidden = en_hidden
        for _ in range(3):
            dec_input, dec_hidden = self.decoder(dec_input, dec_hidden)
            output = self.decoder_fc(dec_input[:, 0, :])
            outputs.append(output.view(batch_size, 1, 25))
            
        return torch.cat(outputs, dim=1)


In [39]:
def train(model, loss_func, optimizer, x_val, y_val):
    model.train()
    x = Variable(x_val, requires_grad=False)
    y = Variable(y_val, requires_grad=False)
    optimizer.zero_grad()
    output = model(x)
    output = loss_func(output, y)
    output.backward()
    optimizer.step()

    
def predict(model, x_val):
    model.eval()
    x_val = Variable(x_val, requires_grad=False)
    output = model(x_val)
    return output


def cal_loss(model, x_val, y_val):
    y_val = Variable(y_val, requires_grad=False)
    loss_func = nn.L1Loss()
    pred = predict(model, x_val)
    loss = loss_func(pred, y_val)
    
    return loss.data.numpy()

In [8]:
trainset = DustTimeDataset('cleaning/train', 6)
testset = DustTimeDataset('cleaning/test', 6)
train_x, train_y = trainset[:]
test_x, test_y = testset[:]
trainloader = DataLoader(trainset, batch_size=64, shuffle=True)

In [51]:
model= CRNN(0.3)

In [52]:
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
loss_func = nn.L1Loss()
vis = visdom.Visdom()

In [53]:
for epoch in range(1000):
    model.train()
    for i, (x_val, y_val) in enumerate(trainloader, 19):
        train(model, loss_func, optimizer, x_val, y_val)
    train_loss = cal_loss(model, train_x, train_y)
    test_loss = cal_loss(model, test_x, test_y)
    print(epoch,'-->' , train_loss, '----', test_loss)
    
    if epoch == 1:
        plot = vis.line(
            Y=np.column_stack([train_loss, test_loss]), 
            X=np.column_stack([np.array([epoch]), np.array([epoch])]),
            opts={'title': 'PM25 Loss', 'legend': ['Train', 'Test'], 'showlegend': True}
        )
    elif epoch > 1:
        vis.line(
            Y=np.column_stack([train_loss, test_loss]), 
            X=np.column_stack([np.array([epoch]), np.array([epoch])]),
            win=plot, update='append',
            opts={'title': 'PM25 Loss', 'legend': ['Train', 'Test'], 'showlegend': True}
        )

0 --> [ 6.42845535] ---- [ 5.67367268]
1 --> [ 6.71884155] ---- [ 5.67863607]
2 --> [ 6.4099617] ---- [ 5.35420942]
3 --> [ 6.06690168] ---- [ 5.38351345]
4 --> [ 6.31955624] ---- [ 5.79871941]
5 --> [ 5.95702696] ---- [ 5.1631794]
6 --> [ 5.71670055] ---- [ 4.96498346]
7 --> [ 5.90452766] ---- [ 4.99609661]
8 --> [ 5.54376888] ---- [ 4.84924698]
9 --> [ 5.79538393] ---- [ 4.9923377]
10 --> [ 5.66962528] ---- [ 5.07766819]
11 --> [ 5.70103741] ---- [ 5.19158602]
12 --> [ 5.89184856] ---- [ 5.10682154]
13 --> [ 5.69320059] ---- [ 4.92745018]
14 --> [ 5.40298223] ---- [ 4.8810401]
15 --> [ 5.73870039] ---- [ 5.0484004]
16 --> [ 5.97947741] ---- [ 5.1197629]
17 --> [ 5.68575668] ---- [ 4.90151262]
18 --> [ 5.63725233] ---- [ 5.16423082]
19 --> [ 5.68477869] ---- [ 4.99548244]
20 --> [ 5.70133638] ---- [ 5.08656836]
21 --> [ 5.18540716] ---- [ 4.71195412]
22 --> [ 5.39765024] ---- [ 4.92133474]
23 --> [ 5.28618097] ---- [ 4.93843031]
24 --> [ 5.15306234] ---- [ 4.80236864]
25 --> [ 5.44324

KeyboardInterrupt: 

In [54]:
model.eval()

CRNN(
  (conv1): Sequential(
    (0): Conv2d(10, 32, kernel_size=(1, 1), stride=(1, 1))
    (1): AvgPool2d(kernel_size=2, stride=1, padding=0, ceil_mode=False, count_include_pad=True)
    (2): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True)
    (3): Dropout2d(p=0.3)
    (4): ReLU()
  )
  (conv2): Sequential(
    (0): Conv2d(10, 32, kernel_size=(2, 2), stride=(1, 1))
    (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True)
    (2): Dropout2d(p=0.3)
    (3): ReLU()
  )
  (conv_fc): Sequential(
    (0): Linear(in_features=1600, out_features=256, bias=True)
    (1): Dropout(p=0.3)
    (2): BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True)
  )
  (encoder): LSTM(256, 512, num_layers=2, batch_first=True, dropout=0.3)
  (decoder): LSTM(512, 512, num_layers=2, batch_first=True, dropout=0.3)
  (decoder_fc): Sequential(
    (0): BatchNorm1d(512, eps=1e-05, momentum=0.1, affine=True)
    (1): Linear(in_features=512, out_features=256, bias=True)
    (2): BatchNorm1d(256, eps=1e-05,

In [None]:
torch.save(model.state_dict(), 'PM10.pth')

In [55]:
X1, y1 = testset[494:500]

In [None]:
X1

In [56]:
X ,y = testset[500:501]

In [57]:
model(Variable(X))

Variable containing:
(0 ,.,.) = 

Columns 0 to 7 
   25.4287  24.8850  25.9930  26.6306  29.7057  27.1062  26.6059  25.7182
  21.1275  21.8374  22.5278  24.7755  25.3776  22.8495  23.4946  21.1049
  18.2484  19.9194  20.4425  23.7843  22.8845  20.6252  21.5660  19.8476

Columns 8 to 15 
   27.8664  25.5589  25.4126  24.7435  28.1241  24.5069  21.5266  24.0851
  25.1824  22.7495  21.9669  21.4053  24.9207  22.1168  18.2256  21.1182
  22.6855  20.0330  19.0478  19.5813  22.6365  19.2255  16.7404  18.9305

Columns 16 to 23 
   25.5820  22.7282  24.6752  27.6817  26.5141  24.1286  27.1976  25.7305
  22.5074  20.3119  22.0734  24.0266  22.9113  22.1684  22.3638  23.0272
  19.9850  19.2328  20.2393  21.0882  20.4650  20.1311  19.4243  20.7416

Columns 24 to 24 
   28.3497
  24.6638
  20.7866
[torch.FloatTensor of size 1x3x25]

In [58]:
y


(0 ,.,.) = 

Columns 0 to 7 
   29.0000  25.0000  25.0000  24.0000  29.0000  27.0000   8.0000  23.0000
  27.0000  22.0000  26.0000  27.0000  25.0000  23.0000  11.0000  23.0000
  17.0000  21.0000  25.0000  23.0000  31.0000  24.0000  10.0000  23.0000

Columns 8 to 15 
   29.0000  32.0000  24.0000  25.0000  26.0000  29.0000  24.0000  23.0000
  29.0000  32.0000  21.0000  24.0000  26.0000  27.0000  17.0000  22.0000
  28.0000  26.0000  23.0000  23.0000  25.0000  28.0000  22.0000  24.0000

Columns 16 to 23 
   14.8000  29.0000  26.0000  22.0000  25.0000  24.0000  26.0000  23.0000
  14.7000  23.0000  25.0000  19.0000  26.0000  23.0000  24.0000  23.0000
  14.6000  22.0000  25.0000  20.0000  23.0000  23.0000  23.0000  21.0000

Columns 24 to 24 
   32.0000
  34.0000
  36.0000
[torch.FloatTensor of size 1x3x25]