In [1]:
import torch
import os
from torch import nn
from torch.utils.data import Dataset, DataLoader
from torch.nn.utils.rnn import *
import numpy as np
from torch.utils.data import Dataset, DataLoader

In [2]:
no2 = 6
uvpm = 14
with open('airquality17-20.csv', 'r') as csvfile:
    uvpm_col = []
    no2_col = []
    for line in csvfile.readlines():
        array = line.split(',')
        new_str = array[14].rstrip("\n")
        new_no2 = array[no2]
        if new_str == '':
            new_str = '0'
        uvpm_col.append(new_str)
        no2_col.append(new_no2)

In [3]:
del uvpm_col[0]
del no2_col[0]

int_uvpm_col = []
float_no2_col = []
for item in uvpm_col:
    new_item = int(item)
    int_uvpm_col.append(new_item)
for item in no2_col:
    
    new_item = float(item)
    float_no2_col.append(new_item)

In [4]:
label = []
feat = []
in_data = 120
out_data = 10

for i in range(0,len(float_no2_col)-in_data-out_data+1):
    feat.append(float_no2_col[i:i+in_data])
    label.append(float_no2_col[i+in_data:i+in_data+out_data])

total_len = len(feat)
train_feature = feat[0:total_len-1-300] 
train_label = label[0:total_len-1-300]
dev_feature = feat[total_len-300:total_len-1]
dev_label = label[total_len-300:total_len-1]
test_feature = float_no2_col[len(float_no2_col)-1-120:len(float_no2_col)-1]

In [5]:
class No2Dataset(Dataset):
    def __init__(self, feature, label):
        self.labels = label
        self.feature = feature
        
    def __len__(self):
        return len(self.feature)
    
    def __getitem__(self, index):
        x = self.feature[index]
        y = self.label[index]
        return x,y

In [15]:
class lstm_model(nn.Module):
    def __init__(self, in_day, out_day, hidden_size):
        super(lstm_model, self).__init__()
        self.lstm = nn.LSTM(input_size=in_day, hidden_size = hidden_size, 
                            num_layers=2, dropout=0.2, bidirectional= True)
        self.linear =nn.Linear(hidden_size*2, hidden_size*2)
        self.droupout = nn.Dropout(0.2)
        self.output = nn.Linear(hidden_size *2, out_day)
    def forward(self, x):
        out = self.lstm(out)[0]
        out = self.linear(out)
        out = self.dropout(out)
        out = self.output(out)
        return out


In [7]:
def train(model, train_loader, dev_loader, num_epoch):
    model.train()
    loss_val = [] 
    acc_val = []
    for epoch in range(num_epoch):
        print("Starting Epoch ", epoch+1)
        avg_loss = 0.0
        before = time.time()
        for batch_num, (featrue, labels) in enumerate(train_loader):
            feature, labels = feature.to(device), labels.to(device)
            out = model(feature)
            loss = criterion()
            
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            
            avg_loss +=loss.item()
            
            if batch_num%50 == 49:
                print("Epoch: {}\tBatch: {}\tAvg-Loss: {:.4f}"
                     .format(epoch+1, batch_num+1, avg_loss/50))
                avg_loss = 0.0
                
        after = time.time()
        total = after - before
        print(time.strftime("%H:%M:%S",time.gmtime(total)))
        val_loss, val_acc = test_predict(model, dev_loader)
        loss_val.append(val_loss)
        acc_val.append(val_acc)
        print('Val Loss: {:.4f}\tVal Accuracy: {:.4f}'.format(val_loss, val_acc))
        scheduler.step()
    return loss_val, acc_val
        
def test_predict(model, data_loader):
    with torch.no_grad():
        model.eval()
        avg_loss = []
        accuracy = []
        print("Starting Dev Evaluation")
        for batch_num, (feature, labels) in enumerate(data_loader):
            feature, labels = feature.to(device), labels.to(device)
            out = model(feature)
            loss = criterion
            correct = 0
            avg_loss/append(loss.item())
            for i in range(len(out)):
                if out[i] == labels[i]:
                    correct +=1
            correct = correct/len(out)
            accuracy.append(correct)
        model.train()
        return np.mean(avg_loss), np.mean(accuracy)

In [10]:
def real_predict(model, feature):
    model.eval()
    out = model(feature)
    return out

def train_predict(model, dataloader):
    model.eval()
    train_out = []
    train_label = []
    for batch_num, (feature, labels) in enumerate(dataloader):
        feature, labels = feature.to(device), labels.to(device)
        out = model(feature)
        train_out.append(out)
        train_label.append(labels)
    
    return train_out, train_labels

In [9]:
train_dataset = No2Dataset(train_feature, train_label)
train_dataloader = DataLoader(dataset=train_dataset,
                             batch_size = 32,
                             shuffle = True)

dev_dataset = No2Dataset(dev_feature, dev_label)
dev_dataloader = DataLoader(dataset=dev_dataset,
                           batch_size = 32,
                           shuffle = True)

In [16]:
hidden_size = 256
num_epoch = 30
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
torch.manual_seed(11785)
model = lstm_model(in_data, out_data, hidden_size)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3, weight_decay=5e-5)
model.to(device)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=4, gamma=0.9)

In [None]:
loss_val, loss_acc = train(model, train_dataloader, dev_dataloader,30)
train_out , train_labels = train_predict(model, train_dataloader)