In [47]:
from glob import glob

import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

from sklearn import preprocessing

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader

In [5]:
class LSTM(nn.Module):
  def __init__(self, num_classes, num_layers, input_size, hidden_size, device):
    super().__init__()
    self.num_classes = num_classes
    self.num_layer = num_layers
    self.input_size = input_size
    self.hidden_size = hidden_size
    self.device = device
    
    self.lstm = nn.LSTM(
      input_size=input_size,
      hidden_size=hidden_size,
      num_layers=num_layers,
      batch_first=True,
      dropout=0.2)
    self.fc_1 = nn.Linear(hidden_size, 128)
    self.fc_2 = nn.Linear(128, num_classes)
    self.relu = nn.ReLU(inplace=True)
    
  def forward(self, x):
    h_0 = torch.zeros(self.num_layer, x.size(0), self.hidden_size, device=self.device)
    c_0 = torch.zeros(self.num_layer, x.size(0), self.hidden_size, device=self.device)
    
    output, (hn, cn) = self.lstm(x, (h_0, c_0))
    hn = hn.view(-1, self.hidden_size)
    out = self.relu(hn)
    out = self.fc_1(out)
    out = self.relu(out)
    out = self.fc_2(out)
    return out

In [56]:
class Predictor:
  def __init__(self):
    # 미리 정의된 거리의 역수를 통한 가중치
    self.near_aws = {
      "아름동": [('세종고운', 0.5959398623781422), ('세종금남', 0.20935575535139192), ('세종연서', 0.19470438227046594)],
      "신흥동":  [('세종연서', 0.623771911782061), ('세종고운', 0.23437985962111915), ('세종전의', 0.14184822859681984)],
      "노은동": [('계룡', 0.37215720734940244), ('세종금남', 0.31442215528240247), ('오월드', 0.313420637368195)],
      "문창동": [('오월드', 0.43792606175001325), ('세천', 0.36840926563842336), ('장동', 0.19366467261156328)],
      "읍내동": [('장동', 0.4600092574298802), ('세천', 0.29439745524012173), ('오월드', 0.2455932873299979)],
      "정림동": [('오월드', 0.6611945451443386), ('계룡', 0.17462776824421944), ('세천', 0.16417768661144194)],
      "공주": [('공주', 0.595096232672347), ('정안', 0.21560028832884157), ('세종금남', 0.18930347899881153)],
      "논산": [('논산', 0.8106894830885594), ('계룡', 0.10161496038210217), ('양화', 0.0876955565293385)],
      "대천2동": [('대천항', 0.5364383656912503), ('청양', 0.24341489240638425), ('춘장대', 0.22014674190236547)],
      "독곶리": [('대산', 0.805639758960593), ('안도', 0.10337714448943511), ('당진', 0.090983096549972)],
      "동문동": [('태안', 0.42383826452678564), ('당진', 0.3223343628472369), ('홍북', 0.2538273726259775)],
      "모종동": [('아산', 0.3665049233697947), ('성거', 0.33423205479638257), ('예산', 0.29926302183382286)],
      "신방동": [('성거', 0.4233387535096642), ('세종전의', 0.37873043302897264), ('아산', 0.19793081346136326)],
      "예산군": [('예산', 0.5326095761484639), ('유구', 0.24550618666774907), ('아산', 0.221884237183787)],
      "이원면": [('태안', 0.38998768925871385), ('안도', 0.3455141387600907), ('대산', 0.2644981719811954)],
      "홍성읍": [('홍북', 0.7537093709927019), ('홍성죽도', 0.12481964368426245), ('예산', 0.1214709853230357)],
      "성성동": [('성거', 0.6985976183654422), ('세종전의', 0.17642014446434248), ('아산', 0.12498223717021528)]
    }
    self.device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
    
    
  def setup(self):
    path_list = sorted(glob("./dataset/TEST_INPUT/*"))
    self.df_list = []
    for path in path_list:
      pm_df = pd.read_csv(path)
      pm_df.drop(columns=["연도", "일시", "측정소"], inplace=True)
    
      aws_dfs = []
      aws_weights = []
      for name, weight in self.near_aws[path.split("\\")[-1][:-4]]:
        aws_dfs.append(pd.read_csv(f"./dataset/TEST_AWS/{name}.csv"))
        aws_weights.append(weight)
        
      cols = [np.zeros(len(pm_df)) for _ in range(5)]
      for aws_df, w in zip(aws_dfs, aws_weights):
        cols[0] += np.array(aws_df["기온(°C)"]) * w
        cols[1] += np.array(aws_df["풍향(deg)"]) * w
        cols[2] += np.array(aws_df["풍속(m/s)"]) * w
        cols[3] += np.array(aws_df["강수량(mm)"]) * w
        cols[4] += np.array(aws_df["습도(%)"]) * w
            
      pm_df["기온(°C)"] = cols[0]
      pm_df["풍향(deg)"] = cols[1]
      pm_df["풍속(m/s)"] = cols[2]
      pm_df["강수량(mm)"] = cols[3]
      pm_df["습도(%)"] = cols[4]
      
      pm_df["풍향(deg)"] = pm_df["풍향(deg)"].apply(lambda x: x * 359)
      wv = pm_df['풍속(m/s)'].values
      wd_rad = pm_df['풍향(deg)'].values * np.pi / 180
      pm_df['Wx'] = wv*np.cos(wd_rad)
      pm_df['Wy'] = wv*np.sin(wd_rad)
      pm_df.drop(columns=["풍향(deg)", "풍속(m/s)"], inplace=True)
      
      self.df_list.append(pm_df)

    
    # model가져오기
    self.model = torch.load("./lstm_v1.pth", map_location=self.device)
  
  def predict(self):
    df_result = pd.read_csv("./dataset/answer_sample.csv")
    outputs = np.array([])
    for df in self.df_list:
      for i in range(64):
        X = torch.tensor(df.iloc[i*(24*5):24*2+i*(24*5)].values).unsqueeze(0)
        X = X.float().to(self.device)
        outputs = np.append(outputs, self.model(X).detach().cpu().numpy())
    df_result["PM2.5"] = outputs
    return df_result

In [57]:
predictor = Predictor()
predictor.setup()

In [58]:
output = predictor.predict()

In [60]:
output.to_csv("lstm_v1.csv", index=False)