In [3]:
import torch
import torch.nn as nn
import seaborn as sns
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

## 데이터 다운로드

In [4]:
!wget 'https://bit.ly/3dD5MU9'

import zipfile
with zipfile.ZipFile('3dD5MU9', 'r') as existing_zip:
    existing_zip.extractall('data')

--2021-04-28 13:01:52--  https://bit.ly/3dD5MU9
Resolving bit.ly (bit.ly)... 67.199.248.10, 67.199.248.11
Connecting to bit.ly (bit.ly)|67.199.248.10|:443... connected.
HTTP request sent, awaiting response... 301 Moved Permanently
Location: https://drive.google.com/uc?export=download&id=1kkF00wW8v0npJ8S2nA7--eMTH3gOL03z [following]
--2021-04-28 13:01:53--  https://drive.google.com/uc?export=download&id=1kkF00wW8v0npJ8S2nA7--eMTH3gOL03z
Resolving drive.google.com (drive.google.com)... 108.177.97.102, 108.177.97.113, 108.177.97.100, ...
Connecting to drive.google.com (drive.google.com)|108.177.97.102|:443... connected.
HTTP request sent, awaiting response... 302 Moved Temporarily
Location: https://doc-14-94-docs.googleusercontent.com/docs/securesc/ha0ro937gcuc7l7deffksulhg5h7mbp1/3pjcnj71732mln28khujm7mleb821f3q/1619614875000/00192245294648390361/*/1kkF00wW8v0npJ8S2nA7--eMTH3gOL03z?e=download [following]
--2021-04-28 13:01:56--  https://doc-14-94-docs.googleusercontent.com/docs/securesc/

### 예측해야될 데이터
---
- dangjin_floating : 당진수상태양광 발전량(KW)
- dangjin_warehouse : 당진자재창고태양광 발전량(KW)
- dangjin : 당진태양광 발전량(KW)
- ulsan : 울산태양광 발전량(KW)

In [5]:
dangjin_fcst_data = pd.read_csv('data/dangjin_fcst_data.csv') #당진지역 발전소 동네 예보
dangjin_obs_data = pd.read_csv('data/dangjin_obs_data.csv') # 당진지역 발전소 인근 기상 관측 자료

ulsan_fcst_data = pd.read_csv('data/ulsan_fcst_data.csv') # 울산지역 발전소 동네 에보
ulsan_obs_data = pd.read_csv('data/ulsan_obs_data.csv') # 울산지역 인근 기상 관측 자료

energy = pd.read_csv('data/energy.csv') # 발전소별 발전량
site_info = pd.read_csv('data/site_info.csv') #발전소 정보


sample_submission = pd.read_csv('data/sample_submission.csv')

### 결측치 처리

In [6]:
dangjin_obs_data.fillna(dangjin_obs_data.mean(),inplace = True)
ulsan_obs_data.fillna(ulsan_obs_data.mean(),inplace = True)
energy.fillna(energy.mean(),inplace = True)

### 울산

In [7]:
energy = energy.set_index('time')

energy.head()

Unnamed: 0_level_0,dangjin_floating,dangjin_warehouse,dangjin,ulsan
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2018-03-01 1:00:00,0.0,0.0,0,0
2018-03-01 2:00:00,0.0,0.0,0,0
2018-03-01 3:00:00,0.0,0.0,0,0
2018-03-01 4:00:00,0.0,0.0,0,0
2018-03-01 5:00:00,0.0,0.0,0,0


## 모델링

In [8]:
import torch
import torch.optim as optim
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler

In [9]:
# hyper parameters
seq_length = 7
data_dim = 5
hidden_dim = 30
output_dim = 1
learning_rate = 0.01
iterations = 501

### ulsan

In [10]:
energy.tail()

Unnamed: 0_level_0,dangjin_floating,dangjin_warehouse,dangjin,ulsan
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2021-01-31 20:00:00,0.0,0.0,0,0
2021-01-31 21:00:00,0.0,0.0,0,0
2021-01-31 22:00:00,0.0,0.0,0,0
2021-01-31 23:00:00,0.0,0.0,0,0
2021-01-31 24:00:00,0.0,0.0,0,0


In [120]:
ulsan = energy['ulsan'].values.astype(float)
dangjin_floating = energy['dangjin_floating'].values.astype(float)
dangjin_warehouse = energy['dangjin_warehouse'].values.astype(float)
dangjin	 = energy['dangjin'].values.astype(float)


'''valid_data_size = 700
train_data = ulsan[:-valid_data_size]
valid_data = ulsan[-valid_data_size:]'''

'valid_data_size = 700\ntrain_data = ulsan[:-valid_data_size]\nvalid_data = ulsan[-valid_data_size:]'

In [15]:
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
train_data_norm = scaler.fit_transform(train_data.reshape(-1,1))

In [121]:
sequence_length = 96 
def make_batch(input_data, sl):
    train_x = []
    train_y = []
    L = len(input_data)
    for i in range(L-sl):
        train_seq = input_data[i:i+sl]
        train_label = input_data[i+sl:i+sl+1]
        train_x.append(train_seq)
        train_y.append(train_label)
    return train_x, train_y

In [102]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [103]:
class RNN(nn.Module):
    
    def __init__(self):
        super().__init__()
        self.input_vector = 1
        self.sequence_length = 100
        self.output_vector = 100
        self.num_layers = 4
        
        self.lstm = nn.LSTM(input_size=self.input_vector, hidden_size=self.output_vector, num_layers=self.num_layers, batch_first=True)
        self.linear = nn.Sequential(
            nn.Linear(self.output_vector, 50),
            nn.Linear(50, 30),
            nn.Linear(30, 10),
            nn.Linear(10,1)
        )
        
    def forward(self, x):
        output, _ = self.lstm(x) #(hidden, cell) 데이터는 사용하지 않음
        return self.linear(output[:,-1,:])


In [107]:
train_x, train_y = make_batch(ulsan.reshape(-1,1), sequence_length)
tensor_x = torch.Tensor(train_x)
tensor_y = torch.Tensor(train_y)


ulsan = RNN()
ulsan = ulsan.to(device)

optimizer = torch.optim.Adam(ulsan.parameters(), lr=0.001)
criterion = nn.MSELoss()
epochs = 600
for i in range(epochs):
    model.train()
    tensor_x = tensor_x.to(device)
    tensor_y = tensor_y.to(device)
    output = ulsan(tensor_x)
    loss = criterion(output, tensor_y.view(-1,1))
    
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    
    if i%25 == 0:
        print('Epoch {}, Loss {:.5f}'.format(i, loss.item()))

x_input = np.array(energy.ulsan[-672:]) #  next value based on data of last year
x_input = x_input.reshape((1, 672, 1)) 


for i in range(672):
    
    x_input = torch.Tensor(x_input)
    x_input = x_input.to(device)
    predict = model(x_input).cpu().detach().numpy()
    new_input = predict.reshape((1,1,1))
    x_input = np.concatenate((x_input[:,-671:].cpu(), new_input), axis = 1)
ulsan_pred = x_input.reshape((x_input.shape[1]))    

Epoch 0, Loss 439.74878
Epoch 25, Loss 568.05463
Epoch 50, Loss 494.38327
Epoch 75, Loss 462.51663
Epoch 100, Loss 451.09293
Epoch 125, Loss 445.11185
Epoch 150, Loss 441.48483
Epoch 175, Loss 438.47583


In [122]:
train_x, train_y = make_batch(dangjin.reshape(-1,1), sequence_length)
tensor_x = torch.Tensor(train_x)
tensor_y = torch.Tensor(train_y)


dangjin = RNN()
dangjin = dangjin.to(device)

optimizer = torch.optim.Adam(dangjin.parameters(), lr=0.001)
criterion = nn.MSELoss()
epochs = 600
for i in range(epochs):
    model.train()
    tensor_x = tensor_x.to(device)
    tensor_y = tensor_y.to(device)
    output = dangjin(tensor_x)
    loss = criterion(output, tensor_y.view(-1,1))
    
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    
    if i%25 == 0:
        print('Epoch {}, Loss {:.5f}'.format(i, loss.item()))

x_input = np.array(energy.dangjin[-672:]) #  next value based on data of last year
x_input = x_input.reshape((1, 672, 1)) 


for i in range(672):
    
    x_input = torch.Tensor(x_input)
    x_input = x_input.to(device)
    predict = model(x_input).cpu().detach().numpy()
    new_input = predict.reshape((1,1,1))
    x_input = np.concatenate((x_input[:,-671:].cpu(), new_input), axis = 1)
dangjin_pred = x_input.reshape((x_input.shape[1]))    

Epoch 0, Loss 68113.13281
Epoch 25, Loss 65633.44531
Epoch 50, Loss 51141.35156
Epoch 75, Loss 48565.36328
Epoch 100, Loss 48563.94922
Epoch 125, Loss 48558.73438
Epoch 150, Loss 48557.18750
Epoch 175, Loss 48557.11328
Epoch 200, Loss 48556.87109
Epoch 225, Loss 48556.27734
Epoch 250, Loss 48551.46484
Epoch 275, Loss 40932.50000
Epoch 300, Loss 3479.84302
Epoch 325, Loss 2774.48047
Epoch 350, Loss 2482.87866
Epoch 375, Loss 2419.37378
Epoch 400, Loss 2288.97998
Epoch 425, Loss 2249.44531
Epoch 450, Loss 2217.58594
Epoch 475, Loss 2196.78784
Epoch 500, Loss 2203.05688
Epoch 525, Loss 2152.76172
Epoch 550, Loss 2175.37866
Epoch 575, Loss 2110.50024


In [125]:
train_x, train_y = make_batch(dangjin_floating.reshape(-1,1), sequence_length)
tensor_x = torch.Tensor(train_x)
tensor_y = torch.Tensor(train_y)


dangjin_floating = RNN()
dangjin_floating = dangjin_floating.to(device)

optimizer = torch.optim.Adam(dangjin_floating.parameters(), lr=0.001)
criterion = nn.MSELoss()
epochs = 600
for i in range(epochs):
    model.train()
    tensor_x = tensor_x.to(device)
    tensor_y = tensor_y.to(device)
    output = dangjin_floating(tensor_x)
    loss = criterion(output, tensor_y.view(-1,1))
    
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    
    if i%25 == 0:
        print('Epoch {}, Loss {:.5f}'.format(i, loss.item()))

x_input = np.array(energy.dangjin_floating[-672:]) #  next value based on data of last year
x_input = x_input.reshape((1, 672, 1)) 


for i in range(672):
    
    x_input = torch.Tensor(x_input)
    x_input = x_input.to(device)
    predict = model(x_input).cpu().detach().numpy()
    new_input = predict.reshape((1,1,1))
    x_input = np.concatenate((x_input[:,-671:].cpu(), new_input), axis = 1)
dangjin_floating_pred = x_input.reshape((x_input.shape[1]))    

In [126]:
train_x, train_y = make_batch(dangjin_warehouse.reshape(-1,1), sequence_length)
tensor_x = torch.Tensor(train_x)
tensor_y = torch.Tensor(train_y)


dangjin_warehouse = RNN()
dangjin_warehouse = dangjin_warehouse.to(device)

optimizer = torch.optim.Adam(dangjin_warehouse.parameters(), lr=0.001)
criterion = nn.MSELoss()
epochs = 600
for i in range(epochs):
    model.train()
    tensor_x = tensor_x.to(device)
    tensor_y = tensor_y.to(device)
    output = dangjin_warehouse(tensor_x)
    loss = criterion(output, tensor_y.view(-1,1))
    
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    
    if i%25 == 0:
        print('Epoch {}, Loss {:.5f}'.format(i, loss.item()))

x_input = np.array(energy.dangjin_warehouse[-672:]) #  next value based on data of last year
x_input = x_input.reshape((1, 672, 1)) 


for i in range(672):
    
    x_input = torch.Tensor(x_input)
    x_input = x_input.to(device)
    predict = model(x_input).cpu().detach().numpy()
    new_input = predict.reshape((1,1,1))
    x_input = np.concatenate((x_input[:,-671:].cpu(), new_input), axis = 1)
dangjin_warehouse_pred = x_input.reshape((x_input.shape[1]))    

In [None]:
b

In [112]:
submission = pd.read_csv('data/sample_submission.csv')

In [127]:
submission.iloc[:24*28, 1] = dangjin_floating_pred
submission.iloc[:24*28, 2] = dangjin_warehouse_pred
submission.iloc[:24*28, 3] = dangjin_pred
submission.iloc[:24*28, 4] = ulsan_pred

In [129]:
submission.to_csv('submission.csv',index=False)