In [1]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
!pip install flaml
!pip install catboost

In [None]:
!pip uninstall tensorflow

In [None]:
!pip install tensorflow==2.9

In [2]:
import random as rn
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os
import glob

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader

from tqdm.auto import tqdm

from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.optimizers import Adam
from sklearn.metrics import mean_squared_error, mean_absolute_error
from tensorflow.keras.models import load_model
from tensorflow.keras.callbacks import ModelCheckpoint

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import LSTM

import tensorflow as tf

import warnings
warnings.filterwarnings(action='ignore') 

In [3]:
%cd '/content/drive/MyDrive/Bok_choy_growth/'

/content/drive/MyDrive/Bok_choy_growth


In [4]:
print(tf.__version__)

2.9.0


In [5]:
# reproducibility
def set_seed(seed_num):
  # tf.random.set_seed(seed_num)
  np.random.seed(seed_num)
  rn.seed(seed_num)
  os.environ['PYTHONHASHSEED']=str(seed_num)

seed_num = 42
set_seed(seed_num)

In [6]:
from keras import backend as K
session_conf = tf.compat.v1.ConfigProto(intra_op_parallelism_threads=1, inter_op_parallelism_threads=1)
sess = tf.compat.v1.Session(graph=tf.compat.v1.get_default_graph(), config=session_conf)
K.set_session(sess)

In [7]:
all_input_list = sorted(glob.glob('./train_input/*.csv'))
all_target_list = sorted(glob.glob('./train_target/*.csv'))

In [8]:
train_input_list = all_input_list[:50]
train_target_list = all_target_list[:50]

val_input_list = all_input_list[50:]
val_target_list = all_target_list[50:]

In [9]:
print(train_input_list[:5])
print(train_target_list[:5])

['./train_input/CASE_01.csv', './train_input/CASE_02.csv', './train_input/CASE_03.csv', './train_input/CASE_04.csv', './train_input/CASE_05.csv']
['./train_target/CASE_01.csv', './train_target/CASE_02.csv', './train_target/CASE_03.csv', './train_target/CASE_04.csv', './train_target/CASE_05.csv']


In [10]:
train_input_sample = pd.read_csv(train_input_list[0])

In [11]:
train_target_sample = pd.read_csv(train_target_list[0])

In [12]:
train_input_sample.head()

Unnamed: 0,시간,내부온도관측치,내부습도관측치,CO2관측치,EC관측치,외부온도관측치,외부습도관측치,펌프상태,펌프작동남은시간,최근분무량,...,카메라상태,냉방온도,난방온도,기준온도,난방부하,냉방부하,총추정광량,백색광추정광량,적색광추정광량,청색광추정광량
0,2021-02-17 00:00:00,24.799999,40.5,361.0,0.059069,20.299999,33.200001,0.0,0.0,0.0,...,4.5,0.0,0.0,363.554,16.548,37.596,363.554,309.41,16.548,37.596
1,2021-02-17 00:01:00,20.5,44.099998,355.0,0.910134,19.700001,10.7,0.0,0.0,0.0,...,0.0,20.0,18.0,19.0,0.0,2.5,0.0,0.0,0.0,0.0
2,2021-02-17 00:02:00,20.6,44.299999,360.0,0.910134,19.799999,10.6,0.0,0.0,0.0,...,0.0,20.0,18.0,19.0,0.0,3.000002,0.0,0.0,0.0,0.0
3,2021-02-17 00:03:00,20.6,44.5,359.0,0.908579,19.799999,10.4,0.0,0.0,0.0,...,0.0,20.0,18.0,19.0,0.0,3.000002,0.0,0.0,0.0,0.0
4,2021-02-17 00:04:00,20.6,44.5,357.0,0.910134,19.799999,10.3,0.0,0.0,0.0,...,0.0,20.0,18.0,19.0,0.0,3.000002,0.0,0.0,0.0,0.0


In [13]:
train_target_sample.head()

Unnamed: 0,시간,rate
0,2021-02-18 00:00:00,0.5
1,2021-02-19 00:00:00,0.66667
2,2021-02-20 00:00:00,0.6
3,2021-02-21 00:00:00,-0.125
4,2021-02-22 00:00:00,1.42857


In [14]:
train_data = []
train_label = []
for train_input_path, train_target_path in tqdm(zip(train_input_list, train_target_list)):
    train_input_df = pd.read_csv(train_input_path)
    train_target_df = pd.read_csv(train_target_path)
    train_input_df = train_input_df.drop(columns=['시간'])
    train_input_df = train_input_df.fillna(0)
    train_input_length = int(len(train_input_df)/1440)
    train_target_length = int(len(train_target_df))

    for idx in range(train_target_length):
        time_series = train_input_df[1440*idx:1440*(idx+1)].values
        train_data.append(time_series)

    for label in train_target_df["rate"]:
        train_label.append(label)    

0it [00:00, ?it/s]

In [15]:
val_data = []
val_label = []
for val_input_path, val_target_path in tqdm(zip(val_input_list, val_target_list)):
    val_input_df = pd.read_csv(val_input_path)
    val_target_df = pd.read_csv(val_target_path)
    val_input_df = val_input_df.drop(columns=['시간'])
    val_input_df = val_input_df.fillna(0)
    val_input_length = int(len(val_input_df)/1440)
    val_target_length = int(len(val_target_df))

    for idx in range(val_target_length):
        time_series = val_input_df[1440*idx:1440*(idx+1)].values
        val_data.append(time_series)

    for label in val_target_df["rate"]:
        val_label.append(label)  

0it [00:00, ?it/s]

In [16]:
train_data = np.array(train_data)
val_data = np.array(val_data)

train_label = np.reshape(train_label,(np.shape(train_label)[0], 1 , 1))
val_label = np.reshape(val_label,(np.shape(val_label)[0], 1 , 1))

In [17]:
print("Shape of train input :", np.shape(train_data))
print("Shape of train target :", np.shape(train_label))
print("Shape of val input :", np.shape(val_data))
print("Shape of val target :", np.shape(val_label))

Shape of train input : (1607, 1440, 37)
Shape of train target : (1607, 1, 1)
Shape of val input : (206, 1440, 37)
Shape of val target : (206, 1, 1)


- Shape of input : (# days, time series per day (minutes), # columns)
- Shape of target : (# days)

In [18]:
model = Sequential()

model.add(LSTM(50, return_sequences=True,input_shape=(train_data.shape[1], train_data.shape[2])))
model.add(LSTM(50, return_sequences=True))
model.add(LSTM(50))
model.add(Dense(1, kernel_initializer=tf.keras.initializers.glorot_uniform(seed=seed_num)))
model.compile(loss='mean_squared_error', optimizer='adam')

In [19]:
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm (LSTM)                 (None, 1440, 50)          17600     
                                                                 
 lstm_1 (LSTM)               (None, 1440, 50)          20200     
                                                                 
 lstm_2 (LSTM)               (None, 50)                20200     
                                                                 
 dense (Dense)               (None, 1)                 51        
                                                                 
Total params: 58,051
Trainable params: 58,051
Non-trainable params: 0
_________________________________________________________________


In [20]:
adam = Adam(learning_rate=0.001)
chk = ModelCheckpoint('lstm.pkl', monitor = 'val_loss', save_best_only = True, mode = 'min', verbose = 1)
model.compile(loss='mse', optimizer = adam)

In [None]:
%%time
hist = model.fit(train_data, train_label, validation_data=(val_data, val_label), callbacks=[chk], epochs=30, batch_size = 64, verbose = 1, shuffle = False)

In [23]:
loaded_model = load_model("lstm.pkl")

In [24]:
train_predicton = loaded_model.predict(train_data)
val_prediction = loaded_model.predict(val_data)

print("Train pred shape :", train_predicton.shape)
print("Val pred shape :", val_prediction.shape)

Train pred shape : (1607, 1)
Val pred shape : (206, 1)


In [25]:
test_input_list = sorted(glob.glob('./test_input/*.csv'))
test_target_list = sorted(glob.glob('./test_target/*.csv'))

In [41]:
test_data = []
for test_input_path, test_target_path in tqdm(zip(test_input_list, test_target_list)):
    test_input_df = pd.read_csv(test_input_path)
    test_target_df = pd.read_csv(test_target_path)

    test_input_df = test_input_df.drop(columns=['시간'])
    test_input_df = test_input_df.fillna(0)

    test_input_length = int(len(test_input_df)/1440)
    test_target_length = int(len(test_target_df))

    for idx in range(test_target_length):
        time_series = test_input_df[1440*idx:1440*(idx+1)].values
        test_data.append(time_series)

0it [00:00, ?it/s]

In [42]:
test_data = np.array(test_data)
print("Shape of test input :", np.shape(test_data))

Shape of test input : (195, 1440, 37)


In [43]:
test_prediction = loaded_model.predict(test_data)
print("Test pred shape :", test_prediction.shape)

Test pred shape : (195, 1)


In [44]:
end = 0
for test_target_path in test_target_list:
    submit_df = pd.read_csv(test_target_path)

    start = submit_df.shape[0]
    submit_df['rate'] = test_prediction[end:(start+end)]
    print(end, start+end)
    submit_df.to_csv(test_target_path, index=False)

    end += start  

0 29
29 64
64 90
90 122
122 159
159 195


In [45]:
import zipfile
os.chdir("./test_target/")
submission = zipfile.ZipFile("../submission.zip", 'w')
for path in test_target_list:
    path = path.split('/')[-1]
    submission.write(path)
submission.close()