In [1]:
from google.colab import drive
drive.mount('/content/drive')

ModuleNotFoundError: No module named 'google.colab'

In [3]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import r2_score
from sklearn.metrics import mean_absolute_error
from tqdm import tqdm
import tensorflow as tf
import keras

In [4]:
path = 'data/'

In [5]:
energy = pd.read_csv(path+'energy.csv')

In [6]:
energy['dangjin_floating'] = energy['dangjin_floating'].fillna(0)
energy['dangjin_warehouse'] = energy['dangjin_warehouse'].fillna(0)

In [7]:
def convert_time(x):
    Ymd, HMS = x.split(' ')
    H, M, S = HMS.split(':')
    H = str(int(H)-1)
    HMS = ':'.join([H, M, S])
    return ' '.join([Ymd, HMS])

In [8]:
energy['time'] = energy['time'].apply(lambda x:convert_time(x))

In [9]:
energy['time'] = pd.to_datetime(energy['time'])

In [10]:
energy = energy.astype({'dangjin': 'float32'})
energy = energy.astype({'ulsan': 'float32'})

In [11]:
energy = energy.set_index('time')

In [12]:
energy.dtypes

dangjin_floating     float64
dangjin_warehouse    float64
dangjin              float32
ulsan                float32
dtype: object

In [13]:
X_train = energy[:'2020-12-31 23:00:00']
X_valid = energy['2020-01-01 00:00:00':'2021-01-31 23:00:00']
X_train.shape, X_valid.shape

((24888, 4), (9528, 4))

In [14]:
def my_split_window(series, window):
    '''
    the series is split in (len(series)-window)-blocks of window size, 
    y is the next value that comes after the block, 
    every block starts with the next value in the series.
    The last block ends with the last-but-one value in the series.
    '''
    X = []
    y = []
    n_steps = len(series) - window
    for step in range(n_steps):
        X.append(series[step:window+step])
        y.append(series[step + window])
    X = np.array(X)
    y = np.array(y)
    return X, y

In [15]:
train_power_series1 = energy.dangjin_floating.values
window = 672
X, y = my_split_window(train_power_series1, window)

In [None]:
n_features = 1
X = X.reshape((X.shape[0], X.shape[1], n_features))
# define model
model = tf.keras.Sequential()
model.add(tf.keras.layers.Conv1D(filters=32, kernel_size=4, activation='relu', 
                                 input_shape=(window, n_features)))
model.add(tf.keras.layers.MaxPooling1D(pool_size=2))
model.add(tf.keras.layers.Flatten())
model.add(tf.keras.layers.Dense(50, activation='relu'))
model.add(tf.keras.layers.Dense(1))
model.compile(optimizer='adam', loss='mae') 
# fit model
history = model.fit(X, y, epochs=600, verbose=1)

# graph of the loss shows convergence
import matplotlib.pyplot as plt
plt.plot(history.history['loss'])
plt.title('loss')
plt.xlabel('epochs')
plt.show()

Epoch 1/600
Epoch 2/600
Epoch 3/600
Epoch 4/600
Epoch 5/600
Epoch 6/600
Epoch 7/600
Epoch 8/600
Epoch 9/600
Epoch 10/600

In [None]:
# predicting next year
x_input = np.array(X_train.dangjin_floating[-672:]) #  next value based on data of last year
x_input = x_input.reshape((1, window, n_features)) # the model expects three dimensions as input (samples, window, features)

for i in range(672):
    y_hat = model.predict(x_input, verbose=1)
    new_x = y_hat.reshape((1,1,1))
    x_input = np.concatenate((x_input[:, -671:], new_x), axis=1)

In [None]:
y_predicted = x_input.reshape((x_input.shape[1]))
plt.plot(y_predicted, label='predicted_power')

y_true = X_valid.dangjin_floating.values
plt.plot(y_true, label='true_power')
plt.legend()
plt.show()

In [None]:
train_power_series2 = energy.dangjin_warehouse.values
window = 672
X, y = my_split_window(train_power_series2, window)

In [None]:
n_features = 1
X = X.reshape((X.shape[0], X.shape[1], n_features))
# define model
model1 = tf.keras.Sequential()
model1.add(tf.keras.layers.Conv1D(filters=32, kernel_size=4, activation='relu', 
                                 input_shape=(window, n_features)))
model1.add(tf.keras.layers.MaxPooling1D(pool_size=2))
model1.add(tf.keras.layers.Flatten())
model1.add(tf.keras.layers.Dense(50, activation='relu'))
model1.add(tf.keras.layers.Dense(1))
model1.compile(optimizer='adam', loss='mae') 
# fit model
history1 = model1.fit(X, y, epochs=600, verbose=1)

In [None]:
# predicting next year
x_input1 = np.array(X_train.dangjin_warehouse[-672:]) #  next value based on data of last year
x_input1 = x_input1.reshape((1, window, n_features)) # the model expects three dimensions as input (samples, window, features)

for i in range(672):
    y_hat1 = model1.predict(x_input1, verbose=1)
    new_x1 = y_hat1.reshape((1,1,1))
    x_input1 = np.concatenate((x_input1[:, -671:], new_x1), axis=1)

In [None]:
y_predicted1 = x_input1.reshape((x_input1.shape[1]))
plt.plot(y_predicted1, label='predicted_power')

y_true1 = X_valid.dangjin_warehouse.values
plt.plot(y_true1, label='true_power')
plt.legend()
plt.show()

In [None]:
train_power_series3 = energy.dangjin.values
window = 672
X, y = my_split_window(train_power_series3, window)

In [None]:
n_features = 1
X = X.reshape((X.shape[0], X.shape[1], n_features))
# define model
model2 = tf.keras.Sequential()
model2.add(tf.keras.layers.Conv1D(filters=32, kernel_size=4, activation='relu', 
                                 input_shape=(window, n_features)))
model2.add(tf.keras.layers.MaxPooling1D(pool_size=2))
model2.add(tf.keras.layers.Flatten())
model2.add(tf.keras.layers.Dense(50, activation='relu'))
model2.add(tf.keras.layers.Dense(1))
model2.compile(optimizer='adam', loss='mae') 
# fit model
history2 = model2.fit(X, y, epochs=600, verbose=1)

In [None]:
# predicting next year
x_input2 = np.array(X_train.dangjin[-672:]) #  next value based on data of last year
x_input2 = x_input2.reshape((1, window, n_features)) # the model expects three dimensions as input (samples, window, features)

for i in range(672):
    y_hat2 = model2.predict(x_input2, verbose=1)
    new_x2 = y_hat2.reshape((1,1,1))
    x_input2 = np.concatenate((x_input2[:, -671:], new_x2), axis=1)

In [None]:
y_predicted2 = x_input2.reshape((x_input2.shape[1]))
plt.plot(y_predicted2, label='predicted_power')

y_true2 = X_valid.dangjin.values
plt.plot(y_true2, label='true_power')
plt.legend()
plt.show()

In [None]:
train_power_series4 = energy.ulsan.values
window = 672
X, y = my_split_window(train_power_series4, window)

In [None]:
n_features = 1
X = X.reshape((X.shape[0], X.shape[1], n_features))
# define model
model3 = tf.keras.Sequential()
model3.add(tf.keras.layers.Conv1D(filters=32, kernel_size=4, activation='relu', 
                                 input_shape=(window, n_features)))
model3.add(tf.keras.layers.MaxPooling1D(pool_size=2))
model3.add(tf.keras.layers.Flatten())
model3.add(tf.keras.layers.Dense(50, activation='relu'))
model3.add(tf.keras.layers.Dense(1))
model3.compile(optimizer='adam', loss='mae') 
# fit model
history3 = model3.fit(X, y, epochs=600, verbose=1)

In [None]:
# predicting next year
x_input3 = np.array(X_train.ulsan[-672:]) #  next value based on data of last year
x_input3 = x_input3.reshape((1, window, n_features)) # the model expects three dimensions as input (samples, window, features)

for i in range(672):
    y_hat3 = model3.predict(x_input3, verbose=1)
    new_x3 = y_hat3.reshape((1,1,1))
    x_input3 = np.concatenate((x_input3[:, -671:], new_x3), axis=1)

In [None]:
y_predicted3 = x_input3.reshape((x_input3.shape[1]))
plt.plot(y_predicted3, label='predicted_power')

y_true3 = X_valid.ulsan.values
plt.plot(y_true3, label='true_power')
plt.legend()
plt.show()

In [None]:
submission = pd.read_csv(path+'sample_submission.csv')

In [None]:
submission.iloc[:24*28, 1] = y_predicted
submission.iloc[:24*28, 2] = y_predicted1
submission.iloc[:24*28, 3] = y_predicted2
submission.iloc[:24*28, 4] = y_predicted3

In [None]:
submission

In [None]:
submission.to_csv(path+'submission_CNN.csv', index=False)