In [None]:
# -*- coding: utf-8 -*-

'''
#################### CONNECT TO THE FOLDER ####################
'''
def connect_to_the_folder(method):
    import os
    if method == 'google':
        from google.colab import drive
        drive.mount('/content/drive/', force_remount=True)
        os.chdir('/content/drive/My Drive/Data_Technology/DT_Learning/III_Big_Data/BDSE21_Team1/Final_Project/')
    elif method == 'local':
        os.chdir('c:\Shared\BDSE_Team1\Final_Project\\')

'''
#################### GET FIRST N POPULAR CODE ####################
'''
def get_first_n_popular_code(n):
    import pandas as pd
    import datetime
    end_year = datetime.datetime.now().year
    years = pd.Series(pd.date_range('2016', end=str(end_year + 1), freq='Y')).dt.year

    df = []

    for year in years:
        filename = f'Dataset_A/News_Anue_Arrange/News_Anue_Arrange_{year}.csv'
        df_read = pd.read_csv(filename, index_col=0, dtype=str)
        for code in df_read['code']:
            df.append(str(code))
    
    df = pd.DataFrame(df)
    df = df.groupby(df.columns.tolist(), as_index=False).size()
    df = df.sort_values(by='size', ascending=False)
    df = df.reset_index(drop=True)
    df = df.rename(columns={0:'code'})
    df = df[:n]
    return df

'''
#################### VALIDATING MODEL - RNN ####################
'''
def get_validating_data_by_code(code, y_label, day):
    import pandas as pd
    filename = f'Dataset_A/Training_Data_Stock/{str(code)}_training.csv'
    print(filename)
    df = pd.read_csv(filename, index_col=0)
    df = df[(df.index > '2016-06-30') & (df.index < '2021-07-01')]
    df = df.iloc[int(len(df)*0.9):]
    df = df.dropna()
    df = df[['open', 'high', 'low', 'close', f'1days_before_{y_label}', f'2days_before_{y_label}', f'3days_before_{y_label}', f'4days_before_{y_label}', f'5days_before_{y_label}', f'6days_before_{y_label}', f'y_label_{y_label}_{day}days_after']]
    return df

def validating_model_by_code(model, model_type, code, y_label, day):
    import pandas as pd
    import numpy as np
    df = get_validating_data_by_code(code, y_label, day)
    test_y = df[f'y_label_{y_label}_{day}days_after'].copy().to_list()
    data_all = pd.DataFrame(df.values.flatten())
    data_all = np.array(data_all).astype(float)

    # Standardization
    from sklearn.preprocessing import MinMaxScaler
    scaler = MinMaxScaler()
    data_all = scaler.fit_transform(data_all)

    # test_x, test_y
    data = []
    sequence_length = len(df.columns) # Feature Number
    for i in range(int(len(data_all) / sequence_length)):
        data.append(data_all[i*sequence_length:(i+1)*sequence_length])
    reshaped_data = np.array(data).astype('float64')
    test_x = reshaped_data[:, :-1]
    # test_y = reshaped_data[:, -1]

    # Validate
    import tensorflow as tf
    predict = model.predict(test_x)
    predict = np.reshape(predict, (predict.size, ))
    predict = scaler.inverse_transform([[i] for i in predict])

    import matplotlib.pyplot as plt
    import math
    plt.plot(test_y, 'r-', label='real')
    plt.plot(predict, 'b-', label='pred')
    plt.legend(['realdata', 'predict'], loc='best')
    plt.ylim(0, math.ceil(max(np.max(predict), np.max(test_y)) / 10) * 10)
    filename = f'Model/Model_Validation_{y_label}/{model_type}/{y_label}_{code}_{day}.png'
    plt.savefig(filename)
    plt.show()
    plt.close()

    print(filename, '--- complete ---')

def validating_model(n, model_type, y_label, days, epochs):
    codes = get_first_n_popular_code(n)['code']
    for y_label in y_labels:
        for day in days:
            # Load Model
            from keras.models import load_model
            if model_type == 'rnn_5':
                filename = f'rnn_{y_label}_{day}_{epochs}'
            elif model_type == 'rnn_5_nlp':
                filename = f'rnn_nlp_{y_label}_{day}_{epochs}'
            model = load_model(f'Model/Model_Save/{model_type}/{filename}.h5')
            for code in codes:
                validating_model_by_code(model, model_type, code, y_label, day)

if __name__ == '__main__':
    connect_to_the_folder('google') # connect to google or local
    model_type = ['rnn_5', 'rnn_5_nlp']
    # y_labels = ['close', 'increase']
    # days = [1, 2, 3, 4, 5]
    y_labels = ['close']
    days = [1]
    n, split, epochs = 50, 0.8, 1
    validating_model(n, model_type[1], y_labels, days, epochs)