# Convert training data

In [2]:
## Common imports

import numpy as np
import pandas as pd
import datetime as dt
import calendar as cal
import os
import glob

# to make this notebook's output stable across runs
np.random.seed(42)

# To plot pretty figures
%matplotlib inline
import matplotlib
import matplotlib.pyplot as plt
import matplotlib.dates as mdates

plt.rcParams['axes.labelsize'] = 14
plt.rcParams['xtick.labelsize'] = 12
plt.rcParams['ytick.labelsize'] = 12

# Ignore useless warnings (see SciPy issue #5998)
import warnings
warnings.filterwarnings(action="ignore", module="scipy", message="^internal gelsd")

from IPython.display import display
pd.options.display.max_columns = 50
pd.options.display.html.table_schema = True

## Convert the historical prices to the desired format

In [14]:
FILE_FOLDER = "../dataset/prices/*.csv"

UOM = {'K': 1000, 'M': 1000000}

def convert_to_numeric(val):
    return float(val[:-1]) * UOM[val[-1]]

for f in glob.glob(FILE_FOLDER):
    df = pd.read_csv(f)
    df.rename(columns={'price':'close'}, inplace=True)
    df.drop(columns=['change_percent'], inplace=True)
    df['date'] = pd.to_datetime(df['date'])
    df['volume'] = df['volume'].apply(convert_to_numeric)
    df.sort_values(by=['date'], ascending=True, inplace=True)
    display(df.head(10))
    df.to_csv(f, index=False)


Unnamed: 0,date,close,open,high,low,volume
924,2015-01-02,3.51,3.49,3.52,3.49,45220.0
923,2015-01-05,3.48,3.53,3.53,3.47,704600.0
922,2015-01-06,3.46,3.5,3.5,3.46,1050000.0
921,2015-01-07,3.45,3.46,3.48,3.45,957890.0
920,2015-01-08,3.45,3.45,3.51,3.43,1070000.0
919,2015-01-09,3.45,3.47,3.48,3.45,729780.0
918,2015-01-12,3.43,3.46,3.47,3.43,701470.0
917,2015-01-13,3.43,3.43,3.46,3.43,897360.0
916,2015-01-14,3.45,3.44,3.46,3.44,602770.0
915,2015-01-15,3.45,3.45,3.48,3.44,726550.0


Unnamed: 0,date,close,open,high,low,volume
906,2015-01-02,0.65,0.64,0.65,0.64,27000.0
905,2015-01-05,0.655,0.66,0.66,0.655,165000.0
904,2015-01-06,0.665,0.655,0.665,0.64,279800.0
903,2015-01-07,0.65,0.655,0.655,0.65,81300.0
902,2015-01-08,0.66,0.65,0.66,0.65,24000.0
901,2015-01-09,0.665,0.66,0.665,0.645,227300.0
900,2015-01-12,0.665,0.645,0.675,0.645,627900.0
899,2015-01-13,0.645,0.645,0.645,0.645,20000.0
898,2015-01-14,0.65,0.65,0.66,0.65,90600.0
897,2015-01-15,0.655,0.65,0.655,0.65,158400.0


Unnamed: 0,date,close,open,high,low,volume
846,2015-01-02,1.32,1.32,1.33,1.32,24100.0
845,2015-01-05,1.33,1.33,1.33,1.33,5000.0
844,2015-01-06,1.3,1.31,1.31,1.3,16300.0
843,2015-01-07,1.3,1.3,1.3,1.3,20000.0
842,2015-01-08,1.32,1.32,1.34,1.32,29400.0
841,2015-01-09,1.32,1.32,1.32,1.32,20000.0
840,2015-01-12,1.33,1.31,1.33,1.3,14000.0
839,2015-01-13,1.33,1.33,1.33,1.33,31000.0
838,2015-01-14,1.36,1.34,1.36,1.34,68200.0
837,2015-01-15,1.36,1.37,1.37,1.35,45600.0


Unnamed: 0,date,close,open,high,low,volume
856,2015-01-02,0.141,0.141,0.141,0.141,200700.0
855,2015-01-05,0.141,0.141,0.145,0.141,235100.0
854,2015-01-06,0.136,0.141,0.141,0.136,375100.0
853,2015-01-07,0.141,0.141,0.141,0.141,1000.0
852,2015-01-08,0.136,0.136,0.145,0.136,119400.0
851,2015-01-09,0.141,0.141,0.145,0.141,229200.0
850,2015-01-12,0.141,0.141,0.141,0.141,137900.0
849,2015-01-13,0.141,0.141,0.141,0.136,447200.0
848,2015-01-14,0.145,0.136,0.145,0.136,949100.0
847,2015-01-15,0.15,0.145,0.155,0.145,1650000.0


Unnamed: 0,date,close,open,high,low,volume
924,2015-01-02,0.56,0.545,0.56,0.54,577500.0
923,2015-01-05,0.57,0.565,0.57,0.55,1980000.0
922,2015-01-06,0.575,0.565,0.575,0.55,1200000.0
921,2015-01-07,0.59,0.57,0.595,0.56,3750000.0
920,2015-01-08,0.615,0.595,0.635,0.595,7580000.0
919,2015-01-09,0.6,0.625,0.63,0.595,4760000.0
918,2015-01-12,0.64,0.61,0.645,0.605,6390000.0
917,2015-01-13,0.66,0.645,0.665,0.645,6260000.0
916,2015-01-14,0.675,0.665,0.68,0.66,5540000.0
915,2015-01-15,0.685,0.68,0.695,0.675,8000000.0


Unnamed: 0,date,close,open,high,low,volume
903,2015-01-05,0.764,0.788,0.788,0.764,482250.0
902,2015-01-06,0.768,0.768,0.776,0.768,75250.0
901,2015-01-07,0.776,0.768,0.776,0.768,300250.0
900,2015-01-08,0.768,0.768,0.768,0.764,322500.0
899,2015-01-09,0.784,0.768,0.784,0.768,14000.0
898,2015-01-12,0.768,0.772,0.772,0.768,145750.0
897,2015-01-14,0.764,0.764,0.764,0.764,37500.0
896,2015-01-15,0.764,0.764,0.764,0.764,50000.0
895,2015-01-16,0.764,0.764,0.764,0.764,32500.0
894,2015-01-19,0.8,0.776,0.808,0.776,1970000.0


Unnamed: 0,date,close,open,high,low,volume
918,2015-01-02,2.323,2.244,2.333,2.244,147400.0
917,2015-01-05,2.323,2.303,2.333,2.303,56400.0
916,2015-01-06,2.372,2.323,2.372,2.323,264900.0
915,2015-01-07,2.372,2.372,2.382,2.342,140400.0
914,2015-01-08,2.372,2.382,2.392,2.372,39000.0
913,2015-01-09,2.372,2.382,2.382,2.372,49800.0
912,2015-01-12,2.412,2.372,2.422,2.372,180100.0
911,2015-01-13,2.432,2.412,2.432,2.412,53000.0
910,2015-01-14,2.451,2.422,2.451,2.422,113400.0
909,2015-01-15,2.461,2.451,2.461,2.382,110300.0


Unnamed: 0,date,close,open,high,low,volume
924,2015-01-02,1.54,1.52,1.54,1.52,491700.0
923,2015-01-05,1.52,1.53,1.54,1.52,1770000.0
922,2015-01-06,1.54,1.5,1.54,1.5,1680000.0
921,2015-01-07,1.5,1.53,1.53,1.5,3610000.0
920,2015-01-08,1.52,1.5,1.52,1.49,2170000.0
919,2015-01-09,1.52,1.52,1.52,1.5,1560000.0
918,2015-01-12,1.51,1.53,1.53,1.5,2320000.0
917,2015-01-13,1.5,1.51,1.53,1.5,2910000.0
916,2015-01-14,1.51,1.5,1.52,1.5,3750000.0
915,2015-01-15,1.52,1.51,1.54,1.5,4970000.0


Unnamed: 0,date,close,open,high,low,volume
868,2015-01-02,2.05,2.09,2.09,2.05,15000.0
867,2015-01-05,1.95,2.02,2.04,1.95,25000.0
866,2015-01-06,1.95,1.95,1.95,1.95,3000.0
865,2015-01-07,1.92,1.95,1.95,1.92,21500.0
864,2015-01-08,1.95,1.95,1.95,1.95,4000.0
863,2015-01-09,1.95,1.95,1.96,1.95,18000.0
862,2015-01-12,2.0,1.95,2.0,1.95,25000.0
861,2015-01-13,2.0,1.99,2.0,1.99,29500.0
860,2015-01-14,2.0,2.0,2.0,1.97,20600.0
859,2015-01-15,2.03,2.0,2.05,2.0,38200.0


Unnamed: 0,date,close,open,high,low,volume
924,2015-01-02,2.228,2.228,2.236,2.228,1060000.0
923,2015-01-05,2.244,2.228,2.244,2.211,908950.0
922,2015-01-06,2.228,2.195,2.236,2.195,1150000.0
921,2015-01-07,2.236,2.211,2.236,2.211,478220.0
920,2015-01-08,2.236,2.236,2.236,2.228,551720.0
919,2015-01-09,2.228,2.236,2.236,2.228,866950.0
918,2015-01-12,2.236,2.228,2.236,2.228,371230.0
917,2015-01-13,2.228,2.211,2.236,2.211,634080.0
916,2015-01-14,2.236,2.228,2.236,2.228,121450.0
915,2015-01-15,2.228,2.228,2.236,2.228,61020.0


Unnamed: 0,date,close,open,high,low,volume
923,2015-01-02,1.43,1.42,1.44,1.41,1530000.0
922,2015-01-05,1.41,1.43,1.43,1.41,705700.0
921,2015-01-06,1.41,1.4,1.43,1.39,2190000.0
920,2015-01-07,1.43,1.41,1.43,1.4,1730000.0
919,2015-01-08,1.42,1.43,1.43,1.42,540500.0
918,2015-01-09,1.43,1.42,1.44,1.42,1010000.0
917,2015-01-12,1.43,1.43,1.43,1.42,2300000.0
916,2015-01-13,1.43,1.43,1.44,1.43,1070000.0
915,2015-01-14,1.44,1.43,1.44,1.42,1390000.0
914,2015-01-15,1.43,1.43,1.44,1.43,4640000.0


Unnamed: 0,date,close,open,high,low,volume
657,2015-01-02,0.59,0.57,0.59,0.57,16000.0
656,2015-01-05,0.54,0.57,0.57,0.53,147900.0
655,2015-01-06,0.545,0.55,0.55,0.54,68100.0
654,2015-01-07,0.55,0.55,0.55,0.55,24000.0
653,2015-01-08,0.55,0.56,0.56,0.55,16000.0
652,2015-01-13,0.6,0.59,0.6,0.58,77500.0
651,2015-01-14,0.6,0.6,0.6,0.6,35000.0
650,2015-01-15,0.605,0.595,0.605,0.585,132900.0
649,2015-01-16,0.6,0.62,0.62,0.6,4100.0
648,2015-01-19,0.605,0.62,0.62,0.605,56000.0


Unnamed: 0,date,close,open,high,low,volume
909,2015-01-02,1.6,1.62,1.62,1.6,418700.0
908,2015-01-05,1.53,1.6,1.6,1.52,736100.0
907,2015-01-06,1.5,1.53,1.53,1.47,946000.0
906,2015-01-07,1.56,1.5,1.56,1.48,716800.0
905,2015-01-08,1.67,1.56,1.67,1.56,1580000.0
904,2015-01-09,1.73,1.7,1.75,1.69,1890000.0
903,2015-01-12,1.7,1.72,1.72,1.7,757200.0
902,2015-01-13,1.68,1.7,1.7,1.68,572700.0
901,2015-01-14,1.6,1.68,1.68,1.59,1080000.0
900,2015-01-15,1.58,1.62,1.64,1.58,1680000.0


Unnamed: 0,date,close,open,high,low,volume
923,2015-01-02,1.76,1.76,1.77,1.76,750600.0
922,2015-01-05,1.74,1.72,1.78,1.72,883900.0
921,2015-01-06,1.71,1.74,1.76,1.68,493500.0
920,2015-01-07,1.68,1.7,1.71,1.68,1140000.0
919,2015-01-08,1.67,1.7,1.7,1.65,1050000.0
918,2015-01-09,1.71,1.69,1.71,1.66,1040000.0
917,2015-01-12,1.71,1.69,1.71,1.67,1220000.0
916,2015-01-13,1.7,1.71,1.72,1.7,1670000.0
915,2015-01-14,1.7,1.7,1.71,1.69,777300.0
914,2015-01-15,1.7,1.71,1.71,1.68,3720000.0


Unnamed: 0,date,close,open,high,low,volume
883,2015-01-02,3.86,3.86,3.86,3.85,17200.0
882,2015-01-05,3.87,3.87,3.87,3.87,3300.0
881,2015-01-06,3.86,3.86,3.86,3.85,39800.0
880,2015-01-07,3.86,3.84,3.86,3.84,7500.0
879,2015-01-08,3.87,3.87,3.87,3.87,2100.0
878,2015-01-09,3.92,3.89,3.92,3.89,2300.0
877,2015-01-12,3.89,3.9,3.9,3.89,2600.0
876,2015-01-13,3.8,3.88,3.88,3.8,82000.0
875,2015-01-14,3.82,3.82,3.82,3.82,800.0
874,2015-01-15,3.87,3.9,3.9,3.87,6000.0


Unnamed: 0,date,close,open,high,low,volume
924,2015-01-02,2.78,2.75,2.78,2.74,242500.0
923,2015-01-05,2.71,2.79,2.82,2.71,790100.0
922,2015-01-06,2.77,2.71,2.78,2.7,498900.0
921,2015-01-07,2.71,2.73,2.73,2.71,203200.0
920,2015-01-08,2.71,2.73,2.73,2.7,438600.0
919,2015-01-09,2.71,2.73,2.76,2.71,1250000.0
918,2015-01-12,2.7,2.72,2.72,2.7,1160000.0
917,2015-01-13,2.74,2.72,2.74,2.72,175100.0
916,2015-01-14,2.74,2.72,2.74,2.71,377500.0
915,2015-01-15,2.72,2.71,2.73,2.71,321700.0


Unnamed: 0,date,close,open,high,low,volume
910,2015-01-02,1.483,1.483,1.483,1.483,22000.0
909,2015-01-05,1.463,1.483,1.483,1.463,145200.0
908,2015-01-06,1.443,1.443,1.443,1.433,455500.0
907,2015-01-07,1.443,1.463,1.463,1.433,218000.0
906,2015-01-08,1.414,1.443,1.443,1.414,706400.0
905,2015-01-09,1.394,1.423,1.423,1.384,372800.0
904,2015-01-12,1.374,1.394,1.394,1.354,554800.0
903,2015-01-13,1.394,1.374,1.404,1.374,209200.0
902,2015-01-14,1.433,1.404,1.443,1.394,284500.0
901,2015-01-15,1.404,1.453,1.463,1.404,226300.0


Unnamed: 0,date,close,open,high,low,volume
924,2015-01-02,1.51,1.45,1.51,1.43,837100.0
923,2015-01-05,1.47,1.5,1.5,1.47,78500.0
922,2015-01-06,1.47,1.47,1.5,1.46,328100.0
921,2015-01-07,1.46,1.47,1.47,1.46,192800.0
920,2015-01-08,1.47,1.46,1.47,1.45,163900.0
919,2015-01-09,1.47,1.47,1.47,1.46,127500.0
918,2015-01-12,1.46,1.46,1.47,1.45,301200.0
917,2015-01-13,1.46,1.45,1.48,1.45,153600.0
916,2015-01-14,1.45,1.46,1.46,1.45,147800.0
915,2015-01-15,1.44,1.46,1.46,1.44,366300.0


Unnamed: 0,date,close,open,high,low,volume
923,2015-01-02,1.0,1.0,1.0,0.99,289800.0
922,2015-01-05,0.99,0.995,0.995,0.99,242000.0
921,2015-01-06,0.995,0.99,0.995,0.98,302500.0
920,2015-01-07,0.99,0.99,0.995,0.985,263800.0
919,2015-01-08,0.995,0.99,1.0,0.99,436500.0
918,2015-01-09,0.995,0.995,1.0,0.995,984700.0
917,2015-01-12,0.995,0.995,0.995,0.99,360400.0
916,2015-01-13,0.995,1.0,1.0,0.995,372200.0
915,2015-01-14,1.0,0.995,1.0,0.995,132900.0
914,2015-01-15,1.01,0.995,1.01,0.995,470900.0


Unnamed: 0,date,close,open,high,low,volume
901,2015-01-02,0.17,0.17,0.17,0.165,1190000.0
900,2015-01-05,0.165,0.17,0.17,0.165,770000.0
899,2015-01-06,0.16,0.165,0.165,0.16,934400.0
898,2015-01-07,0.175,0.16,0.18,0.16,4020000.0
897,2015-01-08,0.18,0.175,0.18,0.175,974900.0
896,2015-01-09,0.175,0.18,0.18,0.175,330600.0
895,2015-01-12,0.175,0.175,0.175,0.17,511200.0
894,2015-01-13,0.175,0.175,0.175,0.175,357000.0
893,2015-01-14,0.175,0.175,0.175,0.175,820000.0
892,2015-01-15,0.185,0.175,0.185,0.175,1870000.0
