# Convert training data

In [48]:
## Common imports

import numpy as np
import pandas as pd
import datetime as dt
import calendar as cal
import os
import glob

# to make this notebook's output stable across runs
np.random.seed(42)

# To plot pretty figures
%matplotlib inline
import matplotlib
import matplotlib.pyplot as plt
import matplotlib.dates as mdates

plt.rcParams['axes.labelsize'] = 14
plt.rcParams['xtick.labelsize'] = 12
plt.rcParams['ytick.labelsize'] = 12

# Ignore useless warnings (see SciPy issue #5998)
import warnings
warnings.filterwarnings(action="ignore", module="scipy", message="^internal gelsd")

from IPython.display import display
pd.options.display.max_columns = 50
pd.options.display.html.table_schema = True

## Convert the historical prices to the desired format

In [55]:
FILE_FOLDER = "../dataset/prices/*.csv"

UOM = {'K': 1000, 'M': 1000000}

def convert_to_numeric(val):
    return float(val[:-1]) * UOM[val[-1]]

for f in glob.glob(FILE_FOLDER):
    df = pd.read_csv(f)
    df.rename(columns={'price':'close'}, inplace=True)
    df.drop(columns=['change_percent'], inplace=True)
    df['date'] = pd.to_datetime(df['date'])
    df['volume'] = df['volume'].apply(convert_to_numeric)
    #df.info()
    #display(f)
    display(df.head(10))


Unnamed: 0,date,close,open,high,low,volume
0,2018-10-12,2.15,2.12,2.18,2.12,943300.0
1,2018-10-11,2.12,2.15,2.15,2.1,983100.0
2,2018-10-10,2.18,2.23,2.24,2.17,1010000.0
3,2018-10-09,2.26,2.24,2.26,2.22,337300.0
4,2018-10-08,2.24,2.23,2.26,2.2,638100.0
5,2018-10-05,2.23,2.27,2.28,2.23,970800.0
6,2018-10-04,2.27,2.27,2.28,2.26,969800.0
7,2018-10-03,2.26,2.31,2.31,2.25,796200.0
8,2018-10-02,2.34,2.34,2.36,2.33,518400.0
9,2018-10-01,2.34,2.36,2.36,2.33,488600.0


Unnamed: 0,date,close,open,high,low,volume
0,2018-10-11,0.495,0.49,0.495,0.485,66000.0
1,2018-10-10,0.495,0.495,0.495,0.495,54000.0
2,2018-10-09,0.5,0.495,0.5,0.495,14000.0
3,2018-10-08,0.5,0.505,0.505,0.5,256700.0
4,2018-10-05,0.505,0.505,0.505,0.505,50000.0
5,2018-10-04,0.505,0.505,0.505,0.505,18000.0
6,2018-10-03,0.51,0.54,0.54,0.495,1880000.0
7,2018-10-02,0.54,0.55,0.55,0.54,137400.0
8,2018-10-01,0.55,0.575,0.575,0.55,79100.0
9,2018-09-28,0.575,0.575,0.575,0.575,32000.0


Unnamed: 0,date,close,open,high,low,volume
0,2018-10-12,1.03,1.03,1.03,1.03,100.0
1,2018-10-11,1.03,1.03,1.03,1.0,58500.0
2,2018-10-10,1.04,1.03,1.04,1.03,24300.0
3,2018-10-09,1.04,1.05,1.05,1.02,18500.0
4,2018-10-08,1.04,1.03,1.05,1.02,29200.0
5,2018-10-05,1.04,1.03,1.05,1.03,11300.0
6,2018-09-28,1.05,1.03,1.05,1.03,33000.0
7,2018-09-27,1.03,1.0,1.03,1.0,148600.0
8,2018-09-26,1.03,1.03,1.03,1.03,10000.0
9,2018-09-24,1.03,1.03,1.03,1.02,27000.0


Unnamed: 0,date,close,open,high,low,volume
0,2018-10-12,0.875,0.855,0.9,0.855,1020000.0
1,2018-10-11,0.855,0.86,0.88,0.82,1430000.0
2,2018-10-10,0.89,0.915,0.92,0.88,1850000.0
3,2018-10-09,0.915,0.905,0.94,0.905,2680000.0
4,2018-10-08,0.91,0.92,0.92,0.88,1910000.0
5,2018-10-05,0.925,0.95,0.95,0.92,1540000.0
6,2018-10-04,0.96,0.975,0.99,0.935,5240000.0
7,2018-10-03,0.965,0.93,0.985,0.93,13890000.0
8,2018-10-02,0.92,0.91,0.93,0.885,4580000.0
9,2018-10-01,0.91,0.94,0.94,0.91,1170000.0


Unnamed: 0,date,close,open,high,low,volume
0,2018-10-12,0.25,0.245,0.255,0.245,790600.0
1,2018-10-11,0.245,0.25,0.25,0.24,1540000.0
2,2018-10-10,0.255,0.26,0.265,0.255,1540000.0
3,2018-10-09,0.265,0.265,0.265,0.26,191000.0
4,2018-10-08,0.265,0.265,0.27,0.265,795300.0
5,2018-10-05,0.265,0.265,0.265,0.26,1160000.0
6,2018-10-04,0.265,0.27,0.27,0.265,372400.0
7,2018-10-03,0.27,0.275,0.275,0.27,406100.0
8,2018-10-02,0.275,0.27,0.28,0.27,657200.0
9,2018-10-01,0.27,0.275,0.275,0.27,359800.0


Unnamed: 0,date,close,open,high,low,volume
0,2018-10-12,1.38,1.39,1.39,1.36,518200.0
1,2018-10-11,1.39,1.36,1.39,1.29,1440000.0
2,2018-10-10,1.39,1.41,1.42,1.37,654800.0
3,2018-10-09,1.41,1.41,1.43,1.39,688300.0
4,2018-10-08,1.4,1.43,1.46,1.39,639800.0
5,2018-10-05,1.43,1.45,1.46,1.43,614000.0
6,2018-10-04,1.47,1.47,1.48,1.44,576600.0
7,2018-10-03,1.47,1.43,1.47,1.42,982200.0
8,2018-10-02,1.43,1.43,1.44,1.4,1220000.0
9,2018-10-01,1.41,1.42,1.43,1.4,1750000.0


Unnamed: 0,date,close,open,high,low,volume
0,2018-10-12,0.745,0.76,0.77,0.74,468100.0
1,2018-10-11,0.76,0.75,0.78,0.75,739800.0
2,2018-10-10,0.77,0.79,0.79,0.76,538400.0
3,2018-10-09,0.79,0.78,0.8,0.78,196600.0
4,2018-10-08,0.785,0.78,0.795,0.77,382000.0
5,2018-10-05,0.78,0.79,0.79,0.775,314500.0
6,2018-10-04,0.79,0.8,0.8,0.79,579800.0
7,2018-10-03,0.795,0.805,0.81,0.795,382100.0
8,2018-10-02,0.805,0.83,0.83,0.8,312400.0
9,2018-10-01,0.82,0.805,0.83,0.8,671000.0


Unnamed: 0,date,close,open,high,low,volume
0,2018-10-12,1.68,1.68,1.69,1.68,327700.0
1,2018-10-11,1.69,1.69,1.7,1.67,828100.0
2,2018-10-10,1.7,1.69,1.7,1.68,371700.0
3,2018-10-09,1.69,1.68,1.69,1.68,159700.0
4,2018-10-08,1.68,1.68,1.69,1.67,151200.0
5,2018-10-05,1.67,1.69,1.7,1.67,776800.0
6,2018-10-04,1.7,1.7,1.7,1.69,250100.0
7,2018-10-03,1.7,1.7,1.7,1.68,400400.0
8,2018-10-02,1.69,1.69,1.7,1.68,1040000.0
9,2018-10-01,1.7,1.7,1.7,1.69,353100.0


Unnamed: 0,date,close,open,high,low,volume
0,2018-10-12,4.14,3.97,4.19,3.97,101000.0
1,2018-10-11,4.03,4.01,4.05,3.93,117600.0
2,2018-10-10,4.18,4.18,4.28,4.11,137200.0
3,2018-10-09,4.18,4.03,4.18,4.03,158100.0
4,2018-10-08,4.04,4.07,4.07,4.03,58500.0
5,2018-10-05,4.07,4.11,4.11,4.05,113200.0
6,2018-10-04,4.15,4.18,4.2,4.13,47400.0
7,2018-10-03,4.16,4.16,4.16,4.12,33400.0
8,2018-10-02,4.16,4.15,4.22,4.14,137500.0
9,2018-10-01,4.12,4.02,4.14,4.01,166400.0


Unnamed: 0,date,close,open,high,low,volume
0,2018-10-12,2.01,2.0,2.03,2.0,177900.0
1,2018-10-11,2.01,2.01,2.02,1.99,567400.0
2,2018-10-10,2.04,2.04,2.04,2.0,411500.0
3,2018-10-09,2.04,2.05,2.05,2.04,89000.0
4,2018-10-08,2.04,2.04,2.04,2.03,110800.0
5,2018-10-05,2.03,2.05,2.07,2.03,680300.0
6,2018-10-04,2.06,2.11,2.11,2.05,179900.0
7,2018-10-03,2.11,2.11,2.12,2.08,1090000.0
8,2018-10-02,2.11,2.09,2.11,2.07,593900.0
9,2018-10-01,2.1,2.08,2.1,2.04,681200.0


Unnamed: 0,date,close,open,high,low,volume
0,2018-10-12,1.14,1.13,1.14,1.12,52400.0
1,2018-10-11,1.12,1.12,1.13,1.11,141500.0
2,2018-10-10,1.12,1.16,1.16,1.12,326600.0
3,2018-10-09,1.16,1.17,1.17,1.16,65300.0
4,2018-10-08,1.16,1.16,1.17,1.16,202100.0
5,2018-10-05,1.16,1.17,1.17,1.16,139800.0
6,2018-10-04,1.16,1.16,1.17,1.16,128200.0
7,2018-10-03,1.16,1.16,1.17,1.15,55600.0
8,2018-10-02,1.15,1.14,1.15,1.14,44900.0
9,2018-10-01,1.14,1.14,1.15,1.14,60900.0


Unnamed: 0,date,close,open,high,low,volume
0,2018-10-12,0.54,0.565,0.565,0.54,5000.0
1,2018-10-11,0.545,0.545,0.545,0.545,30600.0
2,2018-10-10,0.55,0.55,0.55,0.55,12600.0
3,2018-10-04,0.57,0.57,0.57,0.57,30000.0
4,2018-09-25,0.58,0.58,0.58,0.58,4000.0
5,2018-09-21,0.57,0.57,0.57,0.57,10000.0
6,2018-09-18,0.63,0.63,0.63,0.63,5000.0
7,2018-09-14,0.56,0.565,0.565,0.56,11000.0
8,2018-08-28,0.58,0.58,0.58,0.57,19000.0
9,2018-08-21,0.58,0.58,0.58,0.58,10000.0


Unnamed: 0,date,close,open,high,low,volume
0,2018-09-21,0.89,0.89,0.905,0.885,64700.0
1,2018-09-20,0.885,0.87,0.895,0.865,847700.0
2,2018-09-19,0.885,0.89,0.895,0.865,628200.0
3,2018-09-18,0.845,0.85,0.86,0.845,382000.0
4,2018-09-14,0.85,0.845,0.855,0.845,223800.0
5,2018-09-13,0.85,0.845,0.855,0.845,260800.0
6,2018-09-12,0.85,0.875,0.9,0.84,198900.0
7,2018-09-07,0.88,0.88,0.89,0.875,133700.0
8,2018-09-06,0.89,0.895,0.9,0.885,98400.0
9,2018-09-05,0.895,0.925,0.93,0.895,196900.0


Unnamed: 0,date,close,open,high,low,volume
0,2018-10-12,0.43,0.425,0.435,0.41,771300.0
1,2018-10-11,0.425,0.435,0.435,0.41,884100.0
2,2018-10-10,0.45,0.47,0.475,0.44,1220000.0
3,2018-10-09,0.46,0.45,0.465,0.45,697600.0
4,2018-10-08,0.455,0.455,0.46,0.45,355000.0
5,2018-10-05,0.465,0.46,0.465,0.455,680200.0
6,2018-10-04,0.465,0.47,0.475,0.46,703800.0
7,2018-10-03,0.475,0.48,0.485,0.47,1290000.0
8,2018-10-02,0.48,0.5,0.5,0.48,664700.0
9,2018-10-01,0.49,0.495,0.515,0.49,2260000.0


Unnamed: 0,date,close,open,high,low,volume
0,2018-10-12,4.72,4.68,4.8,4.68,5600.0
1,2018-10-11,4.65,4.6,4.65,4.53,6800.0
2,2018-10-10,4.7,4.8,4.86,4.65,44000.0
3,2018-10-09,4.71,4.7,4.71,4.7,2600.0
4,2018-10-08,4.75,4.83,4.83,4.75,13900.0
5,2018-10-05,4.85,4.84,4.85,4.83,14000.0
6,2018-10-04,4.88,4.86,4.88,4.86,4000.0
7,2018-10-03,4.91,5.0,5.0,4.91,11400.0
8,2018-10-02,4.92,4.98,5.0,4.9,13200.0
9,2018-09-28,4.84,4.72,4.84,4.72,4900.0


Unnamed: 0,date,close,open,high,low,volume
0,2018-10-12,1.82,1.79,1.84,1.78,513300.0
1,2018-10-11,1.79,1.8,1.81,1.78,863200.0
2,2018-10-10,1.83,1.86,1.86,1.83,710800.0
3,2018-10-09,1.86,1.87,1.89,1.86,503800.0
4,2018-10-08,1.86,1.89,1.89,1.86,502000.0
5,2018-10-05,1.88,1.89,1.89,1.88,234600.0
6,2018-10-04,1.9,1.9,1.91,1.89,258400.0
7,2018-10-03,1.89,1.92,1.92,1.87,611100.0
8,2018-10-02,1.9,1.92,1.92,1.9,215000.0
9,2018-10-01,1.91,1.93,1.94,1.91,417600.0


Unnamed: 0,date,close,open,high,low,volume
0,2018-10-12,1.53,1.52,1.54,1.52,94000.0
1,2018-10-11,1.52,1.52,1.52,1.49,1220000.0
2,2018-10-10,1.53,1.57,1.59,1.52,374600.0
3,2018-10-09,1.56,1.58,1.58,1.55,68700.0
4,2018-10-08,1.58,1.58,1.58,1.58,18000.0
5,2018-10-05,1.59,1.57,1.59,1.56,103500.0
6,2018-10-04,1.57,1.59,1.59,1.56,392200.0
7,2018-10-03,1.59,1.62,1.62,1.57,153400.0
8,2018-10-02,1.61,1.56,1.62,1.56,303700.0
9,2018-10-01,1.56,1.56,1.6,1.56,107400.0


Unnamed: 0,date,close,open,high,low,volume
0,2018-10-12,1.09,1.09,1.1,1.05,364400.0
1,2018-10-11,1.09,1.1,1.11,1.05,456300.0
2,2018-10-10,1.13,1.13,1.14,1.09,715500.0
3,2018-10-09,1.13,1.13,1.14,1.13,324100.0
4,2018-10-08,1.13,1.14,1.14,1.13,34000.0
5,2018-10-05,1.13,1.13,1.14,1.13,690500.0
6,2018-10-04,1.13,1.14,1.14,1.13,543200.0
7,2018-10-03,1.13,1.14,1.14,1.13,176300.0
8,2018-10-02,1.14,1.13,1.14,1.13,143900.0
9,2018-10-01,1.13,1.13,1.14,1.13,524300.0


Unnamed: 0,date,close,open,high,low,volume
0,2018-10-12,1.19,1.18,1.19,1.18,203300.0
1,2018-10-11,1.18,1.18,1.19,1.15,1510000.0
2,2018-10-10,1.2,1.22,1.22,1.18,1680000.0
3,2018-10-09,1.21,1.23,1.24,1.21,424900.0
4,2018-10-08,1.23,1.24,1.24,1.22,644700.0
5,2018-10-05,1.24,1.22,1.25,1.22,701700.0
6,2018-10-04,1.23,1.22,1.23,1.22,131500.0
7,2018-10-03,1.22,1.23,1.23,1.21,1150000.0
8,2018-10-02,1.23,1.24,1.24,1.22,226400.0
9,2018-10-01,1.24,1.23,1.24,1.23,69200.0


Unnamed: 0,date,close,open,high,low,volume
0,2018-09-07,0.045,0.05,0.055,0.045,223120000.0
1,2018-09-06,0.05,0.045,0.05,0.045,67010000.0
2,2018-09-05,0.05,0.05,0.05,0.045,14380000.0
3,2018-09-04,0.05,0.055,0.055,0.045,39950000.0
4,2018-09-03,0.05,0.05,0.055,0.045,104680000.0
5,2018-08-30,0.06,0.065,0.065,0.06,20900000.0
6,2018-08-29,0.065,0.065,0.065,0.06,26510000.0
7,2018-08-28,0.065,0.065,0.07,0.06,17040000.0
8,2018-08-27,0.065,0.065,0.07,0.06,64110000.0
9,2018-08-24,0.065,0.065,0.07,0.065,4420000.0
