# Convert training data

In [60]:
## Common imports

import numpy as np
import pandas as pd
import datetime as dt
import calendar as cal
import os
import glob

# to make this notebook's output stable across runs
np.random.seed(42)

# To plot pretty figures
%matplotlib inline
import matplotlib
import matplotlib.pyplot as plt
import matplotlib.dates as mdates

plt.rcParams['axes.labelsize'] = 14
plt.rcParams['xtick.labelsize'] = 12
plt.rcParams['ytick.labelsize'] = 12

# Ignore useless warnings (see SciPy issue #5998)
import warnings
warnings.filterwarnings(action="ignore", module="scipy", message="^internal gelsd")

from IPython.display import display
pd.options.display.max_columns = 50
pd.options.display.html.table_schema = True

## Convert the historical prices to the desired format

In [61]:
FILE_FOLDER = "../dataset/prices/*.csv"

UOM = {'K': 1000, 'M': 1000000}

def convert_to_numeric(val):
    return float(val[:-1]) * UOM[val[-1]]

for f in glob.glob(FILE_FOLDER):
    df = pd.read_csv(f)
    df.rename(columns={'price':'close'}, inplace=True)
    df.drop(columns=['change_percent'], inplace=True)
    df['date'] = pd.to_datetime(df['date'])
    df['volume'] = df['volume'].apply(convert_to_numeric)
    df.to_csv()
