In [49]:
# Imports
import pandas as pd
from sklearn import preprocessing

In [35]:
# Read in the data. Starting with just bitcoin for now. We can loop to do the rest
bitcoin_df = pd.read_csv('Data/coin_bitcoin.csv')
bitcoin_df.head()

Unnamed: 0,SNo,Name,Symbol,Date,High,Low,Open,Close,Volume,Marketcap
0,1,Bitcoin,BTC,2013-04-29 23:59:59,147.488007,134.0,134.444,144.539993,0.0,1603769000.0
1,2,Bitcoin,BTC,2013-04-30 23:59:59,146.929993,134.050003,144.0,139.0,0.0,1542813000.0
2,3,Bitcoin,BTC,2013-05-01 23:59:59,139.889999,107.720001,139.0,116.989998,0.0,1298955000.0
3,4,Bitcoin,BTC,2013-05-02 23:59:59,125.599998,92.281898,116.379997,105.209999,0.0,1168517000.0
4,5,Bitcoin,BTC,2013-05-03 23:59:59,108.127998,79.099998,106.25,97.75,0.0,1085995000.0


In [46]:
# A bit of pre-processing.

# We don't really need the Name of the coin if we have the symbol. Same with SNo
adjusted_df = bitcoin_df.drop('Name', axis='columns').drop('SNo', axis='columns')

# Then we truncate the date to remove the time
adjusted_df['Date'] = pd.to_datetime(adjusted_df['Date'])
adjusted_df['Date'] = adjusted_df['Date'].dt.date

# Rename some columns
new_names = {
    'Symbol': 'symbol',
    'Date': 'date',
    'High': 'high',
    'Low': 'low',
    'Open': 'open',
    'Close': 'close',
    'Marketcap': 'market_cap',
    'Volume': 'volume'
}

adjusted_df.rename(columns=new_names, inplace=True)

# Then we add percent change between open and close
# adjusted_df['percent'] = adjusted_df['close']
adjusted_df['percent_change_open_close'] = (adjusted_df['close'] - adjusted_df['open']) / adjusted_df['open']

In [47]:
adjusted_df.head()

Unnamed: 0,symbol,date,high,low,open,close,volume,market_cap,percent_change_open_close
0,BTC,2013-04-29,147.488007,134.0,134.444,144.539993,0.0,1603769000.0,0.075094
1,BTC,2013-04-30,146.929993,134.050003,144.0,139.0,0.0,1542813000.0,-0.034722
2,BTC,2013-05-01,139.889999,107.720001,139.0,116.989998,0.0,1298955000.0,-0.158345
3,BTC,2013-05-02,125.599998,92.281898,116.379997,105.209999,0.0,1168517000.0,-0.095979
4,BTC,2013-05-03,108.127998,79.099998,106.25,97.75,0.0,1085995000.0,-0.08


In [69]:
# Normalize the price columns, the market_cap columns, and volume columns to compare general trends with market index
# without the influence of differing volumes and other things

min_max_scaler = preprocessing.MinMaxScaler()
normalized_df = adjusted_df.copy()
normalized_df[['high', 'low', 'open', 'close', 'volume', 'market_cap']] =\
    min_max_scaler.fit_transform(normalized_df[['high', 'low', 'open', 'close', 'volume', 'market_cap']])

In [70]:
# Volume/Price columns have been normalized from 0-1
normalized_df.head()

Unnamed: 0,symbol,date,high,low,open,close,volume,market_cap,percent_change_open_close
0,BTC,2013-04-29,0.001252,0.001231,0.001147,0.001324,0.0,0.00077,0.075094
1,BTC,2013-04-30,0.001242,0.001232,0.001314,0.001228,0.0,0.000713,-0.034722
2,BTC,2013-05-01,0.001121,0.000759,0.001227,0.000845,0.0,0.000486,-0.158345
3,BTC,2013-05-02,0.000876,0.000481,0.000833,0.00064,0.0,0.000364,-0.095979
4,BTC,2013-05-03,0.000576,0.000244,0.000657,0.00051,0.0,0.000287,-0.08
