In [1]:
# Dependencies and Setup
import pandas as pd
import csv
import os

# Files to Load
crypto_path = os.path.join(os.getcwd(), "Resources", "crypto_data", "consolidated_coin_data.csv")
stock_path = os.path.join(os.getcwd(), "Resources", "stock_data", "indexProcessed.csv")

In [2]:
# Read Files and Store Into Pandas Data Frame
crypto_df = pd.read_csv(crypto_path)
stock_df = pd.read_csv(stock_path)

In [3]:
crypto_df.head()

Unnamed: 0,Currency,Date,Open,High,Low,Close,Volume,Market Cap
0,tezos,"Dec 04, 2019",1.29,1.32,1.25,1.25,46048752,824588509
1,tezos,"Dec 03, 2019",1.24,1.32,1.21,1.29,41462224,853213342
2,tezos,"Dec 02, 2019",1.25,1.26,1.2,1.24,27574097,817872179
3,tezos,"Dec 01, 2019",1.33,1.34,1.25,1.25,24127567,828296390
4,tezos,"Nov 30, 2019",1.31,1.37,1.31,1.33,28706667,879181680


In [4]:
stock_df.head()

Unnamed: 0,Index,Date,Open,High,Low,Close,Adj Close,Volume,CloseUSD
0,HSI,1986-12-31,2568.300049,2568.300049,2568.300049,2568.300049,2568.300049,0.0,333.879006
1,HSI,1987-01-02,2540.100098,2540.100098,2540.100098,2540.100098,2540.100098,0.0,330.213013
2,HSI,1987-01-05,2552.399902,2552.399902,2552.399902,2552.399902,2552.399902,0.0,331.811987
3,HSI,1987-01-06,2583.899902,2583.899902,2583.899902,2583.899902,2583.899902,0.0,335.906987
4,HSI,1987-01-07,2607.100098,2607.100098,2607.100098,2607.100098,2607.100098,0.0,338.923013


In [5]:
stock_df["Index"].unique()

array(['HSI', 'NYA', 'IXIC', '000001.SS', 'N225', 'N100', '399001.SZ',
       'GSPTSE', 'NSEI', 'GDAXI', 'SSMI', 'TWII', 'J203.JO'], dtype=object)

In [6]:
crypto_df["Currency"].unique()

array(['tezos', 'binance-coin', 'eos', 'bitcoin', 'tether', 'xrp',
       'bitcoin-cash', 'stellar', 'litecoin', 'ethereum', 'cardano',
       'bitcoin-sv'], dtype=object)

In [7]:
## Need to get dates in the same format on both dataframes
## Will use built-in datetime functions

import datetime
for index, row in crypto_df.iterrows():
    row["Date"] = row["Date"].replace(',', '')
    new_date = datetime.datetime.strptime(row["Date"], '%b %d %Y')
    row["Date"] = new_date
    
    
## Also need to convert date from string to datetime.date object for comparison purposes    
stock_df["Date"] = pd.to_datetime(stock_df["Date"])

In [8]:
## Make sure it looks the way we want it to

crypto_df.head()

Unnamed: 0,Currency,Date,Open,High,Low,Close,Volume,Market Cap
0,tezos,2019-12-04 00:00:00,1.29,1.32,1.25,1.25,46048752,824588509
1,tezos,2019-12-03 00:00:00,1.24,1.32,1.21,1.29,41462224,853213342
2,tezos,2019-12-02 00:00:00,1.25,1.26,1.2,1.24,27574097,817872179
3,tezos,2019-12-01 00:00:00,1.33,1.34,1.25,1.25,24127567,828296390
4,tezos,2019-11-30 00:00:00,1.31,1.37,1.31,1.33,28706667,879181680


In [9]:
stock_df.head()

Unnamed: 0,Index,Date,Open,High,Low,Close,Adj Close,Volume,CloseUSD
0,HSI,1986-12-31,2568.300049,2568.300049,2568.300049,2568.300049,2568.300049,0.0,333.879006
1,HSI,1987-01-02,2540.100098,2540.100098,2540.100098,2540.100098,2540.100098,0.0,330.213013
2,HSI,1987-01-05,2552.399902,2552.399902,2552.399902,2552.399902,2552.399902,0.0,331.811987
3,HSI,1987-01-06,2583.899902,2583.899902,2583.899902,2583.899902,2583.899902,0.0,335.906987
4,HSI,1987-01-07,2607.100098,2607.100098,2607.100098,2607.100098,2607.100098,0.0,338.923013


In [10]:
## Now let's find the earliest date in our crypto df as it has the shortest timeframe

earliest_date = min(crypto_df["Date"])
print(earliest_date)

2013-04-28 00:00:00


In [13]:
## Now create a filter to get the stock data >= that earliest date from crypto_df

updated_stock_df = stock_df[(stock_df["Date"] >= earliest_date)]
updated_stock_df.head()

Unnamed: 0,Index,Date,Open,High,Low,Close,Adj Close,Volume,CloseUSD
6500,HSI,2013-04-29,22567.63086,22647.58984,22488.65039,22580.76953,22580.76953,1351366000.0,2935.500039
6501,HSI,2013-04-30,22769.58984,22862.68945,22669.53906,22737.00977,22737.00977,1599209000.0,2955.81127
6502,HSI,2013-05-02,22692.33008,22706.07031,22552.31055,22668.30078,22668.30078,1434954000.0,2946.879101
6503,HSI,2013-05-03,22811.83984,22886.16992,22678.66992,22689.96094,22689.96094,1280292000.0,2949.694922
6504,HSI,2013-05-06,22967.7793,22980.56055,22864.91016,22915.08984,22915.08984,1327646000.0,2978.961679


In [15]:
## Now lets convert "currency" header to index for concatonation purposes
crypto_df = crypto_df.rename(columns={"Currency" : "Index" })
crypto_df.head()

Unnamed: 0,Index,Date,Open,High,Low,Close,Volume,Market Cap
0,tezos,2019-12-04 00:00:00,1.29,1.32,1.25,1.25,46048752,824588509
1,tezos,2019-12-03 00:00:00,1.24,1.32,1.21,1.29,41462224,853213342
2,tezos,2019-12-02 00:00:00,1.25,1.26,1.2,1.24,27574097,817872179
3,tezos,2019-12-01 00:00:00,1.33,1.34,1.25,1.25,24127567,828296390
4,tezos,2019-11-30 00:00:00,1.31,1.37,1.31,1.33,28706667,879181680


In [16]:
## Now we need to add a qualitative identifier stock index is different than currency index
crypto_df['ID'] = 'c'
updated_stock_df['ID'] = 's'

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  updated_stock_df['ID'] = 's'


In [18]:
crypto_df.head()

Unnamed: 0,Index,Date,Open,High,Low,Close,Volume,Market Cap,ID
0,tezos,2019-12-04 00:00:00,1.29,1.32,1.25,1.25,46048752,824588509,c
1,tezos,2019-12-03 00:00:00,1.24,1.32,1.21,1.29,41462224,853213342,c
2,tezos,2019-12-02 00:00:00,1.25,1.26,1.2,1.24,27574097,817872179,c
3,tezos,2019-12-01 00:00:00,1.33,1.34,1.25,1.25,24127567,828296390,c
4,tezos,2019-11-30 00:00:00,1.31,1.37,1.31,1.33,28706667,879181680,c


In [19]:
updated_stock_df.head()

Unnamed: 0,Index,Date,Open,High,Low,Close,Adj Close,Volume,CloseUSD,ID
6500,HSI,2013-04-29,22567.63086,22647.58984,22488.65039,22580.76953,22580.76953,1351366000.0,2935.500039,s
6501,HSI,2013-04-30,22769.58984,22862.68945,22669.53906,22737.00977,22737.00977,1599209000.0,2955.81127,s
6502,HSI,2013-05-02,22692.33008,22706.07031,22552.31055,22668.30078,22668.30078,1434954000.0,2946.879101,s
6503,HSI,2013-05-03,22811.83984,22886.16992,22678.66992,22689.96094,22689.96094,1280292000.0,2949.694922,s
6504,HSI,2013-05-06,22967.7793,22980.56055,22864.91016,22915.08984,22915.08984,1327646000.0,2978.961679,s


In [21]:
## Now we can join the two Df's (not merge, but concatonate
## We're doing this method as its like comparing apples to oranges where our Index are those fruits.
## Can't merge on any column data as the results wouldn't make nuch sense)
final_df = updated_stock_df.append(crypto_df, sort=False).reset_index().drop(columns = {"inde"})
final_df.head()

Unnamed: 0,index,Index,Date,Open,High,Low,Close,Adj Close,Volume,CloseUSD,ID,Market Cap
0,6500,HSI,2013-04-29 00:00:00,22567.6,22647.6,22488.7,22580.8,22580.76953,1351370000.0,2935.500039,s,
1,6501,HSI,2013-04-30 00:00:00,22769.6,22862.7,22669.5,22737.0,22737.00977,1599210000.0,2955.81127,s,
2,6502,HSI,2013-05-02 00:00:00,22692.3,22706.1,22552.3,22668.3,22668.30078,1434950000.0,2946.879101,s,
3,6503,HSI,2013-05-03 00:00:00,22811.8,22886.2,22678.7,22690.0,22689.96094,1280290000.0,2949.694922,s,
4,6504,HSI,2013-05-06 00:00:00,22967.8,22980.6,22864.9,22915.1,22915.08984,1327650000.0,2978.961679,s,
