In [1]:
import os
import pandas as pd
from sqlalchemy import create_engine 

In [2]:
# Load current directory
crypto_path = os.path.join(os.getcwd(), 'Resources', 'crypto_data', 'cryptoData.csv')

In [3]:
# Read csv
crypto_df = pd.read_csv(crypto_path)

In [4]:
# Display df (there are zero values for the crypro data too)
crypto_df

Unnamed: 0,Currency,Date,Open,High,Low,Close,Volume,Market Cap
0,tezos,"Dec 04, 2019",1.29,1.32,1.25,1.25,46048752,824588509
1,tezos,"Dec 03, 2019",1.24,1.32,1.21,1.29,41462224,853213342
2,tezos,"Dec 02, 2019",1.25,1.26,1.20,1.24,27574097,817872179
3,tezos,"Dec 01, 2019",1.33,1.34,1.25,1.25,24127567,828296390
4,tezos,"Nov 30, 2019",1.31,1.37,1.31,1.33,28706667,879181680
...,...,...,...,...,...,...,...,...
28939,bitcoin-sv,"May 02, 2013",3.78,4.04,3.01,3.37,0,58287979
28940,bitcoin-sv,"May 01, 2013",4.29,4.36,3.52,3.80,0,65604596
28941,bitcoin-sv,"Apr 30, 2013",4.40,4.57,4.17,4.30,0,74020918
28942,bitcoin-sv,"Apr 29, 2013",4.37,4.57,4.23,4.38,0,75388964


In [5]:
# Harmonizing dates
import datetime 
for index, row in crypto_df.iterrows():
    row["Date"] = row["Date"].replace(',', '')
    new_date = datetime.datetime.strptime(row["Date"], '%b %d %Y')
    row["Date"] = new_date

In [6]:
crypto_df

Unnamed: 0,Currency,Date,Open,High,Low,Close,Volume,Market Cap
0,tezos,2019-12-04 00:00:00,1.29,1.32,1.25,1.25,46048752,824588509
1,tezos,2019-12-03 00:00:00,1.24,1.32,1.21,1.29,41462224,853213342
2,tezos,2019-12-02 00:00:00,1.25,1.26,1.20,1.24,27574097,817872179
3,tezos,2019-12-01 00:00:00,1.33,1.34,1.25,1.25,24127567,828296390
4,tezos,2019-11-30 00:00:00,1.31,1.37,1.31,1.33,28706667,879181680
...,...,...,...,...,...,...,...,...
28939,bitcoin-sv,2013-05-02 00:00:00,3.78,4.04,3.01,3.37,0,58287979
28940,bitcoin-sv,2013-05-01 00:00:00,4.29,4.36,3.52,3.80,0,65604596
28941,bitcoin-sv,2013-04-30 00:00:00,4.40,4.57,4.17,4.30,0,74020918
28942,bitcoin-sv,2013-04-29 00:00:00,4.37,4.57,4.23,4.38,0,75388964


In [7]:
crypto_df['Date'] = pd.to_datetime(crypto_df['Date']).dt.date
crypto_df

Unnamed: 0,Currency,Date,Open,High,Low,Close,Volume,Market Cap
0,tezos,2019-12-04,1.29,1.32,1.25,1.25,46048752,824588509
1,tezos,2019-12-03,1.24,1.32,1.21,1.29,41462224,853213342
2,tezos,2019-12-02,1.25,1.26,1.20,1.24,27574097,817872179
3,tezos,2019-12-01,1.33,1.34,1.25,1.25,24127567,828296390
4,tezos,2019-11-30,1.31,1.37,1.31,1.33,28706667,879181680
...,...,...,...,...,...,...,...,...
28939,bitcoin-sv,2013-05-02,3.78,4.04,3.01,3.37,0,58287979
28940,bitcoin-sv,2013-05-01,4.29,4.36,3.52,3.80,0,65604596
28941,bitcoin-sv,2013-04-30,4.40,4.57,4.17,4.30,0,74020918
28942,bitcoin-sv,2013-04-29,4.37,4.57,4.23,4.38,0,75388964


In [12]:
crypto_df.describe()

Unnamed: 0,Currency,Date,Open,High,Low,Close,Volume,Market Cap
count,28944,28944,28944.0,28944.0,28944.0,28944.0,28944,28944
unique,12,2412,12307.0,12057.0,12803.0,12294.0,16349,16058
top,cardano,2015-02-26,1.0,1.0,1.0,1.0,0,451600
freq,2412,14,1725.0,1511.0,1367.0,1729.0,2916,394


In [14]:
## Now lets convert "currency" header to index for concatonation purposes
crypto_df = crypto_df.rename(columns={"Currency" : "Index" })
crypto_df

Unnamed: 0,Index,Date,Open,High,Low,Close,Volume,Market Cap
0,tezos,2019-12-04,1.29,1.32,1.25,1.25,46048752,824588509
1,tezos,2019-12-03,1.24,1.32,1.21,1.29,41462224,853213342
2,tezos,2019-12-02,1.25,1.26,1.20,1.24,27574097,817872179
3,tezos,2019-12-01,1.33,1.34,1.25,1.25,24127567,828296390
4,tezos,2019-11-30,1.31,1.37,1.31,1.33,28706667,879181680
...,...,...,...,...,...,...,...,...
28939,bitcoin-sv,2013-05-02,3.78,4.04,3.01,3.37,0,58287979
28940,bitcoin-sv,2013-05-01,4.29,4.36,3.52,3.80,0,65604596
28941,bitcoin-sv,2013-04-30,4.40,4.57,4.17,4.30,0,74020918
28942,bitcoin-sv,2013-04-29,4.37,4.57,4.23,4.38,0,75388964


In [33]:
## Now let's find the earliest date in our crypto df as it has the shortest timeframe
first_date = min(crypto_df["Date"])
print(first_date)
last_date = max(crypto_df["Date"])
print(last_date)

2013-04-28
2019-12-04


In [43]:
crypto_df.value_counts()

Index         Date        Open      High      Low       Close     Volume         Market Cap   
xrp           2019-12-04  0.219824  0.221770  0.212603  0.216348  1,427,312,577  9,364,745,688    1
cardano       2013-05-15  2.82      3.04      2.64      2.94      0              51,891,652       1
bitcoin-sv    2019-11-26  104.53    109.66    104.20    107.79    528,497,445    1,947,609,127    1
              2019-11-27  107.39    109.68    100.94    107.86    503,979,901    1,948,925,218    1
              2019-11-28  107.97    110.47    105.58    106.34    395,048,212    1,921,403,990    1
                                                                                                 ..
stellar       2013-05-02  3.78      4.04      3.01      3.37      0              58,287,979       1
              2013-05-03  3.39      3.45      2.40      3.04      0              52,694,847       1
              2013-05-04  3.03      3.64      2.90      3.48      0              60,290,868       1
     

In [16]:
# Load current directory
stock_path = os.path.join(os.getcwd(), 'Resources', 'stock_data', 'stockData.csv')

In [17]:
# Read csv
stock_df = pd.read_csv(stock_path)

In [18]:
# Display df
stock_df.head(10)

Unnamed: 0,Index,Date,Open,High,Low,Close,Adj Close,Volume,CloseUSD
0,HSI,1986-12-31,2568.300049,2568.300049,2568.300049,2568.300049,2568.300049,0.0,333.879006
1,HSI,1987-01-02,2540.100098,2540.100098,2540.100098,2540.100098,2540.100098,0.0,330.213013
2,HSI,1987-01-05,2552.399902,2552.399902,2552.399902,2552.399902,2552.399902,0.0,331.811987
3,HSI,1987-01-06,2583.899902,2583.899902,2583.899902,2583.899902,2583.899902,0.0,335.906987
4,HSI,1987-01-07,2607.100098,2607.100098,2607.100098,2607.100098,2607.100098,0.0,338.923013
5,HSI,1987-01-08,2603.300049,2603.300049,2603.300049,2603.300049,2603.300049,0.0,338.429006
6,HSI,1987-01-09,2561.699951,2561.699951,2561.699951,2561.699951,2561.699951,0.0,333.020994
7,HSI,1987-01-12,2614.899902,2614.899902,2614.899902,2614.899902,2614.899902,0.0,339.936987
8,HSI,1987-01-13,2590.800049,2590.800049,2590.800049,2590.800049,2590.800049,0.0,336.804006
9,HSI,1987-01-14,2578.199951,2578.199951,2578.199951,2578.199951,2578.199951,0.0,335.165994


In [20]:
# Harmonizing dates
stock_df["Date"] = pd.to_datetime(stock_df["Date"])

In [22]:
stock_df['Date'] = pd.to_datetime(stock_df['Date']).dt.date

In [23]:
stock_df

Unnamed: 0,Index,Date,Open,High,Low,Close,Adj Close,Volume,CloseUSD
0,HSI,1986-12-31,2568.300049,2568.300049,2568.300049,2568.300049,2568.300049,0.0,333.879006
1,HSI,1987-01-02,2540.100098,2540.100098,2540.100098,2540.100098,2540.100098,0.0,330.213013
2,HSI,1987-01-05,2552.399902,2552.399902,2552.399902,2552.399902,2552.399902,0.0,331.811987
3,HSI,1987-01-06,2583.899902,2583.899902,2583.899902,2583.899902,2583.899902,0.0,335.906987
4,HSI,1987-01-07,2607.100098,2607.100098,2607.100098,2607.100098,2607.100098,0.0,338.923013
...,...,...,...,...,...,...,...,...,...
104219,J203.JO,2021-05-25,66054.921880,66812.453130,66022.976560,66076.679690,66076.679690,0.0,4625.367578
104220,J203.JO,2021-05-26,66076.679690,66446.367190,66030.351560,66108.226560,66108.226560,0.0,4627.575859
104221,J203.JO,2021-05-27,66108.226560,66940.250000,66102.546880,66940.250000,66940.250000,0.0,4685.817500
104222,J203.JO,2021-05-28,66940.250000,67726.562500,66794.609380,67554.859380,67554.859380,0.0,4728.840157


In [37]:
# dropping date prior to 2013
stock_df = stock_df.loc[stock_df['Date'] >= first_date]
stock_df

Unnamed: 0,Index,Date,Open,High,Low,Close,Adj Close,Volume,CloseUSD
6500,HSI,2013-04-29,22567.63086,22647.58984,22488.65039,22580.76953,22580.76953,1.351366e+09,2935.500039
6501,HSI,2013-04-30,22769.58984,22862.68945,22669.53906,22737.00977,22737.00977,1.599209e+09,2955.811270
6502,HSI,2013-05-02,22692.33008,22706.07031,22552.31055,22668.30078,22668.30078,1.434954e+09,2946.879101
6503,HSI,2013-05-03,22811.83984,22886.16992,22678.66992,22689.96094,22689.96094,1.280292e+09,2949.694922
6504,HSI,2013-05-06,22967.77930,22980.56055,22864.91016,22915.08984,22915.08984,1.327646e+09,2978.961679
...,...,...,...,...,...,...,...,...,...
104219,J203.JO,2021-05-25,66054.92188,66812.45313,66022.97656,66076.67969,66076.67969,0.000000e+00,4625.367578
104220,J203.JO,2021-05-26,66076.67969,66446.36719,66030.35156,66108.22656,66108.22656,0.000000e+00,4627.575859
104221,J203.JO,2021-05-27,66108.22656,66940.25000,66102.54688,66940.25000,66940.25000,0.000000e+00,4685.817500
104222,J203.JO,2021-05-28,66940.25000,67726.56250,66794.60938,67554.85938,67554.85938,0.000000e+00,4728.840157


In [39]:
stock_df['Date'].unique()

array([datetime.date(2013, 4, 29), datetime.date(2013, 4, 30),
       datetime.date(2013, 5, 2), ..., datetime.date(2014, 12, 25),
       datetime.date(2018, 2, 19), datetime.date(2017, 1, 2)],
      dtype=object)

In [38]:
stock_df = stock_df.loc[stock_df['Date'] <= last_date]
stock_df

Unnamed: 0,Index,Date,Open,High,Low,Close,Adj Close,Volume,CloseUSD
6500,HSI,2013-04-29,22567.63086,22647.58984,22488.65039,22580.76953,22580.76953,1.351366e+09,2935.500039
6501,HSI,2013-04-30,22769.58984,22862.68945,22669.53906,22737.00977,22737.00977,1.599209e+09,2955.811270
6502,HSI,2013-05-02,22692.33008,22706.07031,22552.31055,22668.30078,22668.30078,1.434954e+09,2946.879101
6503,HSI,2013-05-03,22811.83984,22886.16992,22678.66992,22689.96094,22689.96094,1.280292e+09,2949.694922
6504,HSI,2013-05-06,22967.77930,22980.56055,22864.91016,22915.08984,22915.08984,1.327646e+09,2978.961679
...,...,...,...,...,...,...,...,...,...
103849,J203.JO,2019-11-28,56173.85938,56173.85938,55702.44922,55702.44922,55702.44922,0.000000e+00,3899.171445
103850,J203.JO,2019-11-29,55702.44922,55702.44922,55057.26953,55349.01172,55349.01172,0.000000e+00,3874.430820
103851,J203.JO,2019-12-02,55349.01172,55504.92188,54740.60938,54814.07031,54814.07031,0.000000e+00,3836.984922
103852,J203.JO,2019-12-03,54814.07031,54814.07031,54413.83984,54485.41016,54485.41016,0.000000e+00,3813.978711


In [40]:
stock_df['Date'].unique()

array([datetime.date(2013, 4, 29), datetime.date(2013, 4, 30),
       datetime.date(2013, 5, 2), ..., datetime.date(2014, 12, 25),
       datetime.date(2018, 2, 19), datetime.date(2017, 1, 2)],
      dtype=object)

In [41]:
# Summary statistics
stock_df.describe()

Unnamed: 0,Open,High,Low,Close,Adj Close,Volume,CloseUSD
count,21353.0,21353.0,21353.0,21353.0,21353.0,21353.0,21353.0
mean,13835.38703,13910.029933,13754.877268,13835.010248,13835.002862,2122406000.0,4907.541605
std,12835.533616,12909.135022,12761.243712,12836.14802,12836.150133,5622127000.0,4929.012038
min,685.630005,687.909973,674.650024,678.049988,678.049988,0.0,52.85
25%,7724.149902,7773.850098,7670.600098,7730.169922,7730.083496,276200.0,445.961133
50%,10267.5,10331.75,10200.62012,10265.2002,10265.08594,78175000.0,3134.637949
75%,15281.0,15345.09961,15208.79981,15277.2002,15277.2002,1705820000.0,9967.821679
max,61684.76953,61776.67969,61422.96094,61684.76953,61684.76953,85888810000.0,16542.711524


In [42]:
# find values
stock_df.value_counts()

Index      Date        Open          High          Low           Close         Adj Close     Volume        CloseUSD   
TWII       2019-12-04  11511.820310  11513.830080  11457.429690  11510.469730  11510.469730  0.000000e+00  460.418789     1
HSI        2015-08-21  22343.250000  22492.789060  22185.849610  22409.619140  22409.619140  2.490614e+09  2913.250488    1
           2015-08-25  21119.529300  21871.400390  20865.259770  21404.960940  21404.960940  3.588835e+09  2782.644922    1
           2015-08-26  21434.250000  21686.449220  21060.089840  21080.390630  21080.390630  2.956077e+09  2740.450782    1
           2015-08-27  21758.619140  21963.429690  21407.589840  21838.539060  21838.539060  3.192968e+09  2839.010078    1
                                                                                                                         ..
N225       2017-08-08  20062.650390  20076.800780  19970.570310  19996.009770  19996.009770  8.040000e+07  199.960098     1
           20

In [45]:
# replace zeros with NaN
import numpy as np
stock_df.replace(0, np.nan, inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().replace(


In [46]:
stock_df

Unnamed: 0,Index,Date,Open,High,Low,Close,Adj Close,Volume,CloseUSD
6500,HSI,2013-04-29,22567.63086,22647.58984,22488.65039,22580.76953,22580.76953,1.351366e+09,2935.500039
6501,HSI,2013-04-30,22769.58984,22862.68945,22669.53906,22737.00977,22737.00977,1.599209e+09,2955.811270
6502,HSI,2013-05-02,22692.33008,22706.07031,22552.31055,22668.30078,22668.30078,1.434954e+09,2946.879101
6503,HSI,2013-05-03,22811.83984,22886.16992,22678.66992,22689.96094,22689.96094,1.280292e+09,2949.694922
6504,HSI,2013-05-06,22967.77930,22980.56055,22864.91016,22915.08984,22915.08984,1.327646e+09,2978.961679
...,...,...,...,...,...,...,...,...,...
103849,J203.JO,2019-11-28,56173.85938,56173.85938,55702.44922,55702.44922,55702.44922,,3899.171445
103850,J203.JO,2019-11-29,55702.44922,55702.44922,55057.26953,55349.01172,55349.01172,,3874.430820
103851,J203.JO,2019-12-02,55349.01172,55504.92188,54740.60938,54814.07031,54814.07031,,3836.984922
103852,J203.JO,2019-12-03,54814.07031,54814.07031,54413.83984,54485.41016,54485.41016,,3813.978711
