In [1]:
# Dependencies and Setup
import pandas as pd
import csv
import os

# Files to Load
crypto_path = os.path.join(os.getcwd(), "Resources", "crypto_data", "cryptoData.csv")
stock_path = os.path.join(os.getcwd(), "Resources", "stock_data", "stockData.csv")

In [2]:
# Read Files and Store Into Pandas Data Frame
crypto_df = pd.read_csv(crypto_path)
stock_df = pd.read_csv(stock_path)

In [3]:
crypto_df.head()

Unnamed: 0,Currency,Date,Open,High,Low,Close,Volume,Market Cap
0,tezos,"Dec 04, 2019",1.29,1.32,1.25,1.25,46048752,824588509
1,tezos,"Dec 03, 2019",1.24,1.32,1.21,1.29,41462224,853213342
2,tezos,"Dec 02, 2019",1.25,1.26,1.2,1.24,27574097,817872179
3,tezos,"Dec 01, 2019",1.33,1.34,1.25,1.25,24127567,828296390
4,tezos,"Nov 30, 2019",1.31,1.37,1.31,1.33,28706667,879181680


In [4]:
stock_df.head()

Unnamed: 0,Index,Date,Open,High,Low,Close,Adj Close,Volume,CloseUSD
0,HSI,1986-12-31,2568.300049,2568.300049,2568.300049,2568.300049,2568.300049,0.0,333.879006
1,HSI,1987-01-02,2540.100098,2540.100098,2540.100098,2540.100098,2540.100098,0.0,330.213013
2,HSI,1987-01-05,2552.399902,2552.399902,2552.399902,2552.399902,2552.399902,0.0,331.811987
3,HSI,1987-01-06,2583.899902,2583.899902,2583.899902,2583.899902,2583.899902,0.0,335.906987
4,HSI,1987-01-07,2607.100098,2607.100098,2607.100098,2607.100098,2607.100098,0.0,338.923013


In [5]:
stock_df["Index"].unique()

array(['HSI', 'NYA', 'IXIC', '000001.SS', 'N225', 'N100', '399001.SZ',
       'GSPTSE', 'NSEI', 'GDAXI', 'SSMI', 'TWII', 'J203.JO'], dtype=object)

In [6]:
crypto_df["Currency"].unique()

array(['tezos', 'binance-coin', 'eos', 'bitcoin', 'tether', 'xrp',
       'bitcoin-cash', 'stellar', 'litecoin', 'ethereum', 'cardano',
       'bitcoin-sv'], dtype=object)

In [7]:
## Need to get dates in the same format on both dataframes
## Will use built-in datetime functions

import datetime
for index, row in crypto_df.iterrows():
    row["Date"] = row["Date"].replace(',', '')
    new_date = datetime.datetime.strptime(row["Date"], '%b %d %Y')
    row["Date"] = new_date
    
    
## Also need to convert date from string to datetime.date object for comparison purposes    
stock_df["Date"] = pd.to_datetime(stock_df["Date"])

In [8]:
## Make sure it looks the way we want it to

crypto_df.head()

Unnamed: 0,Currency,Date,Open,High,Low,Close,Volume,Market Cap
0,tezos,2019-12-04 00:00:00,1.29,1.32,1.25,1.25,46048752,824588509
1,tezos,2019-12-03 00:00:00,1.24,1.32,1.21,1.29,41462224,853213342
2,tezos,2019-12-02 00:00:00,1.25,1.26,1.2,1.24,27574097,817872179
3,tezos,2019-12-01 00:00:00,1.33,1.34,1.25,1.25,24127567,828296390
4,tezos,2019-11-30 00:00:00,1.31,1.37,1.31,1.33,28706667,879181680


In [None]:
stock_df.head()

Unnamed: 0,Index,Date,Open,High,Low,Close,Adj Close,Volume,CloseUSD
0,HSI,1986-12-31,2568.300049,2568.300049,2568.300049,2568.300049,2568.300049,0.0,333.879006
1,HSI,1987-01-02,2540.100098,2540.100098,2540.100098,2540.100098,2540.100098,0.0,330.213013
2,HSI,1987-01-05,2552.399902,2552.399902,2552.399902,2552.399902,2552.399902,0.0,331.811987
3,HSI,1987-01-06,2583.899902,2583.899902,2583.899902,2583.899902,2583.899902,0.0,335.906987
4,HSI,1987-01-07,2607.100098,2607.100098,2607.100098,2607.100098,2607.100098,0.0,338.923013


In [None]:
## Now let's find the earliest date in our crypto df as it has the shortest timeframe

earliest_date = min(crypto_df["Date"])
print(earliest_date)
latest_date = max(crypto_df["Date"])
print(latest_date)

In [None]:
## Now create a filter to get the stock data >= that earliest date from crypto_df

updated_stock_df = stock_df[(stock_df["Date"] >= earliest_date) & (stock_df["Date"] <= latest_date)]
updated_stock_df.head()

In [None]:
## Now lets convert "currency" header to index for concatonation purposes
crypto_df = crypto_df.rename(columns={"Currency" : "Index" })
crypto_df.head()

In [None]:
## Now we need to add a qualitative identifier stock index is different than currency index
crypto_df['ID'] = 'c'
updated_stock_df['ID'] = 's'

In [None]:
crypto_df.head()

In [None]:
updated_stock_df.head()

In [None]:
## Now we can join the two Df's (not merge, but concatenate--
## We're doing this method as it's otherwise like comparing apples to oranges, where our indices are those fruits.
## i.e. stock index vs. currency-- can't merge on any column data as the results wouldn't make much sense.)
final_df = updated_stock_df.append(crypto_df, sort=False).reset_index().drop(columns = {"index"})
final_df.head()

In [None]:
final_df = final_df.drop('Adj Close', 1).drop('Market Cap', 1).drop('CloseUSD', 1)
final_df.head()

In [None]:
## For clarity, let's drop the redundant timestamp

final_df["Date"] = pd.to_datetime(final_df["Date"]).dt.date

In [None]:
final_df

In [None]:
## Need to create a dataframe with primary keys

index_dict = final_df.groupby(["Index"]).first()["ID"].to_dict()
index_df = pd.DataFrame.from_dict(index_dict,orient='index')
index_df = index_df.reset_index().rename(columns = {'index' : 'Index'})
index_df = index_df.rename(columns={index_df.columns[1]: 'ID'})

In [None]:
index_df

In [None]:
## Now let's drop ID from our df's now that we have that information stored in another table
final_df = final_df.drop('ID', 1)
crypto_df = crypto_df.drop('ID', 1)
updated_stock_df = updated_stock_df.drop('ID', 1)

In [None]:
## Now we can begin the creation of the SQL database and table

In [None]:
from sqlalchemy import create_engine
from config import password
import psycopg2

In [None]:
## establishing the connection with database. This checks whether or not the database exists before creation. If it is,
## it will tell you. Then move onto the next cell

conn = None
try:
    conn = psycopg2.connect(
       database="postgres", user='postgres', password=password, host='127.0.0.1', port= '5432'
    )
    conn.autocommit = True
except:
    print('Database not connected.')
    
if conn is not None:
    conn.autocommit = True

    #Creating a cursor object
    cursor = conn.cursor()

    cursor.execute("SELECT datname FROM pg_database;")
    
    list_database = cursor.fetchall()
    database_name = 'stock_crypo_db'
    
    if (database_name,) in list_database:
        print(f"'{database_name}' Database already exists")
    else:
        #Preparing query to create a database
        sql = '''CREATE database stock_crypo_db''';

        #Creating a database
        cursor.execute(sql)
        print("Database created successfully........")

#Closing the connection
conn.close()

In [None]:
## Connect to local database
rds_connection_string = f"postgres:{password}@localhost:5432/stock_crypo_db"
engine = create_engine(f'postgresql://{rds_connection_string}')

In [None]:
## Check to see if there are any tables already in the database
engine.table_names()

In [None]:
## Now let's put our index ID into a table and read
index_df.to_sql(name='index_id', con=engine, if_exists='replace', index=False)
pd.read_sql_query('select * from index_id', con=engine).head()

In [None]:
## Now let's put our crypto data into a table and read
crypto_df.to_sql(name='crypto_data', con=engine, if_exists='replace', index=False)
pd.read_sql_query('select * from crypto_data', con=engine).head()

In [None]:
## Now let's put our stock data into a table and read
updated_stock_df.to_sql(name='stock_data', con=engine, if_exists='replace', index=False)
pd.read_sql_query('select * from stock_data', con=engine).head()

In [None]:
## Now let's put our merged data into a table and read
final_df.to_sql(name='all_data', con=engine, if_exists='replace', index=False)
pd.read_sql_query('select * from all_data', con=engine).head()