In [1]:
#import dependencies
import pandas as pd
from sqlalchemy import create_engine
from datetime import datetime
import numpy as np
import psycopg2 as pg
import pandas.io.sql as psql
import matplotlib as plt

In [2]:
#Load Amazon file
AMZN_file = "Asset_Classes/AMZN_Historical.csv"
AMZN_df = pd.read_csv(AMZN_file)
AMZN_df.head(10)

Unnamed: 0,date,open,high,low,close,volume
0,12/30/2019,1874.0,1884.0,1840.619995,1846.890015,3674700
1,12/31/2019,1842.0,1853.26001,1832.22998,1847.839966,2506500
2,1/2/2020,1875.0,1898.01001,1864.150024,1898.01001,4029000
3,1/3/2020,1864.5,1886.199951,1864.5,1874.969971,3764400
4,1/6/2020,1860.0,1903.689941,1860.0,1902.880005,4061800
5,1/7/2020,1904.5,1913.890015,1892.040039,1906.859985,4044900
6,1/8/2020,1898.040039,1911.0,1886.439941,1891.969971,3508000
7,1/9/2020,1909.890015,1917.819946,1895.800049,1901.050049,3167300
8,1/10/2020,1905.369995,1906.939941,1880.0,1883.160034,2853700
9,1/13/2020,1891.310059,1898.0,1880.800049,1891.300049,2780800


In [3]:
#Get only the columns we need
AMZN_organized = AMZN_df[["date","close"]]
AMZN_organized.head()

Unnamed: 0,date,close
0,12/30/2019,1846.890015
1,12/31/2019,1847.839966
2,1/2/2020,1898.01001
3,1/3/2020,1874.969971
4,1/6/2020,1902.880005


# rename columns

In [27]:
AMZN_organized_df = AMZN_organized.rename(columns={"close": "amzn_close"}, inplace=True)


In [4]:
# Connect to database
connection_string = "postgres:postgres@localhost:5432/Asset_Classes"
engine = create_engine(f'postgresql://{connection_string}')

In [5]:
# Make sure connecting the the database worked by getting the table names
engine.table_names()

OperationalError: (psycopg2.OperationalError) FATAL:  database "Asset_Classes" does not exist

(Background on this error at: http://sqlalche.me/e/e3q8)

In [51]:
#Put our organized Pandas table into the SQL table
AMZN_organized.to_sql(name='amzn', con=engine, if_exists='append', index=False)

In [53]:
#make sure it executed correctly
engine.execute("SELECT * from amzn;").fetchall()

[(datetime.date(2019, 12, 30), 1846.890015),
 (datetime.date(2019, 12, 31), 1847.839966),
 (datetime.date(2020, 1, 2), 1898.0100100000002),
 (datetime.date(2020, 1, 3), 1874.969971),
 (datetime.date(2020, 1, 6), 1902.880005),
 (datetime.date(2020, 1, 7), 1906.859985),
 (datetime.date(2020, 1, 8), 1891.969971),
 (datetime.date(2020, 1, 9), 1901.050049),
 (datetime.date(2020, 1, 10), 1883.160034),
 (datetime.date(2020, 1, 13), 1891.300049),
 (datetime.date(2020, 1, 14), 1869.439941),
 (datetime.date(2020, 1, 15), 1862.02002),
 (datetime.date(2020, 1, 16), 1877.939941),
 (datetime.date(2020, 1, 17), 1864.719971),
 (datetime.date(2020, 1, 21), 1892.0),
 (datetime.date(2020, 1, 22), 1887.459961),
 (datetime.date(2020, 1, 23), 1884.5799559999998),
 (datetime.date(2020, 1, 24), 1861.640015),
 (datetime.date(2020, 1, 27), 1828.339966),
 (datetime.date(2020, 1, 28), 1853.25),
 (datetime.date(2020, 1, 29), 1858.0),
 (datetime.date(2020, 1, 30), 1870.680054),
 (datetime.date(2020, 1, 31), 2008.71

In [8]:
#Load Gold file
GLD_file = "Asset_Classes/GLD_Historical.csv"
GLD_df = pd.read_csv(GLD_file)
GLD_df.head()

Unnamed: 0,date,open,high,low,close,volume
0,12/30/2019,142.559998,142.800003,142.460007,142.630005,4810600
1,12/31/2019,143.309998,143.600006,142.800003,142.899994,5313500
2,1/2/2020,143.860001,144.210007,143.399994,143.949997,7733800
3,1/3/2020,145.75,146.320007,145.399994,145.860001,12272800
4,1/6/2020,148.440002,148.479996,146.949997,147.389999,14403300


In [9]:
#Get only the columns we need
GLD_organized = GLD_df[["date","close"]]
GLD_organized.head()

Unnamed: 0,date,close
0,12/30/2019,142.630005
1,12/31/2019,142.899994
2,1/2/2020,143.949997
3,1/3/2020,145.860001
4,1/6/2020,147.389999


# rename column

In [26]:
GLD_organized_df = GLD_organized.rename(columns={'close': 'gld_close'}, inplace=True)

# Tran joined amzn with gld

In [10]:
first_join = pd.merge(AMZN_organized, GLD_organized, on= 'date')

In [56]:
#Put our organized Pandas table into the SQL table
GLD_organized.to_sql(name='gld', con=engine, if_exists='append', index=False)

In [64]:
engine.execute("SELECT * from gld;")

<sqlalchemy.engine.result.ResultProxy at 0x26b67f4fd48>

In [29]:
#Load Netflix file
NFLX_file = "Asset_Classes/NFLX_Historical.csv"
NFLX_df = pd.read_csv(NFLX_file)
NFLX_df.head()

Unnamed: 0,date,open,high,low,close,volume
0,12/30/2019,329.079987,329.190002,322.859985,323.309998,4311500
1,12/31/2019,322.0,324.920013,321.089996,323.570007,3713300
2,1/2/2020,326.100006,329.980011,324.779999,329.809998,4485800
3,1/3/2020,326.779999,329.859985,325.529999,325.899994,3806900
4,1/6/2020,323.119995,336.359985,321.200012,335.829987,5663100


In [30]:
#Get only the columns we need
NFLX_organized = NFLX_df[["date","close"]]
NFLX_organized.head()

Unnamed: 0,date,close
0,12/30/2019,323.309998
1,12/31/2019,323.570007
2,1/2/2020,329.809998
3,1/3/2020,325.899994
4,1/6/2020,335.829987


## rename column

In [31]:
NFLX_organized_df = NFLX_organized.rename(columns={'close': 'nflx_close'}, inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  errors=errors,


# Tran joined NFLX

In [None]:
second_join = pd.merge(first_join, NFLX_organized_df, on= 'date')

In [61]:
#Put our organized Pandas table into the SQL table
NFLX_organized.to_sql(name='nflx', con=engine, if_exists='append', index=False)

In [63]:
engine.execute("SELECT * from nflx;")

<sqlalchemy.engine.result.ResultProxy at 0x26b67f4f4c8>

In [68]:
#Load SHY file
SHY_file = "Asset_Classes/SHY_Historical.csv"
SHY_df = pd.read_csv(SHY_file)
SHY_df.head()

Unnamed: 0,date,open,high,low,close,volume
0,12/30/2019,84.599998,84.660004,84.599998,84.639999,1630700
1,12/31/2019,84.639999,84.669998,84.610001,84.629997,3025600
2,1/2/2020,84.669998,84.709999,84.639999,84.669998,2665200
3,1/3/2020,84.720001,84.760002,84.690002,84.739998,2039900
4,1/6/2020,84.730003,84.739998,84.68,84.709999,1132100


In [70]:
#Get only the columns we need
SHY_organized = SHY_df[["date","close"]]
SHY_organized.head()

Unnamed: 0,date,close
0,12/30/2019,84.639999
1,12/31/2019,84.629997
2,1/2/2020,84.669998
3,1/3/2020,84.739998
4,1/6/2020,84.709999


# ## rename column

In [None]:
SHY_organized_df = SHY_organized.rename(columns={'close': 'shy_close'}, inplace=True)

In [None]:
third_join = pd.merge(second_join, SHY_organized_df, on= 'date')

In [71]:
#Put our organized Pandas table into the SQL table
SHY_organized.to_sql(name='shy', con=engine, if_exists='append', index=False)

In [72]:
engine.execute("SELECT * from nflx;")

<sqlalchemy.engine.result.ResultProxy at 0x26b67f67088>

In [80]:
#Load S&P 500 file
SP_file = "Asset_Classes/SP_Historical.csv"
SP_df = pd.read_csv(SP_file)
SP_df.head()

Unnamed: 0,date,open,high,low,close,volume
0,12/30/2019,3238.25,3244.25,3217.25,3223.5,984389
1,12/31/2019,3223.25,3236.25,3213.0,3231.0,1124598
2,1/2/2020,3237.0,3261.75,3234.25,3259.0,1157924
3,1/3/2020,3261.0,3263.5,3206.75,3235.5,1416241
4,1/6/2020,3220.25,3249.5,3208.75,3243.5,1755057


In [81]:
#Get only the columns we need
SP_organized = SP_df[["date","close"]]
SP_organized.head()

Unnamed: 0,date,close
0,12/30/2019,3223.5
1,12/31/2019,3231.0
2,1/2/2020,3259.0
3,1/3/2020,3235.5
4,1/6/2020,3243.5


# rename column

In [None]:
SP_organized_df = SP_organized.rename(columns={'close': 'sp_close'}, inplace=True)
fourth_join = pd.merge(third_join, SP_organized_df, on= 'date')

In [82]:
#Put our organized Pandas table into the SQL table
SP_organized.to_sql(name='sp', con=engine, if_exists='append', index=False)

In [83]:
#Load USO file
USO_file = "Asset_Classes/SP_Historical.csv"
USO_df = pd.read_csv(USO_file)
USO_df.head()

Unnamed: 0,date,open,high,low,close,volume
0,12/30/2019,3238.25,3244.25,3217.25,3223.5,984389
1,12/31/2019,3223.25,3236.25,3213.0,3231.0,1124598
2,1/2/2020,3237.0,3261.75,3234.25,3259.0,1157924
3,1/3/2020,3261.0,3263.5,3206.75,3235.5,1416241
4,1/6/2020,3220.25,3249.5,3208.75,3243.5,1755057


In [84]:
#Get only the columns we need
USO_organized = USO_df[["date","close"]]
USO_organized.head()

Unnamed: 0,date,close
0,12/30/2019,3223.5
1,12/31/2019,3231.0
2,1/2/2020,3259.0
3,1/3/2020,3235.5
4,1/6/2020,3243.5


# rename column

In [None]:
USO_organized_df = USO_organized.rename(columns={'close': 'uso_close'}, inplace=True)
fifth_join = pd.merge(fourth_join, USO_organized_df, on= 'date')

In [85]:
#Put our organized Pandas table into the SQL table
USO_organized.to_sql(name='uso', con=engine, if_exists='append', index=False)

In [86]:
#Load ZOOM file
ZM_file = "Asset_Classes/ZM_Historical.csv"
ZM_df = pd.read_csv(ZM_file)
ZM_df.head()

Unnamed: 0,date,open,high,low,close,volume
0,12/30/2019,66.559998,67.18,65.519997,66.790001,1162700
1,12/31/2019,66.379997,68.18,66.309998,68.040001,1351400
2,1/2/2020,68.800003,69.235001,68.019997,68.720001,1315500
3,1/3/2020,67.620003,68.68,67.099998,67.279999,1127900
4,1/6/2020,66.629997,70.495003,65.811996,70.32,3151600


In [87]:
#Get only the columns we need
ZM_organized = USO_df[["date","close"]]
ZM_organized.head()

Unnamed: 0,date,close
0,12/30/2019,3223.5
1,12/31/2019,3231.0
2,1/2/2020,3259.0
3,1/3/2020,3235.5
4,1/6/2020,3243.5


In [None]:
ZM_organized_df = ZM_organized.rename(columns={'close': 'zm_close'}, inplace=True)
final_join = pd.merge(fifth_join, ZM_organized_df, on= 'date')

In [88]:
#Put our organized Pandas table into the SQL table
ZM_organized.to_sql(name='zm', con=engine, if_exists='append', index=False)