# Fetch dynamic oil price data

## Requirements & configuration

In [20]:
# # Install required packages
# ! pip install yfinance --upgrade --no-cache-dir
# ! pip install psycopg2
# ! pip install sqlalchemy

In [171]:
# # Import required packages
# import os
# import json
import pandas as pd
import yfinance as yf # https://pypi.org/project/yfinance/
import psycopg2
from sqlalchemy import create_engine
from datetime import date, timedelta

In [188]:
# DB configuration
config = {
    'host': 'datalake.cknmu1bvrxjg.us-east-1.rds.amazonaws.com',
    'port': '5432',
    'user': 'muser',
    'password': 'datalake',
    'dbname': 'datalake',
}

# Configure cnx_string for sqlalchemy
cnx_str = f'postgresql://{config["user"]}:{config["password"]}@{config["host"]}/{config["dbname"]}'

In [102]:
# Set ticker for required instrument "Brent Crude Oil"
brent = yf.Ticker("BZ=F")

## Setup DB Connection

In [199]:
# Establish connection to database 'lakehouse'
try: 
    conn = psycopg2.connect(
        dbname=config['dbname'],
        user=config['user'],
        host=config['host'],
        password=config['password'],
        port=config['port']
    )

except psycopg2.Error as e: 
    print("Error: Could not make the connection to the postgres database")
    print(e)

# Create cursor
try: 
    cursor = conn.cursor()
except psycopg2.Error as e: 
    print("Error: Could not get the cursor to the database")
    print(e)

# Set auto commit feature
conn.set_session(autocommit=True)

# Create engine
engine = create_engine(cnx_str)

## Get latest date of table oilprice

In [221]:
# Get latest date of table oilprice
sql = '''
    SELECT date
    FROM oilprice
    ORDER BY date DESC
    LIMIT 1;
    '''
cursor.execute(sql)
last_date = str(cursor.fetchone()[0]+timedelta(days=1))
last_date

'2022-04-01'

## Fetch oil price data dynamically

In [225]:
# Get oil price data dynamically
data = yf.download("BZ=F", start=last_date, end=str(date.today()-timedelta(days=1)))
data

[*********************100%***********************]  1 of 1 completed


Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2022-03-31,108.709999,109.360001,107.370003,107.910004,107.910004,31


## Transform data

In [132]:
# Remove column 'Close'
data.drop(['Close'], inplace=True, axis=1)
data.head()

Unnamed: 0_level_0,Open,High,Low,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2022-03-28,120.309998,120.309998,109.059998,112.480003,14026


In [133]:
# Put the date index as column and convert it to date format
data = data.reset_index(level=0)
data['Date'] = pd.to_datetime(data['Date']).dt.date
data.head()

Unnamed: 0,Date,Open,High,Low,Adj Close,Volume
0,2022-03-28,120.309998,120.309998,109.059998,112.480003,14026


In [134]:
# Rename columns
data.rename(columns = {'Date':'date',
                       'Open':'open',
                       'High':'high',
                       'Low':'low',
                       'Adj Close':'close',
                       'Volume':'volume'},
            inplace = True)
data.head()

Unnamed: 0,date,open,high,low,close,volume
0,2022-03-28,120.309998,120.309998,109.059998,112.480003,14026


## Insert values into table

In [135]:
# Insert values into table using sqlalchemy
data.to_sql('oilprice', engine, if_exists='append', index=False)

## Check values

In [136]:
# Check the most recently inputted values
sql = '''
    SELECT *
    FROM oilprice
    ORDER BY date DESC
    LIMIT 1;
    '''
test = pd.read_sql(sql, engine)
test

Unnamed: 0,date,open,high,low,close,volume
0,2022-03-28,120.309998,120.309998,109.059998,112.480003,14026


## Close the connection

In [137]:
cursor.close()
conn.close()