### Data Engineering - Raw Data

Ingestion type: Full Load

Schedule Run: Daily on Briefer Cloud

Source: Yahoo Finance, IPEA

In [1]:
import pandas as pd
import yfinance as yf
import ipeadatapy as ip
import os
from datetime import datetime
from io import StringIO

In [None]:
# Defining start and end date for data extraction

start_date = datetime.strptime('2008-01-01', '%Y-%m-%d')
end_date = datetime.today()

In [None]:
# Extract Data from Yahoo Finance

def extract_yf_data(ticker: str, start_date, end_date) -> pd.DataFrame:
    df = yf.download(ticker, start=start_date, end=end_date)
    return df

In [1]:
# PBR Target location: bronze/raw/yfinance/pbr/pbr.csv
# BRENT Target location: bronze/raw/yfinance/brent/brent.csv
# WTI Target location: bronze/raw/yfinance/wti/wti.csv
# USD Target location: bronze/raw/yfinance/usd/usd.csv
# PRODUCTION Target location: bronze/raw/ipea/production/production.csv

In [None]:
# Extract Petrobras Company Data
pbr = extract_yf_data('PBR', start_date, end_date)

# Extract Brent Crude Oil Data 
brent = extract_yf_data('BZ=F', start_date, end_date)

# Extract WTI Crude Oil Data 
wti = extract_yf_data('CL=F', start_date, end_date)

#Extract USD/BRL Quotation
usd = extract_yf_data('USDBRL=X', start_date, end_date)

In [None]:
# Extract Data from IPEA

def extract_ipea_data(cod: str, start_date) -> pd.DataFrame:
    df = ip.timeseries(cod, yearGreaterThan=start_date.year-1)
    return df

In [None]:
# Extract Month Oil Production - Mean Quantity Barril x 1000 / Month

production = extract_ipea_data('ANP12_PDPET12', start_date)

In [3]:
# ECOPETROL Target location:  bronze/raw/yfinance/ecopetrol/ecopetrol.csv
# BP p.l.c. Target location: bronze/raw/yfinance/bp/bp.csv
# EQUINOR Target location: bronze/raw/yfinance/equinor/equinor.csv
# YPF Target location: bronze/raw/yfinance/ypf/ypf.csv

In [None]:
# Extract Stock Data from similar sector companies

#Extract Ecopetrol (Colombia) data
ecopetrol = extract_yf_data('EC', start_date, end_date)

#Extract BP  p.l.c. (England) data
bp = extract_yf_data('BP', start_date, end_date)

#Extract Equinor (Norway) data
equinor = extract_yf_data('EQNR', start_date, end_date)

#Extract YPF (Argentina) data
ypf = extract_yf_data('YPF', start_date, end_date)