# Installing Libraries

In [None]:
#!pip install pandas_datareader

In [None]:
#!pip install yfinance

In [None]:
#!pip install yfinance --upgrade --no-cache-dir

In [None]:
#!pip install html5lib

# Importing Libraries

In [1]:
import pandas as pd
import numpy as np
import requests
from bs4 import BeautifulSoup
from unidecode import unidecode
import re
import os
import yfinance as yf
from time import sleep
import glob
import regex as re
import datetime
import sqlalchemy as db
from dotenv import load_dotenv
from datetime import timedelta

In [2]:
# Defining file's path
path = r'C:\Users\Pedro\OneDrive\Desktop\Ironhack\04. GitHub\stocks_project\data/'
# Historical dataset is to big to upload to GitHub
stock_path = r'C:\Users\Pedro\OneDrive\Desktop\Ironhack\05. Dados\stock_project_datasets/'

# Gathering Data - Brazilian Companies

### List of brazilian companies and composition of indexes - Ibovespa, IBrX100, IBrX50, IBrA

## Selenium

Used to get list of companies in brazilian's index

### Configurating WebDriver

In [None]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.action_chains import ActionChains

### Configurating download file

In [None]:
chromeOptions = webdriver.ChromeOptions()
download_path = r'C:\Users\Pedro\OneDrive\Desktop\Ironhack\04. GitHub\stocks_project\data'
prefs = {"download.default_directory" : download_path}
chromeOptions.add_experimental_option("prefs",prefs)

In [None]:
#Runs the webdriver
driver = webdriver.Chrome(options=chromeOptions)

### Download Index File (.csv)

In [None]:
def get_index_stocks(index, wait=6):
    '''
        Receives the Index name, download a file that contains the index tickers, and return the name of the downloaded file
    '''

    # Chrome WebDriver opens the index webside
    url = f'https://sistemaswebb3-listados.b3.com.br/indexPage/day/{index.upper()}?language=pt-br'
    driver.get(url)
    driver.implicitly_wait(wait)

    driver.find_element(By.ID, 'segment').send_keys("Setor de Atuação")
    driver.implicitly_wait(wait)
    driver.find_element(By.LINK_TEXT, 'Download').click()
    driver.implicitly_wait(wait)

    # Set the directory
    os.chdir(r'C:\Users\Pedro\OneDrive\Desktop\Ironhack\04. GitHub\stocks_project\data')
    sleep(wait)

    # Get the .csv files from the selectec directory and sort them ascending by modification date 
    files = list(glob.glob('*csv'))
    files.sort(key=lambda x: os.path.getmtime(x), reverse=True)

    # Returns the name of the most recent file
    return files[0]

### Create the Index DataFrame

In [None]:
def create_df(file):
    '''
        This function receives the name of the Stock Exchange index and returns a DataFrame with all companies and their respective sectors
    '''

    # Creating DataFrame
    DataFrame = pd.read_csv(file, 
                                encoding='ISO-8859-1',
                                header=1,                   # Uses line 1 as header
                                sep=';',                    
                                decimal=',',
                                thousands='.',
                                skipfooter=2,               # Removes last 2 lines
                                engine='python',
                                index_col=False)            # Does not make first column as index
    
    # Normalizing columns
    DataFrame.columns = [re.sub('[\.()]', '', re.sub(' ', '_', unidecode(columns.lower()))) for columns in DataFrame.columns]

    return DataFrame

## Creating Index's DataFrames

In [None]:
# Defining file's path
path = r'C:\Users\Pedro\OneDrive\Desktop\Ironhack\04. GitHub\stocks_project\data/'
# Historical dataset is to big to upload to GitHub
stock_path = r'C:\Users\Pedro\OneDrive\Desktop\Ironhack\05. Dados\stock_project_datasets/'

In [None]:
# Defining DataFrames' columns names
col_names = [
    'sector',
    'ticker',
    'name',
    'type',
    'amount',
    'percentage',
    'percentage_acum'
]

### Ibovespa

In [None]:
ibov = create_df(get_index_stocks('ibov'))
len(ibov)

In [None]:
ibov.columns = col_names

In [None]:
# Save index file
ibov.to_csv(f'{path}IBOV.csv',
                    encoding='ISO-8859-1',
                    sep=';',
                    decimal='.',
                    index=False)

In [100]:
ibov = pd.read_csv(f'{path}IBOV.csv',
                    encoding='ISO-8859-1',
                    sep=';',
                    decimal='.')

In [101]:
ibov.shape

(92, 7)

### IBrX100

In [None]:
ibrx = create_df(get_index_stocks('ibxx'))
len(ibrx)

In [None]:
ibrx.columns = col_names

In [None]:
# Save index file
ibrx.to_csv(f'{path}IBRX100.csv',
                    encoding='ISO-8859-1',
                    sep=';',
                    decimal='.',
                    index=False)

### IBrX50

In [None]:
ibrx50 = create_df(get_index_stocks('ibxl'))
len(ibrx50)

In [None]:
ibrx50.columns = col_names

In [None]:
# Save index file
ibrx50.to_csv(f'{path}IBRX50.csv',
                    encoding='ISO-8859-1',
                    sep=';',
                    decimal='.',
                    index=False)

### IBrA

In [None]:
ibra = create_df(get_index_stocks('ibra'))
len(ibra)

In [None]:
ibra.columns = col_names

In [None]:
# Save index file
ibra.to_csv(f'{path}IBRA.csv',
                    encoding='ISO-8859-1',
                    sep=';',
                    decimal='.',
                    index=False)

## Checking companies that differ in the two indexes - Obsolete

In [None]:
emp_ibrx = ibrx['codigo']
set_ibrx = set(emp_ibrx)
len(set_ibrx)

In [None]:
emp_ibov = ibov['codigo']
set_ibov = set(emp_ibov)
len(set_ibov)

In [None]:
emp_ibra = ibra['codigo']
set_ibra = set(emp_ibra)
len(set_ibra)

In [None]:
print(len(set_ibra.difference(set_ibrx)))
print(set_ibra.difference(set_ibrx))

In [None]:
print(len(set_ibov.difference(set_ibrx)))
print(set_ibov.difference(set_ibrx))

In [None]:
print(len(set_ibrx.difference(set_ibov)))
print(set_ibrx.difference(set_ibov))

# Gathering Data - Brazilian Companies Historical Dataset - Obsolete (All data gathered from Yfinance)

### Create dataset concatenating historical datasets downloaded from B3 website with data from the companies listed in IBRA Index

## Create list with main companies ticker

In [None]:
## Using companies in IBRA Index
codigo = list(set_ibra)

## Unzip files

In [None]:
import zipfile
path = r'C:\Users\Pedro\OneDrive\Desktop\Ironhack\04. GitHub\data_visualization\data/'

i = 1986

while i < 2000:
    with zipfile.ZipFile(f'{path}COTAHIST_A{i}.zip', 'r') as zip_ref:
        zip_ref.extractall(path)
    i +=1



## Rename files

In [None]:
os.listdir()

In [None]:
i = 2000

while i < 2022:
    os.rename(f'COTAHIST_A{i}.TXT', f'COTAHIST_A{i}.txt')
    i +=1

## Setting configurations to read B3 historical files

In [None]:
pd.set_option('display.max_columns', 500)
pd.options.display.max_columns=500

In [None]:
path = r'C:\Users\Pedro\OneDrive\Desktop\Ironhack\05. Dados\B3\txt/'

year = 2022

widths = [2,8,2,12,3,12,10,3,4,13,13,13,13,13,13,13,5,18,18,13,1,8,7,13,12,3]

col_names = [
"tipo_registro",
"data_pregao",
"cod_bdi",
"cod_negociacao",
"tipo_mercado",
"nome_empresa",
"especificacao_papel",
"prazo_dias_merc_termo",
"moeda_referencia",
"preco_abertura",
"preco_maximo",
"preco_minimo",
"preco_medio",
"preco_ultimo_negocio",
"preco_melhor_oferta_compra",
"preco_melhor_oferta_venda",
"numero_negocios",
"quantidade_papeis_negociados",
"volume_total_negociado",
"preco_exercicio",
"ìndicador_correcao_precos",
"data_vencimento" ,
"fator_cotacao",
"preco_exercicio_pontos",
"codigo_isin",
"num_distribuicao_papel"]

decimal_config=[
"preco_abertura",
"preco_maximo",
"preco_minimo",
"preco_medio",
"preco_ultimo_negocio",
"preco_melhor_oferta_compra",
"preco_melhor_oferta_venda",
"volume_total_negociado",
"preco_exercicio",
"preco_exercicio_pontos"
]


remains = [
"data_pregao",
"cod_negociacao",
"tipo_mercado",
"nome_empresa",
"preco_abertura",
"preco_maximo",
"preco_minimo",
"preco_medio",
"preco_ultimo_negocio",
]

In [None]:
dataset = pd.DataFrame()

In [None]:
'''     Concatenate DataFrames
year = 2022
while year > 1985:
    df = pd.read_fwf(f'{path}COTAHIST_A{year}.TXT',
                                    encoding='ISO-8859-1',
                                    header=0,
                                    widths=widths,
                                    skipfooter=1,
                                    engine='python',
                                    parse_dates=[1],
                                    infer_datetime_format=True,
                                    index_col=False)
    year -= 1

    #Definindo nomes das colunas
    df.columns = col_names

    #Corrigindo casas decimais
    for col in decimal_config:
        df[col]=df[col]/100

    #Selecionando colunas
    df = df[remains]
    
    #Mascara de empresas desejadas
    mask_empresas = df.cod_negociacao.isin(codigo)

    df = df[mask_empresas]

    dataset = pd.concat([dataset, df], ignore_index=True)
'''

## Export DF - Historical Series

In [None]:
dataset.to_csv(f'{path}dataset_IBRA.csv',
                    encoding='ISO-8859-1',
                    sep=';',
                    decimal='.',
                    index=False)

In [None]:
pd.read_csv(f'{path}dataset_IBRA.csv',
                encoding='ISO-8859-1',
                sep=';',
                decimal='.',
                index_col=False
                )

# Gathering Data From Yahoo Finance

### Get historical series of brazilian and american indexes
### Get historical series of Gold, Bitcoin and Ethererum
### Get historical series of american companies

## Yahoo Finance

In [None]:
# Set YFinance setting

symbol_list_br = ['^BVSP','^IBX50']
symbol_list_eua = ['^DJI','^IXIC','^GSPC','GC=F']
symbol_list_crypto = ['BTC-USD', 'ETH-USD']

name_dict={
    '^BVSP':'Ibovespa',
    '^IBX50':'IBrX50',
    '^DJI':'Dow Jones',
    '^IXIC':'NASDAQ',
    '^GSPC':'S&P 500',
    'GC=F':'Ouro ($)',
    'BTC-USD':'Bitcoin ($)',
    'ETH-USD':'Ethereum ($)'
}

col_names = ['date',
"open",
"high",
"low",
"close",
'cod_yfinance']

drop_col = ['Volume','Dividends','Stock Splits']

In [None]:
# Create empty indexes DataFrames
'''
df_br = pd.DataFrame()
df_eua = pd.DataFrame()
df_crypto = pd.DataFrame()
'''

In [None]:
# Fill brazilian indexes historical series dataset
'''
for ativo in symbol_list_br:
        chamada_api = yf.Ticker(ativo).history(period='max')
        chamada_api['cod_yfinace'] = ativo
        df_br = pd.concat([df_br, chamada_api])
'''

In [None]:
# Tranform data from index to column 0
'''
df_br = df_br.drop(drop_col, axis = 1)
df_br.reset_index(inplace=True)
df_br['Date'] = df_br['Date'].dt.date
df_br.columns = col_names
df_br
'''

In [None]:
# Put indexes names in the DF
df_br['name'] = df_br['cod_yfinance'].map(name_dict)

In [None]:
# Reroder columns
df_br = df_br[['date','name','open','high','low','close','cod_yfinance']]

In [None]:
# Save DataFrame
df_br.to_csv(f'{path}index_br.csv',
                    encoding='ISO-8859-1',
                    sep=';',
                    decimal='.',
                    index=False)

In [None]:
# Read DataFrame
index_br = pd.read_csv(f'{path}index_br.csv',
                    sep = ';',
                    decimal = '.',
                    encoding='ISO-8859-1',
                    index_col=False
)

In [None]:
index_br

In [None]:
# Fill american indexes and gold historical series dataset
'''
for ativo in symbol_list_eua:
        chamada_api = yf.Ticker(ativo).history(period='max')
        chamada_api['cod_yfinance'] = ativo
        df_eua = pd.concat([df_eua, chamada_api])
'''

In [None]:
# Tranform data from index to column 0
'''
df_eua = df_eua.drop(drop_col, axis = 1)
df_eua.reset_index(inplace=True)
df_eua['Date'] = df_eua['Date'].dt.date
df_eua.columns = col_names
df_eua
'''

In [None]:
# Put indexes names in the DF
df_eua['name'] = df_eua['cod_yfinance'].map(name_dict)

In [None]:
# Reorder columns
df_eua = df_eua[['date','name','open','high','low','close','cod_yfinance']]

In [None]:
# Save DataFrame
df_eua.to_csv(f'{path}index_eua.csv',
                    encoding='ISO-8859-1',
                    sep=';',
                    decimal='.',
                    index=False)

In [None]:
# Read DataFrame
index_eua = pd.read_csv(f'{path}index_eua.csv',
                    sep = ';',
                    decimal = '.',
                    encoding='ISO-8859-1',
                    index_col=False)

In [None]:
index_eua

In [None]:
# Fill cripto historical series dataset
'''
for ativo in symbol_list_crypto:
        chamada_api = yf.Ticker(ativo).history(period='max')
        chamada_api['cod_yfinace'] = ativo
        df_crypto = pd.concat([df_crypto, chamada_api])
'''

In [None]:
# Tranform data from index to column 0
'''
df_crypto = df_crypto.drop(drop_col, axis = 1)
df_crypto.reset_index(inplace=True)
df_crypto['Date'] = df_crypto['Date'].dt.date
df_crypto.columns = col_names
df_crypto
'''

In [None]:
# Put indexes names in the DF
df_crypto['name'] = df_crypto['cod_yfinance'].map(name_dict)

In [None]:
# Reorder columns
df_crypto = df_crypto[['date','name','open','high','low','close','cod_yfinance']]

In [None]:
df_crypto.to_csv(f'{path}crypto.csv',
                    encoding='ISO-8859-1',
                    sep=';',
                    decimal='.',
                    index=False)

In [None]:
crypto = pd.read_csv(f'{path}crypto.csv',
                    sep = ';',
                    decimal = '.',
                    encoding='ISO-8859-1',
                    index_col=False)


In [None]:
crypto

# Gathering Data - American Companies

### List of american companies and composition of indexes - S&P500, Dow Jones, Nasdaq

## S&P500

In [None]:
url = 'https://en.wikipedia.org/wiki/List_of_S%26P_500_companies'

data = pd.read_html(url)

In [None]:
data[0].head()

In [None]:
sp500 = data[0].iloc[:,[0,1,3,4]]

In [None]:
sp500.columns = ['ticker', 'name', 'sector', 'sub_industry']

In [None]:
sp500

In [None]:
sp500.to_csv(f'{path}SP500.csv',
                    encoding='UTF-8',
                    sep=';',
                    decimal='.',
                    index=False)

In [102]:
sp500 = pd.read_csv(f'{path}SP500.csv',
                    encoding='UTF-8',
                    sep=';',
                    decimal='.')

In [103]:
sp500.shape

(503, 4)

## Nasdaq

In [None]:
url = 'https://en.wikipedia.org/wiki/Nasdaq-100'

data = pd.read_html(url)

In [None]:
data[4]
nasdaq = data[4]
nasdaq

In [None]:
nasdaq.columns = ['name','ticker', 'sector', 'sub_industry']

In [None]:
nasdaq.to_csv(f'{path}NASDAQ.csv',
                    encoding='UTF-8',
                    sep=';',
                    decimal='.',
                    index=False)

In [104]:
nasdaq = pd.read_csv(f'{path}NASDAQ.csv',
                    encoding='UTF-8',
                    sep=';',
                    decimal='.')

In [105]:
nasdaq.shape

(102, 4)

## Dow Jones

In [None]:
url = 'https://en.wikipedia.org/wiki/Dow_Jones_Industrial_Average'

data = pd.read_html(url)

In [None]:
data[1]
dow_jones = data[1].iloc[:,[0,2,3]]
dow_jones

In [None]:
dow_jones.columns = ['name','ticker', 'sector']

In [None]:
dow_jones.to_csv(f'{path}DOW_JONES.csv',
                    encoding='UTF-8',
                    sep=';',
                    decimal='.',
                    index=False)

In [106]:
dow_jones = pd.read_csv(f'{path}DOW_JONES.csv',
                    encoding='UTF-8',
                    sep=';',
                    decimal='.')

In [107]:
dow_jones.shape

(30, 3)

## Create DataFrame with all companies in american indexes

In [None]:
emp_sp500 = sp500['ticker']
set_sp500 = set(emp_sp500)
len(set_sp500)

In [None]:
emp_nasdaq = nasdaq['ticker']
set_nasdaq = set(emp_nasdaq)
len(set_nasdaq)

In [None]:
# Companies in Nasdaq that aren't in sp500
print(len(set_nasdaq.difference(set_sp500)))
print(set_nasdaq.difference(set_sp500))
list_nasdaq = list(set_nasdaq.difference(set_sp500))
list_nasdaq

In [None]:
# Create DF with all companies in both indexes
sp500_concat = pd.concat([sp500,nasdaq.loc[nasdaq['ticker'].isin(list_nasdaq)]], axis=0, ignore_index=True)

In [None]:
sp500_concat

In [None]:
# Save DF
sp500_concat.to_csv(f'{path}eua_all_companies.csv',
                    encoding='UTF-8',
                    sep=';',
                    decimal='.',
                    index=False)

# Gathering Data - Company's Sector and Industry

### Get sector and sub-sector of all companies using WebScrapping on Yahoo Finance website

In [None]:
ibra = pd.read_csv(f'{path}IBRA.csv',
                    encoding='ISO-8859-1',
                    sep=';',
                    decimal='.')

ibra = ibra[['ticker','name','sector']]
ibra[['sector','sub_industry']] = np.nan

In [None]:
ibra

In [None]:
eua = pd.read_csv(f'{path}eua_all_companies.csv',
                    encoding='UTF-8',
                    sep=';',
                    decimal='.')

In [None]:
eua

In [None]:
eua.isnull().sum()

In [None]:
eua[eua['ticker'].str.contains(r'[^a-zA-Z0-9]')]

In [None]:
# Replace '.' in S&P500 ticker to '-', in order to get the right ticker for yfinance webscrapping
eua['ticker'].replace(r'[^a-zA-Z0-9]', r'-', regex=True, inplace=True)

In [None]:
#Runs the webdriver
driver = webdriver.Chrome(options=chromeOptions)

In [None]:
index = 0
wait = 8

for i in eua['ticker']:
    # Get the Yfinance company's url
    url = f'https://finance.yahoo.com/quote/{i}/profile?p={i}'
    driver.get(url)
    driver.implicitly_wait(wait)

    # Get company's sector
    try:
        sector = driver.find_element(By.XPATH, '//*[@id="Col1-0-Profile-Proxy"]/section/div[1]/div/div/p[2]/span[2]')
        driver.implicitly_wait(wait)
        eua.loc[index, 'sector'] = sector.text
    except:
        pass

    # Get company's industry
    try:
        industry = driver.find_element(By.XPATH, '//*[@id="Col1-0-Profile-Proxy"]/section/div[1]/div/div/p[2]/span[4]')
        driver.implicitly_wait(wait)
        eua.loc[index, 'sub_industry'] = industry.text
    except:
        pass

    # Update index
    index +=1    

### Checking if all companies in IBRA index have sector and industry

In [None]:
ibra[ibra['sector'].isna()]

In [None]:
ibra['sector'].unique()

In [None]:
ibra[ibra['sector'] == '']

In [None]:
ibra.loc[[149,151,200]]

In [None]:
ibra.loc[149,('sector','sub_industry')] = ['Healthcare', 'Drug Manufacturers—Specialty & Generic']
ibra.loc[151,('sector','sub_industry')] = ['Healthcare', 'Drug Manufacturers—Specialty & Generic']
ibra.loc[200,('sector','sub_industry')] = ['Utilities', 'Utilities—Independent Power Producers']

In [None]:
ibra.to_csv(f'{path}IBRA_sector.csv',
                    encoding='UTF-8',
                    sep=';',
                    decimal='.',
                    index=False)

In [None]:
ibra = ibra.rename({'sub_industry':'industry'}, axis=1)

In [None]:
ibra

In [None]:
cod_yfinance = [f'{i}.SA' for i in ibra['ticker']]

In [None]:
ibra['cod_yfinance'] = cod_yfinance

In [None]:
ibra['country'] = 'Brazil'

In [None]:
ibra

### Checking if all companies in eua DataFrame have sector and industry

In [None]:
eua.isnull().sum()

In [None]:
eua['sector'].unique()

In [None]:
# Save Dataframe
eua.to_csv(f'{path}eua_all_companies.csv',
                    encoding='UTF-8',
                    sep=';',
                    decimal='.',
                    index=False)

In [None]:
eua = eua.rename({'sub_industry':'industry'}, axis = 1)

In [None]:
eua['cod_yfinance'] = eua['ticker']

In [None]:
eua['country'] = 'USA'

In [None]:
eua

# Updating IBRA dataset - Obsoleto - criar novo dataframe com series históricas retiradas do yfinace para todas as empresas

### (dataset that contains brazilian companies's historical data)

# Create Datasets

### Dataset that contains brazilian and american companies's characteristics
### Dataset that contains brazilian and american companies's historical data
### Dataset that contains brazilian and american indexes, gold, dolar (R$), bitcoin and ethereum  characteristics
### Dataset that contains brazilian and american indexes, gold, dolar (R$), bitcoin and ethereum  historical data

In [None]:
# Historical dataset is to big to upload to GitHub
stock_path = r'C:\Users\Pedro\OneDrive\Desktop\Ironhack\05. Dados\stock_project_datasets/'

## Dataset with all companies's characteristics

In [None]:
ibra.head()

In [None]:
eua.head()

In [None]:
all_companies = pd.concat([ibra, eua], axis=0, ignore_index=True)

In [None]:
all_companies.to_csv(f'{stock_path}all_companies.csv',
                    encoding='UTF-8',
                    sep=';',
                    decimal='.',
                    index=False)

In [None]:
all_companies = pd.read_csv(f'{stock_path}all_companies_rev4.csv',
                    encoding='UTF-8',
                    sep=';',
                    decimal='.')

In [None]:
all_companies

## Dataset with indexes, gold, dolar (R$), bitcoin and ethereum  characteristics

In [None]:
index_ticker = [
    'IBOV'
    ,'IBRX50'
    ,'NASDAQ'
    ,'DOW JONES'
    ,'SP500'
    ,'GOLD'
    ,'BTC'
    ,'ETH'
    ,'USD-BRL'
]

index_name = [
    'Ibovespa'
    ,'IBrX50'
    ,'Nasdaq 100'
    ,'Dow Jones Industrial Average'
    ,'S&P 500'
    ,'Gold (US$)'
    ,'Bitcoin (US$)'
    ,'Ethereum (US$)'
    ,'Dolar (R$)'
]

index_sector = [
    'Index'
    ,'Index'
    ,'Index'
    ,'Index'
    ,'Index'
    ,'Gold'
    ,'Crypto'
    ,'Crypto'
    ,'Currency'
]

index_industry = [
    'Index'
    ,'Index'
    ,'Index'
    ,'Index'
    ,'Index'
    ,'Gold'
    ,'Crypto'
    ,'Crypto'
    ,'Currency'
]

index_cod = [
    '^BVSP'
    ,'^IBX50'
    ,'NQ=F'
    ,'^DJI'
    ,'^GSPC'
    ,'GC=F'
    ,'BTC-USD'
    ,'ETH-USD'
    ,'BRL=X'
]

index_country = [
    'Brazil'
    ,'Brazil'
    ,'USA'
    ,'USA'
    ,'USA'
    ,'USA'
    ,'USA'
    ,'USA'
    ,'Brazil'
]

index_type = [
    'Index'
    ,'Index'
    ,'Index'
    ,'Index'
    ,'Index'
    ,'Gold'
    ,'Crypto'
    ,'Crypto'
    ,'Currency'
]

In [None]:
df_index = pd.DataFrame()
df_index['ticker'] = index_ticker
df_index['name'] = index_name
df_index['sector'] = index_sector
df_index['industry'] = index_industry
df_index['cod_yfinance'] = index_cod
df_index['country'] = index_country
df_index['type'] = index_type

In [None]:
df_index

In [None]:
df_index.to_csv(f'{stock_path}all_indexes.csv',
                    encoding='UTF-8',
                    sep=';',
                    decimal='.',
                    index=False)

## Unificating characteristic datasets

In [None]:
df_index

In [None]:
all_companies

In [None]:
all_companies_and_index = pd.concat([all_companies, df_index], axis=0, ignore_index=True)

In [None]:
all_companies_and_index

In [None]:
all_companies_and_index.to_csv(f'{stock_path}all_companies_and_indexes.csv',
                    encoding='UTF-8',
                    sep=';',
                    decimal='.',
                    index=False)

## Dataset with all companies and indexes historical data

In [None]:
# Create empty DataFrame
historical_data = pd.DataFrame()

In [None]:
# Creating DataFrame with historical data of all companies
for ticker in all_companies_and_index['cod_yfinance']:
    aux_df = pd.DataFrame()
    aux_df = yf.Ticker(ticker).history(period='max')
    aux_df['cod_yfinance'] = ticker

    historical_data = pd.concat([historical_data, aux_df], axis=0)

In [None]:
historical_data

In [None]:
# Check if got all companies historical data
len(historical_data['cod_yfinance'].unique())

In [None]:
historical_data

In [None]:
historical_data.info()

In [None]:
# Drop 2022-12-12
historical_data = historical_data.drop(historical_data[historical_data['Date'] == datetime.date(2022,12,12)].index, axis=0)

In [None]:
historical_data[historical_data['Date'] == datetime.date(2022,12,9)]

In [None]:
# Save DataFrame
historical_data.to_csv(f'{stock_path}historical_data_complete_rev5.csv',
                    encoding='UTF-8',
                    sep=';',
                    decimal='.',
                    index=False)

In [None]:
# Load DataFrame
historical_data = pd.read_csv(f'{stock_path}historical_data_complete5.csv',
                    encoding='UTF-8',
                    sep=';',
                    decimal='.')

In [None]:
#Transform index to column Date
historical_data.reset_index(inplace=True)

In [None]:
# Transform date to datetime
historical_data['Date'] = pd.to_datetime(historical_data['Date'], utc=True) 

In [None]:
# Transform datetime to YYYY-MM-DD
historical_data['Date'] = historical_data['Date'].dt.date

In [None]:
len(historical_data['cod_yfinance'].unique())

### Rounding values

In [None]:
historical_data

In [None]:
historical_data['Open'] = historical_data['Open'].round(3)
historical_data['High'] = historical_data['High'].round(3)
historical_data['Low'] = historical_data['Low'].round(3)
historical_data['Close'] = historical_data['Close'].round(3)
historical_data['Dividends'] = historical_data['Dividends'].round(3)

## Dataset with indexes, gold, dolar (R$), bitcoin and ethereum historical data - Obsolete

In [None]:
# Create empty DataFrame
indexes_historical_data = pd.DataFrame()

In [None]:
# Creating DataFrame with historical data of all companies
for ticker in df_index['cod_yfinance']:
    aux_df = pd.DataFrame()
    aux_df = yf.Ticker(ticker).history(start='1927-12-30', end='2022-12-10')
    aux_df['cod_yfinance'] = ticker

    indexes_historical_data = pd.concat([indexes_historical_data, aux_df], axis=0)


In [None]:
indexes_historical_data

In [None]:
indexes_historical_data.to_csv(f'{stock_path}indexes_historical_data.csv',
                    encoding='UTF-8',
                    sep=';',
                    decimal='.',
                    index=False)

# Working on companies sector and industry

In [None]:
all_companies = pd.read_csv(f'{stock_path}all_companies_and_indexes.csv',
                    encoding='UTF-8',
                    sep=';',
                    decimal='.')

In [None]:
all_companies['sector'].value_counts()

In [None]:
all_companies['industry'].value_counts()

In [None]:
all_companies['industry'] = all_companies['industry'].str.replace("—"," - ")

In [None]:
all_companies[all_companies['industry'].str.contains('-')]

In [None]:
agf = ['QUAL3'
,'VVBR3'
,'AESB3'
,'WIZS3'
,'BRAP3'
,'BRSR6'
,'BRKM5'
,'BRAP4'
,'BRKM3'
,'BRSR3'
,'CGAS5'
,'TRPL4'
,'VIVT3'
,'TAEE11'
,'TAEE4'
,'TAEE3'
,'CGAS3'
,'ITSA4'
,'CSMG3'
,'ENAT3'
,'ITSA3'
,'SANB11'
,'SANB3'
,'SANB4'
,'GRND3'
,'BRSR5'
,'TRPL3'
,'SAPR4'
,'SAPR3'
,'BBSE3'
,'CMIG4'
,'ALUP11'
,'CLSC3'
,'CMIG4'
,'ELET3'
,'BBAS3'
,'BBDC4'
,'ITUB4'
,'AMBP3'
,'CSAN3'
,'CSAN4'
,'OPCT3'
,'SBSP3'
,'SAPR11'
,'VERZ34'
,'OIBR4'
,'TIMS3'
,'VIVT4'
,'TELB4'
,'TELB3'
,'ATTB34'
]


In [None]:
all_companies[all_companies['ticker'].isin(agf)]

In [None]:
all_companies.loc[all_companies['name'].isin(companies)]

In [None]:
all_companies.loc[(all_companies['name'].isin(companies)) | all_companies['ticker'].isin(agf)].shape

In [None]:
# Perennial Industry
perennial=[
'Utilities - Diversified'
,'Utilities - Independent Power Producers'
,'Utilities - Regulated Electric'
,'Utilities - Regulated Gas'
,'Utilities - Regulated Water'
,'Utilities - Renewable'
,'Banks - Diversified'
,'Banks - Regional'
,'Insurance - Diversified'
,'Insurance - Life'
,'Insurance - Property & Casualty'
,'Insurance - Reinsurance'
,'Insurance - Specialty'
,'Insurance Brokers'
,'Healthcare Plans'
,'Telecom Services'
,'Waste Management'
,'Oil & Gas E&P'
,'Oil & Gas Equipment & Services'
,'Oil & Gas Integrated'
,'Oil & Gas Midstream'
,'Oil & Gas Refining & Marketing']

In [None]:
# Perennial subsector
dict_subsector = {
'Utilities - Diversified' : 'Utilities - Electricity'
,'Utilities - Independent Power Producers' : 'Utilities - Electricity'
,'Utilities - Regulated Electric' : 'Utilities - Electricity'
,'Utilities - Regulated Gas' :	'Utilities - Gas'
,'Utilities - Regulated Water' : 'Sanitation'
,'Utilities - Renewable' : 'Utilities - Electricity'
,'Banks - Diversified' : 'Banks'
,'Banks - Regional' : 'Banks'
,'Insurance - Diversified': 'Insurance'
,'Insurance - Life' : 'Insurance'
,'Insurance - Property & Casualty' : 'Insurance'
,'Insurance - Reinsurance' : 'Insurance'
,'Insurance - Specialty' : 'Insurance'
,'Insurance Brokers' : 'Insurance'
,'Healthcare Plans' : 'Health Insurance'
,'Telecom Services' : 'Telecom'
,'Waste Management' : 'Sanitation'
,'Oil & Gas E&P' : 'Oil & Gas'
,'Oil & Gas Equipment & Services' : 'Oil & Gas'
,'Oil & Gas Integrated' : 'Oil & Gas'
,'Oil & Gas Midstream' : 'Oil & Gas'
,'Oil & Gas Refining & Marketing' : 'Oil & Gas'
}

In [None]:
all_companies

In [None]:
mask_perennial = all_companies['industry'].isin(perennial)

In [None]:
# Indetify if the company is in a perennial sector
all_companies.loc[mask_perennial, 'perennial'] = 'Yes'
all_companies.loc[~mask_perennial, 'perennial'] = 'No'

In [None]:
# Create a subsector for perennial companies
all_companies.loc[mask_perennial, 'perennial_subsector'] =  all_companies.loc[mask_perennial, 'industry'].map(dict_subsector)

In [None]:
# If the company is not perennial, subsector = industry
all_companies.loc[~mask_perennial, 'perennial_subsector'] = all_companies.loc[~mask_perennial, 'industry']

In [None]:
all_companies.isnull().sum()

In [None]:
# Save modified DataFrame
all_companies_rev1.to_csv(f'{stock_path}all_companies_and_indexes_rev2.csv',
                    encoding='UTF-8',
                    sep=';',
                    decimal='.',
                    index=False)

In [None]:
all_companies_rev2 = pd.read_csv(f'{stock_path}all_companies_and_indexes_rev1.csv',
                    encoding='UTF-8',
                    sep=';',
                    decimal='.')

In [None]:
all_companies_rev2

In [None]:
all_companies_rev1 = all_companies_rev1[['cod_yfinance', 'ticker', 'name', 'sector', 'industry', 'perennial_subsector', 'perennial', 'country', 'type']]

In [None]:
all_companies_rev1

## Get companies in AGF ranking using Selenium

In [None]:
#Runs the webdriver
driver = webdriver.Chrome(options=chromeOptions)

In [None]:
driver.implicitly_wait(wait)

In [None]:
company_name = driver.find_elements(By.CSS_SELECTOR, '.text-secondary')

In [None]:
companies = [i.text for i in company_name]

In [None]:
companies

In [None]:
# Get AGF companies ticker - not working
br_ticker = driver.find_elements(By.XPATH, '//*[contains(concat( " ", @class, " " ), concat( " ", "text-muted", " " ))]')

In [None]:
all_companies.to_csv(f'{stock_path}all_companies_and_indexes_rev2.csv',
                    encoding='UTF-8',
                    sep=';',
                    decimal='.',
                    index=False)

# Create SQL Database

In [None]:
# Import sql_password
load_dotenv(r'C:\Users\Pedro\OneDrive\Desktop\Ironhack\04. GitHub\stocks_project/password.env')
sql_password = os.getenv('sql_password')

In [None]:
# Set SQL configurations
user = "root"
password = sql_password
url_banco = "localhost"
nome_db = "stocks_project"
conn_str = f"mysql+pymysql://{user}:{password}@{url_banco}/{nome_db}"

In [None]:
# Create engine object
engine = db.create_engine(conn_str)
print(engine)

In [None]:
all_companies = pd.read_csv(f'{stock_path}all_companies_and_indexes_rev2.csv',
                    encoding='UTF-8',
                    sep=';',
                    decimal='.')

In [None]:
historical_data_aux = pd.read_csv(f'{stock_path}historical_data_complete_rev5.csv',
                    encoding='UTF-8',
                    sep=';',
                    decimal='.')

In [None]:
historical_data

In [None]:
all_companies

In [None]:
engine.execute("USE stocks_project")

In [None]:
# Create companies Dataset in SQL
all_companies.to_sql(name='companies', con=engine, if_exists='replace', index=False)

In [None]:
# Create historical data dataset in SQL
historical_data.to_sql(name='historical_data', con=engine, if_exists='replace', index=False) ##if_exists=append


In [None]:
# Import sql_password
load_dotenv(r'C:\Users\Pedro\OneDrive\Desktop\Ironhack\04. GitHub\stocks_project/password.env')
sql_password = os.getenv('sql_password')

In [None]:
# Set SQL configurations
user = "root"
password = sql_password
url_banco = "localhost"
nome_db = "stocks_project"
conn_str = f"mysql+pymysql://{user}:{password}@{url_banco}/{nome_db}"

In [None]:
# Create engine object
engine = db.create_engine(conn_str)
print(engine)

# Automate historical data dataset update process

## Testing yfinance historical data

In [None]:
def create_engine():
    """Create engine to connect to MySQL server

    Returns: 
        engine (sqlalchemy.engine): engine that connects to the stocks_project dataset on MySQL Server 
    """
    
    # Import sql_password
    load_dotenv(r'C:\Users\Pedro\OneDrive\Desktop\Ironhack\04. GitHub\stocks_project/password.env')
    sql_password = os.getenv('sql_password')
    
    # Set SQL configurations
    user = "root"
    password = sql_password
    url_banco = "localhost"
    nome_db = "stocks_project"
    conn_str = f"mysql+pymysql://{user}:{password}@{url_banco}/{nome_db}"  

    # Create engine object
    engine = db.create_engine(conn_str)
    
    return engine

In [None]:
def add_days_to_date(date, days):
    """Add days to a date and return the date.
    
    Args: 
        date (string): Date string in YYYY-MM-DD format. 
        days (int): Number of days to add to date
    
    Returns: 
        date (date): Date in YYYY-MM-DD with X days added. 
    """
    
    added_date = pd.to_datetime(date) + timedelta(days=days)
    added_date = added_date.strftime("%Y-%m-%d")

    return added_date

In [None]:
def create_update_dataframe(start, end, ticker_list):
    """Create a DataFrame with last days historical data.
    
    Args: 
        start (string): Date string in YYYY-MM-DD format - One day after the last update 
        end (string): Date string in YYYY-MM-DD format - Today
        ticker_list (iterable): iterable containing yfinance code for the companies and indexes
    
    Returns: 
        df (dataframe): DataFrame with last days historical data. 
    """
    
    df = pd.DataFrame()

    for ticker in ticker_list:
        aux_df = pd.DataFrame()
        aux_df = yf.Ticker(ticker).history(start=start, end=end)
        aux_df['cod_yfinance'] = ticker

        df = pd.concat([df, aux_df], axis=0)

    return df

    

In [None]:
def format_update_dataframe(dataframe):
    """Transform index to da columns with dates and round Open, High, Low, Close and Dividends columns to 3 decimal places
    
    Args: 
        dataframe (DataFrame): DataFrame containing last days historical data. 
    
    Returns: 
        df (DataFrame): Formated Dataframe. 
    """

    dataframe.reset_index(inplace=True)
    
    # Transform date to datetime
    dataframe['Date'] = pd.to_datetime(dataframe['Date'], utc=True) 

    # Transform datetime to YYYY-MM-DD
    dataframe['Date'] = dataframe['Date'].dt.date

    # Round Columns
    dataframe['Open'] = dataframe['Open'].round(3)
    dataframe['High'] = dataframe['High'].round(3)
    dataframe['Low'] = dataframe['Low'].round(3)
    dataframe['Close'] = dataframe['Close'].round(3)
    dataframe['Dividends'] = dataframe['Dividends'].round(3)

    return dataframe

In [None]:
def append_update_dataframe(dataframe):
    """Append update DataFrame to SQL dataset

    Args: 
        dataframe (DataFrame): DataFrame, already formated, containing last days historical data. 
    
    Returns**: 
        Append update DataFrame to SQL dataset 
    """

    dataframe.to_sql(name='test_dataset', con=engine, if_exists='append', index=False)

In [None]:
def get_start_date(engine):
    """Get the start date to be used in 'create_update_dataframe' function

    Args: 
        engine (sqlalchemy.engine): engine that connects to the stocks_project dataset on MySQL Server 
    
    Returns: 
        start (str): date to be used in 'create_update_dataframe' function
    """

    last_update = pd.read_sql(sql = "SELECT MAX(Date) FROM test_dataset", con=engine)
    start = last_update.iloc[0,0]
    start= add_days_to_date(start, 1)

    return start

In [None]:
def get_end_date():
    """Get today's date to be used as end date in 'create_update_dataframe' function 

    Returns: 
        today (str): Date string in YYYY-MM-DD format - Today
    """
    today = datetime.datetime.now()
    today = today.strftime("%Y-%m-%d")

    return today

In [None]:
def get_ticker_list(engine):
    """Get the ticker list to be used in 'create_update_dataframe' function

    Args: 
        engine (sqlalchemy.engine): engine that connects to the stocks_project dataset on MySQL Server 
    
    Returns: 
        ticker_list (iterable): iterable containing yfinance code for the companies and indexes
    """

    ticker_list = pd.read_sql(sql='SELECT cod_yfinance FROM companies', con=engine)['cod_yfinance']

    return ticker_list

In [None]:
engine = create_engine()
engine

In [None]:
start = get_start_date(engine)
start

In [None]:
end = get_end_date()
end

In [None]:
ticker_list = get_ticker_list(engine)
ticker_list

In [None]:
df = create_update_dataframe(start, end, ticker_list)
df

In [None]:
df = format_update_dataframe(df)
df

In [None]:
append_update_dataframe(df)

In [None]:
# Import sql_password
load_dotenv(r'C:\Users\Pedro\OneDrive\Desktop\Ironhack\04. GitHub\stocks_project/password.env')
sql_password = os.getenv('sql_password')

In [None]:
# Set SQL configurations
user = "root"
password = sql_password
url_banco = "localhost"
nome_db = "stocks_project"
conn_str = f"mysql+pymysql://{user}:{password}@{url_banco}/{nome_db}"

In [None]:
# Create engine object
engine = db.create_engine(conn_str)
print(engine)

In [None]:
type(engine)

In [None]:
last_update = pd.read_sql(sql = "SELECT MAX(Date) FROM historical_data", con=engine)

In [None]:
start = last_update.iloc[0,0]

In [None]:
start

In [None]:
start = add_days_to_date(start, 1)

In [None]:
start

In [None]:
ticker_list = pd.read_sql(sql='SELECT cod_yfinance FROM companies', con=engine)['cod_yfinance']

In [None]:
ticker_list.iloc[-10:-1]

In [None]:
ticker_list.iloc[-10:]

In [None]:
today = datetime.datetime.now()
today = today.strftime("%Y-%m-%d")

In [None]:
today

In [None]:
# Testing yfinance historical data
test_df = pd.DataFrame()
for ticker in ticker_list.iloc[:5]:
    aux_df = pd.DataFrame()
    aux_df = yf.Ticker(ticker).history(start='2022-12-08', end='2022-12-12')
    aux_df['cod_yfinance'] = ticker

    test_df = pd.concat([test_df, aux_df], axis=0)

In [None]:
test_df

In [None]:
test_df.reset_index(inplace=True)

In [None]:
test_df

In [None]:
test_df.info()

In [None]:
# Transform date to datetime
test_df['Date'] = pd.to_datetime(test_df['Date'], utc=True) 

In [None]:
# Transform datetime to YYYY-MM-DD
test_df['Date'] = test_df['Date'].dt.date

In [None]:
test_df

In [None]:
test_df['Open'] = test_df['Open'].round(3)
test_df['High'] = test_df['High'].round(3)
test_df['Low'] = test_df['Low'].round(3)
test_df['Close'] = test_df['Close'].round(3)
test_df['Dividends'] = test_df['Dividends'].round(3)

In [None]:
test_df

In [None]:
test_df.to_sql(name='test_dataset', con=engine, if_exists='replace', index=False)

In [None]:
all_companies.to_sql(name='companies', con=engine, if_exists='replace', index=False)

In [None]:
# Testing yfinance historical data
test_df = pd.DataFrame()

for ticker in ticker_list.loc[0:5]:
    aux_df = pd.DataFrame()
    aux_df = yf.Ticker(ticker).history(start='2022-12-09', end='2022-12-10')
    aux_df['cod_yfinance'] = ticker

    test_df = pd.concat([test_df, aux_df], axis=0)

In [None]:
test_df

In [None]:
test_df.reset_index(inplace=True)

In [None]:
test_df

In [None]:
test_df.info()

In [None]:
# Transform date to datetime
test_df['Date'] = pd.to_datetime(test_df['Date'], utc=True) 

In [None]:
# Transform datetime to YYYY-MM-DD
test_df['Date'] = test_df['Date'].dt.date

In [None]:
test_df

In [None]:
test_df['Open'] = test_df['Open'].round(3)
test_df['High'] = test_df['High'].round(3)
test_df['Low'] = test_df['Low'].round(3)
test_df['Close'] = test_df['Close'].round(3)
test_df['Dividends'] = test_df['Dividends'].round(3)

In [None]:
test_df

In [None]:
test_df.to_sql(name='test_dataset', con=engine, if_exists='append', index=False)

# Analisys

In [None]:
all_companies = pd.read_csv(f'{stock_path}all_companies_and_indexes_rev2.csv',
                    encoding='UTF-8',
                    sep=';',
                    decimal='.')

In [None]:
historical_data = pd.read_csv(f'{stock_path}historical_data_complete_rev5.csv',
                    encoding='UTF-8',
                    sep=';',
                    decimal='.')

In [None]:
# Masks
country_br = all_companies['country'] == 'Brazil'
country_usa = all_companies['country'] == 'USA'

type_company = all_companies['type'] == 'Company'
type_index = all_companies['type'] == 'Index'
type_gold = all_companies['type'] == 'Gold'
type_crypto = all_companies['type'] == 'Crypto'
type_currency = all_companies['type'] == 'Currency'

perennial = all_companies['perennial'] == 'Yes'

subsector_uti_elec = all_companies['perennial_subsector'] == 'Utilities - Electricity'
subsector_uti_gas = all_companies['perennial_subsector'] == 'Utilities - Gas'
subsector_sanit = all_companies['perennial_subsector'] == 'Sanitation'
subsector_banks = all_companies['perennial_subsector'] == 'Banks'
subsector_insurance = all_companies['perennial_subsector'] == 'Insurance'
subsector_healh_ins = all_companies['perennial_subsector'] == 'Health Insurance'
subsector_telecom = all_companies['perennial_subsector'] == 'Telecom'
subsector_oil = all_companies['perennial_subsector'] == 'Oil & Gas'


In [None]:
historical_data.loc[historical_data['Date'] == '2012-02-22'].head(60)

In [None]:
474210

In [None]:
467086
470795
4794649
482460
1354271

In [None]:
historical_data_aux = historical_data_aux.drop([467086,470795,4794649,482460,1354271])

In [None]:
historical_data_aux.loc[(historical_data_aux['Date'] == '2021-05-02') & (historical_data_aux['cod_yfinance'].isin(all_companies.loc[country_br, 'cod_yfinance']))]

In [None]:
historical_data.loc[(historical_data['Date'] == '2012-01-02') & (historical_data['cod_yfinance'].isin(all_companies.loc[country_br, 'cod_yfinance']))]

In [None]:
historical_data.shape

In [None]:
historical_data.dropna(axis = 0, subset=['Close']).shape

In [None]:
historical_data_aux_1 = historical_data.dropna(axis = 0, subset=['Close'])

In [None]:
historical_data_aux_1.shape

In [None]:
historical_data.loc[(historical_data["Date"] == '2018-07-30') & historical_data['cod_yfinance'].isin(all_companies.loc[type_company & country_br & subsector_banks, 'cod_yfinance']), 'Close'].mean()

In [None]:
all_companies.loc[type_company & country_br & subsector_banks, 'cod_yfinance']

In [None]:
all_companies[subsector_oil]

In [None]:
all_companies.loc[perennial & country_br, 'perennial_subsector'].value_counts()

In [None]:
all_companies['type'].unique()

In [None]:
df_company_br = all_companies[country_br & type_company]
df_company_usa = all_companies[country_usa & type_company]

In [None]:
df_company_usa.shape

In [None]:
all_companies

In [None]:
historical_data

In [None]:
historical_data.loc[historical_data['cod_yfinance'].isin(['^GSPC'])]

In [None]:
historical_data.dtypes

In [None]:
df_company_br

In [None]:
df_br_pivot = pd.pivot_table(historical_data[historical_data['cod_yfinance'].isin(df_company_br["cod_yfinance"].unique())], index='Date', columns='cod_yfinance', values='Close')

In [None]:
df_br_pivot.columns.tolist()

In [None]:
df_br_pivot.loc['2018-01-03',df_br_pivot.loc['2018-01-03'].isnull()]

In [None]:
all_companies.tail(10)

In [None]:
all_companies.loc[[729], 'name'] = 'Dow Jones'

In [None]:
all_companies.tail(10)

In [None]:
all_companies = all_companies.drop('Ticker', axis=1)

# Read and Save

In [96]:
# Save modified DataFrame
all_companies.to_csv(f'{stock_path}all_companies_and_indexes_rev3.csv',
                    encoding='UTF-8',
                    sep=';',
                    decimal='.',
                    index=False)

In [3]:
all_companies = pd.read_csv(f'{stock_path}all_companies_and_indexes_rev2.csv',
                    encoding='UTF-8',
                    sep=';',
                    decimal='.')

In [4]:
historical_data = pd.read_csv(f'{stock_path}historical_data_complete_rev8.csv',
                    encoding='UTF-8',
                    sep=';',
                    decimal='.')

In [None]:
# Save modified DataFrame
historical_data_aux_1.to_csv(f'{stock_path}historical_data_complete_rev6.csv',
                    encoding='UTF-8',
                    sep=';',
                    decimal='.',
                    index=False)

In [None]:
# Import sql_password
load_dotenv(r'C:\Users\Pedro\OneDrive\Desktop\Ironhack\04. GitHub\stocks_project/password.env')
sql_password = os.getenv('sql_password')

In [None]:
# Set SQL configurations
user = "root"
password = sql_password
url_banco = "localhost"
nome_db = "stocks_project"
conn_str = f"mysql+pymysql://{user}:{password}@{url_banco}/{nome_db}"

In [None]:
# Create engine object
engine = db.create_engine(conn_str)
print(engine)

In [None]:
historical_data_aux_1.to_sql(name='historical_data', con=engine, if_exists='replace', index=False)

In [None]:
# Masks
country_br = all_companies['country'] == 'Brazil'
country_usa = all_companies['country'] == 'USA'

type_company = all_companies['type'] == 'Company'
type_index = all_companies['type'] == 'Index'
type_gold = all_companies['type'] == 'Gold'
type_crypto = all_companies['type'] == 'Crypto'
type_currency = all_companies['type'] == 'Currency'

perennial = all_companies['perennial'] == 'Yes'

subsector_uti_elec = all_companies['perennial_subsector'] == 'Utilities - Electricity'
subsector_uti_gas = all_companies['perennial_subsector'] == 'Utilities - Gas'
subsector_sanit = all_companies['perennial_subsector'] == 'Sanitation'
subsector_banks = all_companies['perennial_subsector'] == 'Banks'
subsector_insurance = all_companies['perennial_subsector'] == 'Insurance'
subsector_healh_ins = all_companies['perennial_subsector'] == 'Health Insurance'
subsector_telecom = all_companies['perennial_subsector'] == 'Telecom'
subsector_oil = all_companies['perennial_subsector'] == 'Oil & Gas'


In [None]:
historical_data.loc[(historical_data['Date'] == '2017-06-15') & (historical_data['cod_yfinance'].isin(['^BVSP']))]

In [None]:
create_update_dataframe('2003-07-09','2003-07-10',['^BVSP'])

In [None]:
format_update_dataframe

In [None]:
mask_tasa4 = historical_data['cod_yfinance'] == 'TASA4.SA'
mask_ibov = historical_data['cod_yfinance'] == '^BVSP'

In [None]:
historical_data.loc[mask_tasa4]

In [None]:
historical_data.loc[mask_ibov]

In [None]:
historical_data.shape

In [None]:
historical_data.shape

In [None]:
historical_data = historical_data.drop(historical_data.loc[(historical_data['Close'] > 70) & (historical_data['cod_yfinance'].isin(['LREN3.SA']))].index)

In [None]:
historical_data.loc[(historical_data['Close'] > 300) & (historical_data['cod_yfinance'].isin(all_companies.loc[type_company & country_br, 'cod_yfinance'])) & (historical_data['cod_yfinance'] != 'GFSA3.SA')].head(50)

In [None]:
historical_data.loc[(historical_data['Close'] > 100) & (historical_data['cod_yfinance'].isin(['GGBR4.SA']))]

In [None]:
historical_data.loc[(historical_data['Close'] > 100) & (historical_data['cod_yfinance'].isin(all_companies.loc[type_company & country_br, 'cod_yfinance'])) & (historical_data['cod_yfinance'] != 'GFSA3.SA'), 'cod_yfinance'].unique()

In [None]:
all_companies

In [None]:
all_companies[all_companies['type'] != 'Company']

In [None]:
historical_data.loc[(historical_data['Close'] > 1000) & historical_data['cod_yfinance'].isin(all_companies.loc[type_company, 'cod_yfinance']), 'cod_yfinance'].unique()

In [None]:
dataset_ibra = pd.read_csv(r'C:\Users\Pedro\OneDrive\Desktop\Ironhack\05. Dados\B3\txt/dataset-IBRA.csv',
                        encoding='ISO-8859-1',
                        sep=';',
                        decimal='.')

In [None]:
dataset_ibra.loc[dataset_ibra['cod_negociacao'] == 'BRKM5']

In [None]:
dataset_ibra.loc[(dataset_ibra['data_pregao'] == '2002-09-04') & (dataset_ibra['cod_negociacao'] == 'BRKM5')]

In [None]:
dataset_ibra.loc[(dataset_ibra['preco_ultimo_negocio'] > 1000)]

In [None]:
dataset_ibra.loc[(dataset_ibra['cod_negociacao'] == 'CMIG4')]

In [None]:
dataset_ibra[dataset_ibra['cod_negociacao'] == 'TUPY3'].max()

In [None]:
historical_data.loc[historical_data['cod_yfinance'].isin(['AGRO3.SA']), 'Close'].max()

In [None]:
all_companies[all_companies['cod_yfinance'] == 'VULC3.SA']

In [None]:
historical_data.loc[(historical_data['Close'] > 100000) & historical_data['cod_yfinance'].isin(['VULC3.SA'])]

In [None]:
historical_data.loc[historical_data['cod_yfinance'].isin(['VULC3.SA'])]

In [None]:
historical_data_modified = historical_data.copy()

In [None]:
pd.to_datetime(historical_data['Date'])

In [None]:
historical_data_modified['Date'] = pd.to_datetime(historical_data_modified['Date'])

In [None]:
date_before = pd.to_datetime('2022-01-01')
date_before

In [None]:
historical_data_modified

In [None]:
mask_date_prev_2022 = historical_data_modified['Date'] < date_before

In [None]:
list_modify = ['BRKM5.SA', 'LREN3.SA', 'VULC3.SA', 'UGPA3.SA', 'RCSL3.SA', 'CMIG4.SA', 'TRPL4.SA']

In [None]:
mask_companies_modify = historical_data_modified['cod_yfinance'].isin(list_modify)

In [None]:
historical_data_modified.loc[mask_date_prev_2022 & mask_companies_modify]

In [None]:
4913040 - 37735

In [None]:
historical_data_modified = historical_data_modified.drop(historical_data_modified.loc[mask_date_prev_2022 & mask_companies_modify].index)

In [None]:
historical_data_modified.reset_index()

In [None]:
historical_data_modified

In [None]:
dataset_ibra['data_pregao'] = pd.to_datetime(dataset_ibra['data_pregao'])

In [None]:
dataset_ibra

In [None]:
mask_date_prev_2022_ibra = dataset_ibra['data_pregao'] < date_before

In [None]:
list_modify_ibra = ['BRKM5', 'LREN3', 'VULC3', 'UGPA3', 'RCSL3', 'CMIG4', 'TRPL4']

In [None]:
mask_companies_modify_ibra = dataset_ibra['cod_negociacao'].isin(list_modify_ibra)

In [None]:
dataset_ibra.loc[(dataset_ibra['data_pregao'] > pd.to_datetime('2021-01-01')) & (dataset_ibra['data_pregao'] < pd.to_datetime('2021-01-03'))]

In [None]:
append_to_complete = dataset_ibra.loc[mask_date_prev_2022_ibra  & mask_companies_modify_ibra].copy()

In [None]:
historical_data

In [None]:
append_to_complete

In [None]:
append_to_complete = append_to_complete[['data_pregao', 'preco_abertura', 'preco_maximo', 'preco_minimo', 'preco_ultimo_negocio', 'Volume', 'Dividends', 'Stock Splits', 'cod_yfinance']]

In [None]:
cod_yfinance = [f'{i}.SA'for i in append_to_complete['cod_negociacao']]

In [None]:
append_to_complete['Dividends'] = 0

In [None]:
append_to_complete['Volume'] = 0

In [None]:
append_to_complete

In [None]:
append_to_complete['cod_yfinance'] = cod_yfinance

In [None]:
append_to_complete = append_to_complete.drop('cod_negociacao', axis=1)

In [None]:
append_to_complete.columns = ['Date', 'Open', 'High', 'Low', 'Close', 'Volume', 'Dividends', 'Stock Splits', 'cod_yfinance']

In [None]:
append_to_complete

In [None]:
historical_data_modified

In [None]:
4875305 + 27834

In [None]:
#COncatenar DataFrame yfinance concatenado com B3 para resolver outliers
historical_data_modified_to_save = pd.concat([historical_data_modified, append_to_complete], axis = 0, ignore_index=True)

In [None]:
historical_data_modified_to_save.shape

In [None]:
historical_data_modified_to_save.shape

In [None]:
historical_data_modified_to_save = historical_data_modified_to_save.drop(historical_data_modified_to_save.loc[(historical_data_modified_to_save['Close'] > 100) & historical_data_modified_to_save['cod_yfinance'].isin(['TUPY3.SA'])].index)

In [None]:
# Save modified DataFrame
historical_data_modified_to_save.to_csv(f'{stock_path}historical_data_complete_rev7.csv',
                    encoding='UTF-8',
                    sep=';',
                    decimal='.',
                    index=False)

In [None]:
# Import sql_password
load_dotenv(r'C:\Users\Pedro\OneDrive\Desktop\Ironhack\04. GitHub\stocks_project/password.env')
sql_password = os.getenv('sql_password')

In [None]:
# Set SQL configurations
user = "root"
password = sql_password
url_banco = "localhost"
nome_db = "stocks_project"
conn_str = f"mysql+pymysql://{user}:{password}@{url_banco}/{nome_db}"

In [None]:
# Create engine object
engine = db.create_engine(conn_str)
print(engine)

In [None]:
historical_data_modified_to_save.to_sql(name='historical_data', con=engine, if_exists='replace', index=False)

In [None]:
historical_data = historical_data_modified_to_save.copy()

In [None]:
historical_data[(historical_data['Date'] > '2012-02-10') & ((historical_data['Date'] < '2012-02-21')) & (historical_data['cod_yfinance'].str.contains(r'\.SA'))]

In [None]:
historical_data = pd.read_csv(f'{stock_path}historical_data_complete_rev6.csv',
                    encoding='UTF-8',
                    sep=';',
                    decimal='.')

In [None]:
historical_data.loc[(historical_data['Close'] > 5000) & historical_data['cod_yfinance'].isin(all_companies.loc[type_company, 'cod_yfinance'])].index

In [None]:
historical_data

In [None]:
historical_data = historical_data.drop(historical_data.loc[(historical_data['Close'] < 0) & historical_data['cod_yfinance'].isin(all_companies.loc[type_company, 'cod_yfinance'])].index)

In [None]:
historical_data.loc[(historical_data['Close'] < 0) & historical_data['cod_yfinance'].isin(all_companies.loc[type_company, 'cod_yfinance'])].index

In [None]:
historical_data.loc[(historical_data['Date'] == '2001-09-12') & (historical_data['cod_yfinance'].isin(all_companies.loc[type_company, 'cod_yfinance']))]

In [None]:
historical_data

In [None]:
# Save modified DataFrame
historical_data.to_csv(f'{stock_path}historical_data_complete_rev8.csv',
                    encoding='UTF-8',
                    sep=';',
                    decimal='.',
                    index=False)

In [93]:
# Import sql_password
load_dotenv(r'C:\Users\Pedro\OneDrive\Desktop\Ironhack\04. GitHub\stocks_project/password.env')
sql_password = os.getenv('sql_password')

In [94]:
# Set SQL configurations
user = "root"
password = sql_password
url_banco = "localhost"
nome_db = "stocks_project"
conn_str = f"mysql+pymysql://{user}:{password}@{url_banco}/{nome_db}"

In [95]:
# Create engine object
engine = db.create_engine(conn_str)
print(engine)

Engine(mysql+pymysql://root:***@localhost/stocks_project)


In [None]:
historical_data.to_sql(name='historical_data', con=engine, if_exists='replace', index=False)

# Analisys on Companies DataSet

In [26]:
top_20_barsi = [
'AESB3'
,'BBDC3'
,'BBSE3'
,'BRAP3'
,'BRKM5'
,'BRSR6'
,'CSMG3'
,'ENAT3'
,'GRND3'
,'ITSA4'
,'ITUB3'
,'QUAL3'
,'PSSA3'
,'SANB11'
,'SAPR11'
,'TAEE11'
,'TRPL4'
,'VIVT3'
,'WIZS3'
,'VBBR3'
]

In [5]:
# Masks
country_br = all_companies['country'] == 'Brazil'
country_usa = all_companies['country'] == 'USA'

type_company = all_companies['type'] == 'Company'
type_index = all_companies['type'] == 'Index'
type_gold = all_companies['type'] == 'Gold'
type_crypto = all_companies['type'] == 'Crypto'
type_currency = all_companies['type'] == 'Currency'

perennial = all_companies['perennial'] == 'Yes'

subsector_uti_elec = all_companies['perennial_subsector'] == 'Utilities - Electricity'
subsector_uti_gas = all_companies['perennial_subsector'] == 'Utilities - Gas'
subsector_sanit = all_companies['perennial_subsector'] == 'Sanitation'
subsector_banks = all_companies['perennial_subsector'] == 'Banks'
subsector_insurance = all_companies['perennial_subsector'] == 'Insurance'
subsector_healh_ins = all_companies['perennial_subsector'] == 'Health Insurance'
subsector_telecom = all_companies['perennial_subsector'] == 'Telecom'
subsector_oil = all_companies['perennial_subsector'] == 'Oil & Gas'


In [45]:
all_companies.loc[country_usa & type_company, 'sector'].value_counts()

Technology                82
Industrials               72
Financial Services        70
Healthcare                67
Consumer Cyclical         64
Consumer Defensive        36
Real Estate               32
Utilities                 30
Communication Services    28
Energy                    23
Basic Materials           21
Name: sector, dtype: int64

In [50]:
all_companies.loc[country_usa & type_company & (all_companies['sector'] == 'Real Estate' )]

Unnamed: 0,cod_yfinance,ticker,name,sector,industry,perennial_subsector,perennial,country,type
460,CBRE,CBRE,CBRE Group,Real Estate,Real Estate Services,Real Estate Services,No,USA,Company
461,CSGP,CSGP,CoStar Group,Real Estate,Real Estate Services,Real Estate Services,No,USA,Company
462,VICI,VICI,Vici Properties,Real Estate,REIT - Diversified,REIT - Diversified,No,USA,Company
463,PEAK,PEAK,Healthpeak,Real Estate,REIT - Healthcare Facilities,REIT - Healthcare Facilities,No,USA,Company
464,VTR,VTR,Ventas,Real Estate,REIT - Healthcare Facilities,REIT - Healthcare Facilities,No,USA,Company
465,WELL,WELL,Welltower,Real Estate,REIT - Healthcare Facilities,REIT - Healthcare Facilities,No,USA,Company
466,HST,HST,Host Hotels & Resorts,Real Estate,REIT - Hotel & Motel,REIT - Hotel & Motel,No,USA,Company
467,EXR,EXR,Extra Space Storage,Real Estate,REIT - Industrial,REIT - Industrial,No,USA,Company
468,PLD,PLD,Prologis,Real Estate,REIT - Industrial,REIT - Industrial,No,USA,Company
469,PSA,PSA,Public Storage,Real Estate,REIT - Industrial,REIT - Industrial,No,USA,Company


In [None]:
all_companies[country_br & type_company & all_companies['ticker'].str.contains('SAP')]

In [None]:
all_companies.loc[country_br & type_company & (all_companies['perennial_subsector']=='Health Insurance')]

In [28]:
all_companies.loc[country_br & type_company & all_companies['ticker'].isin(top_20_barsi), 'perennial_subsector'].value_counts()

Banks                      5
Insurance                  3
Utilities - Electricity    3
Sanitation                 2
Asset Management           1
Chemicals                  1
Footwear & Accessories     1
Health Insurance           1
Oil & Gas                  1
Specialty Retail           1
Telecom                    1
Name: perennial_subsector, dtype: int64

In [None]:
historical_data[(historical_data['Date'] == '2008-01-02') & (historical_data['cod_yfinance'].isin(all_companies.loc[country_br & type_company & all_companies['ticker'].isin(top_20_barsi), 'cod_yfinance']))]

In [43]:
historical_data.loc[(historical_data['Date'] == '2008-01-02') & (historical_data['cod_yfinance'].isin(all_companies.loc[country_br & type_company, 'cod_yfinance']))]

Unnamed: 0,Date,Open,High,Low,Close,Volume,Dividends,Stock Splits,cod_yfinance
23610,2008-01-02,68.088,68.088,61.856,63.299,27080.0,0.00,0.0,TASA4.SA
29374,2008-01-02,17.356,17.356,16.882,17.017,1053300.0,0.00,0.0,EMBR3.SA
133074,2008-01-02,19.581,19.581,17.996,17.996,434400.0,0.00,0.0,FHER3.SA
163907,2008-01-02,41.946,41.955,39.953,40.430,844600.0,0.00,0.0,GOLL4.SA
221914,2008-01-02,5.436,5.654,5.314,5.611,266400.0,0.00,0.0,GUAR3.SA
...,...,...,...,...,...,...,...,...,...
4475087,2008-01-02,2.771,2.771,2.551,2.551,5700.0,0.00,0.0,TAEE11.SA
4778327,2008-01-02,5.439,5.439,5.265,5.265,2444846.0,0.00,0.0,CSMG3.SA
4784091,2008-01-02,8.717,9.036,8.540,8.848,912300.0,0.00,0.0,SBSP3.SA
4798099,2008-01-02,4.006,4.006,4.006,4.006,573224.0,0.24,0.0,ENBR3.SA


In [56]:
dict_subsector_pbi = {
'Utilities - Diversified' : 'Electricity'
,'Utilities - Independent Power Producers' : 'Electricity'
,'Utilities - Regulated Electric' : 'Electricity'
,'Utilities - Regulated Gas' :	'Utilities - Gas'
,'Utilities - Regulated Water' : 'Sanitation'
,'Utilities - Renewable' : 'Electricity'
,'Banks - Diversified' : 'Banks'
,'Banks - Regional' : 'Banks'
,'Insurance - Diversified': 'Insurance'
,'Insurance - Life' : 'Insurance'
,'Insurance - Property & Casualty' : 'Insurance'
,'Insurance - Reinsurance' : 'Insurance'
,'Insurance - Specialty' : 'Insurance'
,'Insurance Brokers' : 'Insurance'
,'Healthcare Plans' : 'Health Insurance'
,'Telecom Services' : 'Telecom'
,'Waste Management' : 'Sanitation'
,'Oil & Gas E&P' : 'Oil & Gas'
,'Oil & Gas Equipment & Services' : 'Oil & Gas'
,'Oil & Gas Integrated' : 'Oil & Gas'
,'Oil & Gas Midstream' : 'Oil & Gas'
,'Oil & Gas Refining & Marketing' : 'Oil & Gas'
,'Real Estate - Development' : 'Construction'
}

In [54]:
all_companies

Unnamed: 0,cod_yfinance,ticker,name,sector,industry,perennial_subsector,perennial,country,type
0,IPG,IPG,The Interpublic Group of Companies,Communication Services,Advertising Agencies,Advertising Agencies,No,USA,Company
1,OMC,OMC,Omnicom Group,Communication Services,Advertising Agencies,Advertising Agencies,No,USA,Company
2,TASA4.SA,TASA4,TAURUS ARMAS,Industrials,Aerospace & Defense,Aerospace & Defense,No,Brazil,Company
3,EMBR3.SA,EMBR3,EMBRAER,Industrials,Aerospace & Defense,Aerospace & Defense,No,Brazil,Company
4,BA,BA,Boeing,Industrials,Aerospace & Defense,Aerospace & Defense,No,USA,Company
...,...,...,...,...,...,...,...,...,...
730,^GSPC,SP500,S&P 500,Index,Index,Index,No,USA,Index
731,GC=F,GOLD,Gold (US$),Gold,Gold,Gold,No,USA,Gold
732,BTC-USD,BTC,Bitcoin (US$),Crypto,Crypto,Crypto,No,USA,Crypto
733,ETH-USD,ETH,Ethereum (US$),Crypto,Crypto,Crypto,No,USA,Crypto


In [58]:
all_companies.loc[country_br & type_company & perennial, 'pbi_sector_analysis'] = all_companies['industry'].map(dict_subsector_pbi)

In [89]:
all_companies.loc[all_companies['pbi_sector_analysis'].isnull(), 'pbi_sector_analysis'] = all_companies['sector']

In [90]:
all_companies.isnull().sum()

cod_yfinance           0
ticker                 0
name                   0
sector                 0
industry               0
perennial_subsector    0
perennial              0
country                0
type                   0
pbi_sector_analysis    0
dtype: int64

In [85]:
all_companies.loc[country_usa & type_company & (all_companies['sector'] == 'Technology')]

Unnamed: 0,cod_yfinance,ticker,name,sector,industry,perennial_subsector,perennial,country,type,pbi_sector_analysis
122,CSCO,CSCO,Cisco,Technology,Communication Equipment,Communication Equipment,No,USA,Company,Technology
123,HPE,HPE,Hewlett Packard Enterprise,Technology,Communication Equipment,Communication Equipment,No,USA,Company,Technology
124,JNPR,JNPR,Juniper Networks,Technology,Communication Equipment,Communication Equipment,No,USA,Company,Technology
125,MSI,MSI,Motorola Solutions,Technology,Communication Equipment,Communication Equipment,No,USA,Company,Technology
126,ZBRA,ZBRA,Zebra Technologies,Technology,Communication Equipment,Communication Equipment,No,USA,Company,Technology
...,...,...,...,...,...,...,...,...,...,...
577,PANW,PANW,Palo Alto Networks,Technology,Software - Infrastructure,Software - Infrastructure,No,USA,Company,Technology
578,SPLK,SPLK,Splunk,Technology,Software - Infrastructure,Software - Infrastructure,No,USA,Company,Technology
579,ZS,ZS,Zscaler,Technology,Software - Infrastructure,Software - Infrastructure,No,USA,Company,Technology
580,ENPH,ENPH,Enphase,Technology,Solar,Solar,No,USA,Company,Technology


In [97]:
all_companies.to_sql(name='companies', con=engine, if_exists='replace', index=False)

735

In [81]:
all_companies_rev1 = all_companies

In [109]:
all_companies.sort_values('name').head(10)

Unnamed: 0,cod_yfinance,ticker,name,sector,industry,perennial_subsector,perennial,country,type,pbi_sector_analysis
138,MMM,MMM,3M,Industrials,Conglomerates,Conglomerates,No,USA,Company,Industrials
377,RRRP3.SA,RRRP3,3R PETROLEUM,Energy,Oil & Gas E&P,Oil & Gas,Yes,Brazil,Company,Oil & Gas
14,TTEN3.SA,TTEN3,3TENTOS,Basic Materials,Agricultural Inputs,Agricultural Inputs,No,Brazil,Company,Basic Materials
600,AOS,AOS,A. O. Smith,Industrials,Specialty Industrial Machinery,Specialty Industrial Machinery,No,USA,Company,Industrials
63,ABCB4.SA,ABCB4,ABC BRASIL,Financial Services,Banks - Regional,Banks,Yes,Brazil,Company,Banks
226,ADM,ADM,ADM,Consumer Defensive,Farm Products,Farm Products,No,USA,Company,Consumer Defensive
633,ADP,ADP,ADP,Industrials,Staffing & Employment Services,Staffing & Employment Services,No,USA,Company,Industrials
597,AERI3.SA,AERI3,AERIS,Industrials,Specialty Industrial Machinery,Specialty Industrial Machinery,No,Brazil,Company,Industrials
713,AESB3.SA,AESB3,AES BRASIL,Utilities,Utilities - Renewable,Utilities - Electricity,Yes,Brazil,Company,Electricity
672,AES,AES,AES Corporation,Utilities,Utilities - Diversified,Utilities - Electricity,Yes,USA,Company,Electricity


In [111]:
historical_data_outlier = pd.read_csv(f'{stock_path}historical_data_complete_rev5.csv',
                    encoding='UTF-8',
                    sep=';',
                    decimal='.')

In [114]:
historical_data_outlier[(historical_data_outlier['Close'] > 100000) & (historical_data_outlier['cod_yfinance'].str.contains('SA'))]

Unnamed: 0,Date,Open,High,Low,Close,Volume,Dividends,Stock Splits,cod_yfinance
851429,2002-09-04,415035.788,415035.788,397652.090,403446.656,584.0,0.0,0.0,BRKM5.SA
851430,2002-09-05,403446.635,413587.125,391133.183,399825.031,984.0,0.0,0.0,BRKM5.SA
851431,2002-09-06,405619.600,417136.300,394030.469,394030.469,288.0,0.0,0.0,BRKM5.SA
851432,2002-09-09,402707.870,407213.145,402707.870,407068.281,64.0,0.0,0.0,BRKM5.SA
851433,2002-09-10,408516.830,415760.036,405619.548,408502.344,144.0,0.0,0.0,BRKM5.SA
...,...,...,...,...,...,...,...,...,...
3939297,2008-06-20,1622227.375,1622227.375,1622227.375,1622227.375,0.0,0.0,0.0,RCSL3.SA
3939298,2008-06-23,1622227.375,1622227.375,1622227.375,1622227.375,0.0,0.0,0.0,RCSL3.SA
3939299,2008-06-24,1622227.375,1622227.375,1622227.375,1622227.375,0.0,0.0,0.0,RCSL3.SA
3939300,2008-06-25,1622227.375,1622227.375,1622227.375,1622227.375,0.0,0.0,0.0,RCSL3.SA


In [115]:
historical_data

Unnamed: 0,Date,Open,High,Low,Close,Volume,Dividends,Stock Splits,cod_yfinance
0,1980-03-17,0.000,0.426,0.420,0.420,43200.0,0.0,0.0,IPG
1,1980-03-18,0.000,0.417,0.417,0.417,327600.0,0.0,0.0,IPG
2,1980-03-19,0.000,0.417,0.417,0.417,115200.0,0.0,0.0,IPG
3,1980-03-20,0.000,0.420,0.420,0.420,28800.0,0.0,0.0,IPG
4,1980-03-21,0.000,0.420,0.420,0.420,554400.0,0.0,0.0,IPG
...,...,...,...,...,...,...,...,...,...
4899920,2022-12-05,5.218,5.264,5.214,5.218,0.0,0.0,0.0,BRL=X
4899921,2022-12-06,5.280,5.281,5.220,5.280,0.0,0.0,0.0,BRL=X
4899922,2022-12-07,5.234,5.270,5.210,5.234,0.0,0.0,0.0,BRL=X
4899923,2022-12-08,5.204,5.248,5.194,5.204,0.0,0.0,0.0,BRL=X
