# Brazil Stock Quotes
> ## Data Analyze of [Brazilian Stock Quotes](https://www.kaggle.com/gbonesso/b3-stock-quotes)

In [None]:
from sklearn import preprocessing

import pandas as pd 
import numpy as np
import matplotlib.pyplot as plt
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

In [None]:
df = pd.read_csv('/kaggle/input/b3-stock-quotes/COTAHIST_A2009_to_A2020_P.csv', 
                 parse_dates=['DATPRE', 'DATVEN'],
                 date_parser=lambda x: pd.to_datetime(x, errors='coerce'),
                 index_col=0)

### Ploting data

In [None]:
df.head()

In [None]:
df.columns.values

In [None]:
df.dtypes

In [None]:
df.describe()

### Columns
> Explanation about the columns can be found in portuguese [here](http://bvmf.bmfbovespa.com.br/pt-br/download/SeriesHistoricas_Layout.pdf)

| COLUMN | DESCRIPTION | CONTENT |
| :---: | :--- | :--- |
| TIPREG | RECORD TYPE | FIXED 1 |
| DATPRE | TRADE DATE | |
| CODBDI | USED TO CLASSIFY PAPERS IN THE ISSUE OF THE DAILY INFORMATION BULLETIN | |
| CODNEG | PAPER TRADING CODE | |
| TPMERC | MARKET TYPE | MARKET CODE IN WHICH PAPER IS REGISTERED |
| NOMRES | BRIEF NAME OF PAPER ISSUING COMPANY | |
| ESPECI | PAPER SPECIFICATION | |
| PRAZOT | TERM IN MARKET DAYS | |
| MODREF | REFERENCE CURRENCY | CURRENCY USED ON THE TRADE DATE |
| PREABE | OPENING PRICE OF MARKET PAPER | |
| PREMAX | MAX PRICE OF MARKET PAPER | |
| PREMIN | MIN PRICE OF MARKET PAPER | |
| PREMED | MEAN PRICE OF MARKET PAPER | |
| PREULT | LAST PRICE OF MARKET PAPER | |
| PREOFC | BEST PURCHASE PRICE OF MARKET PAPER | |
| PREOFV | BEST SELL PRICE OF MARKET PAPER | |
| TOTNEG | NUMBER OF TRANSACTIONS OF MARKET PAPER | |
| QUATOT | TOTAL TRANSACTIONS OF MARKET PAPER | |
| VOLTOT | TOTAL TRANSACTIONS VOLUME OF MARKET PAPER  | |
| PREEXE | ? | PREÇO DE EXERCÍCIO PARA O MERCADO DE OPÇÕES OU VALOR DO CONTRATO PARA O MERCADO DE TERMO SECUNDÁRIO  |
| INDOPC | ? | INDICADOR DE CORREÇÃO DE PREÇOS DE EXERCÍCIOS OU VALORES DE CONTRATO PARA OS MERCADOS DE OPÇÕES OU TERMO SECUNDÁRIO |
| DATVEN | EXPIRATION DATE | |
| FATCOT | QUOTE FACTOR | ‘1’ = UNIT QUOTATION OR ‘1000’ = QUOTATION PER THOUSAND |
| PTOEXE | EXERCISE PRICE IN POINTS FOR OPTIONS REFERRED TO DOLLAR OR CONTRACT VALUE IN POINTS FOR SECONDARY TERM | FOR REFERENCES IN DOLLARS, EACH POINT IS EQUAL TO THE AMOUNT, IN THE CURRENT CURRENCY, OF ONE HUNDRED OF THE AVERAGE INTERBANK CLOSING DOLLAR OF THE PREVIOUS DAY, THAT IS, 1 POINT = 1/100 US $ |
| CODISI | PAPER CODE IN THE ISIN SYSTEM OR INTERNAL PAPER CODE | |
| DISMES | PAPER DISTRIBUTION NUMBER | |

### Removing useless data
> TIPREG: this column has a value fixed (1)  
> MODREF: this database has only values is real (Brazilian currency - R$)  
> INDOPC: should be from 1 to 9, but this column has only 0 (Missing data)

In [None]:
df.drop(columns=['TIPREG', 'MODREF', 'INDOPC'], inplace=True)

### Removing future trading
> We are analyzing only trade current trade. For clarification, see [here](https://pt.wikipedia.org/wiki/Mercado_a_termo)

In [None]:
df = df[(df.PRAZOT.isna()) | (df.PRAZOT == 0)]

### Removing unknow data
> For some reason, only codes with 5 characters are available on Google

In [None]:
df = df[df.CODNEG.str.len() == 5]

### One hot enconding BDI column

| COLUMN | DESCRIPTION IN PORTUGUESE | DESCRIPTION IN ENGLISH |
| :---: | :--- | :--- |
| BDI_02 | LOTE PADRÃO | STANDARD PLOT |
| BDI_06 | CONCORDATÁRIAS | AGREEMENTS |
| BDI_10 | DIREITOS E RECIBOS | RIGHTS AND RECEIPTS |
| BDI_12 | FUNDOS IMOBILIÁRIOS | REAL ESTATE FUNDS |
| BDI_14 | CERTIFIC. INVESTIMENTO / DEBÊNTURES / TÍTULOS DIVIDA PÚBLICA | CERTIFICATION. INVESTMENT / DEBENTURES / PUBLIC DEBT SECURITIES |
| BDI_18 | OBRIGAÇÕES | OBLIGATIONS |
| BDI_22 | BÔNUS (PRIVADOS) | BONUS (PRIVATE) |
| BDI_26 | APÓLICES / BÔNUS / TÍTULOS PÚBLICOS | POLICIES / BONUSES / PUBLIC SECURITIES |
| BDI_32 | EXERCÍCIO DE OPÇÕES DE COMPRA DE ÍNDICE | EXERCISE OF INDEX PURCHASE OPTIONS |
| BDI_33 | EXERCÍCIO DE OPÇÕES DE VENDA DE ÍNDICE | EXERCISE OF INDEX SALES OPTIONS |
| BDI_38 | EXERCÍCIO DE OPÇÕES DE COMPRA | EXERCISE OF PURCHASE OPTIONS |
| BDI_42 | EXERCÍCIO DE OPÇÕES DE VENDA | EXERCISE OF SELLING OPTIONS |
| BDI_46 | LEILÃO DE TÍTULOS NÃO COTADOS | AUCTION OF UNLOCKED SECURITIES |
| BDI_48 | LEILÃO DE PRIVATIZAÇÃO | PRIVATIZATION AUCTION |
| BDI_50 | LEILÃO | AUCTION |
| BDI_51 | LEILÃO FINOR | FINOR AUCTION |
| BDI_52 | LEILÃO FINAM | FINAM AUCTION |
| BDI_53 | LEILÃO FISET | FISET AUCTION |
| BDI_54 | LEILÃO DE AÇÕES EM MORA | AUCTION OF ACTIONS IN LATE PAYMENT |
| BDI_56 | VENDAS POR ALVARÁ JUDICIAL | SALES BY JUDICIAL PERMIT |
| BDI_58 | OUTROS | OTHERS |
| BDI_60 | PERMUTA POR AÇÕES | EXCHANGE FOR SHARES |
| BDI_61 | META | GOAL |
| BDI_62 | TERMO | TERM |
| BDI_66 | DEBÊNTURES COM DATA DE VENCIMENTO ATÉ 3 ANOS | DEBENTURES WITH MATURITY DATE UP TO 3 YEARS |
| BDI_68 | DEBÊNTURES COM DATA DE VENCIMENTO MAIOR QUE 3 ANOS | DEBENTURES WITH EXPIRATION DATE MORE THAN 3 YEARS |
| BDI_70 | FUTURO COM MOVIMENTAÇÃO CONTÍNUA | FUTURE WITH CONTINUOUS MOVEMENT |
| BDI_71 | FUTURO COM RETENÇÃO DE GANHO | FUTURE WITH GAIN RETENTION |
| BDI_74 | OPÇÕES DE COMPRA DE ÍNDICES | INDEX PURCHASE OPTIONS |
| BDI_75 | OPÇÕES DE VENDA DE ÍNDICES | INDEX SALES OPTIONS |
| BDI_78 | OPÇÕES DE COMPRA | PURCHASE OPTIONS |
| BDI_82 | OPÇÕES DE VENDA | SALES OPTIONS |
| BDI_83 | DEBÊNTURES E NOTAS PROMISSÓRIAS | DEBENTURES AND PROMISORY NOTES |
| BDI_96 | FRACIONÁRIO | FRACTIONAL |
| BDI_99 | TOTAL GERAL | GRAND TOTAL |

In [None]:
df_bdi = pd.get_dummies(df['CODBDI'], prefix='BDI')
df.drop(columns=['CODBDI'], inplace=True)
df = pd.concat([df, df_bdi], axis=1)

### One hot enconding TPMERC column

| COLUMN | DESCRIPTION IN PORTUGUESE | DESCRIPTION IN ENGLISH |
| :---: | :--- | :--- |
| TPMERC_10 | VISTA | VIEW |
| TPMERC_12 | EXERCÍCIO DE OPÇÕES DE COMPRA | EXERCISE OF PURCHASE OPTIONS |
| TPMERC_13 | EXERCÍCIO DE OPÇÕES DE VENDA | EXERCISE OF SELLING OPTIONS |
| TPMERC_17 | LEILÃO | AUCTION |
| TPMERC_20 | FRACIONÁRIO | FRACTIONAL |
| TPMERC_30 | TERMO | TERM |
| TPMERC_50 | FUTURO COM RETENÇÃO DE GANHO | FUTURE WITH GAIN RETENTION |
| TPMERC_60 | FUTURO COM MOVIMENTAÇÃO CONTÍNUA | FUTURE WITH CONTINUOUS MOVEMENT |
| TPMERC_70 | OPÇÕES DE COMPRA | PURCHASE OPTIONS |
| TPMERC_80 | OPÇÕES DE VENDA | SALES OPTIONS |

In [None]:
df_market_type = pd.get_dummies(df['TPMERC'], prefix='TPMERC')
df.drop(columns=['TPMERC'], inplace=True)
df = pd.concat([df, df_market_type], axis=1)

### Scaling valus
> Converting prices accorging to their factor

In [None]:
cols = ['PREABE', 'PREMAX', 'PREMIN', 'PREMED', 'PREULT', 'PREOFC', 'PREOFV']

for col in cols:
    df[col + '_fat'] = df[col] / df['FATCOT']

### Ploting data

In [None]:
df.head()

In [None]:
df.columns.values

In [None]:
df.dtypes

In [None]:
df.describe()

In [None]:
def group_quotes(min_date=None, max_date=None, min_year=None, max_year=None):
    if min_date != None and max_date != None:
        df_base = df[(df['DATPRE'] >= min_date) & (df['DATPRE'] <= max_date)]
    elif max_date != None:
        df_base = df[(df['DATPRE'] <= max_date)]
    elif min_date != None:
        df_base = df[(df['DATPRE'] >= min_date)]
    else:
        df_base = df
    
    df_cod_min_max_dt = df_base.groupby(by=['CODNEG'], as_index=False).agg({'DATPRE': ['min', 'max']})
    df_cod_min_max_dt.columns = ['CODNEG', 'DATPRE_MIN', 'DATPRE_MAX']
    df_cod_min_max_dt['PREMED_MIN'] = pd.merge(df_cod_min_max_dt.rename(columns={'DATPRE_MIN': 'DATPRE'}), df_base, how='inner', on=['CODNEG', 'DATPRE'])['PREMED']
    df_cod_min_max_dt['PREMED_MAX'] = pd.merge(df_cod_min_max_dt.rename(columns={'DATPRE_MAX': 'DATPRE'}), df_base, how='inner', on=['CODNEG', 'DATPRE'])['PREMED']
    
    if min_year != None and max_year != None:
        df_cod_min_max_dt = df_cod_min_max_dt[(df_cod_min_max_dt['DATPRE_MIN'].dt.year == min_year) & (df_cod_min_max_dt['DATPRE_MAX'].dt.year == max_year)]
    elif max_year != None:
        df_cod_min_max_dt = df_cod_min_max_dt[(df_cod_min_max_dt['DATPRE_MAX'].dt.year == max_year)]
    elif min_year != None:
        df_cod_min_max_dt = df_cod_min_max_dt[(df_cod_min_max_dt['DATPRE_MIN'].dt.year == min_year)]
        
    return df_cod_min_max_dt

### Best 30 quotes
> Best 30 quotes comparing their prices since 2015 and 2020

In [None]:
df_cod_min_max_dt = group_quotes(min_date='2015-01-01', max_year=2020)
df_cod_min_max_dt['INC'] = df_cod_min_max_dt['PREMED_MAX'] / df_cod_min_max_dt['PREMED_MIN']
df_cod_min_max_dt['INC'] = preprocessing.MinMaxScaler().fit_transform(np.reshape(df_cod_min_max_dt['INC'].values, (-1, 1)))
df_cod_min_max_dt.sort_values(by='INC', ascending=False, inplace=True)

fig = plt.figure()
ax = fig.add_axes([0,0,2,1])
best_quotes = df_cod_min_max_dt[:30]
ax.bar(best_quotes['CODNEG'], best_quotes['INC'])
ax.set_xticklabels(best_quotes['CODNEG'], rotation=45)
plt.show()

### Plot timeseries

In [None]:
def plot_quote_timeserie(code):
    quote = df[df['CODNEG'] == code][['PREMED', 'DATPRE']]
    quote.set_index('DATPRE', inplace=True)
    fig = plt.figure()
    ax = fig.add_axes([0,0,2,1])
    ax.plot(quote)
    plt.show()

In [None]:
plot_quote_timeserie('BBDC4')