In [1]:
import numpy as np
import pandas as pd

In [2]:
def clean_dataframe(df):
    pattern = 'Fechamento\r\najust p/ prov\r\nEm moeda orig\r\n'
    df.columns = df.columns.str.replace(pattern, '')

    dates = pd.to_datetime(df['Data'], dayfirst=True)

    # Detach dates
    tickers = [col for col in df.columns if col != 'Data']
    df = df[tickers]

    comma_to_dot = lambda x: x.replace('.', '').replace(',', '.')
    df = df.applymap(comma_to_dot)
    df = df.replace('-', np.nan)
    df = df.apply(pd.to_numeric)

    # Reattach dates
    df['Data'] = dates
    
    # Reorder columns so that dates show up first
    cols = ['Data'] + list(df.columns.drop('Data'))
    df = df[cols]
    
    # Drop weekends and holidays
    df = df.dropna(axis=0, how='all', subset=tickers)
    
    return df

In [3]:
path = './../data/economatica.csv'
close = pd.read_csv(path, delimiter=',')

In [4]:
close = clean_dataframe(close)

In [5]:
close

Unnamed: 0,Data,RRRP3,TTEN3,QVUM3B,QVQP3B,APPA3,APPA4,ABCB3,ABCB4,ABCB11,...,WWOW3,ILMD3,ILMD4,YBRA3B,YDUQ3,ESTC4,ESTC11,OPZI3B,ZAMP3,ZIVI4
0,1986-01-02,,,,,,,,,,...,,,,,,,,,,
1,1986-01-03,,,,,,,,,,...,,,0.002917,,,,,,,
2,1986-01-06,,,,,,,,,,...,,,0.002858,,,,,,,
3,1986-01-07,,,,,,,,,,...,,,,,,,,,,
4,1986-01-08,,,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9610,2022-11-03,4788.0,1296.0,,,,,,2151.0,,...,,,,,1607.0,,,,73.0,
9611,2022-11-04,5131.0,1352.0,,,,,,2181.0,,...,,,,,1635.0,,,,742.0,
9612,2022-11-07,4876.0,1274.0,,,,,,2105.0,,...,,,,,1461.0,,,,7.0,
9613,2022-11-08,47.0,1213.0,,,,,,215.0,,...,,,,,1398.0,,,,71.0,
