In [33]:
import os
import shutil
import pandas as pd
from pathlib import Path

# monta os arquivos em um único arquivo
def merge_arquivos(lista_paths, file_name):
    dfs = []
    for file_path in sorted(lista_paths):
        df = pd.read_csv(file_path, sep=';', skiprows=3, encoding='latin1')
        # remove os caracteres em brancos do nome das colunas
        df.rename(columns=lambda x: x.strip(), inplace=True)
        # transforma o campo saldo em número
        df['SALDO'] = df['SALDO'].str.replace(',','.')
        df['SALDO'] = pd.to_numeric(df['SALDO'])
        # junta em um unico dataframe
        dfs.append(df)

    df = pd.concat(dfs, axis=0, ignore_index=True)
    print(df.shape)
    print(df.columns)

    df.to_csv(os.path.join('bases', file_name))
    return True


def prepare_bases_folder():
    folder_path = os.path.join('bases')

    if not os.path.exists(folder_path):
        Path(folder_path).mkdir(parents=True, exist_ok=True)

    return folder_path

In [34]:
download_folder = os.path.join('downloads')
prepare_bases_folder()

'bases'

In [35]:
files = {
    'BANCOS.CSV':[],
    'CONGLOMERADOS.CSV':[],
    'CONSORCIOS.CSV':[],
    'COOPERATIVAS.CSV':[],
    'LIQUIDACAO.CSV':[],
    'SOCIEDADES.CSV':[],
    'COMBINADOS.CSV':[]
}

In [36]:
for file_name in sorted(os.listdir(download_folder)):
    file_path = os.path.join(download_folder, file_name)
        
    if not file_path.lower().endswith('.csv'):
        continue

    print(file_name)
    data_base = file_name[:7]
    ano = file_name[:4]
    mes = file_name[4:6]
    print(ano, mes, file_name, file_name[6:])
    files.get(file_name[6:]).append(file_path)

199501BANCOS.CSV
1995 01 199501BANCOS.CSV BANCOS.CSV
199501CONGLOMERADOS.CSV
1995 01 199501CONGLOMERADOS.CSV CONGLOMERADOS.CSV
199501CONSORCIOS.CSV
1995 01 199501CONSORCIOS.CSV CONSORCIOS.CSV
199501COOPERATIVAS.CSV
1995 01 199501COOPERATIVAS.CSV COOPERATIVAS.CSV
199501LIQUIDACAO.CSV
1995 01 199501LIQUIDACAO.CSV LIQUIDACAO.CSV
199501SOCIEDADES.CSV
1995 01 199501SOCIEDADES.CSV SOCIEDADES.CSV
199502BANCOS.CSV
1995 02 199502BANCOS.CSV BANCOS.CSV
199502CONGLOMERADOS.CSV
1995 02 199502CONGLOMERADOS.CSV CONGLOMERADOS.CSV
199502CONSORCIOS.CSV
1995 02 199502CONSORCIOS.CSV CONSORCIOS.CSV
199502COOPERATIVAS.CSV
1995 02 199502COOPERATIVAS.CSV COOPERATIVAS.CSV
199502LIQUIDACAO.CSV
1995 02 199502LIQUIDACAO.CSV LIQUIDACAO.CSV
199502SOCIEDADES.CSV
1995 02 199502SOCIEDADES.CSV SOCIEDADES.CSV
199503BANCOS.CSV
1995 03 199503BANCOS.CSV BANCOS.CSV
199503CONGLOMERADOS.CSV
1995 03 199503CONGLOMERADOS.CSV CONGLOMERADOS.CSV
199503CONSORCIOS.CSV
1995 03 199503CONSORCIOS.CSV CONSORCIOS.CSV
199503COOPERATIVAS.CSV

1999 11 199911BANCOS.CSV BANCOS.CSV
199911CONGLOMERADOS.CSV
1999 11 199911CONGLOMERADOS.CSV CONGLOMERADOS.CSV
199911CONSORCIOS.CSV
1999 11 199911CONSORCIOS.CSV CONSORCIOS.CSV
199911COOPERATIVAS.CSV
1999 11 199911COOPERATIVAS.CSV COOPERATIVAS.CSV
199911LIQUIDACAO.CSV
1999 11 199911LIQUIDACAO.CSV LIQUIDACAO.CSV
199911SOCIEDADES.CSV
1999 11 199911SOCIEDADES.CSV SOCIEDADES.CSV
199912BANCOS.CSV
1999 12 199912BANCOS.CSV BANCOS.CSV
199912CONGLOMERADOS.CSV
1999 12 199912CONGLOMERADOS.CSV CONGLOMERADOS.CSV
199912CONSORCIOS.CSV
1999 12 199912CONSORCIOS.CSV CONSORCIOS.CSV
199912COOPERATIVAS.CSV
1999 12 199912COOPERATIVAS.CSV COOPERATIVAS.CSV
199912LIQUIDACAO.CSV
1999 12 199912LIQUIDACAO.CSV LIQUIDACAO.CSV
199912SOCIEDADES.CSV
1999 12 199912SOCIEDADES.CSV SOCIEDADES.CSV
200001BANCOS.CSV
2000 01 200001BANCOS.CSV BANCOS.CSV
200001CONGLOMERADOS.CSV
2000 01 200001CONGLOMERADOS.CSV CONGLOMERADOS.CSV
200001CONSORCIOS.CSV
2000 01 200001CONSORCIOS.CSV CONSORCIOS.CSV
200001COOPERATIVAS.CSV
2000 01 200001CO

In [None]:
for index_name in files:
    print(index_name)
    file_path = files.get(index_name)
    merge_arquivos(file_path, index_name)

BANCOS.CSV
['downloads/199501BANCOS.CSV', 'downloads/199502BANCOS.CSV', 'downloads/199503BANCOS.CSV', 'downloads/199504BANCOS.CSV', 'downloads/199505BANCOS.CSV', 'downloads/199506BANCOS.CSV', 'downloads/199507BANCOS.CSV', 'downloads/199508BANCOS.CSV', 'downloads/199509BANCOS.CSV', 'downloads/199510BANCOS.CSV', 'downloads/199511BANCOS.CSV', 'downloads/199512BANCOS.CSV', 'downloads/199601BANCOS.CSV', 'downloads/199602BANCOS.CSV', 'downloads/199603BANCOS.CSV', 'downloads/199604BANCOS.CSV', 'downloads/199605BANCOS.CSV', 'downloads/199606BANCOS.CSV', 'downloads/199607BANCOS.CSV', 'downloads/199608BANCOS.CSV', 'downloads/199609BANCOS.CSV', 'downloads/199610BANCOS.CSV', 'downloads/199611BANCOS.CSV', 'downloads/199612BANCOS.CSV', 'downloads/199701BANCOS.CSV', 'downloads/199702BANCOS.CSV', 'downloads/199703BANCOS.CSV', 'downloads/199704BANCOS.CSV', 'downloads/199705BANCOS.CSV', 'downloads/199706BANCOS.CSV', 'downloads/199707BANCOS.CSV', 'downloads/199708BANCOS.CSV', 'downloads/199709BANCOS.CSV'