In [14]:
import pandas as pd
import glob
import os

'''
Loop through all excel files in the excel folder and convert them to csv
The header row conatains:
- 0: Exchange Date
- 1: Close
- 2: %Chg
- 3: Open
- 4: Low
- 5: High
- 6: Volume
- 7: Turnover - USD
Which also is the name of the column
'''

directory = os.fsencode('excel_data')

# column headers
expected_columns = ['Exchange Date', 'Close', '%Chg',
                    'Open', 'Low', 'High', 'Volume', 'Turnover - USD']

for file in os.listdir(directory):
    filename = os.fsdecode(file)
    if filename.endswith(".xlsx"):
        print(f'Processing {filename}')

        df = pd.read_excel(f'excel_data/{filename}', header=None)

        for i, row in df.iterrows():
            if list(row[:8]) == expected_columns:
                header_row = i
                print(f'Header found at row {header_row} in {filename}')
                break
        else:
            print(f'Header not found in {filename}, skipping file')
            continue

        df = pd.read_excel(f'excel_data/{filename}', skiprows=header_row)

        df.iloc[:, :8].to_csv(f'etf_csv/{filename[:-5]}.csv', index=False)
    else:
        continue

Processing GOVT.xlsx
Header found at row 29 in GOVT.xlsx
Processing EFAS.xlsx
Header found at row 32 in EFAS.xlsx
Processing IEV.xlsx
Header found at row 27 in IEV.xlsx
Processing EWG.xlsx
Header found at row 32 in EWG.xlsx
Processing VWOB.xlsx
Header found at row 27 in VWOB.xlsx
Processing SPY.xlsx
Header found at row 31 in SPY.xlsx
Processing SAUS.xlsx
Header found at row 29 in SAUS.xlsx
Processing VEU.xlsx
Header found at row 28 in VEU.xlsx
Processing GHYG.xlsx
Header found at row 28 in GHYG.xlsx
Processing EWJ.xlsx
Header found at row 30 in EWJ.xlsx
Processing IGOV.xlsx
Header found at row 26 in IGOV.xlsx
Processing LQD.xlsx
Header found at row 30 in LQD.xlsx
Processing AGGG.xlsx
Header found at row 29 in AGGG.xlsx
Processing VTI.xlsx
Header found at row 26 in VTI.xlsx
Processing EDIV.xlsx
Header found at row 29 in EDIV.xlsx
Processing IWM.xlsx
Header found at row 31 in IWM.xlsx
Processing HYG.xlsx
Header found at row 28 in HYG.xlsx
Processing PSP.xlsx
Header found at row 32 in PSP

In [15]:
def rename_files_in_folder(folder_path):
    file_names = [
        'ACWI.World.Equities.csv',
        'AGGG.World.Debt.csv',
        'AHYG.Asia.Debt.csv',
        'BWX.World.Debt.csv',
        'EDIV.EmergingMarkets.Equities.csv',
        'EEM.EmergingMarkets.Equities.csv',
        'EFA.World.Equities.csv',
        'EFAS.World.Dividend.Equities.csv',
        'EMB.EmergingMarkets.Debt.csv',
        'EPP.Pacific_ex_Japan.Equities.csv',
        'EWC.Canada.Equities.csv',
        'EWG.Europe.Equities.csv',
        'EWJ.Japan.Equities.csv',
        'EWU.Europe.Equities.csv',
        'EWZ.Brazil.Equities.csv',
        'FXI.China.Equities.csv',
        'GHYG.World.Debt.csv',
        'GLD.World.Commodities.csv',
        'GOVT.US.Debt.csv',
        'HYG.US.Debt.csv',
        'IHYG.Europe.Debt.csv',
        'IGOV.World.Debt.csv',
        'INDA.India.Equities.csv',
        'IWM.US.Equities.csv',
        'IEV.Europe.Equities.csv',
        'IEF.US.Debt.csv',
        'JNK.US.Debt.csv',
        'LQD.US.Debt.csv',
        'PSP.World.PrivateEquity.csv',
        'REET.World.Alternative.csv',
        'SAUS.Australia.Equities.csv',
        'SDY.US.Dividend.Equities.csv',
        'SPY.US.Equities.csv',
        'TLT.US.Debt.csv',
        'URTH.World.Equities.csv',
        'VEU.World.Equities.csv',
        'VTI.US.Equities.csv',
        'VWOB.EmergingMarkets.Debt.csv',
    ]

    ticker_to_new_name = {name.split('.')[0]: name for name in file_names}

    for filename in os.listdir(folder_path):

        ticker = filename.split('.')[0]
        if ticker in ticker_to_new_name:
            new_name = ticker_to_new_name[ticker]
            old_file_path = os.path.join(folder_path, filename)
            new_file_path = os.path.join(folder_path, new_name)
            os.rename(old_file_path, new_file_path)
            print(f'Renamed: {filename} -> {new_name}')
        else:
            print(f'Skipping: {filename} (no matching new name)')

folder_path = 'etf_csv'
rename_files_in_folder(folder_path)

Renamed: EDIV.csv -> EDIV.EmergingMarkets.Equities.csv
Renamed: IEV.csv -> IEV.Europe.Equities.csv
Renamed: BWX.csv -> BWX.World.Debt.csv
Renamed: AHYG.csv -> AHYG.Asia.Debt.csv
Renamed: IGOV.csv -> IGOV.World.Debt.csv
Renamed: EFA.csv -> EFA.World.Equities.csv
Renamed: EEM.csv -> EEM.EmergingMarkets.Equities.csv
Renamed: VEU.csv -> VEU.World.Equities.csv
Renamed: IEF.csv -> IEF.US.Debt.csv
Renamed: VWOB.csv -> VWOB.EmergingMarkets.Debt.csv
Renamed: GHYG.csv -> GHYG.World.Debt.csv
Renamed: EWJ.csv -> EWJ.Japan.Equities.csv
Renamed: EMB.csv -> EMB.EmergingMarkets.Debt.csv
Renamed: GLD.csv -> GLD.World.Commodities.csv
Renamed: VTI.csv -> VTI.US.Equities.csv
Renamed: LQD.csv -> LQD.US.Debt.csv
Renamed: IHYG.csv -> IHYG.Europe.Debt.csv
Renamed: EWZ.csv -> EWZ.Brazil.Equities.csv
Renamed: IWM.csv -> IWM.US.Equities.csv
Renamed: URTH.csv -> URTH.World.Equities.csv
Renamed: EWC.csv -> EWC.Canada.Equities.csv
Renamed: SDY.csv -> SDY.US.Dividend.Equities.csv
Renamed: TLT.csv -> TLT.US.Debt.csv
