In [134]:
# Import the required libraries 

import pandas as pd # Make sure that you have openpyxl installed so that you can open newer excel files
import numpy as np
import plotnine as p9
import plotnine3d as p3d
import os 
from zipfile import is_zipfile, ZipFile
from datetime import datetime, date

In [135]:
def extractFolder(inputFilesDir,unzippedFilesDir):
    for file in os.listdir(inputFilesDir):   # get the list of files
        if is_zipfile(inputFilesDir+file): # if it is a zipfile, extract it
            with ZipFile(inputFilesDir+file) as item: # treat the file as a zip
                item.extractall(unzippedFilesDir)  # extract it into a new folder

def cleanTermStructureSheet(filename,foldername,sheetName):
    df = pd.read_excel(foldername+filename, sheet_name=sheetName, header=1, engine='openpyxl')
    df.drop(['Unnamed: 0'], axis=1, inplace=True) # Blank column - artifact of the dataset
    df = df.iloc[8:,:] # Dropping all unnessary columns 
    df.rename(columns={"Main menu": "Tenor"}, inplace=True)
    df.set_index("Tenor", inplace=True)
    return df

def getDateFromFileName(filename):
    dateString = filename.split('_')[2]
    yearString = dateString[:4]
    monthString = dateString[4:6]
    dayString = dateString[6:]
    return date(int(yearString),int(monthString),int(dayString))

def getCountrieslist(inputFilesDir):
    return pd.read_csv(inputFilesDir+"countries.csv").columns.to_list()

def getFilteredFileList(unzippedFilesDir,fileType,sheetName):
    allFiles = os.listdir(unzippedFilesDir)
    filteredList = [file for file in allFiles if fileType in file]
    return filteredList 

In [147]:
inputFilesDir = "inputData/"
unzippedFilesDir = "unzippedFiles/"
fileType = "Term_Structures"
sheetName = "RFR_spot_no_VA"

extractFolder(inputFilesDir,unzippedFilesDir)
countries = getCountrieslist(inputFilesDir)

countriesDict = {}
for country in countries:
    countriesDict[country] = {}

for filename in getFilteredFileList(unzippedFilesDir,fileType,sheetName):
    dateIndex = getDateFromFileName(filename)
    allTermStructures = cleanTermStructureSheet(filename,unzippedFilesDir,sheetName)
    for country in countries:
        countriesDict[country][dateIndex] =  allTermStructures.loc[:,country]



In [149]:
pd.DataFrame(countriesDict['Euro'])

Unnamed: 0_level_0,2023-12-31,2024-02-29,2024-01-31,2023-10-31,2022-12-31,2023-11-30,2023-01-31,2023-04-30,2023-09-30,2023-05-31,2023-08-31,2023-06-30,2023-02-28,2023-03-31,2023-07-31
Tenor,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
1,0.03357,0.03597,0.03355,0.03911,0.03176,0.03732,0.03261,0.03673,0.04047,0.03739,0.03884,0.03983,0.03622,0.03472,0.03919
2,0.0269,0.03127,0.02761,0.03487,0.03295,0.03207,0.03191,0.03362,0.03701,0.034,0.03517,0.03772,0.03684,0.03315,0.03611
3,0.02439,0.02893,0.02538,0.03263,0.03203,0.02956,0.03008,0.03128,0.03468,0.03152,0.03281,0.03501,0.03517,0.0314,0.03357
4,0.0235,0.02763,0.02443,0.03175,0.03152,0.02845,0.02895,0.02998,0.03352,0.02996,0.03105,0.03282,0.0337,0.03009,0.03188
5,0.02323,0.02685,0.02406,0.03149,0.03131,0.028,0.02829,0.02932,0.03292,0.0291,0.03013,0.03132,0.03274,0.0293,0.03086
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
146,0.03241,0.03136,0.03131,0.0338,0.0328,0.03315,0.03252,0.03287,0.03374,0.03286,0.03304,0.03275,0.03306,0.03274,0.03306
147,0.03243,0.03137,0.03132,0.0338,0.03281,0.03316,0.03253,0.03288,0.03374,0.03287,0.03305,0.03276,0.03307,0.03275,0.03307
148,0.03244,0.03138,0.03133,0.0338,0.03282,0.03317,0.03254,0.03289,0.03375,0.03288,0.03306,0.03277,0.03308,0.03276,0.03308
149,0.03245,0.03139,0.03135,0.03381,0.03283,0.03318,0.03256,0.0329,0.03375,0.0329,0.03307,0.03278,0.03309,0.03277,0.03309


In [137]:
filename = 'EIOPA_RFR_20231231_Term_Structures.xlsx'
foldername = "unzippedFiles/"
sheetName = "RFR_spot_no_VA"

dateIndex = getDateFromFileName(filename)
allTermStructures = cleanTermStructureSheet(filename,foldername,sheetName)


In [139]:
countriesDict = {}
for country in countries:
    countriesDict[country] = {}

countriesDict[countries[0]][dateIndex] =  allTermStructures.loc[:,countries[0]]

In [146]:
pd.DataFrame(countriesDict[countries[0]])

Unnamed: 0_level_0,2023-12-31
Tenor,Unnamed: 1_level_1
1,0.03357
2,0.0269
3,0.02439
4,0.0235
5,0.02323
...,...
146,0.03241
147,0.03243
148,0.03244
149,0.03245


In [None]:
countriesDict[countries[0]]

In [None]:
tenorStructure = np.arange(1,151,1)
temp = pd.DataFrame(data = {datetime.today():tenorStructure},index = tenorStructure).transpose()
temp

Unnamed: 0,1,2,3,4,5,6,7,8,9,10,...,141,142,143,144,145,146,147,148,149,150
2024-03-07 08:43:34.623883,1,2,3,4,5,6,7,8,9,10,...,141,142,143,144,145,146,147,148,149,150


In [None]:
temp.append(df.loc[:,countries[0]].transpose())

AttributeError: 'DataFrame' object has no attribute 'append'