In [1]:
from IPython.core.display import display, HTML
display(HTML("<style>.container {width: 90% !important; }</style>"))

In [2]:
import re
import csv
import sys
import boto3
import requests
import boto3.s3
from zipfile import ZipFile
import xml.etree.ElementTree as ET

## Step 1: Download the xml from this [Link](https://registers.esma.europa.eu/solr/esma_registers_firds_files/select?q=*&fq=publication_date:%5B2021-01-17T00:00:00Z+TO+2021-01-19T23:59:59Z%5D&wt=xml&indent=true&start=0&rows=100)
## Step 2: From the xml, please parse through to the first download link whose file_type is DLTINS and download the zip

In [3]:
tree = ET.parse('steeley.xml')
root = tree.getroot()

download_Zip = None
for item in root.iter('str'):
    if item.attrib['name'] == 'download_link' and 'DLTINS' in item.text:
        download_Zip = item.text
        break
        
print(download_Zip)
req = requests.get(download_Zip)
fileName = "steeley.zip"

with open(fileName, 'wb') as file:
    file.write(req.content)

http://firds.esma.europa.eu/firds/DLTINS_20210117_01of01.zip


## Step 3: Extract the xml from the zip.

In [4]:
try:
    with ZipFile(fileName, 'r') as file:
        file.printdir()
        file.extractall()

except Exception as e:
    print(e)

File Name                                             Modified             Size
DLTINS_20210117_01of01.xml                     2021-01-17 01:17:12    143278061


## Step 4: Convert the contents of the xml into a CSV with the following header:
##### FinInstrmGnlAttrbts.Id,   FinInstrmGnlAttrbts.FullNm,   FinInstrmGnlAttrbts.ClssfctnTp,   FinInstrmGnlAttrbts.CmmdtyDerivInd,   FinInstrmGnlAttrbts.NtnlCcy,   Issr

In [5]:
tree = ET.parse('DLTINS_20210117_01of01.xml')
root = tree.getroot()

header = ['FinInstrmGnlAttrbts.Id', 'FinInstrmGnlAttrbts.FullNm', 'FinInstrmGnlAttrbts.ClssfctnTp', 'FinInstrmGnlAttrbts.CmmdtyDerivInd', 'FinInstrmGnlAttrbts.NtnlCcy', 'Issr']
ID, FullNm, ClssfctnTp, CmmdtyDerivInd, NtnlCcy, Issr = 0, 1, 2, 3, 4, 5

OutPutFile = 'steeley.csv'
with open(OutPutFile, 'w', encoding='UTF8', newline='') as f:
    writer = csv.writer(f)
    writer.writerow(header)
    
    for index, item in enumerate(root.iter()):

        if "FinInstrmGnlAttrbts" in item.tag:
            data = [None] * 6

            for child in item:
                if "Id" in child.tag:
                    data[ID] = child.text if re.search("Id$", child.tag) is not None else ""

                elif "FullNm" in child.tag:
                    data[FullNm] = child.text if re.search("FullNm$", child.tag) is not None else ""

                elif "ClssfctnTp" in child.tag:
                    data[ClssfctnTp] = child.text if re.search("ClssfctnTp$", child.tag) is not None else ""

                elif "CmmdtyDerivInd" in child.tag:
                    data[CmmdtyDerivInd] = child.text if re.search("CmmdtyDerivInd$", child.tag) is not None else ""

                elif "NtnlCcy" in child.tag:
                    data[NtnlCcy] = child.text if re.search("NtnlCcy$", child.tag) is not None else ""

        elif re.search("Issr$", item.tag):
            data[Issr] = item.text if re.search("Issr$", item.tag) is not None else ""
            
            writer.writerow(data)

## Step 5: Store the csv from step 4 in an AWS S3 bucket

In [6]:
s3 = boto3.client('s3')
bucketName = "steeleye03"
bucket = s3.create_bucket(Bucket=bucketName)
s3.upload_file(OutPutFile, "steeleye03", "s3_"+OutPutFile)