In [5]:
import requests
import xmltodict
import zipfile
import io
import pandas as pd
import boto3

# Step 1: Download the XML file
url = 'https://registers.esma.europa.eu/solr/esma_registers_firds_files/select?q=*&fq=publication_date:%5B2022-01-01T00:00:00Z+TO+2022-01-31T23:59:59Z%5D&wt=xml&indent=true&start=0&rows=100'
response = requests.get(url)

# Step 2: Parse through the XML file to find the first download link with file_type=DLTINS
xml_data = xmltodict.parse(response.content)
download_links = xml_data['response']['result']['doc']
download_url = None
for link in download_links:
    if 'file_type' in link and link['file_type'] == 'DLTINS':
        download_url = link['download_link']
        break

if download_url is not None:
    # Step 3: Download the zip file and extract the XML file
    response = requests.get(download_url)
    zip_data = zipfile.ZipFile(io.BytesIO(response.content))
    xml_file = zip_data.read(zip_data.namelist()[0])

    # Step 4: Convert the XML file to CSV
    xml_dict = xmltodict.parse(xml_file)
    data = xml_dict['message']['TrdCaptRpt']['Instrmt']
    df = pd.json_normalize(data)
    df = df[['FinInstrmGnlAttrbts.Id', 'FinInstrmGnlAttrbts.FullNm', 'FinInstrmGnlAttrbts.ClssfctnTp',
             'FinInstrmGnlAttrbts.CmmdtyDerivInd', 'FinInstrmGnlAttrbts.NtnlCcy', 'Issr']]

    # Step 5: Upload the CSV to AWS S3
    bucket_name = '<YOUR_BUCKET_NAME>'
    csv_buffer = io.StringIO()
    df.to_csv(csv_buffer, index=False)
    s3 = boto3.resource('s3')
    s3.Object(bucket_name, 'data.csv').put(Body=csv_buffer.getvalue())
else:
    print('No download URL found for file_type=DLTINS')


No download URL found for file_type=DLTINS
