In [None]:
#All imports at the top
import boto3
import pandas as pd
from io import StringIO, BytesIO
from datetime import datetime, timedelta

In [39]:
#Arguments all in a cell for control
arg_date = '2022-05-09'
src_format = '%Y-%m-%d'
src_bucket = 'xetra-1234'
trg_bucket = 'xetra-data-etl-destination'
columns = ['ISIN', 'Date', 'Time', 'StartPrice', 'MaxPrice', 'MinPrice',
           'EndPrice', 'TradedVolume']
key = 'xetra_daily_report_' + datetime.today().strftime("%Y%m%d_%H%M%S") + '.parquet'


In [40]:
arg_date_dt = datetime.strptime(arg_date,src_format).date() - timedelta(days=1)

In [41]:
s3 = boto3.resource('s3')
bucket = s3.Bucket(src_bucket)
objects = [obj for obj in bucket.objects.all() if datetime.strptime(obj.key.split('/')[0], '%Y-%m-%d').date() >= arg_date_dt]

In [42]:
#Remove part of for loop into a fuction
def csv_to_df(filename):
#     count = 0
    csv_obj = bucket.Object(key=filename).get().get('Body').read().decode('utf-8')
#     count = count + 1
#     print(f'Count: {count}', end='\r')
    data = StringIO(csv_obj)
    df = pd.read_csv(data, delimiter=',')
    return df
#Create a list comprehension for the for loop to create the dataframe
df_all = pd.concat([csv_to_df(obj.key) for obj in objects], ignore_index=True)

In [43]:
df_all = df_all.loc[:,columns]

In [44]:
df_all.dropna(inplace=True)

In [45]:
df_all

Unnamed: 0,ISIN,Date,Time,StartPrice,MaxPrice,MinPrice,EndPrice,TradedVolume
0,AT0000A0E9W5,2022-05-08,08:00,15.890,15.960,15.850,15.960,2066
1,DE000A0DJ6J9,2022-05-08,08:00,29.940,29.940,29.820,29.820,947
2,DE000A0D6554,2022-05-08,08:00,12.890,12.950,12.890,12.930,12650
3,DE000A0D9PT0,2022-05-08,08:00,185.850,186.100,185.600,185.600,3994
4,DE000A0HN5C6,2022-05-08,08:00,35.880,35.880,35.880,35.880,10
...,...,...,...,...,...,...,...,...
28393486,GB00BLD4ZP54,2022-12-31,16:46,19.324,19.324,19.324,19.324,0
28393487,LU1923627332,2022-12-31,16:52,12.400,12.400,12.400,12.400,2645
28393488,US98956P1021,2022-12-31,20:30,113.100,113.100,113.100,113.100,0
28393489,US9224171002,2022-12-31,20:30,24.600,24.600,24.600,24.600,0


In [47]:
## Transformations
### Get opening price per ISIN and day
df_all['opening_price'] = df_all.sort_values(by=['Time']).groupby(['ISIN','Date'])['StartPrice'].transform('first')

In [48]:
### Get closing price per ISIN and day
df_all['closing_price'] = df_all.sort_values(by=['Time']).groupby(['ISIN','Date'])['StartPrice'].transform('last')

In [49]:
### Aggregations
df_all = df_all.groupby(['ISIN','Date'],as_index=False).agg(opening_price_eur=('opening_price','min'),closing_price_eur=('closing_price','min'), minimum_price_eur=('MinPrice','min'), maximum_price_eur=('MaxPrice','max'), daily_traded_volume=('TradedVolume','sum'))

In [50]:
### Percent Change Prev Closing
df_all['prev_closing_price'] = df_all.sort_values(by=['Date']).groupby(['ISIN'])['closing_price_eur'].shift(1)

In [51]:
# Percent change of prev closing
df_all['change_prev_closing_%'] = (df_all['closing_price_eur'] - df_all['prev_closing_price']) / df_all['prev_closing_price'] * 100

In [52]:
df_all.drop(columns=['prev_closing_price'],inplace=True)

In [53]:
df_all = df_all.round(decimals=2)

In [54]:
#Filtering by date
df_all = df_all[df_all.Date>=arg_date]

## Saving to S3

In [55]:
out_buffer = BytesIO()
df_all.to_parquet(out_buffer,index=False)
bucket_target = s3.Bucket(trg_bucket) #own bucket name
bucket_target.put_object(Body=out_buffer.getvalue(),Key=key)

s3.Object(bucket_name='xetra-data-etl-destination', key='xetra_daily_report_20230422_171304.parquet')

### Reading the uploaded file

In [56]:
for obj in bucket_target.objects.all():
    print(obj.key)

xetra_daily_report_20230422_151559.parquet
xetra_daily_report_20230422_171304.parquet


In [57]:
prq_obj = bucket_target.Object(key='xetra_daily_report_20230422_171304.parquet').get().get('Body').read()
data = BytesIO(prq_obj)
df_report = pd.read_parquet(data)

In [58]:
df_report

Unnamed: 0,ISIN,Date,opening_price_eur,closing_price_eur,minimum_price_eur,maximum_price_eur,daily_traded_volume,change_prev_closing_%
0,AT000000STR1,2022-05-09,39.00,38.60,38.50,39.05,905,-0.39
1,AT000000STR1,2022-05-10,38.95,38.95,38.95,39.45,147,0.91
2,AT000000STR1,2022-05-11,39.15,39.25,38.65,39.60,914,0.77
3,AT000000STR1,2022-05-12,39.15,39.25,38.65,39.60,914,0.00
4,AT000000STR1,2022-05-13,39.15,39.25,38.65,39.60,914,0.00
...,...,...,...,...,...,...,...,...
758506,XS2434891219,2022-12-27,3.44,3.50,3.44,3.50,0,0.00
758507,XS2434891219,2022-12-28,3.44,3.66,3.42,3.66,0,4.53
758508,XS2434891219,2022-12-29,3.44,3.66,3.42,3.66,0,0.00
758509,XS2434891219,2022-12-30,3.44,3.66,3.42,3.66,0,0.00
