<a href="https://colab.research.google.com/github/tluxxx/PortfolioExperiments/blob/main/ResamplingFinancialTimeSeries01.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Resampling of Financial Time Series

In [22]:
!pip install pandas_market_calendars
!pip install holidays



In [23]:
# data management modules
import pandas as pd

# plotting modules
import plotly.graph_objects as go

# market data modules
import yfinance as yf

# service modules
import pandas_market_calendars as mcal
import holidays

In [24]:
# download data
ticker = '^GDAXI'
start_date = '2020-01-01'
end_date = '2024-05-30'
dax_data = yf.download(ticker, start_date, end_date)

[*********************100%%**********************]  1 of 1 completed


#1. Setting the scene and problem statement

In [25]:
dax_data.head(30)

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2020-01-02,13233.709961,13425.030273,13225.160156,13385.929688,13385.929688,75341400
2020-01-03,13266.389648,13282.740234,13120.719727,13219.139648,13219.139648,80793400
2020-01-06,13085.490234,13143.110352,12948.169922,13126.990234,13126.990234,71778600
2020-01-07,13199.589844,13283.879883,13166.400391,13226.830078,13226.830078,78282300
2020-01-08,13140.490234,13334.040039,13106.269531,13320.179688,13320.179688,88178800
2020-01-09,13474.759766,13523.339844,13456.780273,13495.05957,13495.05957,86965600
2020-01-10,13533.459961,13548.200195,13483.30957,13483.30957,13483.30957,77941000
2020-01-13,13521.860352,13529.209961,13404.719727,13451.519531,13451.519531,67208300
2020-01-14,13439.200195,13487.900391,13362.200195,13456.490234,13456.490234,74555400
2020-01-15,13444.950195,13458.910156,13388.480469,13432.299805,13432.299805,72046400


In [28]:
# straigthforward solution
dax_data.resample('MS').first()

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2020-01-01,13233.709961,13425.030273,13225.160156,13385.929688,13385.929688,75341400
2020-02-01,13033.169922,13077.570312,12987.530273,13045.19043,13045.19043,81389100
2020-03-01,12030.269531,12121.599609,11624.629883,11857.870117,11857.870117,207214100
2020-04-01,9610.669922,9686.459961,9498.049805,9544.75,9544.75,132862200
2020-05-01,10543.360352,10578.429688,10426.05957,10466.799805,10466.799805,140425100
2020-06-01,11896.700195,12055.570312,11850.339844,12021.280273,12021.280273,143102700
2020-07-01,12391.719727,12391.719727,12095.110352,12260.570312,12260.570312,81131500
2020-08-01,12374.459961,12698.55957,12365.610352,12646.980469,12646.980469,73270900
2020-09-01,13037.200195,13127.280273,12850.299805,12974.25,12974.25,63053900
2020-10-01,12812.080078,12836.650391,12671.230469,12730.769531,12730.769531,87114100


# 2. proposed Alternatives

In [4]:
def first_trade_day_of_month_1(data):
  ''' Identification the first trading date of a month
  Method 1:
  simple direct resampling of the data
  --> delivers the 1st calendar-day of the month as label for the month
  --> wrong, since the first trading day of the month is required

  Args:
    data: pandas dataframe containing OHLC stick data.

  Returns:
    trade_dates: list of trade Dates

  Raises:
    Void
  '''
  return data.resample('MS').first().index

In [5]:
def first_trade_day_of_month_2(data):
  ''' Identification the first trading date of a month
  Method 2:
  advanced resampling, using the content of the data, downloaded from yfinance,
  adding new column 'date', filling with content of the dataframe index,
  resampling by the index, colunm 'date' contains now the required dates
  --> delivers the first date of a month where yfinance has data --> first trading day

  Args:
    data: pandas dataframe containing OHLC stick data.

  Returns:
    list of trade Dates

  Raises:
    Void
  '''
  data['date']= pd.to_datetime(data.index).date
  return data.resample('MS').first()['date'].to_list()

In [6]:
def first_trade_day_of_month_3(start_date, end_date):
  '''
  Method 3:
  generating lists of:
  - week-days between dates (using the pandas function)
  - holidays for the region under investigation
  - bank-days = weekdays, that are not in the list of holidays
  generating a dataframe with the bank-days as index and as value of a 'date'-columns
  resampling of the dataframe (monthly) and export

  Args:
    start_date, end_date: dates ('YYYY-MM-DD')

  Returns:
    list of trade Dates

  Raises:
    Void
  '''
  week_days = pd.date_range(start_date, end_date, freq='B')
  holidays_de = holidays.Germany(years=range(int(start_date[:4]), int(end_date[:4]) + 1))
  bank_d = pd.to_datetime([date for date in week_days if date not in holidays_de])
  bank_dd = pd.DataFrame({'date': bank_d},
                         index=bank_d)
  return bank_dd.resample('MS').first().date.tolist()


In [9]:
def first_trade_day_of_month_4(start_date, end_date, exchange):
  '''
  Method 4:
  importing the schedule of the relevant exchange (dataframe with timestamps of open and close)
  resampling dataframe (montly), collecting first dataset for each month
  generating a list of the date-part of the timestamps of all first closes of each month

  Args:
    start_date, end_date: dates ('YYYY-MM-DD')
    exchange: Code of the stock exchange (e.g 'XETR') as string

  Returns:
    list of trade Dates

  Raises:
    Void
  '''

  xetra_cal = mcal.get_calendar('XETR')
  schedule = xetra_cal.schedule(start_date, end_date)
  trades_x = schedule.resample('MS').first()
  trades_x['date'] = pd.to_datetime(trades_x['market_close']).dt.date
  return trades_x['date'].to_list()

In [16]:
# generating lists of trading days
trade_dates_m1 = first_trade_day_of_month_1(dax_data)
trade_dates_m2 = first_trade_day_of_month_2(dax_data)
trade_dates_m3 = first_trade_day_of_month_3(start_date, end_date)
trade_dates_m4 = first_trade_day_of_month_4(start_date, end_date, 'XETR')

# generating dataframes with close dates of trading days

close_m1 = dax_data[dax_data.index.isin(trade_dates_m1)]['Close']
close_m2 = dax_data[dax_data.index.isin(trade_dates_m2)]['Close']
close_m3 = dax_data[dax_data.index.isin(trade_dates_m3)]['Close']
close_m4 = dax_data[dax_data.index.isin(trade_dates_m4)]['Close']

In [19]:
# plotting results
main_title = 'comparision of various methods to define the first trading day of a month'
sub_title = f'instrument {ticker}  from {start_date} to {end_date} '
title = main_title + '<br><br><sup>' + sub_title + '</sup>'

fig = go.Figure()
fig.add_trace(go.Scatter(x=close_m1.index, y=close_m1, name='M1: simple resampling'))
fig.add_trace(go.Scatter(x=close_m2.index, y=close_m2, name='M2: advanced resampling'))
fig.add_trace(go.Scatter(x=close_m3.index, y=close_m3, name='M3: from weekday/holiday lists'))
fig.add_trace(go.Scatter(x=close_m4.index, y=close_m4, name='M4: from stock exchange schedules' ))

fig.update_layout(template='plotly_dark', autosize=False, width=1200, height=600)
fig.update_layout(title=title, xaxis_title='Date', yaxis_title=f'close of {ticker}', legend_title='method')
fig.show()