Copyright (c) Microsoft Corporation.

Licensed under the MIT License.

In [None]:
import pandas as pd
import numpy as np
from datetime import datetime, timedelta

#set start and end date variables
start_dt='01-01-2016'
end_dt='12-31-2023'

df = pd.DataFrame()
#create base date range

df['dt'] = pd.date_range(start=start_dt, end=end_dt, freq='D')

#year as int
df['y'] = pd.DatetimeIndex(df['dt']).year

#month as int
df['m'] = pd.DatetimeIndex(df['dt']).month

#calendar day as int
df['d'] = pd.DatetimeIndex(df['dt']).day

#yearmonth as int
df['ym'] = df['y']*100 + df['m']

#date in yyyymmdd as int
df['dt_int'] = df['y']*10000 + df['m']*100 + df['d']

#day of week name (Monday, Tuesday, ...)
df['dow_name'] = df['dt'].dt.day_name()

#day of week number as int (Monday=0, Sunday=6)
df['dow'] = df['dt'].dt.dayofweek

#day of year number as int
df['doy'] = df['dt'].dt.dayofyear

#month name (January, February, ...)
df['m_name'] = df['dt'].dt.month_name()

#week number of year, using iso conventions (Monday is first DOW)
df['iso_week'] = df['dt'].dt.week

#quarter number of year
df['q'] = ((df['m']-1) // 3) + 1

#quarter as string
df['q_t'] = 'Q' + df['q'].astype(str)

#yearquarter as int
df['yq'] = df['y']*10+df['q']

#year quearter as string
df['yqt'] = df['y'].astype(str) + df['q_t']

#half number of year
df['h'] = ((df['q']-1) // 2) + 1

#yearhalf as int
df['yh'] = df['y']*10+df['h']

#yearmonth name
df['ym_name'] = df['m_name'] + ', ' + df['y'].apply(lambda x: str(x))

#is weekday (1=True, 0=False)
df['is_weekd'] = np.where(df['dow'].isin([0,1,2,3,4,]), 1, 0)

#weekdays in yearmonth through date
df['weekdom'] = df[['ym','is_weekd']].groupby('ym')['is_weekd'].cumsum()

#timestamp when the calendar table was generated by this script
# df['created_on'] = datetime.now()

In [None]:
calendar_df = spark.createDataFrame(df)
# display(calendar_df)

In [None]:
calendar_df.write.mode("overwrite").saveAsTable("calendardata") 