In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
sns.set()
%matplotlib inline

In [None]:
np.random.seed(180180)
df_1 = pd.DataFrame({'Payment': np.random.poisson(lam=357, size=60)})
df_2 = pd.DataFrame({'Payment': np.random.poisson(lam=600, size=38)})

# Assignment 8.1
The DataFrame `df_1` is a collection of payments done by a customer. The payments have been made on all business days with the last payment of the DataFrame being made on Sep 4, 2018. Create a DatetimeIndex for the DataFrame, where the date matches the payment date.
<details><summary>Hint 1</summary>
    <p>
        When creating a DatetimeIndex, it is possible to set the frequency to <b>B</b> for business days
    </p>
</details>
<details><summary>Hint 2</summary>
    <p>
        A DatetimeIndex can be specified with a start and/or an end datetime
    </p>
</details>
<details><summary>Hint 3</summary>
    <p>
        The index of a DataFrame can be set by assigning an Index object to the <b>index</b> attribute of the DataFrame
    </p>
</details>

In [None]:
df_1.index = pd.DatetimeIndex(end='2018-09-04', periods=df_1.shape[0], freq='B')

# Assignment 8.2
The DataFrame `df_2` is likewise a collection of payments. These are, however, only made on Tuesdays, Saturdays and Sundays. Create a DatetimeIndex for this DataFrame, where the date matches the payment date.
<details><summary>Hint 1</summary>
    <p>
        Only using specific weekdays is possible through creating a custom business day (<b>pandas.tseries.offsets.CustomBusinessDay</b>) and setting a <b>weekmask</b>
    </p>
</details>
<details><summary>Hint 2</summary>
    <p>
        A DatetimeIndex can be specified with a start and/or an end datetime
    </p>
</details>
<details><summary>Hint 3</summary>
    <p>
        The index of a DataFrame can be set by assigning an Index object to the <b>index</b> attribute of the DataFrame
    </p>
</details>

In [None]:
df_2.index = pd.DatetimeIndex(start='2018-06-10', end='2018-09-04', freq=pd.tseries.offsets.CustomBusinessDay(weekmask='Tue Sat Sun'))

# Assignment 8.3
Find out which of the two DataFrames (`df_1` or `df_2`), that holds the largest sum paid for June 2018.
<details><summary>Hint 1</summary>
    <p>
        It is possible to specify a subsample of a time series by parsing a part of the date, i.e. <b>YYYY-MM</b> for the year/month part
    </p>
</details>

In [None]:
['df_1','df_2'][np.argmax((df_1['2018-06'].sum(),df_2['2018-06'].sum()))]

# Assignment 8.4
Find the month/year values between Jan 1, 2011 and Dec 31, 2018 where the last day of the month is a Sunday.
<details><summary>Hint 1</summary>
    <p>
        Creating a DatetimeIndex with frequency <b>M</b> creates values at the end of the month
    </p>
</details>
<details><summary>Hint 1</summary>
    <p>
        A DatetimeIndex has a <b>dayofweek</b> attribute, that is <b>0</b> for Monday and <b>6</b> for Sunday
    </p>
</details>

In [None]:
eom = pd.DatetimeIndex(start='2011-01-01', end='2018-12-31', freq='M')
eom[eom.dayofweek==6].to_series().apply(lambda d: d.strftime('%Y-%m')).values

# Assignment 8.5
The payments in `df_1` represents savings for an investment scheme. The account balance is 0 at the time of the first payment and every Sunday the maximum number of shares is bought. The price of each share is always 1000. Calculate the average daily account balance for the period between the date of the first payment and Sep 4, 2018.
<details><summary>Hint 1</summary>
    <p>
        The method <b>resample</b> can aggregate a timeseries to a different unit of time.
    </p>
</details>
<details><summary>Hint 2</summary>
    <p>
        The <b>expanding</b> method creates an expanding window, allowing us to compute a value over all entries from the first to the current.
    </p>
</details>
<details><summary>Hint 3</summary>
    <p>
        Using <b>asfreq</b> to bring the total invested amount and a cummulative sum of the payments to a daily level, allows us to do a left join and compute the difference between total amount paid and total amount invested for any date.
    </p>
</details>

In [None]:
amount_invested = df_1.resample('W').sum().expanding().apply(lambda s: 1000*(s.sum()//1000), raw=True).asfreq('D', method='ffill')[:'2018-09-04'].rename({'Payment':'Invested'}, axis=1)

df_1.asfreq('D').fillna(0).cumsum().merge(amount_invested, left_index=True, right_index=True, how='left').fillna(0).assign(balance=lambda r: r['Payment']-r['Invested'])['balance'].mean()