In [2]:
import pandas as pd
import numpy as np
# Relative path from notebooks/ to data/
file_path = "../data/google_stock_data.xlsx"

# Read the Excel file
price = pd.read_excel(file_path)

# Round all numerical columns to 2 decimal places
price = price.round(2)

# Rename columns
price.rename(columns={'Date': 'date', 'Close_GOOG': 'close', 'High_GOOG': 'high'
                   ,'Low_GOOG': 'low', 'Open_GOOG': 'open'
                   ,'Volume_GOOG': 'volume'}, inplace=True)

1. Convert the date column to datetime and verify the dtype.

In [3]:
price['date'] = pd.to_datetime(price['date'])
print(price['date'].dtype)

datetime64[ns]


2. Create the following columns:

- year

- month

- day

- weekday (Monday, Tuesday, etc.)

In [4]:
price['year'] = price['date'].dt.year
price['month'] = price['date'].dt.month
price['day'] = price['date'].dt.day
price['weekday'] = price['date'].dt.weekday
price

Unnamed: 0,date,close,high,low,open,volume,year,month,day,weekday
0,2020-01-02,67.90,67.94,66.62,66.62,28132000,2020,1,2,3
1,2020-01-03,67.57,68.16,66.82,66.93,23728000,2020,1,3,4
2,2020-01-06,69.24,69.35,67.04,67.04,34646000,2020,1,6,0
3,2020-01-07,69.19,69.67,69.05,69.42,30054000,2020,1,7,1
4,2020-01-08,69.74,70.10,69.07,69.13,30560000,2020,1,8,2
...,...,...,...,...,...,...,...,...,...,...
1253,2024-12-24,196.93,197.03,194.57,195.54,6809800,2024,12,24,1
1254,2024-12-26,196.46,197.52,195.24,196.10,7907900,2024,12,26,3
1255,2024-12-27,193.41,196.16,191.35,195.84,14693000,2024,12,27,4
1256,2024-12-30,192.07,193.15,189.75,190.25,12209500,2024,12,30,0


3. Select all rows after January 15, 2024.

In [5]:
price.loc[price['date'] > '2024-01-15']

Unnamed: 0,date,close,high,low,open,volume,year,month,day,weekday
1015,2024-01-16,143.10,144.85,142.08,142.46,19198900,2024,1,16,1
1016,2024-01-17,141.92,142.44,139.55,141.94,17884500,2024,1,17,2
1017,2024-01-18,144.00,144.60,142.38,142.47,18876800,2024,1,18,3
1018,2024-01-19,146.96,147.03,144.81,145.31,27181000,2024,1,19,4
1019,2024-01-22,146.71,149.00,146.58,147.70,21829200,2024,1,22,0
...,...,...,...,...,...,...,...,...,...,...
1253,2024-12-24,196.93,197.03,194.57,195.54,6809800,2024,12,24,1
1254,2024-12-26,196.46,197.52,195.24,196.10,7907900,2024,12,26,3
1255,2024-12-27,193.41,196.16,191.35,195.84,14693000,2024,12,27,4
1256,2024-12-30,192.07,193.15,189.75,190.25,12209500,2024,12,30,0


4. Set date as the DataFrame index.

Then:

- Check the index type

- Confirm it’s sorted

In [6]:
price.set_index('date', inplace= True)
price

Unnamed: 0_level_0,close,high,low,open,volume,year,month,day,weekday
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2020-01-02,67.90,67.94,66.62,66.62,28132000,2020,1,2,3
2020-01-03,67.57,68.16,66.82,66.93,23728000,2020,1,3,4
2020-01-06,69.24,69.35,67.04,67.04,34646000,2020,1,6,0
2020-01-07,69.19,69.67,69.05,69.42,30054000,2020,1,7,1
2020-01-08,69.74,70.10,69.07,69.13,30560000,2020,1,8,2
...,...,...,...,...,...,...,...,...,...
2024-12-24,196.93,197.03,194.57,195.54,6809800,2024,12,24,1
2024-12-26,196.46,197.52,195.24,196.10,7907900,2024,12,26,3
2024-12-27,193.41,196.16,191.35,195.84,14693000,2024,12,27,4
2024-12-30,192.07,193.15,189.75,190.25,12209500,2024,12,30,0


In [7]:
print(price.index.dtype)
print(price.index.is_monotonic_increasing)

datetime64[ns]
True


5. Resample data to monthly frequency and compute:

- Mean close

- Total volume

In [8]:
price['average_monthly_close']= price['close'].resample('M').transform('mean')
price['total_monthly_volume']= price['volume'].resample('M').transform('sum')
price

Unnamed: 0_level_0,close,high,low,open,volume,year,month,day,weekday,average_monthly_close,total_monthly_volume
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
2020-01-02,67.90,67.94,66.62,66.62,28132000,2020,1,2,3,71.344762,673832000
2020-01-03,67.57,68.16,66.82,66.93,23728000,2020,1,3,4,71.344762,673832000
2020-01-06,69.24,69.35,67.04,67.04,34646000,2020,1,6,0,71.344762,673832000
2020-01-07,69.19,69.67,69.05,69.42,30054000,2020,1,7,1,71.344762,673832000
2020-01-08,69.74,70.10,69.07,69.13,30560000,2020,1,8,2,71.344762,673832000
...,...,...,...,...,...,...,...,...,...,...,...
2024-12-24,196.93,197.03,194.57,195.54,6809800,2024,12,24,1,187.639048,447499100
2024-12-26,196.46,197.52,195.24,196.10,7907900,2024,12,26,3,187.639048,447499100
2024-12-27,193.41,196.16,191.35,195.84,14693000,2024,12,27,4,187.639048,447499100
2024-12-30,192.07,193.15,189.75,190.25,12209500,2024,12,30,0,187.639048,447499100


6. Resample to weekly data and compute:

- Last close price of each week

- Max high of each week

In [9]:
price['weekly_last_close'] = price['close'].resample('W').transform('last')
price['weekly_max_high'] = price['high'].resample('W').transform('max')
price

Unnamed: 0_level_0,close,high,low,open,volume,year,month,day,weekday,average_monthly_close,total_monthly_volume,weekly_last_close,weekly_max_high
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
2020-01-02,67.90,67.94,66.62,66.62,28132000,2020,1,2,3,71.344762,673832000,67.57,68.16
2020-01-03,67.57,68.16,66.82,66.93,23728000,2020,1,3,4,71.344762,673832000,67.57,68.16
2020-01-06,69.24,69.35,67.04,67.04,34646000,2020,1,6,0,71.344762,673832000,71.00,71.26
2020-01-07,69.19,69.67,69.05,69.42,30054000,2020,1,7,1,71.344762,673832000,71.00,71.26
2020-01-08,69.74,70.10,69.07,69.13,30560000,2020,1,8,2,71.344762,673832000,71.00,71.26
...,...,...,...,...,...,...,...,...,...,...,...,...,...
2024-12-24,196.93,197.03,194.57,195.54,6809800,2024,12,24,1,187.639048,447499100,193.41,197.52
2024-12-26,196.46,197.52,195.24,196.10,7907900,2024,12,26,3,187.639048,447499100,193.41,197.52
2024-12-27,193.41,196.16,191.35,195.84,14693000,2024,12,27,4,187.639048,447499100,193.41,197.52
2024-12-30,192.07,193.15,189.75,190.25,12209500,2024,12,30,0,187.639048,447499100,189.83,193.15


7. What is the difference between:

- 7-day rolling average of close

- Weekly resampled average close


The 7 day rolling average take 7 days moving window and calculates average. The calculated average could different between any given adjacent dates. In contrast, weekly resampled average is common for all the dates that were resampled to calculate the average.

8. Compute a 7-day rolling average of close.

- Does this reduce the number of rows?

- Why or why not?

In [10]:
price['7day_rolling_avg']= price['close'].rolling(7).mean()
price

Unnamed: 0_level_0,close,high,low,open,volume,year,month,day,weekday,average_monthly_close,total_monthly_volume,weekly_last_close,weekly_max_high,7day_rolling_avg
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
2020-01-02,67.90,67.94,66.62,66.62,28132000,2020,1,2,3,71.344762,673832000,67.57,68.16,
2020-01-03,67.57,68.16,66.82,66.93,23728000,2020,1,3,4,71.344762,673832000,67.57,68.16,
2020-01-06,69.24,69.35,67.04,67.04,34646000,2020,1,6,0,71.344762,673832000,71.00,71.26,
2020-01-07,69.19,69.67,69.05,69.42,30054000,2020,1,7,1,71.344762,673832000,71.00,71.26,
2020-01-08,69.74,70.10,69.07,69.13,30560000,2020,1,8,2,71.344762,673832000,71.00,71.26,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2024-12-24,196.93,197.03,194.57,195.54,6809800,2024,12,24,1,187.639048,447499100,193.41,197.52,193.894286
2024-12-26,196.46,197.52,195.24,196.10,7907900,2024,12,26,3,187.639048,447499100,193.41,197.52,193.742857
2024-12-27,193.41,196.16,191.35,195.84,14693000,2024,12,27,4,187.639048,447499100,193.41,197.52,193.304286
2024-12-30,192.07,193.15,189.75,190.25,12209500,2024,12,30,0,187.639048,447499100,189.83,193.15,193.665714


This does not reduce the number of rows since the seven day window is moving across dates taking one by one over time. the 7 day rolling average is calculated for every day using close values of that day and last 6 days.

9. Using resampled data:

- Find the month with the highest total volume

- Return the month and volume

In [22]:
price['total_monthly_volume'].max()
price.loc[price['total_monthly_volume'].idxmax(), 'month']

3

10. You are given daily stock data. How would you compute monthly returns?

No full code required — outline the steps.

First the date column of the dataframe will be set as the index. then the last close value of each month is selected. Then a new column will be calculated by subtracting the last close of the previous month from the last close of the current month. 

Below is the code.

In [30]:
monthly_returns_df = price['close'].resample('M').last().to_frame()
monthly_returns_df['month'] = monthly_returns_df.index.month
monthly_returns_df['monthly_return'] = monthly_returns_df['close'] - monthly_returns_df['close'].shift(1)
monthly_returns_df

Unnamed: 0_level_0,close,month,monthly_return
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2020-01-31,71.22,1,
2020-02-29,66.51,2,-4.71
2020-03-31,57.75,3,-8.76
2020-04-30,66.97,4,9.22
2020-05-31,70.96,5,3.99
2020-06-30,70.2,6,-0.76
2020-07-31,73.64,7,3.44
2020-08-31,81.15,8,7.51
2020-09-30,72.98,9,-8.17
2020-10-31,80.5,10,7.52
