In [1]:
import pandas as pd
import numpy as np
# Relative path from notebooks/ to data/
file_path = "../data/google_stock_data.xlsx"

# Read the Excel file
price = pd.read_excel(file_path)

# Round all numerical columns to 2 decimal places
price = price.round(2)

# Rename columns
price.rename(columns={'Date': 'date', 'Close_GOOG': 'close', 'High_GOOG': 'high'
                   ,'Low_GOOG': 'low', 'Open_GOOG': 'open'
                   ,'Volume_GOOG': 'volume'}, inplace=True)

1. Convert the date column to datetime and verify the dtype.

In [2]:
price['date'] = pd.to_datetime(price['date'])
print(price['date'].dtype)

datetime64[ns]


2. Create the following columns:

- year

- month

- day

- weekday (Monday, Tuesday, etc.)

In [4]:
price['year'] = price['date'].dt.year
price['month'] = price['date'].dt.month
price['day'] = price['date'].dt.day
price['weekday'] = price['date'].dt.weekday
price

Unnamed: 0,date,close,high,low,open,volume,year,month,day,weekday
0,2020-01-02,67.90,67.94,66.62,66.62,28132000,2020,1,2,3
1,2020-01-03,67.57,68.16,66.82,66.93,23728000,2020,1,3,4
2,2020-01-06,69.24,69.35,67.04,67.04,34646000,2020,1,6,0
3,2020-01-07,69.19,69.67,69.05,69.42,30054000,2020,1,7,1
4,2020-01-08,69.74,70.10,69.07,69.13,30560000,2020,1,8,2
...,...,...,...,...,...,...,...,...,...,...
1253,2024-12-24,196.93,197.03,194.57,195.54,6809800,2024,12,24,1
1254,2024-12-26,196.46,197.52,195.24,196.10,7907900,2024,12,26,3
1255,2024-12-27,193.41,196.16,191.35,195.84,14693000,2024,12,27,4
1256,2024-12-30,192.07,193.15,189.75,190.25,12209500,2024,12,30,0


3. Select all rows after January 15, 2024.

In [5]:
price.loc[price['date'] > '2024-01-15']

Unnamed: 0,date,close,high,low,open,volume,year,month,day,weekday
1015,2024-01-16,143.10,144.85,142.08,142.46,19198900,2024,1,16,1
1016,2024-01-17,141.92,142.44,139.55,141.94,17884500,2024,1,17,2
1017,2024-01-18,144.00,144.60,142.38,142.47,18876800,2024,1,18,3
1018,2024-01-19,146.96,147.03,144.81,145.31,27181000,2024,1,19,4
1019,2024-01-22,146.71,149.00,146.58,147.70,21829200,2024,1,22,0
...,...,...,...,...,...,...,...,...,...,...
1253,2024-12-24,196.93,197.03,194.57,195.54,6809800,2024,12,24,1
1254,2024-12-26,196.46,197.52,195.24,196.10,7907900,2024,12,26,3
1255,2024-12-27,193.41,196.16,191.35,195.84,14693000,2024,12,27,4
1256,2024-12-30,192.07,193.15,189.75,190.25,12209500,2024,12,30,0


4. Set date as the DataFrame index.

Then:

- Check the index type

- Confirm it’s sorted

In [6]:
price.set_index('date', inplace= True)
price

Unnamed: 0_level_0,close,high,low,open,volume,year,month,day,weekday
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2020-01-02,67.90,67.94,66.62,66.62,28132000,2020,1,2,3
2020-01-03,67.57,68.16,66.82,66.93,23728000,2020,1,3,4
2020-01-06,69.24,69.35,67.04,67.04,34646000,2020,1,6,0
2020-01-07,69.19,69.67,69.05,69.42,30054000,2020,1,7,1
2020-01-08,69.74,70.10,69.07,69.13,30560000,2020,1,8,2
...,...,...,...,...,...,...,...,...,...
2024-12-24,196.93,197.03,194.57,195.54,6809800,2024,12,24,1
2024-12-26,196.46,197.52,195.24,196.10,7907900,2024,12,26,3
2024-12-27,193.41,196.16,191.35,195.84,14693000,2024,12,27,4
2024-12-30,192.07,193.15,189.75,190.25,12209500,2024,12,30,0


In [8]:
print(price.index.dtype)
print(price.index.is_monotonic_increasing)

datetime64[ns]
True


5. esample data to monthly frequency and compute:

- Mean close

- Total volume

In [15]:
price['average_monthly_close']= price['close'].resample('M').transform('mean')
price['total_monthly_volume']= price['volume'].resample('M').transform('sum')
price

Unnamed: 0_level_0,close,high,low,open,volume,year,month,day,weekday,average_monthly_close,total_monthly_volume
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
2020-01-02,67.90,67.94,66.62,66.62,28132000,2020,1,2,3,71.344762,673832000
2020-01-03,67.57,68.16,66.82,66.93,23728000,2020,1,3,4,71.344762,673832000
2020-01-06,69.24,69.35,67.04,67.04,34646000,2020,1,6,0,71.344762,673832000
2020-01-07,69.19,69.67,69.05,69.42,30054000,2020,1,7,1,71.344762,673832000
2020-01-08,69.74,70.10,69.07,69.13,30560000,2020,1,8,2,71.344762,673832000
...,...,...,...,...,...,...,...,...,...,...,...
2024-12-24,196.93,197.03,194.57,195.54,6809800,2024,12,24,1,187.639048,447499100
2024-12-26,196.46,197.52,195.24,196.10,7907900,2024,12,26,3,187.639048,447499100
2024-12-27,193.41,196.16,191.35,195.84,14693000,2024,12,27,4,187.639048,447499100
2024-12-30,192.07,193.15,189.75,190.25,12209500,2024,12,30,0,187.639048,447499100


6. Resample to weekly data and compute:

- Last close price of each week

- Max high of each week