In [None]:
import pandas as pd
import numpy as np
# Relative path from notebooks/ to data/
file_path = "../data/google_stock_data.xlsx"

# Read the Excel file
price = pd.read_excel(file_path)

# Round all numerical columns to 2 decimal places
price = price.round(2)

# update the date format to YYYY-MM-DD
price['Date'] = pd.to_datetime(price['Date']).dt.strftime('%Y-%m-%d')

# Rename columns
price.rename(columns={'Date': 'date', 'Close_GOOG': 'close', 'High_GOOG': 'high'
                   ,'Low_GOOG': 'low', 'Open_GOOG': 'open'
                   ,'Volume_GOOG': 'volume'}, inplace=True)

price['date'] = pd.to_datetime(price['date'])
price = price.sort_values('date')

1. Set date as the DataFrame index.

In [2]:
price.set_index('date', inplace=True)

2. Create a daily price range column

In [None]:
price['price_range'] = price['high'] - price['low']

3. Create a column close_5d_avg that contains the 5-day rolling mean of close

In [4]:
price['close_5d_avg'] = price['close'].rolling(5).mean()

4. Create a column volume_7d_avg that shows the 7-day rolling average volume.

In [5]:
price['volume_7d_avg'] = price['volume'].rolling(7).mean()

5. Create a column daily_close_change = today’s close − yesterday’s close.

In [6]:
price['daily_close_change'] = price['close'] - price['close'].shift(1)

6. Find the date with the largest positive daily close change.

In [7]:
#price[price['daily_close_change'] == price['daily_close_change'].max()]['daily_close_change']
price.loc[price['daily_close_change'].idxmax(), ['daily_close_change']]

daily_close_change    15.63
Name: 2024-04-26 00:00:00, dtype: float64

7. Calculate 10-day rolling volatility using the standard deviation of close.

In [8]:
price['rolling_3d_volatility'] = price['daily_close_change'].rolling(3).std()
price

Unnamed: 0_level_0,close,high,low,open,volume,price_range,close_5d_avg,volume_7d_avg,daily_close_change,rolling_3d_volatility
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2020-01-02,67.90,67.94,66.62,66.62,28132000,1.32,,,,
2020-01-03,67.57,68.16,66.82,66.93,23728000,1.34,,,-0.33,
2020-01-06,69.24,69.35,67.04,67.04,34646000,2.31,,,1.67,
2020-01-07,69.19,69.67,69.05,69.42,30054000,0.62,,,-0.05,1.082959
2020-01-08,69.74,70.10,69.07,69.13,30560000,1.03,68.728,,0.55,0.873002
...,...,...,...,...,...,...,...,...,...,...
2024-12-24,196.93,197.03,194.57,195.54,6809800,2.46,192.652,2.548044e+07,1.57,0.910842
2024-12-26,196.46,197.52,195.24,196.10,7907900,2.28,194.036,2.200320e+07,-0.47,1.753292
2024-12-27,193.41,196.16,191.35,195.84,14693000,4.81,194.900,2.065513e+07,-3.05,2.315254
2024-12-30,192.07,193.15,189.75,190.25,12209500,3.40,194.846,1.845100e+07,-1.34,1.312593


8. Group data by month and calculate:

- Average close

- Max high

- Total volume

In [9]:
print(price.index)

DatetimeIndex(['2020-01-02', '2020-01-03', '2020-01-06', '2020-01-07',
               '2020-01-08', '2020-01-09', '2020-01-10', '2020-01-13',
               '2020-01-14', '2020-01-15',
               ...
               '2024-12-17', '2024-12-18', '2024-12-19', '2024-12-20',
               '2024-12-23', '2024-12-24', '2024-12-26', '2024-12-27',
               '2024-12-30', '2024-12-31'],
              dtype='datetime64[ns]', name='date', length=1258, freq=None)


In [10]:
# price['date'] = pd.to_datetime(price['date'])
price['monthly_avg'] = price.resample('M')['close'].transform('mean')
price

Unnamed: 0_level_0,close,high,low,open,volume,price_range,close_5d_avg,volume_7d_avg,daily_close_change,rolling_3d_volatility,monthly_avg
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
2020-01-02,67.90,67.94,66.62,66.62,28132000,1.32,,,,,71.344762
2020-01-03,67.57,68.16,66.82,66.93,23728000,1.34,,,-0.33,,71.344762
2020-01-06,69.24,69.35,67.04,67.04,34646000,2.31,,,1.67,,71.344762
2020-01-07,69.19,69.67,69.05,69.42,30054000,0.62,,,-0.05,1.082959,71.344762
2020-01-08,69.74,70.10,69.07,69.13,30560000,1.03,68.728,,0.55,0.873002,71.344762
...,...,...,...,...,...,...,...,...,...,...,...
2024-12-24,196.93,197.03,194.57,195.54,6809800,2.46,192.652,2.548044e+07,1.57,0.910842,187.639048
2024-12-26,196.46,197.52,195.24,196.10,7907900,2.28,194.036,2.200320e+07,-0.47,1.753292,187.639048
2024-12-27,193.41,196.16,191.35,195.84,14693000,4.81,194.900,2.065513e+07,-3.05,2.315254,187.639048
2024-12-30,192.07,193.15,189.75,190.25,12209500,3.40,194.846,1.845100e+07,-1.34,1.312593,187.639048


In [11]:
price['monthly_max_high'] = price.resample('M')['high'].transform('max')
price

Unnamed: 0_level_0,close,high,low,open,volume,price_range,close_5d_avg,volume_7d_avg,daily_close_change,rolling_3d_volatility,monthly_avg,monthly_max_high
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
2020-01-02,67.90,67.94,66.62,66.62,28132000,1.32,,,,,71.344762,74.65
2020-01-03,67.57,68.16,66.82,66.93,23728000,1.34,,,-0.33,,71.344762,74.65
2020-01-06,69.24,69.35,67.04,67.04,34646000,2.31,,,1.67,,71.344762,74.65
2020-01-07,69.19,69.67,69.05,69.42,30054000,0.62,,,-0.05,1.082959,71.344762,74.65
2020-01-08,69.74,70.10,69.07,69.13,30560000,1.03,68.728,,0.55,0.873002,71.344762,74.65
...,...,...,...,...,...,...,...,...,...,...,...,...
2024-12-24,196.93,197.03,194.57,195.54,6809800,2.46,192.652,2.548044e+07,1.57,0.910842,187.639048,202.23
2024-12-26,196.46,197.52,195.24,196.10,7907900,2.28,194.036,2.200320e+07,-0.47,1.753292,187.639048,202.23
2024-12-27,193.41,196.16,191.35,195.84,14693000,4.81,194.900,2.065513e+07,-3.05,2.315254,187.639048,202.23
2024-12-30,192.07,193.15,189.75,190.25,12209500,3.40,194.846,1.845100e+07,-1.34,1.312593,187.639048,202.23


In [13]:
price['monthly_total_volume'] = price.resample('M')['volume'].transform('sum')
price

Unnamed: 0_level_0,close,high,low,open,volume,price_range,close_5d_avg,volume_7d_avg,daily_close_change,rolling_3d_volatility,monthly_avg,monthly_max_high,monthly_total_volume
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
2020-01-02,67.90,67.94,66.62,66.62,28132000,1.32,,,,,71.344762,74.65,673832000
2020-01-03,67.57,68.16,66.82,66.93,23728000,1.34,,,-0.33,,71.344762,74.65,673832000
2020-01-06,69.24,69.35,67.04,67.04,34646000,2.31,,,1.67,,71.344762,74.65,673832000
2020-01-07,69.19,69.67,69.05,69.42,30054000,0.62,,,-0.05,1.082959,71.344762,74.65,673832000
2020-01-08,69.74,70.10,69.07,69.13,30560000,1.03,68.728,,0.55,0.873002,71.344762,74.65,673832000
...,...,...,...,...,...,...,...,...,...,...,...,...,...
2024-12-24,196.93,197.03,194.57,195.54,6809800,2.46,192.652,2.548044e+07,1.57,0.910842,187.639048,202.23,447499100
2024-12-26,196.46,197.52,195.24,196.10,7907900,2.28,194.036,2.200320e+07,-0.47,1.753292,187.639048,202.23,447499100
2024-12-27,193.41,196.16,191.35,195.84,14693000,4.81,194.900,2.065513e+07,-3.05,2.315254,187.639048,202.23,447499100
2024-12-30,192.07,193.15,189.75,190.25,12209500,3.40,194.846,1.845100e+07,-1.34,1.312593,187.639048,202.23,447499100


9. Create a boolean column high_volatility_day where:

- daily_range > average daily range

In [18]:
price['average_daily_range'] = price['price_range'].mean()
price['high_volatility_day'] = price['price_range'] > price['average_daily_range']
price

Unnamed: 0_level_0,close,high,low,open,volume,price_range,close_5d_avg,volume_7d_avg,daily_close_change,rolling_3d_volatility,monthly_avg,monthly_max_high,monthly_total_volume,average_daily_range,high_volatility_day
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
2020-01-02,67.90,67.94,66.62,66.62,28132000,1.32,,,,,71.344762,74.65,673832000,2.617607,False
2020-01-03,67.57,68.16,66.82,66.93,23728000,1.34,,,-0.33,,71.344762,74.65,673832000,2.617607,False
2020-01-06,69.24,69.35,67.04,67.04,34646000,2.31,,,1.67,,71.344762,74.65,673832000,2.617607,False
2020-01-07,69.19,69.67,69.05,69.42,30054000,0.62,,,-0.05,1.082959,71.344762,74.65,673832000,2.617607,False
2020-01-08,69.74,70.10,69.07,69.13,30560000,1.03,68.728,,0.55,0.873002,71.344762,74.65,673832000,2.617607,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2024-12-24,196.93,197.03,194.57,195.54,6809800,2.46,192.652,2.548044e+07,1.57,0.910842,187.639048,202.23,447499100,2.617607,False
2024-12-26,196.46,197.52,195.24,196.10,7907900,2.28,194.036,2.200320e+07,-0.47,1.753292,187.639048,202.23,447499100,2.617607,False
2024-12-27,193.41,196.16,191.35,195.84,14693000,4.81,194.900,2.065513e+07,-3.05,2.315254,187.639048,202.23,447499100,2.617607,True
2024-12-30,192.07,193.15,189.75,190.25,12209500,3.40,194.846,1.845100e+07,-1.34,1.312593,187.639048,202.23,447499100,2.617607,True


10. Business Interpretation

What does a rising rolling average and increasing volatility suggest about a stock?

Rising rolling average means show a overall up trend of the data over time. Getting the rolling average reduces the short term noise and highlights long term trends. Increasing volatility shows the decreasing stability of the stock price. Even though the stock price rises over long time, the price swings with increasing magnitudes within short time periods. 

For short term traders, this could mean opportunities to make money through short term trading. Since the stock price goes up over time, they can find lows to by and highs to sell during the swings. Shorting is not preferable here because the stock price goes up over time.  