In [1]:
import pandas as pd
import numpy as np
# Relative path from notebooks/ to data/
file_path = "../data/google_stock_data.xlsx"

# Read the Excel file
price = pd.read_excel(file_path)

# Round all numerical columns to 2 decimal places
price = price.round(2)

# update the date format to YYYY-MM-DD
price['Date'] = pd.to_datetime(price['Date']).dt.strftime('%Y-%m-%d')

# Rename columns
price.rename(columns={'Date': 'date', 'Close_GOOG': 'close', 'High_GOOG': 'high'
                   ,'Low_GOOG': 'low', 'Open_GOOG': 'open'
                   ,'Volume_GOOG': 'volume'}, inplace=True)

price['date'] = pd.to_datetime(price['date'])
price = price.sort_values('date')

price.set_index('date', inplace=True)

1. Round all price columns (open, high, low, close) to integers using apply.

In [2]:
price[['open', 'high', 'low', 'close']].apply(np.int64)

Unnamed: 0_level_0,open,high,low,close
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2020-01-02,66,67,66,67
2020-01-03,66,68,66,67
2020-01-06,67,69,67,69
2020-01-07,69,69,69,69
2020-01-08,69,70,69,69
...,...,...,...,...
2024-12-24,195,197,194,196
2024-12-26,196,197,195,196
2024-12-27,195,196,191,193
2024-12-30,190,193,189,192


2. Convert volume to millions (e.g. 2,100,000 → 2.1) using an apply function.

In [3]:
#price['volume'] = price['volume']/ 1000000
price['volume'].apply(lambda x: x/1000000)

date
2020-01-02    28.1320
2020-01-03    23.7280
2020-01-06    34.6460
2020-01-07    30.0540
2020-01-08    30.5600
               ...   
2024-12-24     6.8098
2024-12-26     7.9079
2024-12-27    14.6930
2024-12-30    12.2095
2024-12-31    14.3552
Name: volume, Length: 1258, dtype: float64

3. Create a column price_movement using apply function:

- "UP" if close > open

- "DOWN" if close < open

- "FLAT" otherwise

In [4]:
price['price_movement'] = 'FLAT'
price.loc[price['close'] > price['open'], 'price_movement'] = 'UP'
price.loc[price['close'] > price['open'], 'price_movement'] = 'DOWN'
price

Unnamed: 0_level_0,close,high,low,open,volume,price_movement
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2020-01-02,67.90,67.94,66.62,66.62,28132000,DOWN
2020-01-03,67.57,68.16,66.82,66.93,23728000,DOWN
2020-01-06,69.24,69.35,67.04,67.04,34646000,DOWN
2020-01-07,69.19,69.67,69.05,69.42,30054000,FLAT
2020-01-08,69.74,70.10,69.07,69.13,30560000,DOWN
...,...,...,...,...,...,...
2024-12-24,196.93,197.03,194.57,195.54,6809800,DOWN
2024-12-26,196.46,197.52,195.24,196.10,7907900,DOWN
2024-12-27,193.41,196.16,191.35,195.84,14693000,FLAT
2024-12-30,192.07,193.15,189.75,190.25,12209500,DOWN


4. Create volatility_level:

- "HIGH" if (high - low) > 3

- "MEDIUM" if between 1 and 3

- "LOW" otherwise

In [5]:
price['volatility'] = price['high'] - price['low']

price['volatility_level'] = 'LOW'
price.loc[price['volatility'] > 3, 'volatility_level'] = 'HIGH'
price.loc[(price['volatility']<= 3) & (price['volatility'] >= 1), 'volatility_level'] = 'MEDIUM'
price

Unnamed: 0_level_0,close,high,low,open,volume,price_movement,volatility,volatility_level
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2020-01-02,67.90,67.94,66.62,66.62,28132000,DOWN,1.32,MEDIUM
2020-01-03,67.57,68.16,66.82,66.93,23728000,DOWN,1.34,MEDIUM
2020-01-06,69.24,69.35,67.04,67.04,34646000,DOWN,2.31,MEDIUM
2020-01-07,69.19,69.67,69.05,69.42,30054000,FLAT,0.62,LOW
2020-01-08,69.74,70.10,69.07,69.13,30560000,DOWN,1.03,MEDIUM
...,...,...,...,...,...,...,...,...
2024-12-24,196.93,197.03,194.57,195.54,6809800,DOWN,2.46,MEDIUM
2024-12-26,196.46,197.52,195.24,196.10,7907900,DOWN,2.28,MEDIUM
2024-12-27,193.41,196.16,191.35,195.84,14693000,FLAT,4.81,HIGH
2024-12-30,192.07,193.15,189.75,190.25,12209500,DOWN,3.40,HIGH


5. Create a weekday column from date

In [6]:
price = price.reset_index()
price['weekday'] = price['date'].dt.day_name()
price

Unnamed: 0,date,close,high,low,open,volume,price_movement,volatility,volatility_level,weekday
0,2020-01-02,67.90,67.94,66.62,66.62,28132000,DOWN,1.32,MEDIUM,Thursday
1,2020-01-03,67.57,68.16,66.82,66.93,23728000,DOWN,1.34,MEDIUM,Friday
2,2020-01-06,69.24,69.35,67.04,67.04,34646000,DOWN,2.31,MEDIUM,Monday
3,2020-01-07,69.19,69.67,69.05,69.42,30054000,FLAT,0.62,LOW,Tuesday
4,2020-01-08,69.74,70.10,69.07,69.13,30560000,DOWN,1.03,MEDIUM,Wednesday
...,...,...,...,...,...,...,...,...,...,...
1253,2024-12-24,196.93,197.03,194.57,195.54,6809800,DOWN,2.46,MEDIUM,Tuesday
1254,2024-12-26,196.46,197.52,195.24,196.10,7907900,DOWN,2.28,MEDIUM,Thursday
1255,2024-12-27,193.41,196.16,191.35,195.84,14693000,FLAT,4.81,HIGH,Friday
1256,2024-12-30,192.07,193.15,189.75,190.25,12209500,DOWN,3.40,HIGH,Monday


6. Map weekdays to numbers:

- Monday → 1

- Friday → 5

In [7]:
mapping = {'Monday': 1, 'Tuesday': 2, 'Wednesday': 3, 'Thursday': 4, 'Friday': 5}
price['weekday_number'] = price['weekday'].map(mapping)
price

Unnamed: 0,date,close,high,low,open,volume,price_movement,volatility,volatility_level,weekday,weekday_number
0,2020-01-02,67.90,67.94,66.62,66.62,28132000,DOWN,1.32,MEDIUM,Thursday,4
1,2020-01-03,67.57,68.16,66.82,66.93,23728000,DOWN,1.34,MEDIUM,Friday,5
2,2020-01-06,69.24,69.35,67.04,67.04,34646000,DOWN,2.31,MEDIUM,Monday,1
3,2020-01-07,69.19,69.67,69.05,69.42,30054000,FLAT,0.62,LOW,Tuesday,2
4,2020-01-08,69.74,70.10,69.07,69.13,30560000,DOWN,1.03,MEDIUM,Wednesday,3
...,...,...,...,...,...,...,...,...,...,...,...
1253,2024-12-24,196.93,197.03,194.57,195.54,6809800,DOWN,2.46,MEDIUM,Tuesday,2
1254,2024-12-26,196.46,197.52,195.24,196.10,7907900,DOWN,2.28,MEDIUM,Thursday,4
1255,2024-12-27,193.41,196.16,191.35,195.84,14693000,FLAT,4.81,HIGH,Friday,5
1256,2024-12-30,192.07,193.15,189.75,190.25,12209500,DOWN,3.40,HIGH,Monday,1


7. Create signal column:

- "BUY" if close > open and volume > average volume

- "SELL" if close < open

- "HOLD" otherwise

In [8]:
price.set_index('date', inplace=True)
price['average_volume'] = price['volume'].mean()
price['price_diff'] = price['close'] - price['open']
price['trade_signal'] = "HOLD"
price.loc[(price['price_diff']> 0) & (price['volume'] > price['average_volume']), 'trade_signal'] = "BUY"
price.loc[price['price_diff']< 0, 'trade_signal'] = "SELL"
price


Unnamed: 0_level_0,close,high,low,open,volume,price_movement,volatility,volatility_level,weekday,weekday_number,average_volume,price_diff,trade_signal
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
2020-01-02,67.90,67.94,66.62,66.62,28132000,DOWN,1.32,MEDIUM,Thursday,4,2.741160e+07,1.28,BUY
2020-01-03,67.57,68.16,66.82,66.93,23728000,DOWN,1.34,MEDIUM,Friday,5,2.741160e+07,0.64,HOLD
2020-01-06,69.24,69.35,67.04,67.04,34646000,DOWN,2.31,MEDIUM,Monday,1,2.741160e+07,2.20,BUY
2020-01-07,69.19,69.67,69.05,69.42,30054000,FLAT,0.62,LOW,Tuesday,2,2.741160e+07,-0.23,SELL
2020-01-08,69.74,70.10,69.07,69.13,30560000,DOWN,1.03,MEDIUM,Wednesday,3,2.741160e+07,0.61,BUY
...,...,...,...,...,...,...,...,...,...,...,...,...,...
2024-12-24,196.93,197.03,194.57,195.54,6809800,DOWN,2.46,MEDIUM,Tuesday,2,2.741160e+07,1.39,HOLD
2024-12-26,196.46,197.52,195.24,196.10,7907900,DOWN,2.28,MEDIUM,Thursday,4,2.741160e+07,0.36,HOLD
2024-12-27,193.41,196.16,191.35,195.84,14693000,FLAT,4.81,HIGH,Friday,5,2.741160e+07,-2.43,SELL
2024-12-30,192.07,193.15,189.75,190.25,12209500,DOWN,3.40,HIGH,Monday,1,2.741160e+07,1.82,HOLD


Create strength_score:

- +1 if close > open

- +1 if close > previous day close

- +1 if volume > 7-day average

In [10]:
price['strength_score']= 0
price.loc[price['price_diff'] > 0, 'strength_score'] += 1
price.loc[price['close'] > price['close'].shift(1), 'strength_score'] += 1
price['volume_7d_avg'] = price['volume'].rolling(7).mean()
price.loc[price['volume'] > price['volume_7d_avg'], 'strength_score'] += 1
price

Unnamed: 0_level_0,close,high,low,open,volume,price_movement,volatility,volatility_level,weekday,weekday_number,average_volume,price_diff,trade_signal,strength_score,volume_7d_avg
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
2020-01-02,67.90,67.94,66.62,66.62,28132000,DOWN,1.32,MEDIUM,Thursday,4,2.741160e+07,1.28,BUY,1,
2020-01-03,67.57,68.16,66.82,66.93,23728000,DOWN,1.34,MEDIUM,Friday,5,2.741160e+07,0.64,HOLD,1,
2020-01-06,69.24,69.35,67.04,67.04,34646000,DOWN,2.31,MEDIUM,Monday,1,2.741160e+07,2.20,BUY,2,
2020-01-07,69.19,69.67,69.05,69.42,30054000,FLAT,0.62,LOW,Tuesday,2,2.741160e+07,-0.23,SELL,0,
2020-01-08,69.74,70.10,69.07,69.13,30560000,DOWN,1.03,MEDIUM,Wednesday,3,2.741160e+07,0.61,BUY,2,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2024-12-24,196.93,197.03,194.57,195.54,6809800,DOWN,2.46,MEDIUM,Tuesday,2,2.741160e+07,1.39,HOLD,2,2.548044e+07
2024-12-26,196.46,197.52,195.24,196.10,7907900,DOWN,2.28,MEDIUM,Thursday,4,2.741160e+07,0.36,HOLD,1,2.200320e+07
2024-12-27,193.41,196.16,191.35,195.84,14693000,FLAT,4.81,HIGH,Friday,5,2.741160e+07,-2.43,SELL,0,2.065513e+07
2024-12-30,192.07,193.15,189.75,190.25,12209500,DOWN,3.40,HIGH,Monday,1,2.741160e+07,1.82,HOLD,1,1.845100e+07
