In [1]:
import numpy as np
import pandas as pd

import warnings
warnings.filterwarnings('ignore')

import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib.animation as animation


import plotly.express as px
import plotly.graph_objects as go
import plotly.figure_factory as ff


# Step 1: Loading Dataset

In [2]:
# Load the dataset
df = pd.read_csv("Data\Daily Stock Historical\DC_AMZN_daily_historical_data.csv")

# Removing leading and trailing spaces from column names
df.columns = df.columns.str.strip()

# Using a regular expression to replace multiple spaces with a single space in all column names
df.columns = df.columns.str.replace(r'\s+', ' ', regex=True)  

# # Dropping columns that are not needed
df.drop(["Unnamed: 0", "Adj Close"], axis=1, inplace=True)

# Step 2: Overview of Dataset

In [3]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3968 entries, 0 to 3967
Data columns (total 6 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   Date    3968 non-null   object 
 1   Open    3968 non-null   float64
 2   High    3968 non-null   float64
 3   Low     3968 non-null   float64
 4   Close   3968 non-null   float64
 5   Volume  3968 non-null   int64  
dtypes: float64(4), int64(1), object(1)
memory usage: 186.1+ KB


In [4]:
df.head()

Unnamed: 0,Date,Open,High,Low,Close,Volume
0,2009-01-02,2.5675,2.7265,2.5535,2.718,145928000
1,2009-01-05,2.7865,2.787,2.6515,2.703,190196000
2,2009-01-06,2.7275,2.911,2.6875,2.868,221602000
3,2009-01-07,2.8145,2.8475,2.7675,2.81,158854000
4,2009-01-08,2.7495,2.866,2.729,2.858,131558000


# Step 3: EDA - Missing Values Analysis 

## Step 3)i): EDA - Show Missing Values in each Column

In [5]:
def display_columns_with_null_values(df: pd.DataFrame):
    """
    Displays the total number of null values for each column in the dataframe,
    showing only columns that have null values.
    
    Parameters:
    - df (pd.DataFrame): The dataframe to be checked for null values.
    
    Returns:
    - None: Prints the columns with null values and their counts.
    """
    
    # Get total null values in each column
    total_null_values = df.isnull().sum()
    
    # Filter out columns that don't have any null values
    columns_with_null = total_null_values[total_null_values > 0].sort_values(ascending=False)
    
    # Check if there are any columns with null values
    if not columns_with_null.empty:
        print('-' * 64)
        print("Total null values in each column (only columns with null values)")
        print('-' * 64)
        print(columns_with_null)
    else:
        print('-' * 64)
        print("Total null values in each column (only columns with null values)")
        print('-' * 64)
        print("No columns have null values.")

In [6]:
# Get percentage of null values in each column
null_values_percentage = df.isnull().mean().round(4).mul(100).sort_values(ascending=False)
print('-' * 44)
print("Percentage(%) of null values in each column")
print('-' * 44)
print(null_values_percentage)
print('\n')

# Get total null values in each column
display_columns_with_null_values(df)


--------------------------------------------
Percentage(%) of null values in each column
--------------------------------------------
Date      0.0
Open      0.0
High      0.0
Low       0.0
Close     0.0
Volume    0.0
dtype: float64


----------------------------------------------------------------
Total null values in each column (only columns with null values)
----------------------------------------------------------------
No columns have null values.


## Step 3)ii): EDA - Handling Missing Values

In [7]:
# # Drop rows with missing values
df = df.dropna()


# Step 4: EDA - Duplicate Values Analysis 

## Step 4)i): EDA - Show Duplicate Values Rows

In [8]:
# Get percentage of duplicate rows
total_rows = len(df)
duplicate_rows = df.duplicated().sum()
duplicate_percentage = (duplicate_rows / total_rows) * 100

print('-' * 48)
print("Percentage(%) of duplicate rows in the DataFrame")
print('-' * 48)
print(f"{duplicate_percentage:.2f}%")
print('\n')

# Get total number of duplicate rows
print('-' * 30)
print("Total number of duplicate rows")
print('-' * 30)
print(duplicate_rows)


------------------------------------------------
Percentage(%) of duplicate rows in the DataFrame
------------------------------------------------
0.00%


------------------------------
Total number of duplicate rows
------------------------------
0


## Step 4)ii): EDA - Handling Duplicate Values Rows

In [9]:
# # Drop all duplicate rows
# df_cleaned = df.drop_duplicates()

# print('-' * 22)
# print("Duplicate rows dropped")
# print('-' * 22)
# print(f"Original number of rows: {total_rows}")
# print(f"Number of rows after dropping duplicates: {len(df_cleaned)}")

# Step 6): EDA - Feature Engineering 

## Step 6)i): EDA - Monthly Return

* Definition: The percentage change in the stock price from the beginning to the end of the month.

* Reasoning: Monthly returns offer insights into the stock's performance over a month, which helps to understand its momentum and profitability potential.

In [10]:
df['Monthly_Return'] = df['Close'].pct_change(periods=21)

df.head(50)

Unnamed: 0,Date,Open,High,Low,Close,Volume,Monthly_Return
0,2009-01-02,2.5675,2.7265,2.5535,2.718,145928000,
1,2009-01-05,2.7865,2.787,2.6515,2.703,190196000,
2,2009-01-06,2.7275,2.911,2.6875,2.868,221602000,
3,2009-01-07,2.8145,2.8475,2.7675,2.81,158854000,
4,2009-01-08,2.7495,2.866,2.729,2.858,131558000,
5,2009-01-09,2.846,2.85,2.735,2.7755,133682000,
6,2009-01-12,2.706,2.715,2.5435,2.596,191126000,
7,2009-01-13,2.548,2.6645,2.5375,2.5725,157664000,
8,2009-01-14,2.505,2.505,2.407,2.4245,208852000,
9,2009-01-15,2.428,2.6115,2.3815,2.572,232346000,


## Step 6)ii): EDA - Monthly Moving Average (MA)

* Definition: The average closing price over the last 21 trading days (approx. one month).

* Reasoning: Moving averages smooth out daily price fluctuations, giving a clearer trend direction.

In [11]:
df['MA_21'] = df['Close'].rolling(window=21).mean()

df.head(50)

Unnamed: 0,Date,Open,High,Low,Close,Volume,Monthly_Return,MA_21
0,2009-01-02,2.5675,2.7265,2.5535,2.718,145928000,,
1,2009-01-05,2.7865,2.787,2.6515,2.703,190196000,,
2,2009-01-06,2.7275,2.911,2.6875,2.868,221602000,,
3,2009-01-07,2.8145,2.8475,2.7675,2.81,158854000,,
4,2009-01-08,2.7495,2.866,2.729,2.858,131558000,,
5,2009-01-09,2.846,2.85,2.735,2.7755,133682000,,
6,2009-01-12,2.706,2.715,2.5435,2.596,191126000,,
7,2009-01-13,2.548,2.6645,2.5375,2.5725,157664000,,
8,2009-01-14,2.505,2.505,2.407,2.4245,208852000,,
9,2009-01-15,2.428,2.6115,2.3815,2.572,232346000,,


## Step 6)iii): EDA - Exponential Moving Average (EMA)

* Definition: Similar to the MA but gives more weight to recent prices.

* Reasoning: EMA captures recent price momentum and responds faster to price changes.

In [12]:
df['EMA_21'] = df['Close'].ewm(span=21, adjust=False).mean()
df.head(50)

Unnamed: 0,Date,Open,High,Low,Close,Volume,Monthly_Return,MA_21,EMA_21
0,2009-01-02,2.5675,2.7265,2.5535,2.718,145928000,,,2.718
1,2009-01-05,2.7865,2.787,2.6515,2.703,190196000,,,2.716636
2,2009-01-06,2.7275,2.911,2.6875,2.868,221602000,,,2.730397
3,2009-01-07,2.8145,2.8475,2.7675,2.81,158854000,,,2.737633
4,2009-01-08,2.7495,2.866,2.729,2.858,131558000,,,2.748576
5,2009-01-09,2.846,2.85,2.735,2.7755,133682000,,,2.751023
6,2009-01-12,2.706,2.715,2.5435,2.596,191126000,,,2.73693
7,2009-01-13,2.548,2.6645,2.5375,2.5725,157664000,,,2.721982
8,2009-01-14,2.505,2.505,2.407,2.4245,208852000,,,2.694938
9,2009-01-15,2.428,2.6115,2.3815,2.572,232346000,,,2.683762


## Step 6)iv): EDA - Bollinger Bands

* Definition: Consists of an upper, middle (MA), and lower band to indicate volatility.

* Reasoning: Useful for spotting volatility breakouts or trends.

In [13]:
df['MA_21'] = df['Close'].rolling(window=21).mean()
df['BB_Upper'] = df['MA_21'] + 2*df['Close'].rolling(window=21).std()
df['BB_Lower'] = df['MA_21'] - 2*df['Close'].rolling(window=21).std()

df.head(50)

Unnamed: 0,Date,Open,High,Low,Close,Volume,Monthly_Return,MA_21,EMA_21,BB_Upper,BB_Lower
0,2009-01-02,2.5675,2.7265,2.5535,2.718,145928000,,,2.718,,
1,2009-01-05,2.7865,2.787,2.6515,2.703,190196000,,,2.716636,,
2,2009-01-06,2.7275,2.911,2.6875,2.868,221602000,,,2.730397,,
3,2009-01-07,2.8145,2.8475,2.7675,2.81,158854000,,,2.737633,,
4,2009-01-08,2.7495,2.866,2.729,2.858,131558000,,,2.748576,,
5,2009-01-09,2.846,2.85,2.735,2.7755,133682000,,,2.751023,,
6,2009-01-12,2.706,2.715,2.5435,2.596,191126000,,,2.73693,,
7,2009-01-13,2.548,2.6645,2.5375,2.5725,157664000,,,2.721982,,
8,2009-01-14,2.505,2.505,2.407,2.4245,208852000,,,2.694938,,
9,2009-01-15,2.428,2.6115,2.3815,2.572,232346000,,,2.683762,,


## Step 6)v): EDA - Relative Strength Index (RSI)

* Definition: A momentum oscillator that measures the speed and change of price movements.

* Reasoning: RSI helps identify overbought or oversold conditions in the market.

In [14]:
delta = df['Close'].diff()
gain = (delta.where(delta > 0, 0)).rolling(window=14).mean()
loss = (-delta.where(delta < 0, 0)).rolling(window=14).mean()
df['RSI'] = 100 - (100 / (1 + gain / loss))

df.head(50)

Unnamed: 0,Date,Open,High,Low,Close,Volume,Monthly_Return,MA_21,EMA_21,BB_Upper,BB_Lower,RSI
0,2009-01-02,2.5675,2.7265,2.5535,2.718,145928000,,,2.718,,,
1,2009-01-05,2.7865,2.787,2.6515,2.703,190196000,,,2.716636,,,
2,2009-01-06,2.7275,2.911,2.6875,2.868,221602000,,,2.730397,,,
3,2009-01-07,2.8145,2.8475,2.7675,2.81,158854000,,,2.737633,,,
4,2009-01-08,2.7495,2.866,2.729,2.858,131558000,,,2.748576,,,
5,2009-01-09,2.846,2.85,2.735,2.7755,133682000,,,2.751023,,,
6,2009-01-12,2.706,2.715,2.5435,2.596,191126000,,,2.73693,,,
7,2009-01-13,2.548,2.6645,2.5375,2.5725,157664000,,,2.721982,,,
8,2009-01-14,2.505,2.505,2.407,2.4245,208852000,,,2.694938,,,
9,2009-01-15,2.428,2.6115,2.3815,2.572,232346000,,,2.683762,,,


## Step 6)vi): EDA - Moving Average Convergence Divergence (MACD)

* Definition: The difference between two EMAs (typically 12 and 26 days).

* Reasoning: MACD helps identify potential buy and sell signals based on trend direction.

In [15]:
df['EMA_12'] = df['Close'].ewm(span=12, adjust=False).mean()
df['EMA_26'] = df['Close'].ewm(span=26, adjust=False).mean()
df['MACD'] = df['EMA_12'] - df['EMA_26']

df.head(50)

Unnamed: 0,Date,Open,High,Low,Close,Volume,Monthly_Return,MA_21,EMA_21,BB_Upper,BB_Lower,RSI,EMA_12,EMA_26,MACD
0,2009-01-02,2.5675,2.7265,2.5535,2.718,145928000,,,2.718,,,,2.718,2.718,0.0
1,2009-01-05,2.7865,2.787,2.6515,2.703,190196000,,,2.716636,,,,2.715692,2.716889,-0.001197
2,2009-01-06,2.7275,2.911,2.6875,2.868,221602000,,,2.730397,,,,2.739124,2.728082,0.011042
3,2009-01-07,2.8145,2.8475,2.7675,2.81,158854000,,,2.737633,,,,2.750028,2.73415,0.015878
4,2009-01-08,2.7495,2.866,2.729,2.858,131558000,,,2.748576,,,,2.766639,2.743324,0.023315
5,2009-01-09,2.846,2.85,2.735,2.7755,133682000,,,2.751023,,,,2.768002,2.745708,0.022295
6,2009-01-12,2.706,2.715,2.5435,2.596,191126000,,,2.73693,,,,2.741541,2.734618,0.006922
7,2009-01-13,2.548,2.6645,2.5375,2.5725,157664000,,,2.721982,,,,2.715534,2.722609,-0.007075
8,2009-01-14,2.505,2.505,2.407,2.4245,208852000,,,2.694938,,,,2.67076,2.700527,-0.029768
9,2009-01-15,2.428,2.6115,2.3815,2.572,232346000,,,2.683762,,,,2.655566,2.691007,-0.035441


## Step 6)vii): EDA - Stochastic Oscillator

* Definition: A momentum indicator that compares a specific closing price to a range of its prices over a certain period.

* Reasoning: Identifies overbought and oversold conditions and potential reversals.

In [16]:
df['Lowest_Low_14'] = df['Low'].rolling(window=14).min()
df['Highest_High_14'] = df['High'].rolling(window=14).max()
df['Stochastic_Oscillator'] = (df['Close'] - df['Lowest_Low_14']) / (df['Highest_High_14'] - df['Lowest_Low_14']) * 100

df.head(50)

Unnamed: 0,Date,Open,High,Low,Close,Volume,Monthly_Return,MA_21,EMA_21,BB_Upper,BB_Lower,RSI,EMA_12,EMA_26,MACD,Lowest_Low_14,Highest_High_14,Stochastic_Oscillator
0,2009-01-02,2.5675,2.7265,2.5535,2.718,145928000,,,2.718,,,,2.718,2.718,0.0,,,
1,2009-01-05,2.7865,2.787,2.6515,2.703,190196000,,,2.716636,,,,2.715692,2.716889,-0.001197,,,
2,2009-01-06,2.7275,2.911,2.6875,2.868,221602000,,,2.730397,,,,2.739124,2.728082,0.011042,,,
3,2009-01-07,2.8145,2.8475,2.7675,2.81,158854000,,,2.737633,,,,2.750028,2.73415,0.015878,,,
4,2009-01-08,2.7495,2.866,2.729,2.858,131558000,,,2.748576,,,,2.766639,2.743324,0.023315,,,
5,2009-01-09,2.846,2.85,2.735,2.7755,133682000,,,2.751023,,,,2.768002,2.745708,0.022295,,,
6,2009-01-12,2.706,2.715,2.5435,2.596,191126000,,,2.73693,,,,2.741541,2.734618,0.006922,,,
7,2009-01-13,2.548,2.6645,2.5375,2.5725,157664000,,,2.721982,,,,2.715534,2.722609,-0.007075,,,
8,2009-01-14,2.505,2.505,2.407,2.4245,208852000,,,2.694938,,,,2.67076,2.700527,-0.029768,,,
9,2009-01-15,2.428,2.6115,2.3815,2.572,232346000,,,2.683762,,,,2.655566,2.691007,-0.035441,,,


## Step 6)viii): EDA - Average True Range (ATR)

* Definition: Measures market volatility by decomposing the range of stock prices for each period.

* Reasoning: High ATR indicates strong volatility, often preceding a breakout or breakdown.

In [17]:
df['TR'] = df[['High', 'Low', 'Close']].diff().abs().max(axis=1)
df['ATR'] = df['TR'].rolling(window=14).mean()

df.head(50)

Unnamed: 0,Date,Open,High,Low,Close,Volume,Monthly_Return,MA_21,EMA_21,BB_Upper,BB_Lower,RSI,EMA_12,EMA_26,MACD,Lowest_Low_14,Highest_High_14,Stochastic_Oscillator,TR,ATR
0,2009-01-02,2.5675,2.7265,2.5535,2.718,145928000,,,2.718,,,,2.718,2.718,0.0,,,,,
1,2009-01-05,2.7865,2.787,2.6515,2.703,190196000,,,2.716636,,,,2.715692,2.716889,-0.001197,,,,0.098,
2,2009-01-06,2.7275,2.911,2.6875,2.868,221602000,,,2.730397,,,,2.739124,2.728082,0.011042,,,,0.165,
3,2009-01-07,2.8145,2.8475,2.7675,2.81,158854000,,,2.737633,,,,2.750028,2.73415,0.015878,,,,0.08,
4,2009-01-08,2.7495,2.866,2.729,2.858,131558000,,,2.748576,,,,2.766639,2.743324,0.023315,,,,0.048,
5,2009-01-09,2.846,2.85,2.735,2.7755,133682000,,,2.751023,,,,2.768002,2.745708,0.022295,,,,0.0825,
6,2009-01-12,2.706,2.715,2.5435,2.596,191126000,,,2.73693,,,,2.741541,2.734618,0.006922,,,,0.1915,
7,2009-01-13,2.548,2.6645,2.5375,2.5725,157664000,,,2.721982,,,,2.715534,2.722609,-0.007075,,,,0.0505,
8,2009-01-14,2.505,2.505,2.407,2.4245,208852000,,,2.694938,,,,2.67076,2.700527,-0.029768,,,,0.1595,
9,2009-01-15,2.428,2.6115,2.3815,2.572,232346000,,,2.683762,,,,2.655566,2.691007,-0.035441,,,,0.1475,


## Step 6)ix): EDA - Price Change Feature

* Definition: The percentage change of the close price over each month.

* Reasoning: A straightforward indicator of price direction, helpful for capturing trends.

In [18]:
df['Price_Change'] = df['Close'].pct_change(periods=21)

df.head(50)

Unnamed: 0,Date,Open,High,Low,Close,Volume,Monthly_Return,MA_21,EMA_21,BB_Upper,...,RSI,EMA_12,EMA_26,MACD,Lowest_Low_14,Highest_High_14,Stochastic_Oscillator,TR,ATR,Price_Change
0,2009-01-02,2.5675,2.7265,2.5535,2.718,145928000,,,2.718,,...,,2.718,2.718,0.0,,,,,,
1,2009-01-05,2.7865,2.787,2.6515,2.703,190196000,,,2.716636,,...,,2.715692,2.716889,-0.001197,,,,0.098,,
2,2009-01-06,2.7275,2.911,2.6875,2.868,221602000,,,2.730397,,...,,2.739124,2.728082,0.011042,,,,0.165,,
3,2009-01-07,2.8145,2.8475,2.7675,2.81,158854000,,,2.737633,,...,,2.750028,2.73415,0.015878,,,,0.08,,
4,2009-01-08,2.7495,2.866,2.729,2.858,131558000,,,2.748576,,...,,2.766639,2.743324,0.023315,,,,0.048,,
5,2009-01-09,2.846,2.85,2.735,2.7755,133682000,,,2.751023,,...,,2.768002,2.745708,0.022295,,,,0.0825,,
6,2009-01-12,2.706,2.715,2.5435,2.596,191126000,,,2.73693,,...,,2.741541,2.734618,0.006922,,,,0.1915,,
7,2009-01-13,2.548,2.6645,2.5375,2.5725,157664000,,,2.721982,,...,,2.715534,2.722609,-0.007075,,,,0.0505,,
8,2009-01-14,2.505,2.505,2.407,2.4245,208852000,,,2.694938,,...,,2.67076,2.700527,-0.029768,,,,0.1595,,
9,2009-01-15,2.428,2.6115,2.3815,2.572,232346000,,,2.683762,,...,,2.655566,2.691007,-0.035441,,,,0.1475,,


## Step 6)x): EDA - Volume Moving Average

* Definition: The moving average of the trading volume over a period.

* Reasoning: Tracks how trading activity is changing over time, which can hint at momentum shifts.

In [19]:
df['Volume_MA_21'] = df['Volume'].rolling(window=21).mean()

df.head(50)

Unnamed: 0,Date,Open,High,Low,Close,Volume,Monthly_Return,MA_21,EMA_21,BB_Upper,...,EMA_12,EMA_26,MACD,Lowest_Low_14,Highest_High_14,Stochastic_Oscillator,TR,ATR,Price_Change,Volume_MA_21
0,2009-01-02,2.5675,2.7265,2.5535,2.718,145928000,,,2.718,,...,2.718,2.718,0.0,,,,,,,
1,2009-01-05,2.7865,2.787,2.6515,2.703,190196000,,,2.716636,,...,2.715692,2.716889,-0.001197,,,,0.098,,,
2,2009-01-06,2.7275,2.911,2.6875,2.868,221602000,,,2.730397,,...,2.739124,2.728082,0.011042,,,,0.165,,,
3,2009-01-07,2.8145,2.8475,2.7675,2.81,158854000,,,2.737633,,...,2.750028,2.73415,0.015878,,,,0.08,,,
4,2009-01-08,2.7495,2.866,2.729,2.858,131558000,,,2.748576,,...,2.766639,2.743324,0.023315,,,,0.048,,,
5,2009-01-09,2.846,2.85,2.735,2.7755,133682000,,,2.751023,,...,2.768002,2.745708,0.022295,,,,0.0825,,,
6,2009-01-12,2.706,2.715,2.5435,2.596,191126000,,,2.73693,,...,2.741541,2.734618,0.006922,,,,0.1915,,,
7,2009-01-13,2.548,2.6645,2.5375,2.5725,157664000,,,2.721982,,...,2.715534,2.722609,-0.007075,,,,0.0505,,,
8,2009-01-14,2.505,2.505,2.407,2.4245,208852000,,,2.694938,,...,2.67076,2.700527,-0.029768,,,,0.1595,,,
9,2009-01-15,2.428,2.6115,2.3815,2.572,232346000,,,2.683762,,...,2.655566,2.691007,-0.035441,,,,0.1475,,,


## Step 6)xi): EDA - Momentum Indicator

* Definition: The rate of acceleration of a security’s price or volume.

* Reasoning: A key factor in assessing how quickly price movements are changing.

In [20]:
df['Momentum_21'] = df['Close'] - df['Close'].shift(21)

df.head(50)

Unnamed: 0,Date,Open,High,Low,Close,Volume,Monthly_Return,MA_21,EMA_21,BB_Upper,...,EMA_26,MACD,Lowest_Low_14,Highest_High_14,Stochastic_Oscillator,TR,ATR,Price_Change,Volume_MA_21,Momentum_21
0,2009-01-02,2.5675,2.7265,2.5535,2.718,145928000,,,2.718,,...,2.718,0.0,,,,,,,,
1,2009-01-05,2.7865,2.787,2.6515,2.703,190196000,,,2.716636,,...,2.716889,-0.001197,,,,0.098,,,,
2,2009-01-06,2.7275,2.911,2.6875,2.868,221602000,,,2.730397,,...,2.728082,0.011042,,,,0.165,,,,
3,2009-01-07,2.8145,2.8475,2.7675,2.81,158854000,,,2.737633,,...,2.73415,0.015878,,,,0.08,,,,
4,2009-01-08,2.7495,2.866,2.729,2.858,131558000,,,2.748576,,...,2.743324,0.023315,,,,0.048,,,,
5,2009-01-09,2.846,2.85,2.735,2.7755,133682000,,,2.751023,,...,2.745708,0.022295,,,,0.0825,,,,
6,2009-01-12,2.706,2.715,2.5435,2.596,191126000,,,2.73693,,...,2.734618,0.006922,,,,0.1915,,,,
7,2009-01-13,2.548,2.6645,2.5375,2.5725,157664000,,,2.721982,,...,2.722609,-0.007075,,,,0.0505,,,,
8,2009-01-14,2.505,2.505,2.407,2.4245,208852000,,,2.694938,,...,2.700527,-0.029768,,,,0.1595,,,,
9,2009-01-15,2.428,2.6115,2.3815,2.572,232346000,,,2.683762,,...,2.691007,-0.035441,,,,0.1475,,,,


## Step 6)xii): EDA - On-Balance Volume (OBV)

* Definition: A cumulative total of volume that adds volume on up days and subtracts volume on down days.

* Reasoning: Helps detect whether a stock is being accumulated or distributed.

In [21]:
df['OBV'] = (df['Volume'] * (df['Close'].diff() > 0).astype(int)).cumsum()

df.head(50)

Unnamed: 0,Date,Open,High,Low,Close,Volume,Monthly_Return,MA_21,EMA_21,BB_Upper,...,MACD,Lowest_Low_14,Highest_High_14,Stochastic_Oscillator,TR,ATR,Price_Change,Volume_MA_21,Momentum_21,OBV
0,2009-01-02,2.5675,2.7265,2.5535,2.718,145928000,,,2.718,,...,0.0,,,,,,,,,0
1,2009-01-05,2.7865,2.787,2.6515,2.703,190196000,,,2.716636,,...,-0.001197,,,,0.098,,,,,0
2,2009-01-06,2.7275,2.911,2.6875,2.868,221602000,,,2.730397,,...,0.011042,,,,0.165,,,,,221602000
3,2009-01-07,2.8145,2.8475,2.7675,2.81,158854000,,,2.737633,,...,0.015878,,,,0.08,,,,,221602000
4,2009-01-08,2.7495,2.866,2.729,2.858,131558000,,,2.748576,,...,0.023315,,,,0.048,,,,,353160000
5,2009-01-09,2.846,2.85,2.735,2.7755,133682000,,,2.751023,,...,0.022295,,,,0.0825,,,,,353160000
6,2009-01-12,2.706,2.715,2.5435,2.596,191126000,,,2.73693,,...,0.006922,,,,0.1915,,,,,353160000
7,2009-01-13,2.548,2.6645,2.5375,2.5725,157664000,,,2.721982,,...,-0.007075,,,,0.0505,,,,,353160000
8,2009-01-14,2.505,2.505,2.407,2.4245,208852000,,,2.694938,,...,-0.029768,,,,0.1595,,,,,353160000
9,2009-01-15,2.428,2.6115,2.3815,2.572,232346000,,,2.683762,,...,-0.035441,,,,0.1475,,,,,585506000


## Step 6)xiii): EDA - Cumulative Return

* Definition: The total return on an asset over a set period, calculated from daily returns.

* Reasoning: Tracks overall stock performance, capturing the compounding effect of returns.

In [22]:
df['Cumulative_Return'] = (1 + df['Close'].pct_change()).cumprod()

df.head(50)

Unnamed: 0,Date,Open,High,Low,Close,Volume,Monthly_Return,MA_21,EMA_21,BB_Upper,...,Lowest_Low_14,Highest_High_14,Stochastic_Oscillator,TR,ATR,Price_Change,Volume_MA_21,Momentum_21,OBV,Cumulative_Return
0,2009-01-02,2.5675,2.7265,2.5535,2.718,145928000,,,2.718,,...,,,,,,,,,0,
1,2009-01-05,2.7865,2.787,2.6515,2.703,190196000,,,2.716636,,...,,,,0.098,,,,,0,0.994481
2,2009-01-06,2.7275,2.911,2.6875,2.868,221602000,,,2.730397,,...,,,,0.165,,,,,221602000,1.055188
3,2009-01-07,2.8145,2.8475,2.7675,2.81,158854000,,,2.737633,,...,,,,0.08,,,,,221602000,1.033848
4,2009-01-08,2.7495,2.866,2.729,2.858,131558000,,,2.748576,,...,,,,0.048,,,,,353160000,1.051509
5,2009-01-09,2.846,2.85,2.735,2.7755,133682000,,,2.751023,,...,,,,0.0825,,,,,353160000,1.021155
6,2009-01-12,2.706,2.715,2.5435,2.596,191126000,,,2.73693,,...,,,,0.1915,,,,,353160000,0.955114
7,2009-01-13,2.548,2.6645,2.5375,2.5725,157664000,,,2.721982,,...,,,,0.0505,,,,,353160000,0.946468
8,2009-01-14,2.505,2.505,2.407,2.4245,208852000,,,2.694938,,...,,,,0.1595,,,,,353160000,0.892016
9,2009-01-15,2.428,2.6115,2.3815,2.572,232346000,,,2.683762,,...,,,,0.1475,,,,,585506000,0.946284


## Step 6)xiv): EDA - Rate of Change (ROC)

* Definition: Measures the percentage change in price over a set period.

* Reasoning: Identifies the speed of the price movement, essential for momentum-based strategies.

In [23]:
df['ROC_21'] = df['Close'].pct_change(periods=21)

df.head(50)

Unnamed: 0,Date,Open,High,Low,Close,Volume,Monthly_Return,MA_21,EMA_21,BB_Upper,...,Highest_High_14,Stochastic_Oscillator,TR,ATR,Price_Change,Volume_MA_21,Momentum_21,OBV,Cumulative_Return,ROC_21
0,2009-01-02,2.5675,2.7265,2.5535,2.718,145928000,,,2.718,,...,,,,,,,,0,,
1,2009-01-05,2.7865,2.787,2.6515,2.703,190196000,,,2.716636,,...,,,0.098,,,,,0,0.994481,
2,2009-01-06,2.7275,2.911,2.6875,2.868,221602000,,,2.730397,,...,,,0.165,,,,,221602000,1.055188,
3,2009-01-07,2.8145,2.8475,2.7675,2.81,158854000,,,2.737633,,...,,,0.08,,,,,221602000,1.033848,
4,2009-01-08,2.7495,2.866,2.729,2.858,131558000,,,2.748576,,...,,,0.048,,,,,353160000,1.051509,
5,2009-01-09,2.846,2.85,2.735,2.7755,133682000,,,2.751023,,...,,,0.0825,,,,,353160000,1.021155,
6,2009-01-12,2.706,2.715,2.5435,2.596,191126000,,,2.73693,,...,,,0.1915,,,,,353160000,0.955114,
7,2009-01-13,2.548,2.6645,2.5375,2.5725,157664000,,,2.721982,,...,,,0.0505,,,,,353160000,0.946468,
8,2009-01-14,2.505,2.505,2.407,2.4245,208852000,,,2.694938,,...,,,0.1595,,,,,353160000,0.892016,
9,2009-01-15,2.428,2.6115,2.3815,2.572,232346000,,,2.683762,,...,,,0.1475,,,,,585506000,0.946284,


## Step 6)xv): EDA - Volatility (Standard Deviation)

* Definition: Measures the dispersion of returns for a given security.

* Reasoning: Helps in assessing risk; higher volatility often signals greater risk.

In [24]:
df['Volatility'] = df['Close'].rolling(window=21).std()
df.head(50)

Unnamed: 0,Date,Open,High,Low,Close,Volume,Monthly_Return,MA_21,EMA_21,BB_Upper,...,Stochastic_Oscillator,TR,ATR,Price_Change,Volume_MA_21,Momentum_21,OBV,Cumulative_Return,ROC_21,Volatility
0,2009-01-02,2.5675,2.7265,2.5535,2.718,145928000,,,2.718,,...,,,,,,,0,,,
1,2009-01-05,2.7865,2.787,2.6515,2.703,190196000,,,2.716636,,...,,0.098,,,,,0,0.994481,,
2,2009-01-06,2.7275,2.911,2.6875,2.868,221602000,,,2.730397,,...,,0.165,,,,,221602000,1.055188,,
3,2009-01-07,2.8145,2.8475,2.7675,2.81,158854000,,,2.737633,,...,,0.08,,,,,221602000,1.033848,,
4,2009-01-08,2.7495,2.866,2.729,2.858,131558000,,,2.748576,,...,,0.048,,,,,353160000,1.051509,,
5,2009-01-09,2.846,2.85,2.735,2.7755,133682000,,,2.751023,,...,,0.0825,,,,,353160000,1.021155,,
6,2009-01-12,2.706,2.715,2.5435,2.596,191126000,,,2.73693,,...,,0.1915,,,,,353160000,0.955114,,
7,2009-01-13,2.548,2.6645,2.5375,2.5725,157664000,,,2.721982,,...,,0.0505,,,,,353160000,0.946468,,
8,2009-01-14,2.505,2.505,2.407,2.4245,208852000,,,2.694938,,...,,0.1595,,,,,353160000,0.892016,,
9,2009-01-15,2.428,2.6115,2.3815,2.572,232346000,,,2.683762,,...,,0.1475,,,,,585506000,0.946284,,


## Step 6)xvi): EDA - Price Gap

* Definition: The difference between the previous day’s close and the current day’s open.

* Reasoning: Identifies potential entry/exit points based on gaps that may indicate momentum shifts.

In [25]:
df['Price_Gap'] = df['Open'] - df['Close'].shift(1)
df.head(50)


Unnamed: 0,Date,Open,High,Low,Close,Volume,Monthly_Return,MA_21,EMA_21,BB_Upper,...,TR,ATR,Price_Change,Volume_MA_21,Momentum_21,OBV,Cumulative_Return,ROC_21,Volatility,Price_Gap
0,2009-01-02,2.5675,2.7265,2.5535,2.718,145928000,,,2.718,,...,,,,,,0,,,,
1,2009-01-05,2.7865,2.787,2.6515,2.703,190196000,,,2.716636,,...,0.098,,,,,0,0.994481,,,0.0685
2,2009-01-06,2.7275,2.911,2.6875,2.868,221602000,,,2.730397,,...,0.165,,,,,221602000,1.055188,,,0.0245
3,2009-01-07,2.8145,2.8475,2.7675,2.81,158854000,,,2.737633,,...,0.08,,,,,221602000,1.033848,,,-0.0535
4,2009-01-08,2.7495,2.866,2.729,2.858,131558000,,,2.748576,,...,0.048,,,,,353160000,1.051509,,,-0.0605
5,2009-01-09,2.846,2.85,2.735,2.7755,133682000,,,2.751023,,...,0.0825,,,,,353160000,1.021155,,,-0.012
6,2009-01-12,2.706,2.715,2.5435,2.596,191126000,,,2.73693,,...,0.1915,,,,,353160000,0.955114,,,-0.0695
7,2009-01-13,2.548,2.6645,2.5375,2.5725,157664000,,,2.721982,,...,0.0505,,,,,353160000,0.946468,,,-0.048
8,2009-01-14,2.505,2.505,2.407,2.4245,208852000,,,2.694938,,...,0.1595,,,,,353160000,0.892016,,,-0.0675
9,2009-01-15,2.428,2.6115,2.3815,2.572,232346000,,,2.683762,,...,0.1475,,,,,585506000,0.946284,,,0.0035


## Step 6)xvii): EDA - Lag Features

* Definition: Previous days' values for the same feature, such as lagging the closing price by 1, 2, or 3 days.

* Reasoning: Helps in capturing temporal patterns and relationships across time.

In [26]:
df['Lag_1'] = df['Close'].shift(1)
df['Lag_2'] = df['Close'].shift(2)
df['Lag_3'] = df['Close'].shift(3)

df.head(50)

Unnamed: 0,Date,Open,High,Low,Close,Volume,Monthly_Return,MA_21,EMA_21,BB_Upper,...,Volume_MA_21,Momentum_21,OBV,Cumulative_Return,ROC_21,Volatility,Price_Gap,Lag_1,Lag_2,Lag_3
0,2009-01-02,2.5675,2.7265,2.5535,2.718,145928000,,,2.718,,...,,,0,,,,,,,
1,2009-01-05,2.7865,2.787,2.6515,2.703,190196000,,,2.716636,,...,,,0,0.994481,,,0.0685,2.718,,
2,2009-01-06,2.7275,2.911,2.6875,2.868,221602000,,,2.730397,,...,,,221602000,1.055188,,,0.0245,2.703,2.718,
3,2009-01-07,2.8145,2.8475,2.7675,2.81,158854000,,,2.737633,,...,,,221602000,1.033848,,,-0.0535,2.868,2.703,2.718
4,2009-01-08,2.7495,2.866,2.729,2.858,131558000,,,2.748576,,...,,,353160000,1.051509,,,-0.0605,2.81,2.868,2.703
5,2009-01-09,2.846,2.85,2.735,2.7755,133682000,,,2.751023,,...,,,353160000,1.021155,,,-0.012,2.858,2.81,2.868
6,2009-01-12,2.706,2.715,2.5435,2.596,191126000,,,2.73693,,...,,,353160000,0.955114,,,-0.0695,2.7755,2.858,2.81
7,2009-01-13,2.548,2.6645,2.5375,2.5725,157664000,,,2.721982,,...,,,353160000,0.946468,,,-0.048,2.596,2.7755,2.858
8,2009-01-14,2.505,2.505,2.407,2.4245,208852000,,,2.694938,,...,,,353160000,0.892016,,,-0.0675,2.5725,2.596,2.7755
9,2009-01-15,2.428,2.6115,2.3815,2.572,232346000,,,2.683762,,...,,,585506000,0.946284,,,0.0035,2.4245,2.5725,2.596


## Step 6)xviii): EDA - Cumulative Volume

* Definition: A running total of the stock’s volume over a month.

* Reasoning: Tracks the total trading activity over time, which can be linked to price movements.

In [27]:
df['Cumulative_Volume'] = df['Volume'].cumsum()

df.head(50)

Unnamed: 0,Date,Open,High,Low,Close,Volume,Monthly_Return,MA_21,EMA_21,BB_Upper,...,Momentum_21,OBV,Cumulative_Return,ROC_21,Volatility,Price_Gap,Lag_1,Lag_2,Lag_3,Cumulative_Volume
0,2009-01-02,2.5675,2.7265,2.5535,2.718,145928000,,,2.718,,...,,0,,,,,,,,145928000
1,2009-01-05,2.7865,2.787,2.6515,2.703,190196000,,,2.716636,,...,,0,0.994481,,,0.0685,2.718,,,336124000
2,2009-01-06,2.7275,2.911,2.6875,2.868,221602000,,,2.730397,,...,,221602000,1.055188,,,0.0245,2.703,2.718,,557726000
3,2009-01-07,2.8145,2.8475,2.7675,2.81,158854000,,,2.737633,,...,,221602000,1.033848,,,-0.0535,2.868,2.703,2.718,716580000
4,2009-01-08,2.7495,2.866,2.729,2.858,131558000,,,2.748576,,...,,353160000,1.051509,,,-0.0605,2.81,2.868,2.703,848138000
5,2009-01-09,2.846,2.85,2.735,2.7755,133682000,,,2.751023,,...,,353160000,1.021155,,,-0.012,2.858,2.81,2.868,981820000
6,2009-01-12,2.706,2.715,2.5435,2.596,191126000,,,2.73693,,...,,353160000,0.955114,,,-0.0695,2.7755,2.858,2.81,1172946000
7,2009-01-13,2.548,2.6645,2.5375,2.5725,157664000,,,2.721982,,...,,353160000,0.946468,,,-0.048,2.596,2.7755,2.858,1330610000
8,2009-01-14,2.505,2.505,2.407,2.4245,208852000,,,2.694938,,...,,353160000,0.892016,,,-0.0675,2.5725,2.596,2.7755,1539462000
9,2009-01-15,2.428,2.6115,2.3815,2.572,232346000,,,2.683762,,...,,585506000,0.946284,,,0.0035,2.4245,2.5725,2.596,1771808000


## Step 6)xix): EDA - Monthly High-Low Difference

* Definition: The difference between the highest and lowest price in a month.

* Reasoning: Provides insight into the range of price movements over a month.

In [28]:
df['Monthly_High_Low_Diff'] = df['High'].rolling(window=21).max() - df['Low'].rolling(window=21).min()

df.head(50)

Unnamed: 0,Date,Open,High,Low,Close,Volume,Monthly_Return,MA_21,EMA_21,BB_Upper,...,OBV,Cumulative_Return,ROC_21,Volatility,Price_Gap,Lag_1,Lag_2,Lag_3,Cumulative_Volume,Monthly_High_Low_Diff
0,2009-01-02,2.5675,2.7265,2.5535,2.718,145928000,,,2.718,,...,0,,,,,,,,145928000,
1,2009-01-05,2.7865,2.787,2.6515,2.703,190196000,,,2.716636,,...,0,0.994481,,,0.0685,2.718,,,336124000,
2,2009-01-06,2.7275,2.911,2.6875,2.868,221602000,,,2.730397,,...,221602000,1.055188,,,0.0245,2.703,2.718,,557726000,
3,2009-01-07,2.8145,2.8475,2.7675,2.81,158854000,,,2.737633,,...,221602000,1.033848,,,-0.0535,2.868,2.703,2.718,716580000,
4,2009-01-08,2.7495,2.866,2.729,2.858,131558000,,,2.748576,,...,353160000,1.051509,,,-0.0605,2.81,2.868,2.703,848138000,
5,2009-01-09,2.846,2.85,2.735,2.7755,133682000,,,2.751023,,...,353160000,1.021155,,,-0.012,2.858,2.81,2.868,981820000,
6,2009-01-12,2.706,2.715,2.5435,2.596,191126000,,,2.73693,,...,353160000,0.955114,,,-0.0695,2.7755,2.858,2.81,1172946000,
7,2009-01-13,2.548,2.6645,2.5375,2.5725,157664000,,,2.721982,,...,353160000,0.946468,,,-0.048,2.596,2.7755,2.858,1330610000,
8,2009-01-14,2.505,2.505,2.407,2.4245,208852000,,,2.694938,,...,353160000,0.892016,,,-0.0675,2.5725,2.596,2.7755,1539462000,
9,2009-01-15,2.428,2.6115,2.3815,2.572,232346000,,,2.683762,,...,585506000,0.946284,,,0.0035,2.4245,2.5725,2.596,1771808000,


## Step 6)xx): EDA - Seasonality Features

* Definition: Features like month or quarter that can indicate seasonal patterns.

* Reasoning: Stocks can exhibit seasonal behaviors, so this helps capture those trends

In [29]:
# Convert 'Date' column to datetime format
df['Date'] = pd.to_datetime(df['Date'])

# Now extract month and quarter
df['Month'] = df['Date'].dt.month
df['Quarter'] = df['Date'].dt.quarter

df.head(50)


Unnamed: 0,Date,Open,High,Low,Close,Volume,Monthly_Return,MA_21,EMA_21,BB_Upper,...,ROC_21,Volatility,Price_Gap,Lag_1,Lag_2,Lag_3,Cumulative_Volume,Monthly_High_Low_Diff,Month,Quarter
0,2009-01-02,2.5675,2.7265,2.5535,2.718,145928000,,,2.718,,...,,,,,,,145928000,,1,1
1,2009-01-05,2.7865,2.787,2.6515,2.703,190196000,,,2.716636,,...,,,0.0685,2.718,,,336124000,,1,1
2,2009-01-06,2.7275,2.911,2.6875,2.868,221602000,,,2.730397,,...,,,0.0245,2.703,2.718,,557726000,,1,1
3,2009-01-07,2.8145,2.8475,2.7675,2.81,158854000,,,2.737633,,...,,,-0.0535,2.868,2.703,2.718,716580000,,1,1
4,2009-01-08,2.7495,2.866,2.729,2.858,131558000,,,2.748576,,...,,,-0.0605,2.81,2.868,2.703,848138000,,1,1
5,2009-01-09,2.846,2.85,2.735,2.7755,133682000,,,2.751023,,...,,,-0.012,2.858,2.81,2.868,981820000,,1,1
6,2009-01-12,2.706,2.715,2.5435,2.596,191126000,,,2.73693,,...,,,-0.0695,2.7755,2.858,2.81,1172946000,,1,1
7,2009-01-13,2.548,2.6645,2.5375,2.5725,157664000,,,2.721982,,...,,,-0.048,2.596,2.7755,2.858,1330610000,,1,1
8,2009-01-14,2.505,2.505,2.407,2.4245,208852000,,,2.694938,,...,,,-0.0675,2.5725,2.596,2.7755,1539462000,,1,1
9,2009-01-15,2.428,2.6115,2.3815,2.572,232346000,,,2.683762,,...,,,0.0035,2.4245,2.5725,2.596,1771808000,,1,1


## Step 9) Aggregating to Quarterly Data

* Conversion from daily to Quarterly data, resample the features

**Features Aggregated on a Quarterly Basis**:
   - **`'Open': 'first'`**: The first open price of the quarter is useful to understand where the market started.

   - **`'High': 'max'`**: The highest price reached during the quarter provides insight into the price ceiling.
   
   - **`'Low': 'min'`**: The lowest price during the quarter gives insight into the floor price.

   - **`'Close': 'last'`**: The last close price of the quarter is critical for determining the end of the trend.

   - **`'Volume': 'sum'`**: Summing the volume over the quarter shows the total trading activity.

   - **`'MA_21': 'mean'`**: The average moving average over the quarter provides smoothed trend information.

   - **`'RSI': 'mean'`**: The average RSI helps to see overall momentum during the quarter.

   - **`'MACD': 'last'`**: The last MACD value is useful for tracking the trend at the end of the quarter.

   - **`'Stochastic_Oscillator': 'mean'`**: Average of this indicator reflects overbought/oversold conditions over the quarter.

   - **`'ATR': 'mean'`**: Average volatility (ATR) over the quarter helps to understand the risk associated with the stock during the period.

   - **`'Momentum_21': 'last'`**: The last momentum indicator at the quarter’s end gives insight into the stock's directional strength.

   - **`'OBV': 'last'`**: OBV is cumulative and reflects the volume flow into or out of a stock by the end of the quarter.

   - **`'Cumulative_Return': 'last'`**: The cumulative return at the end of the quarter captures overall growth.

   - **`'Volatility': 'mean'`**: The mean volatility across the quarter shows the price fluctuations.
   
   - **`'Price_Gap': 'mean'`**: The average price gap provides insight into volatility around the open prices.




In [30]:
# Resample data to quarterly frequency (Q for quarterly) and aggregate accordingly
quarterly_df = df.resample('Q', on='Date').agg({
    'Open': 'first',            # First open price of the quarter
    'High': 'max',              # Highest price during the quarter
    'Low': 'min',               # Lowest price during the quarter
    'Close': 'last',            # Last close price of the quarter
    'Volume': 'sum',            # Total volume traded during the quarter
    'MA_21': 'mean',            # Average moving average over the quarter
    'RSI': 'mean',              # Average RSI over the quarter
    'MACD': 'last',             # Last MACD value of the quarter
    'Stochastic_Oscillator': 'mean',  # Average Stochastic Oscillator
    'ATR': 'mean',              # Average ATR (volatility)
    'Momentum_21': 'last',      # Last momentum value
    'OBV': 'last',              # Last OBV value
    'Cumulative_Return': 'last', # Last cumulative return of the quarter
    'Volatility': 'mean',       # Average volatility over the quarter
    'Price_Gap': 'mean'         # Average price gap over the quarter
}).dropna()


quarterly_df.head()


Unnamed: 0_level_0,Open,High,Low,Close,Volume,MA_21,RSI,MACD,Stochastic_Oscillator,ATR,Momentum_21,OBV,Cumulative_Return,Volatility,Price_Gap
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
2009-03-31,2.5675,3.7805,2.3815,3.672,12370496000,3.070679,58.512097,0.136847,65.430878,0.116978,0.5725,6131718000,1.350993,0.211514,0.00145
2009-06-30,3.651,4.428,3.5855,4.183,9494416000,3.910458,55.309698,0.023607,58.383477,0.10284,0.0305,11657492000,1.538999,0.161083,-0.001254
2009-09-30,4.221,4.725,3.7705,4.668,8324122000,4.18242,53.239807,0.126727,53.727712,0.0929,0.6085,15869958000,1.717439,0.157503,-0.003992
2009-12-31,4.625,7.2955,4.4135,6.726,12727852000,5.718096,63.826041,0.116955,69.004807,0.140652,-0.199,22967222000,2.474614,0.328069,0.027391
2010-03-31,6.8125,6.9095,5.691,6.7885,11980988000,6.306039,51.020327,0.142294,48.488807,0.131931,0.512,28442446000,2.497609,0.214745,0.006992


## Step 8) Save Data to CSV


In [31]:
quarterly_df = quarterly_df.reset_index()

quarterly_df.head() 

Unnamed: 0,Date,Open,High,Low,Close,Volume,MA_21,RSI,MACD,Stochastic_Oscillator,ATR,Momentum_21,OBV,Cumulative_Return,Volatility,Price_Gap
0,2009-03-31,2.5675,3.7805,2.3815,3.672,12370496000,3.070679,58.512097,0.136847,65.430878,0.116978,0.5725,6131718000,1.350993,0.211514,0.00145
1,2009-06-30,3.651,4.428,3.5855,4.183,9494416000,3.910458,55.309698,0.023607,58.383477,0.10284,0.0305,11657492000,1.538999,0.161083,-0.001254
2,2009-09-30,4.221,4.725,3.7705,4.668,8324122000,4.18242,53.239807,0.126727,53.727712,0.0929,0.6085,15869958000,1.717439,0.157503,-0.003992
3,2009-12-31,4.625,7.2955,4.4135,6.726,12727852000,5.718096,63.826041,0.116955,69.004807,0.140652,-0.199,22967222000,2.474614,0.328069,0.027391
4,2010-03-31,6.8125,6.9095,5.691,6.7885,11980988000,6.306039,51.020327,0.142294,48.488807,0.131931,0.512,28442446000,2.497609,0.214745,0.006992


In [32]:
quarterly_df.to_csv("Data\EDA Preprocessed Data\EDA_AMZN_Historical_Quarterly.csv", index=False)