In [11]:
### Covered in this section are the following concepts:
# Window Functions
# Shift/Lag Analysis


### Library Imports
import pandas as pd
import yfinance as yf

In [12]:
### Create a Function to Populate the DataFrame
def get_data(tickers, start_date, end_date):
    # Create an empty DataFrame
    stock_data = pd.DataFrame()
    # Populate the DataFrame with adjusted close prices for each ticker
    for ticker in tickers:
        # Download data for the current ticker
        stock_data[ticker] = yf.download(ticker, start = start_date, end = end_date)['Adj Close']
    
    # Reset the index to turn the Date index into a column
    stock_data_reset = stock_data.reset_index()

    return stock_data_reset

# Function Call
tickers = ['GOOG', 'AAPL', 'NVDA']
start_date = '2024-01-01'
end_date = '2024-08-01'
stock_data = get_data(tickers, start_date, end_date)

# Now you can access the Date column directly
print(stock_data.tail())

[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed

          Date        GOOG        AAPL        NVDA
141 2024-07-25  169.160004  217.238556  112.279999
142 2024-07-26  168.679993  217.708008  113.059998
143 2024-07-29  171.130005  217.987686  111.589996
144 2024-07-30  171.860001  218.547043  103.730003
145 2024-07-31  173.149994  221.823242  117.019997





#### Window Functions

- **`DataFrame.rolling()`** - Perform calculations over a fixed-size sliding window across the data.
  - **Use Case:** Calculates the _ day Simple Moving Average (SMA).

- **`DataFrame.ewm()`** - Apply exponential weighting to data points, giving more weight to recent observations.
  - **Use Case:** Calculates the _ day Exponential Moving Average (EMA).

- **`DataFrame.expanding()`** - Perform cumulative operations that include all data points up to the current point.
  - **Use Case:** Calculates the cumulative sum of a specified column.


In [13]:
### Window Functions
# Create a function to calculate the 10 day SMA, 20 day EMWA, and create the cumulative sum of the NVDA col
def window_functions(df, window1, window2):
    # Calculate the 10 day SMA
    df['10_SMA'] = df['NVDA'].rolling(window = window1).mean() 
    # Calculate the 20 day EMWA
    df['20_EWMA'] = df['NVDA'].ewm(span = window2, adjust = False).mean()
    # Calculate Cumulative sum of the NVDA col
    df['Cum_Sum'] = df['NVDA'].expanding().sum()
    # Calculate the Rolling Sum
    df['Roll_Sum'] = df['NVDA'].rolling(window = window1).sum() 

    return df

stock_data_new = window_functions(stock_data, window1=10, window2=20)
print(stock_data_new.tail())


          Date        GOOG        AAPL        NVDA      10_SMA     20_EWMA  \
141 2024-07-25  169.160004  217.238556  112.279999  121.371000  122.033601   
142 2024-07-26  168.679993  217.708008  113.059998  119.752999  121.178972   
143 2024-07-29  171.130005  217.987686  111.589996  118.067999  120.265736   
144 2024-07-30  171.860001  218.547043  103.730003  115.804999  118.690905   
145 2024-07-31  173.149994  221.823242  117.019997  115.707999  118.531771   

          Cum_Sum     Roll_Sum  
141  13026.623875  1213.709999  
142  13139.683872  1197.529991  
143  13251.273869  1180.679985  
144  13355.003872  1158.049988  
145  13472.023869  1157.079987  


### Shift/Lag Analysis

- **`DataFrame.shift()`** - Shift index by the specified number of periods, with an optional time `freq`.
  - **Use Case:** Useful for creating lagged features in time series data or for aligning data with a previous time step.

- **`DataFrame.diff()`** - Compute the difference between consecutive elements in a DataFrame.
  - **Use Case:** Commonly used to compute changes between periods, such as daily price changes or differences in other time series data.

- **`DataFrame.pct_change()`** - Calculate the percentage change between the current and prior element.
  - **Use Case:** Useful for calculating returns or percentage changes in time series data, such as daily stock returns.

- **`DataFrame.cumsum()`** - Compute the cumulative sum of DataFrame columns.
  - **Use Case:** Helpful for understanding the accumulation of values over time, such as cumulative profit.

- **`DataFrame.cummax()`** - Compute the cumulative maximum of DataFrame columns.
  - **Use Case:** Useful for identifying peak values over time, often used in drawdown analysis.

- **`DataFrame.cummin()`** - Compute the cumulative minimum of DataFrame columns.
  - **Use Case:** Used for monitoring the lowest values over time, which can be useful in various analyses, including drawdown.

- **`DataFrame.cumprod()`** - Compute the cumulative product of DataFrame columns.
  - **Use Case:** Often used in financial analysis to calculate the growth of an investment over time when considering compounded returns.


In [20]:
### Shift/Lag Functions
# Create a function to perform the above actions
def shift_lag(df):
    # Shift the GOOG column down by 1 period, effectively lagging the data
    df['shifted'] = df['GOOG'].shift(periods = 1)
    # Calculate the difference between the current and previous row in the GOOG column
    df['difference'] = df['GOOG'].diff(periods = 1)
    # Compute the percentage change from one period to the next in the GOOG column
    df['pct change'] = df['GOOG'].pct_change(periods = 1)
    # Calculate the cumulative sum of the GOOG column over time
    df['cumsum'] = df['GOOG'].cumsum()
    # Track the highest value encountered so far in the GOOG column
    df['cummax'] = df['GOOG'].cummax()
    # Track the lowest value encountered so far in the GOOG column
    df['cummin'] = df['GOOG'].cummin()

    # Calculate the cumulative product of the first 5 values in the GOOG column
    cumprod_first5 = df['GOOG'].iloc[:5].cumprod()

    return df, cumprod_first5

shift_lag_df, cumprod_first5 = shift_lag(stock_data)
#print(shift_lag_df.tail())
print(cumprod_first5)


0    1.394014e+02
1    1.954414e+04
2    2.694806e+06
3    3.698185e+08
4    5.191153e+10
Name: GOOG, dtype: float64
