# Experimenting with Prophet Data Preparation

This notebook demonstrates how to download and transform stock data for use with Prophet.

In [None]:
import yfinance as yf
import pandas as pd 
import plotly.graph_objects as go
from prophet import Prophet

In [None]:
# Check the version of Prophet
import prophet
prophet.__version__

In [None]:

def download_stock_data(ticker_symbol):
    """
    Download stock data with error handling and validation
    """
    try:
        # Create ticker object and get history
        yticker = yf.Ticker(ticker_symbol)
        df = yticker.history(period='max')
        
        if df.empty:
            raise ValueError(f"No data downloaded for {ticker_symbol}")
            
        print(f"Downloaded {len(df)} days of {ticker_symbol} data")
        
        # Basic validation
        required_columns = ['Open', 'Close', 'Volume']
        missing_cols = [col for col in required_columns if col not in df.columns]
        if missing_cols:
            raise ValueError(f"Missing required columns: {missing_cols}")
            
        return df
    
    except Exception as e:
        print(f"Error downloading {ticker_symbol}: {str(e)}")
        return None

def transform_stock_data(df, ticker):
    """
    Transform stock data for Prophet model
    """
    if df is None:
        return None
    
    # Reset index to make the date a column
    df = df.reset_index()
    
    # Prophet requires columns named 'ds' (date) and 'y' (target)
    prophet_df = pd.DataFrame()
    prophet_df['ds'] = pd.to_datetime(df['Date']).dt.tz_localize(None)  # Remove timezone
    prophet_df['y'] = df['Close'].astype(float)    # Ensure float
    
    # Add additional features as regressors
    prophet_df['volume'] = df['Volume'].astype(float)
    prophet_df['open'] = df['Open'].astype(float)
    
    # Calculate returns
    prophet_df['returns'] = prophet_df['y'].pct_change()
    
    # Calculate moving averages
    prophet_df['ma20'] = prophet_df['y'].rolling(window=20).mean()
    prophet_df['ma50'] = prophet_df['y'].rolling(window=50).mean()
    
    # Calculate volatility (20-day rolling standard deviation)
    prophet_df['volatility'] = prophet_df['y'].rolling(window=20).std()
    
    # Add day of week as a feature
    prophet_df['day_of_week'] = prophet_df['ds'].dt.dayofweek
    
    # Fill any missing values
    prophet_df = prophet_df.ffill()
    
    return prophet_df

## Download Stock Data

In [None]:
# Download data for QQQ
ticker = "QQQ"
df = download_stock_data(ticker)

print("\nRaw data sample:")
df.head()

## Transform Data for Prophet

Prophet requires specific column names (read docs here: https://facebook.github.io/prophet/docs/quick_start.html):
- 'ds': date column (should be without timezone)
- 'y': target variable

We'll also add additional features as regressors.

In [None]:
# Transform data into Prophet format
prophet_df = transform_stock_data(df, ticker)

# !! No Saturdays and Sundays (day_of_week !=5,6)
print("\nTransformed data sample (Prophet format):")
prophet_df.tail(10)

## Available Features

Let's look at all the features we've created for Prophet:

In [None]:
print("Available features:")
prophet_df.columns.tolist()

# Predict with Prophet
* Simple to use, but mostly black box approach
* "By default, Prophet uses a linear model for its forecast."
* [Ivan] Good for long-term trend undestanding, but not for the short-term
* Automatic changepoint detection in Prophet

In [None]:
m = Prophet(seasonality_mode = 'multiplicative')
m.add_country_holidays(country_name='US')
m.add_seasonality(name='monthly', period=30.5, fourier_order=5)
m.fit(prophet_df)

In [None]:
m.train_holiday_names


In [None]:
# Make dates for prediction
future = m.make_future_dataframe(periods=365)
future.tail(365)

In [None]:
# Remove weekends using pandas datetime weekday
future = future[future['ds'].dt.dayofweek < 5]  # 5 = Saturday, 6 = Sunday
future.tail(365)

In [None]:
forecast = m.predict(future)
forecast[['ds', 'yhat', 'yhat_lower', 'yhat_upper']].tail()


In [None]:
# Let's omit static graphs
# fig1 = m.plot(forecast)
# fig2 = m.plot_components(forecast)

In [None]:
# Dynamic graphs 
from prophet.plot import plot_plotly, plot_components_plotly

plot_plotly(m, forecast)

In [None]:
plot_components_plotly(m, forecast)

In [None]:
# Try to identify the changepoints
from prophet.plot import add_changepoints_to_plot
fig = m.plot(forecast)
a = add_changepoints_to_plot(fig.gca(), m, forecast)


In [None]:
# Python
m = Prophet(changepoint_prior_scale=0.5)
forecast = m.fit(prophet_df).predict(future)
fig = m.plot(forecast)

In [None]:
# Python
m = Prophet(changepoint_prior_scale=0.001)
forecast = m.fit(prophet_df).predict(future)
fig = m.plot(forecast)

In [None]:
# https://en.wikipedia.org/wiki/List_of_largest_daily_changes_in_the_S%26P_500_Index
# Python
m = Prophet(changepoints=['2009-01-20','2011-08-08','2018-02-05','2020-03-16','2022-09-13'])
forecast = m.fit(prophet_df).predict(future)
fig = m.plot(forecast)

In [None]:
help(Prophet.add_regressor) 

In [None]:
help(Prophet.fit)