In [18]:
# --- 1. INSTALL LIBRARIES ---
# This part installs all the necessary packages for the project.
print("--- Installing libraries... ---")
!pip install yfinance pytrends pandas scikit-learn plotly nbformat streamlit pyngrok joblib -q

# --- 2. SETUP AND IMPORTS ---
print("\n--- Importing libraries and setting up... ---")
import pandas as pd
import yfinance as yf
from pytrends.request import TrendReq
from sklearn.preprocessing import MinMaxScaler
from sklearn.ensemble import GradientBoostingClassifier
import joblib
import time
from pyngrok import ngrok

# --- 3. DATA COLLECTION & PREPROCESSING ---
print("\n--- Starting Data Collection & Preprocessing... ---")
# Fetch financial data
tickers = ['^VIX', 'SPY']
start_date = '2019-01-01'
end_date = pd.to_datetime('today').strftime('%Y-%m-%d')
financial_data = yf.download(tickers, start=start_date, end=end_date, progress=False)
vix_data = financial_data['Close']['^VIX'].to_frame('VIX')
spy_data = financial_data['Close']['SPY'].to_frame('SPY')
daily_data = vix_data.join(spy_data).ffill()

# Fetch Google Trends data
pytrends = TrendReq(hl='en-US', tz=360)
keywords = ["stock market crash", "recession", "buy the dip", "market volatility", "inflation"]
timeframe = f'{start_date} {end_date}'
trends_data = pd.DataFrame()
for keyword in keywords:
    try:
        pytrends.build_payload([keyword], cat=0, timeframe=timeframe, geo='', gprop='')
        trend_df = pytrends.interest_over_time()
        if not trend_df.empty:
            trends_data[keyword] = trend_df[keyword]
    except Exception as e:
        print(f"Skipping '{keyword}': {e}")
if 'isPartial' in trends_data.columns:
    trends_data = trends_data.drop(columns=['isPartial'])

# Resample and align data
weekly_data = daily_data.resample('W-MON').agg({'VIX': 'mean', 'SPY': 'last'}).ffill()
df = weekly_data.join(trends_data, how='inner')

# Create the Fear Index
scaler = MinMaxScaler()
fear_columns = [col for col in df.columns if col in keywords]
df[fear_columns] = scaler.fit_transform(df[fear_columns])
df['Fear_Index'] = df[fear_columns].mean(axis=1)

# Create features and target variable
df['SPY_Returns'] = df['SPY'].pct_change()
df['Fear_Index_Lag1'] = df['Fear_Index'].shift(1)
df['VIX_Rolling_Mean_4W'] = df['VIX'].rolling(window=4).mean()
df['Target'] = (df['VIX'].shift(-1) > df['VIX_Rolling_Mean_4W'].shift(-1)).astype(int)
df_model = df.dropna()
print("--- Data processing complete. ---")

# --- 4. MODEL TRAINING ---
print("\n--- Training the predictive model... ---")
features = ['Fear_Index_Lag1', 'SPY_Returns']
X = df_model[features]
y = df_model['Target']
split_index = int(len(X) * 0.8)
X_train, X_test = X[:split_index], X[split_index:]
y_train, y_test = y[:split_index], y[split_index:]
gb_classifier = GradientBoostingClassifier(n_estimators=100, learning_rate=0.1, max_depth=3, random_state=42)
gb_classifier.fit(X_train, y_train)
print("--- Model training complete. ---")

# --- 5. SAVE THE MODEL AND SCALER ---
print("\n--- Saving model and scaler files... ---")
joblib.dump(gb_classifier, 'volatility_predictor_model.joblib')
joblib.dump(scaler, 'google_trends_scaler.joblib')
print("--- Files saved. ---")

# --- 6. DEFINE AND WRITE THE STREAMLIT APP FILE ---
print("\n--- Writing the Streamlit app file (app.py)... ---")

# Define the app code as a multi-line string
app_code = """
import streamlit as st
import pandas as pd
import yfinance as yf
from pytrends.request import TrendReq
import plotly.graph_objects as go
import joblib
from sklearn.preprocessing import MinMaxScaler
import numpy as np
from datetime import datetime, timedelta

st.set_page_config(page_title="Volatility Fear & Greed Index", page_icon="📈", layout="wide")

@st.cache_resource
def load_model():
    model = joblib.load('volatility_predictor_model.joblib')
    scaler = joblib.load('google_trends_scaler.joblib')
    return model, scaler

try:
    model, scaler = load_model()
    model_loaded = True
except FileNotFoundError:
    model_loaded = False

@st.cache_data(ttl=3600)
def get_live_data():
    end_date = datetime.today()
    start_date = end_date - timedelta(days=90)
    financial_data = yf.download(['^VIX', 'SPY'], start=start_date.strftime('%Y-%m-%d'), end=end_date.strftime('%Y-%m-%d'), progress=False)
    vix_data = financial_data['Close']['^VIX'].to_frame('VIX')
    spy_data = financial_data['Close']['SPY'].to_frame('SPY')
    daily_data = vix_data.join(spy_data).ffill()
    weekly_data = daily_data.resample('W-MON').agg({'VIX': 'mean', 'SPY': 'last'}).ffill()
    pytrends = TrendReq(hl='en-US', tz=360)
    keywords = ["stock market crash", "recession", "buy the dip", "market volatility", "inflation"]
    timeframe = f'{start_date.strftime("%Y-%m-%d")} {end_date.strftime("%Y-%m-%d")}'
    trends_list = []
    for keyword in keywords:
        try:
            pytrends.build_payload([keyword], cat=0, timeframe=timeframe, geo='', gprop='')
            trend_df = pytrends.interest_over_time()
            if not trend_df.empty and keyword in trend_df.columns:
                trends_list.append(trend_df[[keyword]])
        except Exception:
            pass
    if not trends_list: return None, None
    trends_data = pd.concat(trends_list, axis=1).ffill().bfill()
    live_df = weekly_data.join(trends_data, how='inner')
    return live_df, keywords

st.title("📈 Fear & Greed Index: Predicting Short-Term Volatility")

if not model_loaded:
    st.error("Model files not found. Please ensure the notebook has run completely to generate the .joblib files.")
else:
    st.header("🔮 Volatility Prediction for Next Week")
    live_df, keywords = get_live_data()
    if live_df is not None and not live_df.empty and len(live_df) > 1:
        latest_data = live_df.iloc[[-1]]
        fear_columns = [col for col in latest_data.columns if col in keywords]
        # Ensure that fear_columns is not empty before proceeding
        if fear_columns:
            latest_data_scaled = scaler.transform(latest_data[fear_columns])
            latest_data['Fear_Index'] = np.mean(latest_data_scaled, axis=1)
            latest_data['SPY_Returns'] = live_df['SPY'].pct_change().iloc[-1]
            features_for_prediction = pd.DataFrame({
                'Fear_Index_Lag1': [latest_data['Fear_Index'].iloc[0]],
                'SPY_Returns': [latest_data['SPY_Returns'].iloc[0]]
            })
            prediction = model.predict(features_for_prediction)[0]
            prediction_proba = model.predict_proba(features_for_prediction)[0][1]
            if prediction == 1:
                st.error(f"🔴 ALERT: High Volatility Expected (Probability: {prediction_proba:.1%})", icon="🚨")
            else:
                st.success(f"🟢 NORMAL: Low Volatility Expected (Probability of Spike: {prediction_proba:.1%})", icon="✅")
            st.markdown(f"**Last Data Point:** Week of {latest_data.index[0].strftime('%Y-%m-%d')}")
            st.dataframe(latest_data[['VIX', 'Fear_Index', 'SPY_Returns']].style.format("{:.2f}"))
        else:
            st.warning("Could not find any Google Trends keywords in the live data to create the Fear Index.")
    else:
        st.warning("Could not fetch sufficient live data to make a prediction.")
    st.header("📊 Live Data Visualization")
    if live_df is not None:
        viz_df = live_df.copy()
        fear_cols_viz = [col for col in viz_df.columns if col in keywords]
        if fear_cols_viz:
            viz_df_scaled = scaler.transform(viz_df[fear_cols_viz])
            viz_df['Fear_Index'] = np.mean(viz_df_scaled, axis=1)
            fig = go.Figure()
            fig.add_trace(go.Scatter(x=viz_df.index, y=viz_df['VIX'], name='VIX Index', line=dict(color='royalblue')))
            fig.add_trace(go.Scatter(x=viz_df.index, y=viz_df['Fear_Index'], name='Fear Index', line=dict(color='firebrick', dash='dash'), yaxis='y2'))
            fig.update_layout(title="Live VIX vs. Fear Index (Last 90 Days)", template='plotly_white', yaxis=dict(title='VIX Level'), yaxis2=dict(title='Fear Index', overlaying='y', side='right'), legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1))
            st.plotly_chart(fig, use_container_width=True)
"""

# Write the string to the app.py file
with open("app.py", "w") as f:
    f.write(app_code)

print("--- App file written successfully. ---")


# --- 7. RUN THE STREAMLIT APP ---
print("\n--- Starting the dashboard... ---")
ngrok.kill()
# Paste your ngrok token here
AUTHTOKEN = "2pLiaBO193VuGDE4XGFXw20j6y0_Te98D498t5ABemXS7hYY"
ngrok.set_auth_token(AUTHTOKEN)
# Run streamlit in background
!nohup streamlit run app.py &
time.sleep(5)
# Open a tunnel to the streamlit port
public_url = ngrok.connect(8501)
print("="*50)
print("✅ Your app is live! Click the link below to view it.")
print(public_url)
print("="*50)

--- Installing libraries... ---

--- Importing libraries and setting up... ---

--- Starting Data Collection & Preprocessing... ---



YF.download() has changed argument auto_adjust default to True


Downcasting object dtype arrays on .fillna, .ffill, .bfill is deprecated and will change in a future version. Call result.infer_objects(copy=False) instead. To opt-in to the future behavior, set `pd.set_option('future.no_silent_downcasting', True)`


Downcasting object dtype arrays on .fillna, .ffill, .bfill is deprecated and will change in a future version. Call result.infer_objects(copy=False) instead. To opt-in to the future behavior, set `pd.set_option('future.no_silent_downcasting', True)`


Downcasting object dtype arrays on .fillna, .ffill, .bfill is deprecated and will change in a future version. Call result.infer_objects(copy=False) instead. To opt-in to the future behavior, set `pd.set_option('future.no_silent_downcasting', True)`


Downcasting object dtype arrays on .fillna, .ffill, .bfill is deprecated and will change in a future version. Call result.infer_objects(copy=False) instead. To opt-in to the future 

--- Data processing complete. ---

--- Training the predictive model... ---
--- Model training complete. ---

--- Saving model and scaler files... ---
--- Files saved. ---

--- Writing the Streamlit app file (app.py)... ---
--- App file written successfully. ---

--- Starting the dashboard... ---
Downloading ngrok ...


Downcasting object dtype arrays on .fillna, .ffill, .bfill is deprecated and will change in a future version. Call result.infer_objects(copy=False) instead. To opt-in to the future behavior, set `pd.set_option('future.no_silent_downcasting', True)`



nohup: appending output to 'nohup.out'
✅ Your app is live! Click the link below to view it.
NgrokTunnel: "https://eb58d39b21ef.ngrok-free.app" -> "http://localhost:8501"
