In [None]:
# Inputs
TICKER = "AAPL"
DROP_TICKERS = False

In [None]:
import glob

import numpy as np
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from scipy.stats import norm, cauchy

import strat_defs # custom functions
import prep_data

In [None]:
gt_monthly_files = glob.glob('data/gt_monthly_*.csv')
gt_monthly_latest = max(gt_monthly_files, key=lambda f: f.split("_")[2])
gt_monthly_raw = pd.read_csv(gt_monthly_latest, parse_dates=['start_date','end_date'])

gt_weekly_files = glob.glob('data/gt_weekly_*.csv')
gt_weekly_latest = max(gt_weekly_files, key=lambda f: f.split("_")[2])
gt_weekly_raw = pd.read_csv(gt_weekly_latest, parse_dates=['start_date','end_date'])

gt_daily_files = glob.glob('data/gt_daily_*.csv')
gt_daily_latest = max(gt_daily_files, key=lambda f: f.split("_")[2])
gt_daily_raw = pd.read_csv(gt_daily_latest, parse_dates=['date'])

gt_adjusted_files = glob.glob('data/gt_adjusted_*.csv')
gt_adjusted_latest = max(gt_adjusted_files, key=lambda f: f.split("_")[2])
gt_adjusted_raw = pd.read_csv(gt_adjusted_latest, parse_dates=['date'])

stocks_df, wiki_pageviews, ffr_raw, weather, gt_adjusted = prep_data.load_data()

In [None]:
# Set up prepd_data data frame
prepd_data = prep_data.prep_data(
    stocks_df,
    wiki_pageviews,
    ffr_raw,
    weather,
    gt_adjusted,
    config=prep_data.IndicatorConfig(ticker=TICKER),
    drop_tickers=DROP_TICKERS
)

In [None]:
prepd_data_ext = prepd_data
prepd_data_ext['day_of_week_name'] = prepd_data_ext['Date'].dt.day_name()

review_cols = ['Date', 'day_of_week_name', 'Adj Close_SPY', 'index_Trump', 'Target']

prepd_data_ext[review_cols]

In [None]:
selected_kw = "Trump"

for_monthly = gt_monthly_raw.loc[gt_monthly_raw['search_term']==selected_kw].sort_values(by='start_date')
for_monthly['params_date_range'] = for_monthly['pytrends_params'].str.extract(r'"(\d{4}-\d{2}-\d{2} \d{4}-\d{2}-\d{2})"')[0]
for_monthly = for_monthly.loc[for_monthly['params_date_range']==max(for_monthly['params_date_range'])] # could be chunky if ran mult. per day

for_daily = gt_adjusted_raw.loc[gt_adjusted_raw['search_term']==selected_kw].sort_values(by='date')

for_daily_bp = gt_daily_raw.loc[gt_daily_raw['search_term']==selected_kw].sort_values(by='date')
for_daily_bp['day_of_week'] = for_daily_bp['date'].dt.day_name()

fig1 = px.line(for_monthly, x="start_date", y="index", labels={'start_date':'Month'},title=f'{selected_kw} Monthly index')
fig2 = px.line(for_daily, x="date", y="index",labels={'date':'Date','index':'Scaled index'},title=f'{selected_kw} Daily scaled index')
fig3 = px.box(for_daily_bp, x="day_of_week", y="index",labels={'day_of_week':'Day of week'},title=f'{selected_kw} index by Day of week')
fig3.update_xaxes(categoryorder='array', categoryarray=['Sunday','Monday','Tuesday','Wednesday','Thursday','Friday','Saturday'])
fig1.show()
fig2.show()
fig3.show()

In [None]:
vol_cols = [col for col in prepd_data.columns if 'Volume_' in col]

column_sums = prepd_data[vol_cols].sum().sort_values(ascending=False)

px.bar(
    x=column_sums.index.str.replace('Volume_', ''),
    y=column_sums.values,
    labels={'x': 'Ticker', 'y': 'Total Volume'},
    title="Total Volume by Ticker"
)

In [None]:
column_sums.tail(10)

In [None]:
data = prepd_data['Daily_Return'].dropna()

# Compute histogram using numpy to get exact bin edges and bin width
hist_values, bin_edges = np.histogram(data, bins=300, density=False)
bin_width = bin_edges[1] - bin_edges[0]

# x values covering the histogram's range using the same bin edges
x = np.linspace(bin_edges[0], bin_edges[-1], 100)

# Create a histogram in Plotly using explicit binning to match np.histogram
hist = go.Histogram(x=data, xbins=dict(start=bin_edges[0], end=bin_edges[-1], size=bin_width), name="Histogram")

# Fit distributions and create PDFs
mu, std = norm.fit(data)
loc, scale = cauchy.fit(data)

pdf_norm = norm.pdf(x, mu, std) * len(data) * bin_width
pdf_cauchy = cauchy.pdf(x, loc, scale) * len(data) * bin_width

# Create curves
curve_norm = go.Scatter(x=x, y=pdf_norm, mode='lines', name="Fitted Normal")
curve_cauchy = go.Scatter(x=x, y=pdf_cauchy, mode='lines', name="Fitted Cauchy")

fig = go.Figure([hist, curve_norm, curve_cauchy])
fig.update_layout(
    title=f"SPY Daily Return Histogram with Fitted Normal and Cauchy Distributions",
    xaxis_title="Value",
    yaxis_title="Frequency"
)
fig.show()

In [None]:
# # Compute correlation matrix (slow/ laggy if looking at tickers)
# exclude_vars = ("Open","High","Low","Close","Volume","day_of_week","streak","movement")
# corr = prepd_data.select_dtypes(include='number')
# corr = corr.drop(columns=[col for col in corr.columns if "Target" in col])
# corr = corr.drop(columns=[
#     col for col in corr.columns 
#     if any(col.startswith(prefix) for prefix in exclude_vars) and col != "Adj Close_"+TICKER
# ])
# corr = corr.corr().round(3)

# # Melt the correlation matrix into long-form for px.imshow
# fig = px.imshow(corr, text_auto=True, color_continuous_scale="RdBu", zmin=-1, zmax=1, title="Correlation Heatmap")

# fig.update_layout(width=1000,height=800)
# fig.show()

In [None]:
fig = px.scatter(x=prepd_data[f'Volume_{TICKER}'], y=prepd_data['Daily_Return'])
fig.update_layout(title=f"{TICKER} Daily Return vs Volume", xaxis_title="Volume", yaxis_title="Daily Return")
fig.show()

fig = px.scatter(x=prepd_data[f'views_{TICKER}'], y=prepd_data['Daily_Return'])
fig.update_layout(title=f"{TICKER} Daily Return vs Wiki Page Views", xaxis_title="Views", yaxis_title="Daily Return")
fig.show()

fig = px.scatter(x=prepd_data[f'index_inflation'], y=prepd_data['Daily_Return'])
fig.update_layout(title=f'{TICKER} Daily Return vs Google Trends "Inflaiton" index', xaxis_title="Index", yaxis_title="Daily Return")
fig.show()