In [None]:
import pyro as py
from data_retrieval.database_io import DB_handler
import pandas as pd
import numpy as np
import yfinance as yf

In [None]:
def calc_return(ticker, buy_date, sell_date):
    data = yf.download(ticker, start=buy_date, end=sell_date, interval="1d")
    data["close_prev"] = data["Close"][ticker].shift(1)
    returns = [1]
    for x in (data["Close"][ticker] / data["close_prev"]).values[1:]:
        returns.append(returns[-1] * x)
    return returns

In [None]:
dbh = DB_handler()

In [None]:
start = dbh.valuations.analysis_valuations(0)

In [None]:
targets = pd.DataFrame(start).sort_values("target_date")

In [None]:
targets["pred_gain"] = targets["target_usd"] / targets["valuation"]

In [None]:
# Change column type to string for column: 'target_date'
targets = targets.astype({'target_date': 'string'})

# Derive column 'month' from column: 'target_date'
def month(target_date):
    """
    Transform based on the following examples:
       target_date     Output
    1: "2022-01-02" => "202201"
    """
    split1 = target_date.str.split("-")
    return split1.str[0] + split1.str[1]

targets.insert(2, "month", month(targets["target_date"]))

In [None]:
targets = targets[targets["pred_gain"] <= 2]

In [None]:
invest = targets.sort_values(['pred_gain'], ascending=False).groupby('month').head(5)


In [None]:
invest.sort_values(['month'], ascending=True)

In [None]:
invest = invest[invest["month"] <= "202404"]

In [None]:
from datetime import datetime
from datetime import timedelta

In [None]:
invest

In [None]:
from collections import defaultdict
from time import sleep
gains = defaultdict(list)
for index, row in invest.iterrows():
    gains[row["ticker"]].append(calc_return(row["ticker"], (datetime.strptime(row["month"], "%Y%m") + timedelta(days=31)).strftime("%Y-%m") + "-01", "2025-04-30"))
    sleep(1)

In [None]:
longest = 0
for ticker in gains:
    for gain in gains[ticker]:
        if len(gain) > longest:
            longest = len(gain)
            print(ticker, longest)
gains_same_length = defaultdict(list)
for ticker in gains:
    for gain in gains[ticker]:
        if len(gain) < longest:
            # fill with None in front
            gains_same_length[ticker].append([np.nan] * (longest - len(gain)) + gain)
        else:
            gains_same_length[ticker].append(gain)
gains_together = defaultdict(list)
for ticker in gains:
    if len(gains[ticker]) > 1:
        gains_together[ticker] = np.nanmean(gains_same_length[ticker], axis=0)
    else:
        gains_together[ticker] = gains_same_length[ticker][0]

In [None]:
arr = np.array(list(gains_together.values()))

# Take the mean along axis 0 (column-wise)
mean_vals = np.nanmean(arr, axis=0)


In [None]:
spy = data = yf.download('SPY', start='2022-02-01', end='2025-05-13', interval="1d")
spy["gain"] = spy["Close"]["SPY"] / spy["Close"]["SPY"].shift(1)
inv_spy = 1
inv_spy_a = []
for g in spy["gain"].values: 
    inv_spy *= g if pd.notna(g) else 1
    inv_spy_a.append(inv_spy)

In [None]:
len(mean_vals)

In [None]:
spy

In [None]:
plot_ = pd.DataFrame(inv_spy_a, index=spy.index, columns=["SPY"])

In [None]:
plot_["RECOM"] = mean_vals

In [None]:
plot_

In [None]:
import plotly.express as px
px.line(plot_, x=plot_.index, y=["SPY", "RECOM"], template="plotly_dark", labels={"value": "Percentage Gain", "variable": "Strategy"})
# px.scatter(plot_compare, x="Date", y=["SPY", "Contrarian"], template="plotly_dark", labels={"value": "Percentage Gain", "variable": "Strategy"})

In [None]:
for ticker in gains_together:
    print(ticker, gains_together[ticker][-1])

In [None]:
from collections import defaultdict
from time import sleep
gains = defaultdict(list)
for index, row in invest.iterrows():
    gains[row["ticker"]].append(calc_return(row["ticker"], (datetime.strptime(row["month"], "%Y%m") + timedelta(days=31)).strftime("%Y-%m") + "-01", (datetime.strptime(row["month"], "%Y%m") + timedelta(days=365))))
    #sleep(1)

In [None]:
gains

In [None]:
from datetime import datetime, timedelta
import numpy as np
from collections import defaultdict

# 1. Create a mapping of ticker to (start_date, gain_series)
aligned_gains = defaultdict(list)
start_dates = []

# Extract all start dates
for index, row in invest.iterrows():
    start_date = datetime.strptime(row["month"], "%Y%m") + timedelta(days=31)
    start_date = datetime(start_date.year, start_date.month, 1)  # Normalize to 1st of month
    start_dates.append(start_date)

# 2. Reference date = earliest of all start dates
ref_start = min(start_dates)

# 3. Build aligned gain series
for index, row in invest.iterrows():
    ticker = row["ticker"]
    start_date = datetime.strptime(row["month"], "%Y%m") + timedelta(days=31)
    start_date = datetime(start_date.year, start_date.month, 1)
    offset_days = (start_date - ref_start).days
    end_date = min(datetime.today(), start_date + timedelta(days=365))
    # Calculate gain
    gain = calc_return(ticker, start_date.strftime("%Y-%m-%d"), end_date.strftime("%Y-%m-%d"))
    
    # Pad with NaNs according to offset
    padded_gain = [np.nan] * offset_days + gain
    aligned_gains[ticker].append(padded_gain)

# 4. Determine the longest aligned series
max_length = max(len(g) for series in aligned_gains.values() for g in series)

# 5. Pad all to same length at the end
gains_same_length = defaultdict(list)
for ticker in aligned_gains:
    for gain in aligned_gains[ticker]:
        if len(gain) < max_length:
            gain = gain + [np.nan] * (max_length - len(gain))
        gains_same_length[ticker].append(gain)

# 6. Average across all series for each ticker
gains_together = {}
for ticker in gains_same_length:
    if len(gains_same_length[ticker]) > 1:
        gains_together[ticker] = np.nanmean(gains_same_length[ticker], axis=0)
    else:
        gains_together[ticker] = gains_same_length[ticker][0]


In [None]:
arr = np.array(list(gains_together.values()))

# Take the mean along axis 0 (column-wise)
mean_vals = np.nanmean(arr, axis=0)


In [None]:
mean_vals

In [None]:
plot_ = pd.DataFrame(inv_spy_a, index=spy.index, columns=["SPY"])

In [None]:
plot_["RECOM"] = mean_vals

In [None]:
import plotly.express as px
px.line(plot_, x=plot_.index, y=["SPY", "RECOM"], template="plotly_dark", labels={"value": "Percentage Gain", "variable": "Strategy"})
# px.scatter(plot_compare, x="Date", y=["SPY", "Contrarian"], template="plotly_dark", labels={"value": "Percentage Gain", "variable": "Strategy"})