In [1]:
import pandas as pd
import datetime
from datetime import date
import yfinance as yf
import os
import numpy as np

#Portfolio libraries
from plotly.io import show
from sklearn.model_selection import GridSearchCV, train_test_split
from skfolio.prior import FactorModel, EmpiricalPrior
from skfolio import Population, RatioMeasure, RiskMeasure
from skfolio.cluster import HierarchicalClustering, LinkageMethod
from skfolio.distance import KendallDistance, PearsonDistance
from skfolio.metrics import make_scorer
from skfolio.model_selection import (
    CombinatorialPurgedCV,
    WalkForward,
    cross_val_predict,
    optimal_folds_number,
)
from skfolio.optimization import (
    HierarchicalEqualRiskContribution,
    HierarchicalRiskParity,
)
from skfolio.preprocessing import prices_to_returns

# .py module import
import trading

np.random.seed(123)

In [2]:
# Download the quotes
def get_quotes(tickers, start_date, end_date):
    df_dict = {}
    for ticker in tickers:
        df = yf.download(ticker, start=start_date, end=end_date)
        df_dict[ticker] = df
        
    full_df = pd.concat(df_dict)
    
    return full_df

# Start and end dates  
start_date = datetime.datetime(2020, 1, 1)  
end_date = date.today().strftime('%Y-%m-%d')

def data():
    with open('pair.txt', 'rb') as f:
        try:  # catch OSError in case of a one line file 
            f.seek(-2, os.SEEK_END)
            while f.read(1) != b'\n':
                f.seek(-2, os.SEEK_CUR)
        except OSError:
            f.seek(0)
        last_line = f.readline().decode()
        tickers = ['AMZN', 'AAPL', 'MSFT', 'GOOG', 'NVDA', 'TSM', 'IBM']
        db = trading.get_database()
        trading_db = db["Trading Log"]
        trading_details = list(trading_db.find())
        trading_df = pd.DataFrame(trading_details)

        # Get the data 
        if not trading_df.empty:
            for i in trading_df['Asset']:
                tickers.append(i)
            
        for i in last_line.strip().split(" "):
            tickers.append(i)
        print(tickers)

        all_data = get_quotes(tickers, start_date, end_date)
        adj_close_prices = all_data['Adj Close'].unstack(level=0)
        X = prices_to_returns(adj_close_prices)
        X_train, X_test = train_test_split(X, test_size=0.33, shuffle=False)
        return X, X_train, X_test

X,  X_train, X_test = data()
X

Pinged your deployment. You successfully connected to MongoDB!
['AMZN', 'AAPL', 'MSFT', 'GOOG', 'NVDA', 'TSM', 'IBM', 'EXPD', 'BAH', 'WLK', 'PPG', 'LEN', 'WTRG', 'BKH', 'BKH', 'WTRG']


[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%*******

Unnamed: 0_level_0,AMZN,AAPL,MSFT,GOOG,NVDA,TSM,IBM,EXPD,BAH,WLK,PPG,LEN,WTRG,BKH
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
2020-01-03,-0.012139,-0.009722,-0.012452,-0.004907,-0.016006,-0.032978,-0.007975,-0.006126,0.010709,-0.022007,-0.014514,0.011826,0.007193,-0.003783
2020-01-06,0.014886,0.007968,0.002585,0.024657,0.004193,-0.011540,-0.001787,-0.013356,0.016100,0.017617,-0.004627,0.027979,-0.003463,0.000392
2020-01-07,0.002092,-0.004703,-0.009118,-0.000624,0.012107,0.016205,0.000671,-0.002603,-0.005823,-0.007565,-0.007824,-0.015676,0.005863,-0.002487
2020-01-08,-0.007809,0.016086,0.015928,0.007880,0.001875,0.007373,0.008346,-0.003262,0.008854,-0.004251,0.002577,0.007875,0.004102,-0.001444
2020-01-09,0.004799,0.021241,0.012493,0.011044,0.010983,0.008170,0.010568,0.005630,0.027815,-0.002650,0.001947,0.001736,0.004730,0.010120
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2024-12-19,0.012561,0.007015,-0.000823,-0.002367,0.013730,-0.004704,0.017032,-0.013907,-0.035315,-0.019301,-0.014268,-0.051600,-0.001656,0.005543
2024-12-20,0.007300,0.018816,-0.000984,0.017185,0.030762,0.013204,-0.002501,-0.011176,0.019664,-0.000706,0.005773,-0.002312,0.004975,-0.001378
2024-12-23,0.000622,0.003065,-0.003092,0.015703,0.036897,0.051468,-0.006402,-0.008791,-0.000610,0.010333,-0.000499,-0.000579,-0.003025,0.008108
2024-12-24,0.017729,0.011478,0.009374,0.008062,0.003938,-0.004967,0.011175,0.006244,-0.010754,0.004808,0.006908,0.000000,0.004138,0.009240


In [3]:
def portfolio_model():
    model_hrp = HierarchicalRiskParity(
        risk_measure=RiskMeasure.CVAR,
        hierarchical_clustering_estimator=HierarchicalClustering(),
    )
    model_herc = HierarchicalEqualRiskContribution(
        risk_measure=RiskMeasure.CVAR,
        hierarchical_clustering_estimator=HierarchicalClustering(),
    )

    cv = WalkForward(train_size=252, test_size=9)
    grid_search_hrp = GridSearchCV(
        estimator=model_hrp,
        cv=cv,
        n_jobs=-1,
        param_grid={
            "distance_estimator": [PearsonDistance(), KendallDistance()],
            "hierarchical_clustering_estimator__linkage_method": [
                LinkageMethod.SINGLE,
                LinkageMethod.WARD,
                LinkageMethod.COMPLETE,
            ],
        },
        scoring=make_scorer(RatioMeasure.CVAR_RATIO),
    )
    grid_search_hrp.fit(X_train)
    model_hrp = grid_search_hrp.best_estimator_
    grid_search_herc = grid_search_hrp.set_params(estimator=model_herc)
    grid_search_herc.fit(X_train)
    model_herc = grid_search_herc.best_estimator_
    return model_hrp, model_herc, cv

model_hrp, model_herc, cv = portfolio_model()

In [4]:
def cross_predict(cv):
    pred_hrp = cross_val_predict(
        model_hrp,
        X_test,
        cv=cv,
        n_jobs=-1,
        portfolio_params=dict(name="HRP"),
    )

    pred_herc = cross_val_predict(
        model_herc,
        X_test,
        cv=cv,
        n_jobs=-1,
        portfolio_params=dict(name="HERC"),
    )
    population = Population([pred_hrp, pred_herc])
    return population, pred_hrp, pred_herc

population, pred_hrp, pred_herc = cross_predict(cv)
population.plot_cumulative_returns()


In [5]:
def statistics_report(population):
    for ptf in population:
        print("=" * 25)
        print(" " * 8 + ptf.name)
        print("=" * 25)
        print(f"CVaR : {ptf.cvar:0.2%}")
        print(f"Mean-CVaR ratio : {ptf.cvar_ratio:0.4f}")
        print("\n")

    summary = population.summary()
    return summary

summary = statistics_report(population)
summary

        HRP
CVaR : 2.00%
Mean-CVaR ratio : 0.0277


        HERC
CVaR : 2.25%
Mean-CVaR ratio : 0.0063




Unnamed: 0,HRP,HERC
Mean,0.055%,0.014%
Annualized Mean,13.98%,3.55%
Variance,0.0061%,0.0079%
Annualized Variance,1.54%,1.99%
Semi-Variance,0.0035%,0.0044%
Annualized Semi-Variance,0.89%,1.11%
Standard Deviation,0.78%,0.89%
Annualized Standard Deviation,12.40%,14.12%
Semi-Deviation,0.59%,0.66%
Annualized Semi-Deviation,9.41%,10.55%


In [6]:
def portfolio_ratio(model):
    portfolio = model.weights_per_observation.iloc[-1].to_dict()
    date = pd.to_datetime(model.weights_per_observation.iloc[-1].name)
    db = trading.get_database()
    portfolio_db = db["Portfolio"]
    last_entry = portfolio_db.find_one(sort=[("_id", -1)])  # Get the last document by ID
    new_id = (last_entry["_id"] + 1) if last_entry else 1  # Increment or start at 1
    portfolio['Date'] = date
    portfolio["_id"] = new_id
    portfolio_db.insert_many([portfolio])

portfolio_ratio(model=pred_hrp)

Pinged your deployment. You successfully connected to MongoDB!
