In [1]:
import pandas as pd
import numpy as np
import requests
from bs4 import BeautifulSoup
import yfinance as yf
import time
from tqdm import tqdm
import matplotlib.pyplot as plt #not needed to run program
import datetime
import logging
from pypfopt import risk_models, expected_returns, EfficientFrontier, objective_functions
import itertools
import warnings
# import plotly.express as px
# import plotly.graph_objects as go

In [2]:
logging.basicConfig(format = '%(asctime)s:%(levelname)s :%(message)s',
                        datefmt = '%Y-%m-%d %H:%M:%S',
                        filename = 'logs/run_main.log',
                        level=logging.INFO)

**1. Get tickers**

In [3]:
def get_dax_tickers():
    """
    Retrieves the tickers of companies listed in the DAX index from Wikipedia and returns a list of tickers.

    Returns:
        tickers_dax (list): A list of tickers of companies listed in the DAX index.

    Raises:
        Exception: If the website cannot be reached or does not exist.
        Exception: If the table with id 'constituents' cannot be found in the website.
        Exception: If the 'Ticker' column cannot be extracted from the table.

    """
    url = 'https://en.wikipedia.org/wiki/DAX'

    #Check if the connection is succesful
    try:
        response = requests.get(url, verify=False)
        if response.ok:
            soup = BeautifulSoup(response.text, 'html.parser')
        else:
            raise Exception(f'Cannot reach website {url}')
    except:
        raise Exception(f'Website {url} does not exist')

    #check if table 'constituents' exists
    wiki_table_id = 'constituents'
    try:
        indiatable = soup.find('table',{'id': wiki_table_id,'class':'wikitable'})
        df = pd.read_html(str(indiatable))
    except:
        raise Exception(f'Table {wiki_table_id} cannot be found in {url}')

    #Extract columns from table
    extract_cols = 'Ticker'
    try:
        tickers_dax = pd.DataFrame(df[0])[extract_cols]
        tickers_dax = tickers_dax.tolist()
    except KeyError as e:
        raise Exception((e.args[0]).replace('index', 'column index of table constituents'))

    logging.info(f'DAX40 tickers loaded successfully')

    return tickers_dax

In [75]:
def get_custom_tickers():
    """
    Asks the user to input a list of tickers separated by commas and returns the list of tickers.

    Returns:
    - tickers_custom (list): The list of custom tickers entered by the user.

    Raises:
    - Exception: If the length of the input data is less than 2.

    """
    while True:
        tickers_custom = input("Enter a list of tickers separated by commas. Use tickers from https://finance.yahoo.com/").strip().split(',')
        tickers_custom = [ticker.strip().upper() for ticker in tickers_custom]  # Convert tickers to uppercase and remove whitespace
        
        if len(tickers_custom) >= 2:
            break
        else:
            print("Invalid input. Please enter at least 2 tickers.")

    return tickers_custom



In [76]:
def ask_user_tickers():
    """
    Asks the user whether to use default values or custom values for the tickers portfolio.

    Returns:
    - tickers_portfolio (list): The tickers portfolio based on the user's choice.

    """
    while True:
        answer = input("Do you want to use DAX40 constituents as default tickers? (yes/no): ").strip().lower()
        if answer == 'yes':
            tickers_portfolio = get_dax_tickers()  # Default values using get_dax_tickers() function
            break
        elif answer == 'no':
            tickers_portfolio = get_custom_tickers()  # Custom values using get_custom_tickers() function
            break
        else:
            print("Invalid answer. Please enter either 'yes' or 'no'.")

    return tickers_portfolio


**2.Download monthly data from yahoo API**

In [8]:
def calculate_stock_returns(prices):
    """
    Calculates the series of returns from a series of prices.

    Args:
        prices (pd.Series): A pandas Series containing the prices.

    Returns:
        returns (pd.Series): A pandas Series containing the calculated returns.

    """
    try:
        returns = prices.pct_change().dropna()
    except:
        raise Exception("Error while calculating returns")
    return returns


In [77]:
#correct docstring
def convert_datetime_index_to_date(pandas_series):
    """
    Convert the datetime index of a DataFrame to date only.

    Args:
        dataframe (pandas.DataFrame): The DataFrame to convert.

    Returns:
        pandas.DataFrame: The DataFrame with the datetime index converted to date only.

    Raises:
        Exception: If an AttributeError occurs during the conversion process.
    
    """    
    try:
        pandas_series.index = pd.to_datetime(pandas_series.index.date)
    except AttributeError as e:
        raise Exception(f'Could not convert index of a dataframe to date. Error message: {e}')
    return pandas_series

In [79]:
def get_single_stock_returns(ticker, start_date="2001-04-01", end_date="2023-01-01"):
    
    stock_data = yf.Ticker(ticker)
    stock_price = stock_data.history(start=start_date, end=end_date, interval='1mo').Close

    if stock_price.empty:
        raise ValueError(f'{ticker} either does not exist or does not have data for a given time period')
    
    stock_returns = calculate_stock_returns(stock_price)
    stock_returns = convert_datetime_index_to_date(stock_returns)
    
    return stock_returns


In [81]:
def get_stock_info(ticker):
    """
    Retrieves information about a stock given its ticker symbol.

    Args:
        ticker (str): Ticker symbol of the stock.

    Returns:
        stock_name (str): Name of the stock.
        stock_country (str): Country of the stock.
        stock_sector (str): Sector of the stock.

    Raises:
        ValueError: If an error occurs while retrieving the stock information.

    """
    try:
        stock_info = yf.Ticker(ticker).info
        stock_name = stock_info['longName'].lower().capitalize()
        stock_country = stock_info['country'].lower().capitalize()
        stock_sector = stock_info['sector'].lower().capitalize()
        return stock_name, stock_country, stock_sector

    except Exception as e:
        raise ValueError(f"An error occurred while retrieving stock information for {ticker}: {str(e)}")


In [82]:
def get_batch_stock_data(tickers, start_date="2001-04-01", end_date="2023-01-01"):

    stock_data = {}
    tickers_success = []
    tickers_fail = []

    with tqdm(total = len(tickers)) as pbar:
        for single_ticker in tickers:
            pbar.set_postfix_str(single_ticker)
            try:
                stock_name, stock_country, stock_sector = get_stock_info(single_ticker)
                stock_returns = get_single_stock_returns(single_ticker, start_date=start_date, end_date=end_date)
                stock_data[single_ticker] = {'Name': stock_name, 'Country': stock_country, 
                                             'Sector': stock_sector,'Monthly_prices': stock_returns
                                             }
                tickers_success.append(single_ticker)
            except ValueError:
                tickers_fail.append(single_ticker)      
                pass
            pbar.update()
            time.sleep(0.5)
    
    logging.info(f'Successfully downloaded stocks: {tickers_success}')
    logging.warning(f'Non-existent tickers or no data for given time interval: {tickers_fail}')

    return stock_data, tickers_fail

In [83]:
def get_date_input(question):
    while True:
        date_input = input(question)
        try:
            date_obj = datetime.datetime.strptime(date_input, "%Y-%m-%d")
            return date_obj
        except ValueError:
            print("Invalid date format. Please use the format 'YYYY-MM-DD'.")

In [84]:
def ask_custom_date_range():
    while True:
        start_date = get_date_input("Enter start date in 'YYYY-MM-DD' format:")
        end_date = get_date_input("Enter end date in 'YYYY-MM-DD' format:")

        if end_date > start_date:
            start_date = start_date.strftime("%Y-%m-%d")
            end_date = end_date.strftime("%Y-%m-%d")
            return start_date, end_date
        else:
            print("Invalid date range. Start date must be before end date.")

In [85]:
def ask_user_date_range():
    while True:
        answer = input("Do you want to use default date range? (yes/no): ").strip().lower()
        if answer == 'yes':
            start_date, end_date = ("2001-04-01", "2023-01-01")
            break
        elif answer == 'no':
            start_date, end_date = ask_custom_date_range()
            break
        else:
            print("Invalid answer. Please enter either 'yes' or 'no'.")

    return start_date, end_date

In [87]:
def full_stock_data_retrieval():
    tickers_portfolio = ask_user_tickers()
    stock_start_date, stock_end_date = ask_user_date_range()
    stock_data, tickers_failed = get_batch_stock_data(tickers_portfolio, start_date=stock_start_date, end_date=stock_end_date)
    return stock_data, tickers_failed

In [93]:
def ask_full_stock_data_retrieval():

    while True:
        stock_data, tickers_failed = full_stock_data_retrieval()
        
        if len(tickers_failed) == 0:
            break

        else:
            answer = input(f"Tickers {tickers_failed} either does not exist or they have no data for given time range. Would you like to provide ticker's list and date range once again? (yes/no): ")
            
            if answer == 'yes':
                print("Enter the required data once again")
            elif answer == 'no':
                break
            else:
                print("Invalid answer. Please enter either 'yes' or 'no'.")

    return stock_data


In [94]:
stock_data = ask_full_stock_data_retrieval()

100%|██████████| 5/5 [00:04<00:00,  1.16it/s, BMW.DE]


In [96]:
print(stock_data)

{'META': {'Name': 'Meta platforms, inc.', 'Country': 'United states', 'Sector': 'Communication services', 'Monthly_prices': 2012-07-01   -0.301929
2012-08-01   -0.168125
2012-09-01    0.199336
2012-10-01   -0.025392
2012-11-01    0.326386
                ...   
2022-08-01    0.024073
2022-09-01   -0.167250
2022-10-01   -0.313384
2022-11-01    0.267711
2022-12-01    0.018967
Name: Close, Length: 126, dtype: float64}, 'ADS.DE': {'Name': 'Adidas ag', 'Country': 'Germany', 'Sector': 'Consumer cyclical', 'Monthly_prices': 2001-05-01    0.024273
2001-06-01    0.047284
2001-07-01    0.066887
2001-08-01   -0.034197
2001-09-01   -0.240343
                ...   
2022-08-01   -0.116329
2022-09-01   -0.198274
2022-10-01   -0.167396
2022-11-01    0.235199
2022-12-01    0.042532
Name: Close, Length: 260, dtype: float64}, 'ZAL.DE': {'Name': 'Zalando se', 'Country': 'Germany', 'Sector': 'Consumer cyclical', 'Monthly_prices': 2014-11-01    0.356757
2014-12-01    0.015936
2015-01-01   -0.058824
2015-02-

**3. Miniumum variance portfolio** <br>
*Machine learning approach - tuning hyperparameters to achieve portfolio with least variance*

In [21]:
def mvp_name(cov_est, mu_est, penalty):
    """
    Generates a name for a minimum variance portfolio based on the given covariance estimation method, mean estimation method, and penalty value.

    Args:
        cov_est (str): The covariance estimation method.
        mu_est (str): The mean estimation method.
        penalty (float): The penalty value.

    Returns:
        str: The generated portfolio name.

    Raises:
        KeyError: If the provided covariance or mean estimation method is not implemented.
    """
    cov_est_encoding = {
        "sample_cov": 'sample',
        "semicovariance": 'semi',
        "exp_cov": 'exp',
        "ledoit_wolf": 'lw',
        "ledoit_wolf_constant_variance": 'lwcv',
        "ledoit_wolf_single_factor": 'lwsf',
        "ledoit_wolf_constant_correlation": 'lwcc',
        "oracle_approximating": 'oa'
    }

    mu_est_encoding = {
        "mean_historical_return": 'mean',
        "ema_historical_return": 'ema',
        "capm_return": 'capm'
    }

    mvp_name_template = 'mvp_{cov_est_name}_{mu_est_name}_{penalty_value}'

    try:
        mvp_name_filled = mvp_name_template.format(
            cov_est_name=cov_est_encoding[cov_est],
            mu_est_name=mu_est_encoding[mu_est],
            penalty_value=penalty
        )
        return mvp_name_filled

    except KeyError as k:
        raise KeyError(f'{k} is not implemented')


In [22]:
def mvp_weights(dataframe_returns, cov_est="sample_cov", mu_est="mean_historical_return", penalty=0):
    """
    Computes the minimum variance portfolio weights.

    Parameters:
        dataframe_returns (pandas.DataFrame): DataFrame containing asset returns.
        cov_est (str, optional): Method to estimate the covariance matrix. Defaults to "sample_cov".
        mu_est (str, optional): Method to estimate expected returns. Defaults to "mean_historical_return".
        penalty (float, optional): L2 regularization penalty. Defaults to 0.

    Returns:
        dict or None: Dictionary of asset weights for the minimum variance portfolio or None if an error occurs.

    Raises:
        ValueError: If the DataFrame has less than 2 columns.
        Exception: If an error occurs during the computation, an exception is raised with an error message.
    """
    if dataframe_returns.shape[1] < 2:
        raise ValueError("DataFrame must have at least 2 columns.")

    try:
        mu = expected_returns.return_model(prices=dataframe_returns, returns_data=True, frequency=12, method=mu_est)  # estimates for expected returns
        cov_mat = risk_models.risk_matrix(prices=dataframe_returns, returns_data=True, frequency=12, method=cov_est)  # estimates for covariance matrix
        cov_mat_fix = risk_models.fix_nonpositive_semidefinite(matrix=cov_mat)  # fix matrix if it's non-positive semidefinite

        ef = EfficientFrontier(expected_returns=mu, cov_matrix=cov_mat_fix)
        ef.add_objective(objective_functions.L2_reg, gamma=penalty)
        ef.min_volatility()
        weights = ef.clean_weights()
        return weights
    
    except Exception as e:
        raise Exception(f'Error message: {e}')

In [23]:
def calculate_returns(portfolio_weights, stock_returns):
    """
    Calculate the portfolio return.

    Args:
        portfolio_weights (list or array-like): The weights of the assets in the portfolio.
        stock_returns (pandas.Series): The returns of the stocks in the portfolio.

    Returns:
        float or None: The calculated portfolio return. If an error occurs during calculation,
                      None is returned.

    """
    try:
        np_weights = pd.Series(portfolio_weights).sort_index().to_numpy()
        np_returns = stock_returns.sort_index().to_numpy()
        portfolio_return = np.dot(np_weights, np_returns)
        return portfolio_return
    except (AttributeError, KeyError, TypeError) as e:
        print(f"An error occurred while calculating portfolio returns: {e}")
        return None

In [24]:
#perhaps daily data?

In [25]:
def mvp_oos_returns(dataframe_returns, is_period=24, cov_est="sample_cov", mu_est="mean_historical_return", penalty=0):
    """
    Calculates the out-of-sample portfolio returns based on the given historical returns data and parameters.

    Args:
        dataframe_returns (pandas.DataFrame): Historical returns data as a DataFrame.
        is_period (int, optional): Length of the in-sample period. Defaults to 24.
        cov_est (str, optional): Covariance estimation method. Defaults to "sample_cov".
        mu_est (str, optional): Mean estimation method. Defaults to "mean_historical_return".
        penalty (float, optional): Penalty value. Defaults to 0.

    Returns:
        pandas.Series: Out-of-sample portfolio returns.

    Raises:
        KeyError: If the provided covariance or mean estimation method is not implemented.
    """
    port_name = mvp_name(cov_est=cov_est, mu_est=mu_est, penalty=penalty)

    n = dataframe_returns.shape[0]  # number of observations - 260
    n_is = is_period  # length of in-sample period - 24
    n_oos = n - n_is # length of out-of-sample period - 236

    date_oos = dataframe_returns.iloc[n_is:n].index
    port_oos_returns = []

    for i in range(n_oos):
            
        index_is = list(range(i, i + n_is))
        index_oos = n_is + i

        stock_returns_is = dataframe_returns.iloc[index_is,]
        stock_returns_oos = dataframe_returns.iloc[index_oos]

        #if error in calculating weights (due to problems with covariance matrix), assign zero as returns
        try:
            port_weights = mvp_weights(dataframe_returns=stock_returns_is, cov_est=cov_est, mu_est=mu_est, penalty=penalty)
            port_return = calculate_returns(portfolio_weights=port_weights, stock_returns=stock_returns_oos)
            port_oos_returns.append(port_return)
        except:
            port_return = 0
            port_oos_returns.append(port_return)
            
    port_oos_returns = pd.Series(data=port_oos_returns, index=date_oos, name=port_name)

    return port_oos_returns


In [26]:
def mvp_tune(dataframe_returns, is_period=24, parameter_values = {'cov_est':["sample_cov"], 'mu_est':['mean_historical_return'], 'penalty':[0]}):
    """
    Tune minimum variance portfolio based on different parameter combinations.

    Parameters:
        dataframe_returns (DataFrame): Historical returns data.
        is_period (int): Number of periods.
        param_values (dict): Dictionary of parameter values.

    Returns:
        DataFrame: Results of the tuned portfolios.

    """
    estimation_progress_template = 'Covariance matrix: {cov_est} | Expected retuns: {mu_est} | Penalty value: {penalty}'  # holds info on calculated portfolio
    parameter_combinations = list(itertools.product(*parameter_values.values()))
    output = {}

    with warnings.catch_warnings():
        warnings.filterwarnings("ignore")

        with tqdm(total = len(parameter_combinations)) as pbar:
            
            for parameters in parameter_combinations:
                parameters_selected = dict(zip(parameter_values.keys(), parameters))
                estimation_progress = estimation_progress_template.format(cov_est = parameters_selected['cov_est'],
                                                    mu_est = parameters_selected['mu_est'],
                                                    penalty = parameters_selected['penalty'])
                
                pbar.set_postfix_str(estimation_progress)
                
                try:
                    port_returns = mvp_oos_returns(dataframe_returns=dataframe_returns, is_period=is_period, **parameters_selected)
                    output[port_returns.name] = port_returns
                except Exception as e:
                    print(f"Error occurred for hyperparameters: {parameters_selected}. Error: {str(e)}")
                
                pbar.update()

    df_results = pd.DataFrame(output)

    return df_results

In [27]:
hyperparameter_values = {
    'cov_est': ["sample_cov", "semicovariance", "ledoit_wolf", "oracle_approximating"],
    'mu_est': ["mean_historical_return"],
    'penalty': [0, 0.25, 0.5]  # [0, 0.25, 0.5, 1, 2, 4, 8]
}  # user provides input

In [28]:
mvp_candidate_returns = mvp_tune(dataframe_returns=dax_monthly_returns, is_period = 24, parameter_values=hyperparameter_values)

100%|██████████| 12/12 [02:09<00:00, 10.80s/it, Covariance matrix: oracle_approximating | Expected retuns: mean_historical_return | Penalty value: 0.5] 


In [29]:
display(mvp_candidate_returns)

Unnamed: 0,mvp_sample_mean_0,mvp_sample_mean_0.25,mvp_sample_mean_0.5,mvp_semi_mean_0,mvp_semi_mean_0.25,mvp_semi_mean_0.5,mvp_lw_mean_0,mvp_lw_mean_0.25,mvp_lw_mean_0.5,mvp_oa_mean_0,mvp_oa_mean_0.25,mvp_oa_mean_0.5
2003-05-01,0.019900,0.023876,0.028242,0.017342,0.033161,0.033875,0.023602,0.027046,0.030295,0.027293,0.026116,0.029779
2003-06-01,0.016596,0.031017,0.037606,-0.000008,0.039488,0.042424,0.031598,0.034582,0.043008,0.030246,0.032887,0.041706
2003-07-01,0.019016,0.059385,0.068175,0.030809,0.060447,0.069504,0.030264,0.065494,0.072366,0.020663,0.064576,0.071539
2003-08-01,0.034575,0.029469,0.033019,0.014690,0.017713,0.023563,0.026726,0.032652,0.033972,0.025291,0.032187,0.033971
2003-09-01,-0.022327,-0.031509,-0.031408,-0.000282,-0.023405,-0.028622,-0.030075,-0.031326,-0.031157,-0.029543,-0.031303,-0.031214
...,...,...,...,...,...,...,...,...,...,...,...,...
2022-08-01,-0.042446,-0.041518,-0.042887,-0.044041,-0.039023,-0.040126,-0.039097,-0.042855,-0.042811,-0.038644,-0.042635,-0.042858
2022-09-01,-0.037660,-0.049696,-0.051441,-0.025717,-0.045181,-0.050340,-0.048406,-0.051322,-0.053279,-0.049007,-0.050807,-0.052871
2022-10-01,0.055966,0.049120,0.059318,0.045932,0.076843,0.078298,0.040012,0.058408,0.066224,0.041171,0.056573,0.064778
2022-11-01,0.080850,0.087558,0.090059,0.050627,0.081202,0.085331,0.083872,0.089345,0.091153,0.083486,0.088897,0.091112


**4. Plot results**

In [30]:
def annual_expected_returns(monthly_returns):
    """
    Calculate the annual expected returns from monthly returns.

    Parameters:
        monthly_returns (array-like): A sequence of monthly returns.

    Returns:
        float or None: The annual returns calculated from the mean of monthly returns, rounded to four decimal places.
                       Returns None if an error occurs during the calculation.

    Raises:
        None.
    """
    try:
        return_pa = monthly_returns.mean() * 12
        return_pa_round = np.round(return_pa, 4)
        return return_pa_round
    except Exception as e:
        print(f"An error occurred: {e}")
        return None


In [31]:
def annual_volatility(monthly_returns):

    """
    Calculate the annual volatility from monthly returns returns.

    Parameters:
        monthly_returns (array-like): A sequence of monthly stock returns.

    Returns:
        float or None: The annual volatility of the stock returns, rounded to four decimal places.
                       Returns None if an error occurs during the calculation.

    Raises:
        ValueError: If the length of monthly_returns is less than 2.
    """

    if len(monthly_returns) < 2:
            raise ValueError("At least two stock returns are required to calculate volatility.")    
    try:
        volatility_pa = monthly_returns.std()*np.sqrt(12)
        volatility_pa_round = np.round(volatility_pa, 4)
        return volatility_pa_round
    except Exception as e:
        print(f"An error occurred: {e}")
        return None


In [32]:
def risk_metrics(monthly_returns):
    """
    Calculate risk metrics for a given set of monthly returns.

    Args:
        monthly_returns (pandas.DataFrame): DataFrame containing the monthly returns for different portfolios.

    Returns:
        pandas.DataFrame: DataFrame containing the calculated risk metrics, including expected returns and volatility.
                          The DataFrame has the following structure:
                          - Column 1: 'portfolio_name'
                          - Column 2: 'expected_returns'
                          - Column 3: 'volatility'
    """
    expected_returns_pa = {col: annual_expected_returns(monthly_returns[col]) for col in monthly_returns.columns}
    volatility_pa = {col: annual_volatility(monthly_returns[col]) for col in monthly_returns.columns}

    metrics = {"expected_returns": expected_returns_pa, "volatility": volatility_pa}
    metrics_df = pd.DataFrame(metrics)
    
    metrics_df.index.name = "name"
    metrics_df = metrics_df.reset_index()

    return metrics_df

In [33]:
mvp_candidate_metrics = risk_metrics(monthly_returns=mvp_candidate_returns)

In [34]:
print(mvp_candidate_metrics)

                    name  expected_returns  volatility
0      mvp_sample_mean_0            0.1150      0.1485
1   mvp_sample_mean_0.25            0.1252      0.1506
2    mvp_sample_mean_0.5            0.1279      0.1570
3        mvp_semi_mean_0            0.1042      0.1594
4     mvp_semi_mean_0.25            0.1266      0.1532
5      mvp_semi_mean_0.5            0.1269      0.1605
6          mvp_lw_mean_0            0.1243      0.1433
7       mvp_lw_mean_0.25            0.1305      0.1548
8        mvp_lw_mean_0.5            0.1293      0.1620
9          mvp_oa_mean_0            0.1227      0.1436
10      mvp_oa_mean_0.25            0.1262      0.1547
11       mvp_oa_mean_0.5            0.1287      0.1610


In [35]:
# plot it

In [36]:
mvp_best = mvp_candidate_metrics[mvp_candidate_metrics.volatility == mvp_candidate_metrics.volatility.min()]

In [37]:
# # #portfolio with least volatility
# mvp_best = df_mvp[df_mvp.volatility == df_mvp.volatility.min()]

In [38]:
dax_metrics = risk_metrics(monthly_returns=dax_monthly_returns.loc[mvp_candidate_returns.index,])

In [39]:
dax_constituents_metrics = dax_metrics.merge(dax_constituents_info, how = "left", left_on = "name", right_on = "Ticker")

In [40]:
print(dax_constituents_metrics.head(5))

      name  expected_returns  volatility   Ticker     Name  \
0   ADS.DE            0.1466      0.2640   ADS.DE   Adidas   
1   AIR.DE            0.2156      0.3476   AIR.DE   Airbus   
2   ALV.DE            0.1399      0.2787   ALV.DE  Allianz   
3   BAS.DE            0.1224      0.2595   BAS.DE     BASF   
4  BAYN.DE            0.1208      0.2562  BAYN.DE    Bayer   

                Sector  
0                Other  
1  Aerospace & Defence  
2   Financial Services  
3            Chemicals  
4      Pharmaceuticals  


In [41]:
fig = px.scatter(
    dax_constituents_metrics, 
    x = "volatility", 
    y = "expected_returns", 
    color = "Sector",
    color_discrete_sequence=px.colors.qualitative.Set1,
    labels={"expected_returns": "Expected returns p.a.", "volatility": "Volatility p.a."},
    hover_name = "Name",
    hover_data={"expected_returns":True, "volatility":True,"Sector":False})
fig.update_layout(
    title_text="Out-of-sample Risk-return matrix", 
    title_x=0.5, 
    font={'size': 15}, 
    hoverlabel = {"font_size": 15}, 
    width=1200, 
    height=700)
fig.add_trace(
    go.Scatter(
        x=mvp_best.volatility, 
        y=mvp_best.expected_returns,
        mode='markers',
        marker_symbol="star",
        name='Minimum Variance Portfolio',
        marker=dict(line=dict(color="black", width=3)),
        hovertemplate='<b>Minimum variance portfolio</b> <br><br>Volatility p.a.=%{x} <br>Expected returns p.a.=%{y}'
        ))
fig.update_traces(marker_size=30)
fig.show()

#code should be structured (put it in different folder), 