# Portfolio Optimization
---

## Imports

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy.optimize import minimize
import pandas_datareader as web
import yfinance as yf
import plotly.graph_objects as go
import plotly.io as pio
import datetime as dt
import ssl
import certifi
pio.templates.default = "plotly_white"

## Introduction

The main goal of this project is to perform a portfolio optimization. This means that we want to maximize the mean return of a portfolio $R$ of $p$ different assets under a given risk $\sigma ^2 _{max}$. 

In short we want to solve :
$$
\begin{align}
\max_{w} \quad & \mathbb{E}[w^T R] \\
\text{st} \quad & \mathbb{V}[w^T \phi] < \sigma_{max}^2
\end{align}
$$

Where $R = (R^1, \dots, R^p)^T$ is a vector whose values are the rate of return of the different assets which make up the market. 

For an asset $R^i$, we have $$\begin{align} R^i_t = \frac {P^i_t - P^i_{t-1}}{P^i_{t-1}} \end{align}$$ with $P^i_t$ that denotes the price of the $i$-th asset of our portfolio at time $t$.

Few computations lead to :

$$
\begin{align}
\mathbb{E}[w^T R] = w^T \mathbb E \\
\mathbb{V}[w^T \phi] = w^T \Sigma _R w
\end{align}
$$

with $\Sigma _R$ the covariance matrix of the different return rates.

## Step 1 : Getting the data

For this project, I choosed to use the data available in ``yfinance``. The first step consists in getting the prices of different assets through one year.

In [2]:
def get_data(assets, start_date, end_date):
    """
    Return a dictionnary data which contains the price of different assets from start_date to end_date.
    """
    data = {}
    for asset in assets:
        success = False
        attempts = 0
        while not success and attempts < 5:  # Try 5 times
            try:
                data[asset] = yf.download(asset, start=start_date, end=end_date)['Adj Close']
                success = True
            except Exception as e:
                attempts += 1
                print(f"Failed to get ticker '{asset}' reason: {e}. Attempt {attempts}/5")
    return data

In [3]:
# Assets of the portfolio
assets = ['AAPL', 'AMZN', 'GOOG', 'META', 'NFLX', 'MSFT', 'TSLA', 'NVDA', 'PYPL', 'ADBE']

# Number of days
N = 365

start_date = dt.datetime.now() - dt.timedelta(days=N)
end_date = dt.datetime.now()

data = get_data(assets, start_date, end_date)

[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed


Now, we can plot the data of the different prices.

In [4]:
fig = go.Figure()
for a in assets:
    fig.add_trace(go.Scatter(x=data[a].index, y=data[a], name=a))
fig.update_layout(title='Evolution of the price of the assets', yaxis_title='Price in $')
fig.show()

## Step 2 : computation of the returns

In this step, we'll compute the different values of the return rates $R^i_t$ and plot them. For data frames, the method ``.pct_change()`` makes this task really simple.

In [5]:
def compute_return(data) :
    data = pd.DataFrame(data)
    returns = data.pct_change()
    return returns

In [6]:
returns = compute_return(data)

fig = go.Figure()
for a in assets:
    fig.add_trace(go.Scatter(x=returns[a].index, y=returns[a], name=a))
fig.update_layout(title='Evolution of the price of the return rates', yaxis_title='Price in $')
fig.show()

On this graph, one can observe occurences of $R^i$. 

What we do now is to estimate the mean, the standard deviation of each asset and the correlation between each asset.

In [7]:
returns_mean = returns.mean()
returns_std = returns.std() 

print("moyenne des différents rendements :\n",returns_mean, '\n')
print("écart type des différents rendements :\n",returns_std, '\n')

moyenne des différents rendements :
 AAPL    0.001176
AMZN    0.001512
GOOG    0.000945
META    0.002692
NFLX    0.002583
MSFT    0.001355
TSLA    0.000213
NVDA    0.004660
PYPL    0.001149
ADBE    0.000120
dtype: float64 

écart type des différents rendements :
 AAPL    0.014087
AMZN    0.018037
GOOG    0.017717
META    0.023090
NFLX    0.020184
MSFT    0.012471
TSLA    0.034276
NVDA    0.032695
PYPL    0.021911
ADBE    0.021744
dtype: float64 



In [8]:
corr_matrix = returns.corr()

fig = go.Figure(data=go.Heatmap(z=corr_matrix, x=assets, y=assets, colorscale='Darkmint'))
fig.update_layout(
    margin=dict(l=350, r=350, t=50, b=50),
)
fig.update_layout(title='Matrice de corrélation', yaxis_title='Actifs', xaxis_title='Actifs')
fig.show()

Then, we compute the values of the portfolio given the values $w_i$.

In [16]:
def compute_portfolio_value(weights, returns) :
    return np.dot(returns.mean(), weights) * len(returns)

def compute_portfolio_risk(weights, returns) :
    return np.sqrt(np.dot(weights.T, np.dot(returns.cov(), weights))) * np.sqrt(len(returns))

w_test = 1/len(assets) * np.ones(len(assets))
portfolio_return = compute_portfolio_value(w_test, returns)
portfolio_risk = compute_portfolio_risk(w_test, returns)

initial_portfolio_value = np.array([data[a][0] for a in assets]).sum()

print('value of the portfolio', np.round((1+portfolio_return) * (initial_portfolio_value)),'$')
print('portfolio risk', np.round(portfolio_risk, 4))

value of the portfolio 3320.0 $
portfolio risk 0.2151



Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]`



## Step 4 : Optimization of the portfolio

With all those informations, we can now begin to optimize our portfolio to solve the problem introduced previously.

In [17]:
def generate_random_weights(n) :
    weights = np.random.dirichlet(np.ones(n), size=1)
    return weights[0]

def generate_random_portfolio(returns) :
    weights = generate_random_weights(len(returns.columns))

    portfolio_return = compute_portfolio_value(weights, returns)
    portfolio_risk = compute_portfolio_risk(weights, returns)   

    return weights, portfolio_return, portfolio_risk

def generate_random_portfolios(returns, n) :
    portfolios = []
    for i in range (n) :
        weights, portfolio_return, portfolio_risk = generate_random_portfolio(returns)
        portfolios.append([weights, portfolio_return, portfolio_risk])
        portfolios_df = pd.DataFrame(portfolios, columns=['weights', 'returns', 'risk'])
    return portfolios_df

In [18]:
N = 10000
portfolios_df = generate_random_portfolios(returns, N)

### Visualisation of results

In [20]:
fig = go.Figure()
fig.update_layout(
    margin=dict(l=200, r=200, t=50, b=50 )
)
fig.add_trace(go.Scatter(x=portfolios_df['risk'], y=portfolios_df['returns'], mode='markers', marker=dict(color=portfolios_df['returns']/portfolios_df['risk'], showscale=True, colorscale='RdBu', size=7), name='Random portfolios'))

best_sharpe_ratio_index = (portfolios_df['returns']/portfolios_df['risk']).idxmax()
fig.add_trace(go.Scatter(x=[portfolios_df['risk'][best_sharpe_ratio_index]], y=[portfolios_df['returns'][best_sharpe_ratio_index]], mode='markers', marker=dict(color='yellow', size=15, symbol='star'), name='Best Sharpe Ratio'))

lowest_risk_index = portfolios_df['risk'].idxmin()
fig.add_trace(go.Scatter(x=[portfolios_df['risk'][lowest_risk_index]], y=[portfolios_df['returns'][lowest_risk_index]], mode='markers', marker=dict(color='yellow', size=15, symbol='x'), name='Minimal Risk'))

fig.update_layout(title='efficient frontier', yaxis_title='yields', xaxis_title='risk', legend=dict(yanchor='top', y=1, xanchor='left', x=0.05))
fig.show()
