In [2]:
import numpy as np

# Create a vector representing daily returns of a stock
daily_returns = np.array([0.02, -0.01, 0.015, 0.03, -0.005])

# Create a matrix representing returns of multiple stocks over 5 days
returns_matrix = np.array([
    [0.02, -0.01, 0.015],
    [0.03, 0.01, -0.02],
    [0.015, 0.005, -0.01],
    [0.03, -0.005, 0.02],
    [-0.01, 0.02, 0.01]
])

print("Daily Returns Vector:", daily_returns)
print("Returns Matrix:\n", returns_matrix)


Daily Returns Vector: [ 0.02  -0.01   0.015  0.03  -0.005]
Returns Matrix:
 [[ 0.02  -0.01   0.015]
 [ 0.03   0.01  -0.02 ]
 [ 0.015  0.005 -0.01 ]
 [ 0.03  -0.005  0.02 ]
 [-0.01   0.02   0.01 ]]


In [3]:
# Calculate the mean return for each stock (column-wise mean)
mean_returns = returns_matrix.mean(axis=0) # axis = 0 calculates mean along columns
print("Mean Returns for Each Stock:", mean_returns)

# Dot product: calculate portfolio returns assuming equal weights for each stock
weights = np.array([1/3, 1/3, 1/3])  # Equal weighting
# Dot product of weights and mean returns gives 1D array with each elm representing portfolio returns on a specific day
portfolio_return = np.dot(returns_matrix, weights)
print("Portfolio Returns over 5 Days:", portfolio_return)


Mean Returns for Each Stock: [0.017 0.004 0.003]
Portfolio Returns over 5 Days: [0.00833333 0.00666667 0.00333333 0.015      0.00666667]


In [4]:
import scipy.stats as stats

# Simulate daily returns for a stock using a normal distribution
# loc - mean of distribution set to 0.1%
# scale - 0.02 - 2% is typical volatility
# size - 252 days of trading
# generates synthetic data with normal distribution
daily_returns = np.random.normal(loc=0.001, scale=0.02, size=252)  # 252 trading days

# Calculate expected return and standard deviation
expected_return = np.mean(daily_returns)
volatility = np.std(daily_returns)

print("Expected Daily Return:", expected_return)
print("Volatility (Standard Deviation):", volatility)


Expected Daily Return: 0.00039963308319122196
Volatility (Standard Deviation): 0.02057809660917751


In [5]:
# Perform a t-test to check if the average return is significantly different from zero
t_stat, p_value = stats.ttest_1samp(daily_returns, 0)
print("T-statistic:", t_stat)
print("P-value:", p_value)

# Interpret result
if p_value < 0.05:
    print("Reject null hypothesis: Returns are significantly different from zero.")
else:
    print("Fail to reject null hypothesis: Returns are not significantly different from zero.")


T-statistic: 0.3076756257812521
P-value: 0.7585843154441377
Fail to reject null hypothesis: Returns are not significantly different from zero.


In [6]:
import sympy as sp

# Define a function for portfolio profit (hypothetical)
w1, w2 = sp.symbols('w1 w2')
profit = 0.1 * w1 + 0.15 * w2 - 0.05 * w1**2 - 0.1 * w2**2

# Calculate partial derivatives (gradients)
profit_w1 = sp.diff(profit, w1)
profit_w2 = sp.diff(profit, w2)

print("Partial derivative with respect to w1:", profit_w1)
print("Partial derivative with respect to w2:", profit_w2)


Partial derivative with respect to w1: 0.1 - 0.1*w1
Partial derivative with respect to w2: 0.15 - 0.2*w2


In [7]:
# Initialize weights
w1_val, w2_val = 0.5, 0.5
learning_rate = 0.1

for i in range(10):
    # Compute gradients
    grad_w1 = profit_w1.subs({w1: w1_val, w2: w2_val})
    grad_w2 = profit_w2.subs({w1: w1_val, w2: w2_val})

    # Update weights
    w1_val += learning_rate * grad_w1
    w2_val += learning_rate * grad_w2

    print(f"Iteration {i+1}: w1 = {w1_val}, w2 = {w2_val}")


Iteration 1: w1 = 0.505000000000000, w2 = 0.505000000000000
Iteration 2: w1 = 0.509950000000000, w2 = 0.509900000000000
Iteration 3: w1 = 0.514850500000000, w2 = 0.514702000000000
Iteration 4: w1 = 0.519701995000000, w2 = 0.519407960000000
Iteration 5: w1 = 0.524504975050000, w2 = 0.524019800800000
Iteration 6: w1 = 0.529259925299500, w2 = 0.528539404784000
Iteration 7: w1 = 0.533967326046505, w2 = 0.532968616688320
Iteration 8: w1 = 0.538627652786040, w2 = 0.537309244354554
Iteration 9: w1 = 0.543241376258180, w2 = 0.541563059467462
Iteration 10: w1 = 0.547808962495598, w2 = 0.545731798278113


In [8]:
import yfinance as yf
import numpy as np
import pandas as pd

# Fetch daily closing prices for multiple stocks
tickers = ["AAPL", "MSFT", "GOOGL"]
stock_data = yf.download(tickers, start="2023-01-01", end="2023-12-31")['Close']
print(stock_data.head())


[*********************100%***********************]  3 of 3 completed

Ticker                           AAPL      GOOGL        MSFT
Date                                                        
2023-01-03 00:00:00+00:00  125.070000  89.120003  239.580002
2023-01-04 00:00:00+00:00  126.360001  88.080002  229.100006
2023-01-05 00:00:00+00:00  125.019997  86.199997  222.309998
2023-01-06 00:00:00+00:00  129.619995  87.339996  224.929993
2023-01-09 00:00:00+00:00  130.149994  88.019997  227.119995





In [9]:
# Calculate daily returns for each stock
daily_returns = stock_data.pct_change().dropna()
print("Daily Returns:\n", daily_returns.head())


Daily Returns:
 Ticker                         AAPL     GOOGL      MSFT
Date                                                   
2023-01-04 00:00:00+00:00  0.010314 -0.011670 -0.043743
2023-01-05 00:00:00+00:00 -0.010605 -0.021344 -0.029638
2023-01-06 00:00:00+00:00  0.036794  0.013225  0.011785
2023-01-09 00:00:00+00:00  0.004089  0.007786  0.009736
2023-01-10 00:00:00+00:00  0.004456  0.004544  0.007617


In [10]:
# Define equal weights for each stock in the portfolio
weights = np.array([1/3, 1/3, 1/3])

# Calculate portfolio returns using dot product
portfolio_returns = daily_returns.dot(weights)
print("Portfolio Daily Returns:\n", portfolio_returns.head())


Portfolio Daily Returns:
 Date
2023-01-04 00:00:00+00:00   -0.015033
2023-01-05 00:00:00+00:00   -0.020529
2023-01-06 00:00:00+00:00    0.020601
2023-01-09 00:00:00+00:00    0.007204
2023-01-10 00:00:00+00:00    0.005539
dtype: float64


In [11]:
# Calculate correlation matrix for the daily returns of each stock
# The correlation coefficient is a measure of the linear relationship between two variables (in this case, the returns of two stocks). It ranges from -1 to 1:
# 1 means a perfect positive correlation (they move exactly in the same direction).
# -1 means a perfect negative correlation (they move in exactly opposite directions).
# 0 means no correlation (their movements are not related).
correlation_matrix = daily_returns.corr()
print("Correlation Matrix:\n", correlation_matrix)

# Calculate covariance matrix
# Covariance measures how two variables change together. Unlike correlation, it is not normalized and depends on the units of the variables:
# A positive covariance indicates that the returns of two stocks tend to move in the same direction.
# A negative covariance indicates that the returns of two stocks move in opposite directions.
# A zero covariance suggests no relationship in their movements.
covariance_matrix = daily_returns.cov()
print("Covariance Matrix:\n", covariance_matrix)


Correlation Matrix:
 Ticker      AAPL     GOOGL      MSFT
Ticker                              
AAPL    1.000000  0.528196  0.547336
GOOGL   0.528196  1.000000  0.508790
MSFT    0.547336  0.508790  1.000000
Covariance Matrix:
 Ticker      AAPL     GOOGL      MSFT
Ticker                              
AAPL    0.000158  0.000127  0.000109
GOOGL   0.000127  0.000367  0.000154
MSFT    0.000109  0.000154  0.000251


In [12]:
# Calculate statistical summaries for AAPL returns
aapl_returns = daily_returns['AAPL']

print("AAPL Return Statistics:")
print("Mean:", aapl_returns.mean())
print("Volatility (Standard Deviation):", aapl_returns.std())
print("Skewness:", aapl_returns.skew())
print("Kurtosis:", aapl_returns.kurt()) 
# Kurtosis is a statistical measure that describes the shape of a distribution's 
# tails in relation to its overall shape. Specifically, it helps to determine how much 
# of a distribution's variance is due to extreme values or outliers. Kurtosis provides insight
# into the "tailedness" of the data, which is important in finance for understanding the likelihood of extreme returns.


AAPL Return Statistics:
Mean: 0.0018124480592667167
Volatility (Standard Deviation): 0.012565588337602261
Skewness: 0.01305411665456395
Kurtosis: 1.4318522846810735


In [13]:
from scipy import stats

# Perform a t-test to determine if the average daily return of AAPL is significantly different from zero
t_stat, p_value = stats.ttest_1samp(aapl_returns, 0)
print("T-statistic:", t_stat)
print("P-value:", p_value)

if p_value < 0.05:
    print("Reject null hypothesis: Returns are significantly different from zero.")
else:
    print("Fail to reject null hypothesis: Returns are not significantly different from zero.")


T-statistic: 2.27605323381091
P-value: 0.023696019651616464
Reject null hypothesis: Returns are significantly different from zero.


In [14]:
# Define the function to calculate the Sharpe ratio
# Higher Sharpe Ratio: Indicates a better risk-adjusted return. It suggests that the investment generates higher returns for each unit of risk, which is generally desirable for investors.
# Lower Sharpe Ratio: Indicates a less favorable risk-adjusted return, meaning the investment may either generate lower returns or involve higher risk relative to its return.

# Parameters:
# weights: A vector (array) containing the weights of each asset in the portfolio. These weights indicate the proportion of the total investment allocated to each asset.
# returns: A DataFrame of historical returns for each asset.
# risk_free_rate: The risk-free rate of return, typically set to 0 if unspecified. This is the rate of return you would expect from a theoretically "risk-free" investment, like U.S. Treasury bonds.

def sharpe_ratio(weights, returns, risk_free_rate=0.0):
    portfolio_return = np.dot(weights, returns.mean()) * 252  # Annualized return
    portfolio_volatility = np.sqrt(np.dot(weights.T, np.dot(returns.cov() * 252, weights)))  # Annualized volatility
    return (portfolio_return - risk_free_rate) / portfolio_volatility


In [15]:
from scipy.optimize import minimize
# The minimize function is a general-purpose optimization function. We use it to find the weights that maximize the Sharpe ratio by minimizing the negative Sharpe ratio (more on this below).

# Constraints: weights must sum to 1
constraints = ({'type': 'eq', 'fun': lambda x: np.sum(x) - 1})

# Bounds: each weight must be between 0 and 1
# creates a tuple with (0,1) for the number of assets we have. 
# If you have a portfolio with three assets (tickers = ["AAPL", "MSFT", "GOOGL"]), then:
# ((0, 1), (0, 1), (0, 1))
bounds = tuple((0, 1) for _ in range(len(tickers)))

# Initial guess (equal weights)
# For example, if tickers has 3 assets, the result would be [0.333, 0.333, 0.333], representing an equal allocation to each asset.
# The purpose of the initial guesses is that in optimization we need a starting point for the algthm to begin its search for an optimal solution
initial_guess = np.array([1/len(tickers)] * len(tickers))

# Optimization to maximize the Sharpe ratio
"""
minimize:

minimize is a function from the scipy.optimize library that is used for optimization.
It minimizes a given objective function by adjusting the input variables (in this case, asset weights) within specified constraints and bounds.
lambda w: -sharpe_ratio(w, daily_returns):

This is the objective function that minimize will try to minimize.
lambda w: -sharpe_ratio(w, daily_returns) is a lambda function (an anonymous function) that takes w (the weights of assets) as input.
sharpe_ratio(w, daily_returns) calculates the Sharpe ratio for a portfolio with weights w, using daily_returns as the data for asset returns.
-sharpe_ratio(w, daily_returns): The negative sign is used because minimize can only minimize functions, but we actually want to maximize the Sharpe ratio. By minimizing the negative Sharpe ratio, we effectively maximize the Sharpe ratio itself.
initial_guess:

This is the starting point for the optimization algorithm.
initial_guess is an array of initial weights (typically set to equal weights) that serves as the initial allocation for the portfolio.
method='SLSQP':

This specifies the optimization algorithm to use. 'SLSQP' stands for Sequential Least Squares Programming.
SLSQP is well-suited for problems with constraints and bounds, making it a good choice for portfolio optimization where weights must sum to 1 and lie between 0 and 1.
bounds=bounds:

bounds define the permissible range for each weight. In this case, bounds are set so each weight lies between 0 and 1.
This prevents short-selling (weights below 0) and over-leveraging (weights above 1), enforcing that each asset’s allocation is between 0% and 100% of the total portfolio.
constraints=constraints:

constraints ensure that the solution meets specific requirements.
In this case, constraints enforces that the sum of all weights equals 1, ensuring that the portfolio is fully invested (100% of the capital is allocated across the assets).
"""
result = minimize(lambda w: -sharpe_ratio(w, daily_returns), initial_guess, 
                  method='SLSQP', bounds=bounds, constraints=constraints)

optimal_weights = result.x
print("Optimal Weights:", optimal_weights)
print("Maximized Sharpe Ratio:", -result.fun)


Optimal Weights: [0.63909511 0.08875517 0.27214972]
Maximized Sharpe Ratio: 2.448694541717261
