In [1]:
import pandas as pd 
import yfinance as yf
import numpy as np
import random
from scipy.optimize import minimize

In [2]:
dataset = pd.read_csv('dataset.csv')

In [3]:
dataset['stock'].unique()

array(['AAPL', 'ABBV', 'AMZN', 'DB', 'DIS', 'FB', 'GOOG', 'HAL', 'HSBC',
       'JPM', 'KO', 'MCD', 'MSFT', 'PFE', 'XOM'], dtype=object)

In [48]:
dataset

Unnamed: 0_level_0,stock,adj_close,bbr,pnlog
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2015-01-02,AAPL,99.945885,58.343195,0.010336
2015-01-05,AAPL,97.130241,65.384615,0.014229
2015-01-06,AAPL,97.139420,73.206751,0.017950
2015-01-07,AAPL,98.501518,67.732123,0.000000
2015-01-08,AAPL,102.286186,62.257496,0.005537
...,...,...,...,...
2020-06-03,XOM,49.240002,,
2020-06-04,XOM,49.099998,37.168142,-0.011122
2020-06-05,XOM,53.080002,39.331027,-0.017159
2020-06-08,XOM,54.740002,100.000000,0.000000


In [4]:
def get_data(tickers, start_date, end_date):

    # initialise list to store ticker data farmes
    dfs = []

    for ticker in tickers:
        # Download the stock price data with yfinance
        data = yf.download(ticker, start=start_date, end=end_date, interval='1d')
    
        # Create a new data frame with the necessary columns
        df = pd.DataFrame(index=data.index)
        df["ticker"] = ticker
        df["adj_close"] = data["Adj Close"]
        df["adj_close_lag"] = data["Adj Close"].shift(1)
        df["return"] = ((df["adj_close"] / df["adj_close_lag"]) - 1)*100
        df = df[["ticker", "return"]]
        dfs.append(df)

    # Concatenate the data frames vertically
    result = pd.concat(dfs)
    result = result.dropna()

    return result

In [5]:
ticker = ['AAPL', 'ABBV', 'AMZN', 'DB', 'DIS', 'FB', 'GOOG', 'HAL', 'HSBC',
       'JPM', 'KO', 'MCD', 'MSFT', 'PFE', 'XOM']


stocks = get_data(ticker, '2015-01-01', '2020-06-01')
stocks

[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed

1 Failed download:
- FB: No timezone found, symbol may be delisted
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%*********

Unnamed: 0_level_0,ticker,return
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2015-01-05,AAPL,-2.817138
2015-01-06,AAPL,0.009416
2015-01-07,AAPL,1.402210
2015-01-08,AAPL,3.842208
2015-01-09,AAPL,0.107264
...,...,...
2020-05-22,XOM,0.089747
2020-05-26,XOM,2.937226
2020-05-27,XOM,0.718814
2020-05-28,XOM,-2.595172


In [6]:
stocks.reset_index(inplace=True)

# Pivot the data frame to invert it
inverted_df = stocks.pivot(index='Date', columns='ticker', values='return')

# Reset the column index
inverted_df.columns.name = None

inverted_df = inverted_df.reset_index()
inverted_df

Unnamed: 0,Date,AAPL,ABBV,AMZN,DB,DIS,GOOG,HAL,HSBC,JPM,KO,MCD,MSFT,PFE,XOM
0,2015-01-05,-2.817138,-1.881902,-2.051729,-5.415165,-1.461330,-2.084562,-1.975252,-2.294461,-3.104502,0.000000,-1.104450,-0.919588,-0.542612,-2.736207
1,2015-01-06,0.009416,-0.494983,-2.283333,-1.596111,-0.530419,-2.317709,-0.981616,-1.456851,-2.592912,0.759377,0.184360,-1.467726,0.834418,-0.531587
2,2015-01-07,1.402210,4.041660,1.059974,1.057833,1.022978,-0.171323,2.687191,0.772298,0.152622,1.248189,1.742411,1.270517,1.368544,1.013242
3,2015-01-08,3.842208,1.045858,0.683602,0.209339,1.034136,0.315304,2.159561,0.328465,2.234624,1.209622,0.372265,2.941820,2.040827,1.664457
4,2015-01-09,0.107264,-2.735444,-1.174861,-1.671307,0.490457,-1.295055,-1.019665,-0.523797,-1.738701,-1.103200,-1.218719,-0.840491,0.461530,-0.140940
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1355,2020-05-22,0.643816,-0.989040,-0.402982,-0.543476,0.161245,0.543205,-3.255436,-5.860658,-0.776325,-0.309937,-0.361986,0.043620,0.644144,0.089747
1356,2020-05-26,-0.677351,-1.509222,-0.616361,12.158464,2.482630,0.467937,2.415891,3.656931,7.097365,2.354005,0.233167,-1.057138,-0.026683,2.937226
1357,2020-05-27,0.435703,-0.804769,-0.473604,5.724720,0.479539,0.057868,2.695866,1.511978,5.792103,1.388570,1.558084,0.132174,-0.213385,0.718814
1358,2020-05-28,0.044029,0.055564,-0.385415,-1.958519,-3.933184,-0.078283,-0.656293,-2.399664,-1.489591,0.770382,0.538043,-0.225529,2.058264,-2.595172


Unnamed: 0,Date,pnlog,AAPL,ABBV,AMZN,DB,DIS,GOOG,HAL,HSBC,JPM,KO,MCD,MSFT,PFE,XOM
0,2015-01-05,0.014229,-2.817138,-1.881902,-2.051729,-5.415165,-1.461330,-2.084562,-1.975252,-2.294461,-3.104502,0.000000,-1.104450,-0.919588,-0.542612,-2.736207
1,2015-01-05,0.000000,-2.817138,-1.881902,-2.051729,-5.415165,-1.461330,-2.084562,-1.975252,-2.294461,-3.104502,0.000000,-1.104450,-0.919588,-0.542612,-2.736207
2,2015-01-05,0.013374,-2.817138,-1.881902,-2.051729,-5.415165,-1.461330,-2.084562,-1.975252,-2.294461,-3.104502,0.000000,-1.104450,-0.919588,-0.542612,-2.736207
3,2015-01-05,0.008442,-2.817138,-1.881902,-2.051729,-5.415165,-1.461330,-2.084562,-1.975252,-2.294461,-3.104502,0.000000,-1.104450,-0.919588,-0.542612,-2.736207
4,2015-01-05,0.010258,-2.817138,-1.881902,-2.051729,-5.415165,-1.461330,-2.084562,-1.975252,-2.294461,-3.104502,0.000000,-1.104450,-0.919588,-0.542612,-2.736207
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
20395,2020-05-29,-0.006908,-0.097425,2.932348,1.718795,-1.292608,0.471095,0.860428,-2.972733,-2.289113,-2.553582,-0.870682,-0.618718,1.019845,0.026220,0.954721
20396,2020-05-29,-0.079283,-0.097425,2.932348,1.718795,-1.292608,0.471095,0.860428,-2.972733,-2.289113,-2.553582,-0.870682,-0.618718,1.019845,0.026220,0.954721
20397,2020-05-29,-0.008783,-0.097425,2.932348,1.718795,-1.292608,0.471095,0.860428,-2.972733,-2.289113,-2.553582,-0.870682,-0.618718,1.019845,0.026220,0.954721
20398,2020-05-29,-0.013148,-0.097425,2.932348,1.718795,-1.292608,0.471095,0.860428,-2.972733,-2.289113,-2.553582,-0.870682,-0.618718,1.019845,0.026220,0.954721


In [7]:
# dataset = dataset[~dataset['stock'].isin(['JPM', 'KO', 'MCD', 'MSFT', 'PFE', 'XOM'])]

In [8]:
interval = '1d'
start= '2015-01-01'
end= '2020-06-09'

In [9]:
sp500 = yf.download('SPY', start=start, end=end, interval='1d')
sp500

[*********************100%***********************]  1 of 1 completed


Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2015-01-02,206.380005,206.880005,204.179993,205.429993,176.134644,121465900
2015-01-05,204.169998,204.369995,201.350006,201.720001,172.953720,169632600
2015-01-06,202.089996,202.720001,198.860001,199.820007,171.324646,209151400
2015-01-07,201.419998,202.720001,200.880005,202.309998,173.459534,125346700
2015-01-08,204.009995,206.160004,203.990005,205.899994,176.537613,147217800
...,...,...,...,...,...,...
2020-06-02,306.549988,308.130005,305.100006,308.079987,293.203827,74267200
2020-06-03,310.239990,313.220001,309.940002,312.179993,297.105835,92567600
2020-06-04,311.109985,313.000000,309.079987,311.359985,296.325500,75794400
2020-06-05,317.230011,321.269989,317.160004,319.339996,303.920166,150524700


In [10]:
sp500 = sp500.rename(columns={"Adj Close": "adjclose"})
sp500

Unnamed: 0_level_0,Open,High,Low,Close,adjclose,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2015-01-02,206.380005,206.880005,204.179993,205.429993,176.134644,121465900
2015-01-05,204.169998,204.369995,201.350006,201.720001,172.953720,169632600
2015-01-06,202.089996,202.720001,198.860001,199.820007,171.324646,209151400
2015-01-07,201.419998,202.720001,200.880005,202.309998,173.459534,125346700
2015-01-08,204.009995,206.160004,203.990005,205.899994,176.537613,147217800
...,...,...,...,...,...,...
2020-06-02,306.549988,308.130005,305.100006,308.079987,293.203827,74267200
2020-06-03,310.239990,313.220001,309.940002,312.179993,297.105835,92567600
2020-06-04,311.109985,313.000000,309.079987,311.359985,296.325500,75794400
2020-06-05,317.230011,321.269989,317.160004,319.339996,303.920166,150524700


In [11]:
sp500 = sp500[['adjclose']]


In [12]:
sp500

Unnamed: 0_level_0,adjclose
Date,Unnamed: 1_level_1
2015-01-02,176.134644
2015-01-05,172.953720
2015-01-06,171.324646
2015-01-07,173.459534
2015-01-08,176.537613
...,...
2020-06-02,293.203827
2020-06-03,297.105835
2020-06-04,296.325500
2020-06-05,303.920166


In [13]:
sp500['SP500_return'] = (sp500['adjclose'] / sp500['adjclose'].shift(1) - 1) * 100

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  sp500['SP500_return'] = (sp500['adjclose'] / sp500['adjclose'].shift(1) - 1) * 100


In [14]:
sp500

Unnamed: 0_level_0,adjclose,SP500_return
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2015-01-02,176.134644,
2015-01-05,172.953720,-1.805961
2015-01-06,171.324646,-0.941913
2015-01-07,173.459534,1.246107
2015-01-08,176.537613,1.774523
...,...,...
2020-06-02,293.203827,0.828005
2020-06-03,297.105835,1.330818
2020-06-04,296.325500,-0.262645
2020-06-05,303.920166,2.562947


In [15]:
# Convert 'date' column in df2 to datetime (if it's not already)
dataset['Date'] = pd.to_datetime(dataset['Date'])

# Set 'date' column as the index in df2
dataset.set_index('Date', inplace=True)

# Join the two dataframes
df = sp500.join(dataset, how='inner')  # Use how='left' for left join, 'outer' for outer join, etc.


In [16]:
# Read the file into a DataFrame, skipping the last row
ff = pd.read_csv('F-F_Research_Data_Factors_daily.CSV', skiprows= 4, skipfooter=1, engine='python', index_col=0)



In [17]:
ff['Date'] = ff.index
ff['Date'] = pd.to_datetime(ff.index, format='%Y%m%d')
ff = ff.set_index('Date')


In [18]:
ff

Unnamed: 0_level_0,Mkt-RF,SMB,HML,RF
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1926-07-01,0.10,-0.24,-0.28,0.009
1926-07-02,0.45,-0.32,-0.08,0.009
1926-07-06,0.17,0.27,-0.35,0.009
1926-07-07,0.09,-0.59,0.03,0.009
1926-07-08,0.21,-0.36,0.15,0.009
...,...,...,...,...
2021-01-25,0.28,-0.07,-0.45,0.000
2021-01-26,-0.32,0.00,0.03,0.000
2021-01-27,-2.53,1.30,1.47,0.000
2021-01-28,0.92,-1.86,-0.79,0.000


In [19]:
# df = ff.join(inverted_df, how='inner') 

In [20]:
df = pd.merge(ff, inverted_df, on='Date', how='right')

In [21]:
df

Unnamed: 0,Date,Mkt-RF,SMB,HML,RF,AAPL,ABBV,AMZN,DB,DIS,GOOG,HAL,HSBC,JPM,KO,MCD,MSFT,PFE,XOM
0,2015-01-05,-1.84,0.34,-0.68,0.0,-2.817138,-1.881902,-2.051729,-5.415165,-1.461330,-2.084562,-1.975252,-2.294461,-3.104502,0.000000,-1.104450,-0.919588,-0.542612,-2.736207
1,2015-01-06,-1.04,-0.78,-0.30,0.0,0.009416,-0.494983,-2.283333,-1.596111,-0.530419,-2.317709,-0.981616,-1.456851,-2.592912,0.759377,0.184360,-1.467726,0.834418,-0.531587
2,2015-01-07,1.19,0.18,-0.64,0.0,1.402210,4.041660,1.059974,1.057833,1.022978,-0.171323,2.687191,0.772298,0.152622,1.248189,1.742411,1.270517,1.368544,1.013242
3,2015-01-08,1.81,-0.12,-0.28,0.0,3.842208,1.045858,0.683602,0.209339,1.034136,0.315304,2.159561,0.328465,2.234624,1.209622,0.372265,2.941820,2.040827,1.664457
4,2015-01-09,-0.85,0.01,-0.48,0.0,0.107264,-2.735444,-1.174861,-1.671307,0.490457,-1.295055,-1.019665,-0.523797,-1.738701,-1.103200,-1.218719,-0.840491,0.461530,-0.140940
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1355,2020-05-22,0.27,0.47,-0.87,0.0,0.643816,-0.989040,-0.402982,-0.543476,0.161245,0.543205,-3.255436,-5.860658,-0.776325,-0.309937,-0.361986,0.043620,0.644144,0.089747
1356,2020-05-26,1.23,0.04,4.59,0.0,-0.677351,-1.509222,-0.616361,12.158464,2.482630,0.467937,2.415891,3.656931,7.097365,2.354005,0.233167,-1.057138,-0.026683,2.937226
1357,2020-05-27,1.54,0.61,3.62,0.0,0.435703,-0.804769,-0.473604,5.724720,0.479539,0.057868,2.695866,1.511978,5.792103,1.388570,1.558084,0.132174,-0.213385,0.718814
1358,2020-05-28,-0.41,-1.47,-2.41,0.0,0.044029,0.055564,-0.385415,-1.958519,-3.933184,-0.078283,-0.656293,-2.399664,-1.489591,0.770382,0.538043,-0.225529,2.058264,-2.595172


In [22]:
# df = df.sort_values(by=['stock']).sort_index()


In [23]:
# stock=list(set(df['stock']))
# data = []  # list to hold dataframes
# for i in range(0,len(stock)):
#     stock_data=df[df['stock']==stock[i]]
#     data.append(stock_data)


In [24]:
# data=pd.concat(data)

In [25]:
# data

In [26]:
# data['stock_return'] = (data['adjclose'] / data['adjclose'].shift(1)-1) * 100

In [27]:
# data['y'] = data['stock_return'] - (data['RF'] / 100)


In [28]:
# data

In [29]:
# data.dropna(inplace=True)

In [30]:
ticker = ['AAPL', 'ABBV', 'AMZN', 'DB', 'DIS', 'GOOG', 'HAL', 'HSBC', 'JPM', 'KO', 'MCD', 'MSFT', 'PFE', 'XOM']

In [31]:
for t in ticker:
    df[f'{t}_target'] = df[t]-df['RF']

In [32]:
df

Unnamed: 0,Date,Mkt-RF,SMB,HML,RF,AAPL,ABBV,AMZN,DB,DIS,...,DIS_target,GOOG_target,HAL_target,HSBC_target,JPM_target,KO_target,MCD_target,MSFT_target,PFE_target,XOM_target
0,2015-01-05,-1.84,0.34,-0.68,0.0,-2.817138,-1.881902,-2.051729,-5.415165,-1.461330,...,-1.461330,-2.084562,-1.975252,-2.294461,-3.104502,0.000000,-1.104450,-0.919588,-0.542612,-2.736207
1,2015-01-06,-1.04,-0.78,-0.30,0.0,0.009416,-0.494983,-2.283333,-1.596111,-0.530419,...,-0.530419,-2.317709,-0.981616,-1.456851,-2.592912,0.759377,0.184360,-1.467726,0.834418,-0.531587
2,2015-01-07,1.19,0.18,-0.64,0.0,1.402210,4.041660,1.059974,1.057833,1.022978,...,1.022978,-0.171323,2.687191,0.772298,0.152622,1.248189,1.742411,1.270517,1.368544,1.013242
3,2015-01-08,1.81,-0.12,-0.28,0.0,3.842208,1.045858,0.683602,0.209339,1.034136,...,1.034136,0.315304,2.159561,0.328465,2.234624,1.209622,0.372265,2.941820,2.040827,1.664457
4,2015-01-09,-0.85,0.01,-0.48,0.0,0.107264,-2.735444,-1.174861,-1.671307,0.490457,...,0.490457,-1.295055,-1.019665,-0.523797,-1.738701,-1.103200,-1.218719,-0.840491,0.461530,-0.140940
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1355,2020-05-22,0.27,0.47,-0.87,0.0,0.643816,-0.989040,-0.402982,-0.543476,0.161245,...,0.161245,0.543205,-3.255436,-5.860658,-0.776325,-0.309937,-0.361986,0.043620,0.644144,0.089747
1356,2020-05-26,1.23,0.04,4.59,0.0,-0.677351,-1.509222,-0.616361,12.158464,2.482630,...,2.482630,0.467937,2.415891,3.656931,7.097365,2.354005,0.233167,-1.057138,-0.026683,2.937226
1357,2020-05-27,1.54,0.61,3.62,0.0,0.435703,-0.804769,-0.473604,5.724720,0.479539,...,0.479539,0.057868,2.695866,1.511978,5.792103,1.388570,1.558084,0.132174,-0.213385,0.718814
1358,2020-05-28,-0.41,-1.47,-2.41,0.0,0.044029,0.055564,-0.385415,-1.958519,-3.933184,...,-3.933184,-0.078283,-0.656293,-2.399664,-1.489591,0.770382,0.538043,-0.225529,2.058264,-2.595172


In [33]:
columns_subset = []
for t in ticker:
    columns_subset.append(f'{t}_target')


# Calculate the mean for the subset of columns
df['portfolio_target'] = df[columns_subset].mean(axis=1)

In [34]:
df

Unnamed: 0,Date,Mkt-RF,SMB,HML,RF,AAPL,ABBV,AMZN,DB,DIS,...,GOOG_target,HAL_target,HSBC_target,JPM_target,KO_target,MCD_target,MSFT_target,PFE_target,XOM_target,portfolio_target
0,2015-01-05,-1.84,0.34,-0.68,0.0,-2.817138,-1.881902,-2.051729,-5.415165,-1.461330,...,-2.084562,-1.975252,-2.294461,-3.104502,0.000000,-1.104450,-0.919588,-0.542612,-2.736207,-2.027778
1,2015-01-06,-1.04,-0.78,-0.30,0.0,0.009416,-0.494983,-2.283333,-1.596111,-0.530419,...,-2.317709,-0.981616,-1.456851,-2.592912,0.759377,0.184360,-1.467726,0.834418,-0.531587,-0.890405
2,2015-01-07,1.19,0.18,-0.64,0.0,1.402210,4.041660,1.059974,1.057833,1.022978,...,-0.171323,2.687191,0.772298,0.152622,1.248189,1.742411,1.270517,1.368544,1.013242,1.333453
3,2015-01-08,1.81,-0.12,-0.28,0.0,3.842208,1.045858,0.683602,0.209339,1.034136,...,0.315304,2.159561,0.328465,2.234624,1.209622,0.372265,2.941820,2.040827,1.664457,1.434435
4,2015-01-09,-0.85,0.01,-0.48,0.0,0.107264,-2.735444,-1.174861,-1.671307,0.490457,...,-1.295055,-1.019665,-0.523797,-1.738701,-1.103200,-1.218719,-0.840491,0.461530,-0.140940,-0.885923
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1355,2020-05-22,0.27,0.47,-0.87,0.0,0.643816,-0.989040,-0.402982,-0.543476,0.161245,...,0.543205,-3.255436,-5.860658,-0.776325,-0.309937,-0.361986,0.043620,0.644144,0.089747,-0.741004
1356,2020-05-26,1.23,0.04,4.59,0.0,-0.677351,-1.509222,-0.616361,12.158464,2.482630,...,0.467937,2.415891,3.656931,7.097365,2.354005,0.233167,-1.057138,-0.026683,2.937226,2.136919
1357,2020-05-27,1.54,0.61,3.62,0.0,0.435703,-0.804769,-0.473604,5.724720,0.479539,...,0.057868,2.695866,1.511978,5.792103,1.388570,1.558084,0.132174,-0.213385,0.718814,1.357404
1358,2020-05-28,-0.41,-1.47,-2.41,0.0,0.044029,0.055564,-0.385415,-1.958519,-3.933184,...,-0.078283,-0.656293,-2.399664,-1.489591,0.770382,0.538043,-0.225529,2.058264,-2.595172,-0.732526


In [36]:
from sklearn.linear_model import LinearRegression

In [37]:

model = LinearRegression()

# Define the independent variables (factors)
X = df[['Mkt-RF', 'SMB', 'HML']]  # Replace with the actual column names of your factors

# Define the dependent variable
y = df['portfolio_target']  # Replace with the actual column name of your dependent variable

# Fit the linear regression model
model.fit(X, y)

predicted_values = model.predict(X)

df['FFF'] = predicted_values


df

Unnamed: 0,Date,Mkt-RF,SMB,HML,RF,AAPL,ABBV,AMZN,DB,DIS,...,HAL_target,HSBC_target,JPM_target,KO_target,MCD_target,MSFT_target,PFE_target,XOM_target,portfolio_target,FFF
0,2015-01-05,-1.84,0.34,-0.68,0.0,-2.817138,-1.881902,-2.051729,-5.415165,-1.461330,...,-1.975252,-2.294461,-3.104502,0.000000,-1.104450,-0.919588,-0.542612,-2.736207,-2.027778,-1.951976
1,2015-01-06,-1.04,-0.78,-0.30,0.0,0.009416,-0.494983,-2.283333,-1.596111,-0.530419,...,-0.981616,-1.456851,-2.592912,0.759377,0.184360,-1.467726,0.834418,-0.531587,-0.890405,-0.991980
2,2015-01-07,1.19,0.18,-0.64,0.0,1.402210,4.041660,1.059974,1.057833,1.022978,...,2.687191,0.772298,0.152622,1.248189,1.742411,1.270517,1.368544,1.013242,1.333453,1.092371
3,2015-01-08,1.81,-0.12,-0.28,0.0,3.842208,1.045858,0.683602,0.209339,1.034136,...,2.159561,0.328465,2.234624,1.209622,0.372265,2.941820,2.040827,1.664457,1.434435,1.787702
4,2015-01-09,-0.85,0.01,-0.48,0.0,0.107264,-2.735444,-1.174861,-1.671307,0.490457,...,-1.019665,-0.523797,-1.738701,-1.103200,-1.218719,-0.840491,0.461530,-0.140940,-0.885923,-0.905170
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1355,2020-05-22,0.27,0.47,-0.87,0.0,0.643816,-0.989040,-0.402982,-0.543476,0.161245,...,-3.255436,-5.860658,-0.776325,-0.309937,-0.361986,0.043620,0.644144,0.089747,-0.741004,0.115549
1356,2020-05-26,1.23,0.04,4.59,0.0,-0.677351,-1.509222,-0.616361,12.158464,2.482630,...,2.415891,3.656931,7.097365,2.354005,0.233167,-1.057138,-0.026683,2.937226,2.136919,1.822883
1357,2020-05-27,1.54,0.61,3.62,0.0,0.435703,-0.804769,-0.473604,5.724720,0.479539,...,2.695866,1.511978,5.792103,1.388570,1.558084,0.132174,-0.213385,0.718814,1.357404,1.949361
1358,2020-05-28,-0.41,-1.47,-2.41,0.0,0.044029,0.055564,-0.385415,-1.958519,-3.933184,...,-0.656293,-2.399664,-1.489591,0.770382,0.538043,-0.225529,2.058264,-2.595172,-0.732526,-0.566943


In [None]:
# predicted_values = model.predict(X)

In [None]:
# data['FittedValues'] = predicted_values

In [None]:
# data

Unnamed: 0_level_0,Mkt-RF,SMB,HML,RF,adjclose,SP500_return,stock,adj_close,bbr,pnlog,stock_return,y,FittedValues
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
2015-01-05,-1.84,0.34,-0.68,0.0,172.953659,-1.805987,GOOG,512.463013,52.854812,-0.002559,-1.805987,-1.805987,-1.831597
2015-01-06,-1.04,-0.78,-0.30,0.0,171.324631,-0.941887,GOOG,500.585632,68.195719,0.017585,-0.941887,-0.941887,-0.912424
2015-01-07,1.19,0.18,-0.64,0.0,173.459595,1.246151,GOOG,499.727997,54.057592,-0.004112,1.246151,1.246151,1.139709
2015-01-08,1.81,-0.12,-0.28,0.0,176.537628,1.774496,GOOG,501.303680,71.653543,0.015942,1.774496,1.774496,1.781081
2015-01-09,-0.85,0.01,-0.48,0.0,175.122910,-0.801369,GOOG,494.811493,53.968254,-0.006977,-0.801369,-0.801369,-0.826077
...,...,...,...,...,...,...,...,...,...,...,...,...,...
2020-06-01,0.52,-0.02,0.45,0.0,290.796021,0.404196,ABBV,90.699997,44.786730,-0.016372,0.404196,0.404196,0.512048
2020-06-02,0.81,-0.05,0.45,0.0,293.203827,0.828005,ABBV,91.040001,68.914286,0.012053,0.828005,0.828005,0.798266
2020-06-03,1.42,0.11,2.66,0.0,297.105865,1.330828,ABBV,90.889999,37.832311,-0.027009,1.330828,1.330828,1.372320
2020-06-04,-0.34,-0.24,2.98,0.0,296.325470,-0.262666,ABBV,93.040001,38.028169,-0.033180,-0.262666,-0.262666,-0.298249


In [None]:
# data['stock'].unique()

array(['GOOG', 'AMZN', 'HAL', 'FB', 'DIS', 'XOM', 'MSFT', 'DB', 'KO',
       'JPM', 'MCD', 'HSBC', 'AAPL', 'PFE', 'ABBV'], dtype=object)

In [None]:
# transformed_data.to_csv('transformed.csv')

In [47]:
import numpy as np
import pandas as pd
from scipy.optimize import minimize

# Select 9 stocks from the dataset
selected_stocks = ['AAPL', 'ABBV', 'AMZN', 'DB', 'DIS', 'GOOG', 'HAL', 'HSBC', 'JPM']

# Extract the relevant columns for selected stocks from the DataFrame 'df'
selected_data = df[selected_stocks]

# Compute the covariance matrix using the selected stocks' returns
covariance_matrix = selected_data.cov()

# Define the objective function for portfolio optimization
def objective(weights, covariance_matrix):
    portfolio_variance = np.dot(weights.T, np.dot(covariance_matrix, weights))
    return portfolio_variance

# Define the constraint for portfolio weights summing to 1
def constraint(weights):
    return np.sum(weights) - 1.0

# Define the initial guess for portfolio weights
initial_weights = np.ones(len(selected_stocks)) / len(selected_stocks)

# Define the bounds for portfolio weights (0 <= weight <= 1)
bounds = [(0, 1)] * len(selected_stocks)

# Define the equality constraint for portfolio weights summing to 1
constraint_eq = {'type': 'eq', 'fun': constraint}

# Run the portfolio optimization to find the minimum variance portfolio
result = minimize(objective, initial_weights, args=(covariance_matrix,), method='SLSQP', bounds=bounds, constraints=constraint_eq)

# Get the optimal portfolio weights
optimal_weights = result.x

# Print the optimal weights for each stock
for stock, weight in zip(selected_stocks, optimal_weights):
    print(f"{stock}: {weight}")

# Calculate the portfolio return
portfolio_return = np.sum(selected_data * optimal_weights, axis=1)

# Print the portfolio return
print("Portfolio Return:", portfolio_return)


AAPL: 0.016086469605548475
ABBV: 0.1584895565587243
AMZN: 0.10361587928933512
DB: 0.0
DIS: 0.2186875645099934
GOOG: 0.06470742480025873
HAL: 1.204650257585066e-16
HSBC: 0.43841310523614024
JPM: 0.0
Portfolio Return: 0      -2.016555
1      -1.219559
2       1.324160
3       0.688956
4      -0.759730
          ...   
1355   -2.687129
1356    1.862489
1357    0.601873
1358   -1.947669
1359   -0.203605
Length: 1360, dtype: float64


In [52]:
inverted_df.to_csv('stocks.csv')

In [None]:
# Define the independent variables (factors)
X2 = df[['Mkt-RF', 'SMB', 'HML']]  # Replace with the actual column names of your factors

# Define the dependent variable
y = df['portfolio_target']  # Replace with the actual column name of your dependent variable

# Fit the linear regression model
model.fit(X, y)

predicted_values = model.predict(X)

df['FFFF'] = predicted_values


df

In [None]:
num_perturbations = 10

kappa_values = [0.2, 0.5, 0.8]

# Perform robust portfolio optimization with different perturbation matrices and kappa values
for kappa in kappa_values:
    print(f"Kappa: {kappa}")
    
    for i in range(num_perturbations):
        print(f"Perturbation {i + 1}/{num_perturbations}")
        
        # Generate a perturbation matrix (Sigma) using the factor model returns and kappa
        perturbation_matrix = np.diag(kappa * np.abs(factor_returns[:, 0]))  # Adjust the perturbation matrix based on your factor model
        
        # Define the objective function for portfolio optimization
        def objective(weights):
            portfolio_return = np.dot(weights.T, factor_returns)
            return -portfolio_return

        # Define the constraint for portfolio weights summing to 1
        def constraint(weights):
            return np.sum(weights) - 1.0

        # Define the initial guess for portfolio weights
        initial_weights = np.ones(len(selected_stocks)) / len(selected_stocks)

        # Define the bounds for portfolio weights (0 <= weight <= 1)
        bounds = [(0, 1)] * len(selected_stocks)

        # Define the equality constraint for portfolio weights summing to 1
        constraint_eq = {'type': 'eq', 'fun': constraint}

        # Run the portfolio optimization to find the maximum return portfolio
        result = minimize(objective, initial_weights, method='SLSQP', bounds=bounds, constraints=constraint_eq)

        # Get the optimal portfolio weights
        optimal_weights = result.x

        # Print the optimal weights for each stock
        for stock, weight in zip(selected_stocks, optimal_weights):
            print(f"{stock}: {weight}")
        
        # Calculate the portfolio return
        portfolio_return = np.dot(optimal_weights.T, factor_returns)[0, 0]
        
        # Print the portfolio return
        print("Portfolio Return:", portfolio_return)
        print()


In [None]:
# begin_date = "2018-01-01"  # "2015-01-01"
# end_date = "2019-12-20"  # "2019-12-20" #"2017-12-31"
# period = f"{begin_date}/{end_date}"


In [None]:
# import pandas as pd
# import numpy as np
# from scipy.linalg import solve
# import statsmodels.api as sm
# from sklearn.metrics import mean_squared_error
# import matplotlib.pyplot as plt
# import seaborn as sns

# # assuming 'dataset', 'fama_lib', and 'SP500_index' are pandas DataFrame or Series

# stock_prices = dataset['adjusted'][period]
# stock_prices.index = pd.to_datetime(stock_prices.index)

# X = np.log(stock_prices).diff().dropna()

# N = X.shape[1]  # number of stocks
# T = X.shape[0]  # number of days

# F_FamaFrench = fama_lib.loc[X.index] / 100

# f_SP500 = np.log(SP500_index).diff().dropna()

# BBrMkt = dataset['BBr'].rolling(window=period).mean().fillna(method='bfill') / 100
# PNlogMkt = dataset['PNlog'].rolling(window=period).mean().fillna(method='bfill')

# SentIndx = PNlogMkt.loc[X.index]

# FFS = pd.concat([fama_lib.loc[X.index], SentIndx], axis=1).fillna(method='bfill')

# SPS = pd.concat([f_SP500.loc[X.index], SentIndx], axis=1).fillna(method='bfill')

# T_trn = round(0.5 * T)
# X_trn = X.iloc[:T_trn]
# X_tst = X.iloc[T_trn:]
# F_FamaFrench_trn = F_FamaFrench.iloc[:T_trn]
# F_FamaFrench_tst = F_FamaFrench.iloc[T_trn:]
# f_SP500_trn = f_SP500.iloc[:T_trn]
# f_SP500_tst = f_SP500.iloc[T_trn:]
# SentIndx_trn = SentIndx.iloc[:T_trn]
# SentIndx_tst = SentIndx.iloc[T_trn:]
# FFS_trn = FFS.iloc[:T_trn]
# FFS_tst = FFS.iloc[T_trn:]
# SPS_trn = SPS.iloc[:T_trn]
# SPS_tst = SPS.iloc[T_trn:]

# Sigma_SCM = X_trn.cov()

# # 1-factor model SP500
# model = sm.OLS(X_trn, sm.add_constant(f_SP500_trn))
# results = model.fit()
# Gamma = results.params.T
# alpha = Gamma[0]
# beta = Gamma[1:]
# resid = results.resid
# Psi_sp500 = np.cov(resid, rowvar=False)
# Sigma_SP500 = f_SP500_trn.var() * np.outer(beta, beta) + np.diag(np.diag(Psi_sp500))

# # and so on for the other models

# Sigma_true = X_tst.cov()

# errors = {
#     'SCM': np.linalg.norm(Sigma_SCM - Sigma_true, ord='fro'),
#     'SP500': np.linalg.norm(Sigma_SP500 - Sigma_true, ord='fro'),
#     # ...
# }

# # barplot of errors
# plt.bar(errors.keys(), errors.values(), color='aquamarine')
# plt.title('Error in estimation of covariance matrix')
# plt.show()

# ref = np.linalg.norm(Sigma_SCM - Sigma_true, ord='fro')**2
# PRIAL = 100 * (ref - np.array(list(errors.values()))**2) / ref

# # barplot of PRIAL
# plt.bar(errors.keys(), PRIAL, color='bisque')
# plt.title('PRIAL for estimation of covariance matrix')
# plt.show()
