# Import data

In [120]:
import pandas as pd 
import numpy as np 
import matplotlib.pyplot as plt 
import os
import openpyxl
import scipy.optimize as minimizer 

In [109]:
# Import data from files
os.chdir('/Users/talhajamal/Desktop/Code/BigData1Project')
data = pd.read_csv("data/Returns_Data.csv")
data['date'] = pd.to_datetime(data['date'], dayfirst=False)
characteristics = pd.read_csv("data/Stock_Characteristics_Data.csv")
dictionary = pd.read_excel("data/StockDataDictionary.xlsx")

In [110]:
# Create new dataframes
prices = data.pivot(index='date', columns='ticker', values='PRC')
volume = data.pivot(index='date', columns='ticker', values='VOL')
returns = data.pivot(index='date', columns='ticker', values='RET')
returns = returns * 100 # Scale returns to percentage
# Summary of Returns
returns_summary = returns.describe()
shares_outstanding = data.pivot(index='date', columns='ticker', values='SHROUT')
value_weighted_returns = data.pivot(index='date', columns='ticker', values='vwretd')
equal_weighted_returns = data.pivot(index='date', columns='ticker', values='ewretd')
tickers = prices.columns # List of Tickers

# Data Preprocessing

In [111]:
# Cumulative Returns
returns.cumsum()

ticker,ADI,ADP,ADSK,AFL,AIZ,AMAT,AMP,APH,AXP,BBY,...,TRV,TSN,UNM,VFC,VLO,VZ,WAT,WY,WYNN,XOM
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2010-01-04,0.2850,0.0234,1.0232,2.8541,2.3066,2.5825,2.7306,-0.5197,0.9872,1.8500,...,-0.1003,-0.2445,2.1516,0.1912,6.8060,0.4528,-0.5326,2.6426,9.8403,1.4078
2010-01-05,0.1271,-0.5136,-0.4961,5.7551,5.2907,1.8133,5.0626,-2.6094,0.7673,4.3879,...,-2.4693,1.6346,3.6060,2.0991,9.3773,0.6331,-1.8956,4.7655,15.9222,1.7983
2010-01-06,-0.0627,-0.7483,-0.2588,6.6335,5.0009,1.6019,6.4593,-2.2314,2.3838,3.6114,...,-3.8882,6.6867,4.4958,1.6979,11.8841,-2.2013,-1.7146,3.6598,14.6105,2.6626
2010-01-07,-0.8548,-0.7954,0.2937,7.7068,6.2279,0.5426,7.2084,-2.8959,3.9986,5.2010,...,-2.4489,8.4424,6.7006,3.6313,12.6284,-2.7965,-1.2877,3.2126,16.7461,2.3484
2010-01-08,-0.2799,-0.9366,3.3549,6.7050,6.2917,4.4113,7.5682,-2.1601,3.9271,1.2771,...,-2.5928,8.2173,5.3583,3.1044,11.2564,-2.7335,-1.2059,2.2917,16.0296,1.9472
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2020-12-24,225.2769,209.1564,318.5352,134.8974,210.7661,266.2223,249.6355,215.6821,168.8371,215.7204,...,157.3226,228.5158,105.6305,230.4820,248.2550,132.9342,175.7468,164.4529,229.2066,21.8604
2020-12-28,225.4223,209.1847,317.9579,135.0799,211.3392,265.6832,249.7831,216.2008,169.6978,215.7497,...,157.8280,227.8830,106.4060,232.3294,247.1954,133.1721,175.7266,164.0393,230.1507,22.1969
2020-12-29,224.7173,208.1995,317.4440,134.6928,210.7019,264.9762,249.3041,215.1226,169.5119,213.9755,...,157.2245,226.5161,106.8587,231.5219,247.8489,132.8839,175.2530,162.9120,229.1461,21.0709
2020-12-30,226.4714,207.5933,317.8801,135.2871,211.8337,268.1921,250.5100,216.2126,170.6038,214.5908,...,157.7521,226.6578,107.9853,233.1618,250.1393,131.7446,175.1269,163.8721,228.3500,21.8705


In [112]:
# Create correlation matrix
correlation_matrix = returns.corr()
correlation_matrix

ticker,ADI,ADP,ADSK,AFL,AIZ,AMAT,AMP,APH,AXP,BBY,...,TRV,TSN,UNM,VFC,VLO,VZ,WAT,WY,WYNN,XOM
ticker,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
ADI,1.000000,0.575610,0.552164,0.523202,0.462259,0.714050,0.596292,0.637304,0.546807,0.347137,...,0.483073,0.326923,0.517449,0.465644,0.437207,0.338292,0.495225,0.534595,0.464222,0.506499
ADP,0.575610,1.000000,0.528904,0.605848,0.540483,0.559727,0.655543,0.622024,0.616486,0.355851,...,0.624318,0.363147,0.581940,0.528556,0.489928,0.485502,0.553371,0.597151,0.416860,0.575303
ADSK,0.552164,0.528904,1.000000,0.463746,0.398231,0.552088,0.550950,0.568078,0.474242,0.314827,...,0.401518,0.288148,0.437308,0.434490,0.407097,0.283416,0.484981,0.470147,0.405759,0.404498
AFL,0.523202,0.605848,0.463746,1.000000,0.633143,0.490498,0.711044,0.608352,0.690189,0.346125,...,0.637511,0.412584,0.724434,0.548912,0.569525,0.408948,0.501116,0.598684,0.503274,0.619953
AIZ,0.462259,0.540483,0.398231,0.633143,1.000000,0.444370,0.623686,0.526663,0.547776,0.333194,...,0.655542,0.345027,0.618616,0.448169,0.448457,0.391155,0.429497,0.504014,0.376574,0.515368
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
VZ,0.338292,0.485502,0.283416,0.408948,0.391155,0.312622,0.429567,0.400548,0.384200,0.231419,...,0.456158,0.290179,0.382645,0.346429,0.310185,1.000000,0.360842,0.387749,0.228160,0.412483
WAT,0.495225,0.553371,0.484981,0.501116,0.429497,0.480138,0.552548,0.573233,0.509605,0.286880,...,0.455313,0.281526,0.479949,0.454820,0.426990,0.360842,1.000000,0.502558,0.381953,0.470176
WY,0.534595,0.597151,0.470147,0.598684,0.504014,0.506892,0.649698,0.584423,0.608739,0.371413,...,0.570950,0.358299,0.598621,0.524152,0.497598,0.387749,0.502558,1.000000,0.458862,0.571841
WYNN,0.464222,0.416860,0.405759,0.503274,0.376574,0.454122,0.497309,0.481443,0.522615,0.284438,...,0.417731,0.299607,0.498535,0.438643,0.464208,0.228160,0.381953,0.458862,1.000000,0.484089


In [113]:
# Covariance Matrix
covariance_matrix = returns.cov()
covariance_matrix

ticker,ADI,ADP,ADSK,AFL,AIZ,AMAT,AMP,APH,AXP,BBY,...,TRV,TSN,UNM,VFC,VLO,VZ,WAT,WY,WYNN,XOM
ticker,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
ADI,3.262099,1.431482,2.258351,1.684421,1.371195,2.754325,2.335061,1.830164,1.806743,1.582277,...,1.230018,1.088972,2.044468,1.533855,1.971979,0.676519,1.468850,1.935924,2.466527,1.371915
ADP,1.431482,1.895913,1.649152,1.486982,1.222238,1.645976,1.957046,1.361792,1.552912,1.236547,...,1.211896,0.922177,1.752877,1.327340,1.684646,0.740185,1.251271,1.648574,1.688536,1.187972
ADSK,2.258351,1.649152,5.128014,1.871921,1.481065,2.670058,2.705062,2.045397,1.964669,1.799203,...,1.281829,1.203408,2.166338,1.794467,2.302181,0.710622,1.803537,2.134633,2.703053,1.373699
AFL,1.684421,1.486982,1.871921,3.177357,1.853532,1.867275,2.748022,1.724180,2.250687,1.557037,...,1.602031,1.356339,2.824852,1.784502,2.535201,0.807126,1.466890,2.139662,2.639058,1.657266
AIZ,1.371195,1.222238,1.481065,1.853532,2.697303,1.558646,2.220862,1.375284,1.645819,1.381003,...,1.517803,1.045060,2.222543,1.342421,1.839300,0.711301,1.158381,1.659673,1.819391,1.269354
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
VZ,0.676519,0.740185,0.710622,0.807126,0.711301,0.739261,1.031245,0.705162,0.778237,0.646654,...,0.712041,0.592556,0.926829,0.699577,0.857685,1.225971,0.656120,0.860807,0.743175,0.684932
WAT,1.468850,1.251271,1.803537,1.466890,1.158381,1.683953,1.967371,1.496761,1.530997,1.188938,...,1.054110,0.852644,1.724189,1.362220,1.751098,0.656120,2.696820,1.654728,1.845218,1.157943
WY,1.935924,1.648574,2.134633,2.139662,1.659673,2.170542,2.824339,1.863104,2.232854,1.879340,...,1.613849,1.324902,2.625611,1.916697,2.491496,0.860807,1.654728,4.020034,2.706502,1.719453
WYNN,2.466527,1.688536,2.703053,2.639058,1.819391,2.853131,3.171963,2.251910,2.812598,2.111700,...,1.732441,1.625497,3.208270,2.353446,3.410284,0.743175,1.845218,2.706502,8.654118,2.135687


In [114]:
returns_summary

ticker,ADI,ADP,ADSK,AFL,AIZ,AMAT,AMP,APH,AXP,BBY,...,TRV,TSN,UNM,VFC,VLO,VZ,WAT,WY,WYNN,XOM
count,2769.0,2769.0,2769.0,2769.0,2769.0,2769.0,2769.0,2769.0,2769.0,2769.0,...,2769.0,2769.0,2769.0,2769.0,2769.0,2769.0,2769.0,2769.0,2769.0,2769.0
mean,0.082171,0.075467,0.115515,0.049235,0.077092,0.096573,0.091032,0.078342,0.06206,0.076931,...,0.057306,0.082338,0.039803,0.083894,0.09024,0.047957,0.063519,0.059052,0.082285,0.007568
std,1.806128,1.376921,2.264512,1.782514,1.642347,2.135688,2.168156,1.589992,1.829424,2.523674,...,1.409776,1.844263,2.18758,1.823818,2.497278,1.107236,1.6422,2.005002,2.941788,1.499687
min,-16.6149,-15.2526,-15.8599,-16.4312,-19.736,-20.3576,-23.8221,-13.8317,-14.8187,-28.5866,...,-20.8004,-14.4893,-22.3684,-12.9217,-19.2209,-6.6205,-12.3133,-22.6734,-24.4346,-12.2248
25%,-0.7688,-0.5183,-0.9586,-0.6547,-0.6793,-0.9567,-0.8548,-0.624,-0.6426,-1.0363,...,-0.5536,-0.7703,-0.813,-0.6923,-1.1099,-0.57,-0.6591,-0.8601,-1.2707,-0.6616
50%,0.0759,0.0948,0.1326,0.0925,0.1009,0.0799,0.1077,0.0876,0.0741,0.0983,...,0.0913,0.1376,0.0828,0.0913,0.0862,0.0668,0.0988,0.0697,-0.009,-0.0115
75%,0.9748,0.7446,1.2205,0.7626,0.8852,1.2083,1.0872,0.8089,0.8641,1.3289,...,0.708,0.9873,1.0329,0.951,1.3251,0.6503,0.832,1.0041,1.3712,0.6996
max,14.6978,11.8036,16.1985,26.1766,14.1484,13.8122,22.6358,14.7586,21.8823,21.4796,...,13.2903,22.7026,26.2042,14.003,31.2025,7.6802,12.6927,25.3155,27.6883,12.6868


In [115]:
annualized_mean_returns = returns.mean() * 252
annualized_mean_returns

ticker
ADI     20.707110
ADP     19.017709
ADSK    29.109750
AFL     12.407273
AIZ     19.427180
          ...    
VZ      12.085243
WAT     16.006814
WY      14.881151
WYNN    20.735841
XOM      1.907246
Length: 100, dtype: float64

In [116]:
annualized_std_dev = returns.std() * np.sqrt(252)
annualized_std_dev

ticker
ADI     28.671394
ADP     21.857950
ADSK    35.948012
AFL     28.296536
AIZ     26.071447
          ...    
VZ      17.576825
WAT     26.069115
WY      31.828424
WYNN    46.699441
XOM     23.806789
Length: 100, dtype: float64

# Portfolio Calculations

In [117]:
df_returns = returns.copy()
returns = returns.T 

In [119]:
def portfolio_return(weights, returns):
    return np.sum(np.mean(returns, axis=1) * weights) * 252

def portfolio_vol(weights, returns):
    return np.sqrt(np.dot(weights.T, np.dot(np.cov(returns) * 252, weights)))