In [1]:
import sys
sys.path.insert(0, '..')
%load_ext autoreload
%autoreload 2
%aimport std_func

In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

## Mean-Variance Analysis - Minimum Variance

Mean-variance analysis is a mathematical framework that examplifies the trade-off between return and risk. It is used to create diversified portfolios based on investors’ expectation. There are one main approach used in this report. We have the minimum volatility portfolio that concentrates on minimizing the risk of the portfolio. Mimimum variance portfolio can help us compare the correlation of simple sample covariance, covariance generated using cosine similarity distances and covariance generated using factor model in Sent-LDA.

### Monthly Returns
Since we will generate cosine similarity estimates in the next notebook using business description of companies from 2016 to 2018. We only consider monthly returns before 2019.

In [3]:
#returns_data = pd.read_csv('../data/returns_2018_top5_SIC.csv', 
#                      parse_dates = ['DATE'], 
#                      usecols = ["trt1m", "CIK", "name", "DATE"])

In [4]:
# TAKE OUT THE DATA before 2019 and reset index
#end_date = pd.to_datetime('2019-01-01')
#returns = returns_data.loc[returns_data['DATE'] < end_date]

# drop the missing value
#returns = returns.dropna()
# group the data by name and date
#group_returns = returns.groupby(['name', 'DATE'])
# calculate the percentage return
#group_returns_pct = group_returns.agg({"trt1m": (lambda x:  x/100)})
#r_selected = group_returns_pct.unstack()
# drop the first index
#r_selected.columns = r_selected.columns.droplevel(0)

In [5]:
# there are some rows containing more than one float number (array of multiple returns)
#for i in range(len(r_selected)):
#    if type(r_selected.iloc[i,1]) == np.ndarray:
#        r_selected = r_selected.drop(index = r_selected.index[i])
# convert object to float
#r_selected = r_selected.astype(float)
# drop missing values rows
#r_selected = r_selected.dropna()
#r_selected.to_csv("filtered_r.csv")
# get the mean of all 
#mu = r_selected.mean(axis = 1)
# compute the covariance matrix 
#cov = r_selected.T.cov()

In [6]:
r_selected = pd.read_csv("data/filtered_r.csv")
# get the mean of all 
r_selected.set_index("name", inplace = True)
mu = r_selected.mean(axis = 1)
# compute the covariance matrix 
cov = r_selected.T.cov()

In [7]:
## Cosine Similarity

In [8]:
df = pd.read_csv('../data/preprocessed.csv',
                 usecols = ['reportingDate', 'name', 'CIK',
                           'coDescription_stopwords', 'SIC', 'SIC_desc'])
df = df.set_index(df.name)

### Perform Mean-Variance Analysis

In [9]:
pip install dataframe_image

Note: you may need to restart the kernel to use updated packages.


In [10]:
pip install PyPortfolioOpt

Note: you may need to restart the kernel to use updated packages.


In [11]:
from pypfopt import EfficientFrontier
from pypfopt import risk_models
from pypfopt import expected_returns
from pypfopt import objective_functions
from pypfopt import plotting

In [12]:
# get the names of the companies in the pharmaceutical preparations industry
Pharm = df[df.SIC == 2834]
Pharm_list = Pharm.index

In [13]:
# get the companies name that match return data and business description data
SET = (set(Pharm_list) & set(r_selected.index))
LIST = [*SET, ]

#### Mean and Covariance for Returns in the Pharmaceutical Preparations Industry

In [14]:
mu_Pharm = mu[LIST]
mu_Pharm

name
ARDELYX, INC.                            -0.027414
CELLECTAR BIOSCIENCES, INC.              -0.075946
ALEXION PHARMACEUTICALS INC              -0.007589
PRESTIGE BRANDS HOLDINGS, INC.           -0.014350
SERES THERAPEUTICS, INC.                 -0.035758
                                            ...   
IONIS PHARMACEUTICALS INC                 0.040746
ZYNERBA PHARMACEUTICALS, INC.            -0.003030
CORMEDIX INC.                             0.031226
AKORN INC                                -0.044988
BIODELIVERY SCIENCES INTERNATIONAL INC    0.025782
Length: 124, dtype: float64

In [15]:
tmp = cov[LIST].T
cov_Pharm = tmp[LIST]
cov_Pharm

name,"ARDELYX, INC.","CELLECTAR BIOSCIENCES, INC.",ALEXION PHARMACEUTICALS INC,"PRESTIGE BRANDS HOLDINGS, INC.","SERES THERAPEUTICS, INC.",TREVENA INC,KARYOPHARM THERAPEUTICS INC.,"ATHERSYS, INC / NEW",HORIZON PHARMA PLC,PORTOLA PHARMACEUTICALS INC,...,AMICUS THERAPEUTICS INC,"ACTINIUM PHARMACEUTICALS, INC.","CATALYST PHARMACEUTICALS, INC.","PULMATRIX, INC.",PFENEX INC.,IONIS PHARMACEUTICALS INC,"ZYNERBA PHARMACEUTICALS, INC.",CORMEDIX INC.,AKORN INC,BIODELIVERY SCIENCES INTERNATIONAL INC
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
"ARDELYX, INC.",0.036588,0.002618,0.010281,0.006181,0.005588,0.015577,0.010180,0.009007,0.011769,0.010888,...,0.006035,0.007307,0.005124,-0.000965,0.008321,0.014650,0.025486,0.016880,0.017637,-0.010666
"CELLECTAR BIOSCIENCES, INC.",0.002618,0.033018,-0.000421,0.001340,0.011397,0.003419,0.001872,0.007644,-0.003084,0.010387,...,0.007192,0.001572,0.008495,0.053345,0.003222,-0.000076,0.015640,-0.009426,-0.000512,0.003848
ALEXION PHARMACEUTICALS INC,0.010281,-0.000421,0.012712,0.002937,0.005804,0.009493,0.004195,0.001423,0.004890,0.009967,...,0.006752,0.002777,-0.001091,0.011443,0.001218,0.001960,0.002605,0.011167,0.007724,-0.000073
"PRESTIGE BRANDS HOLDINGS, INC.",0.006181,0.001340,0.002937,0.007043,0.006858,0.000967,0.001386,0.003148,0.003539,0.005748,...,0.002534,0.008591,0.001227,0.008177,0.002524,0.003926,0.006555,-0.001771,0.009009,0.003494
"SERES THERAPEUTICS, INC.",0.005588,0.011397,0.005804,0.006858,0.036740,0.010376,0.008612,0.009622,0.000411,0.011225,...,0.004647,0.001480,0.013368,0.013652,0.000535,-0.002446,0.004337,0.008517,0.004016,0.004308
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
IONIS PHARMACEUTICALS INC,0.014650,-0.000076,0.001960,0.003926,-0.002446,0.006784,0.008911,-0.002020,0.009003,0.008131,...,0.000188,0.003204,-0.000591,-0.009051,0.005312,0.029279,0.013580,0.003016,0.004382,-0.004476
"ZYNERBA PHARMACEUTICALS, INC.",0.025486,0.015640,0.002605,0.006555,0.004337,0.013980,0.008041,0.006055,0.000944,0.012758,...,0.006013,0.013983,0.015183,0.034465,0.003190,0.013580,0.060301,-0.006719,0.014366,-0.003132
CORMEDIX INC.,0.016880,-0.009426,0.011167,-0.001771,0.008517,0.010391,0.008000,0.005163,0.006018,0.004303,...,-0.004717,0.016194,-0.000194,0.029467,-0.012006,0.003016,-0.006719,0.142607,-0.005619,-0.007417
AKORN INC,0.017637,-0.000512,0.007724,0.009009,0.004016,0.002312,0.000825,0.012292,0.002804,0.012785,...,0.011761,0.017652,0.005256,0.003398,0.004266,0.004382,0.014366,-0.005619,0.037979,-0.001695
