In [1]:
import pandas as pd

def highlight(val):
    color = 'blue' if val in Same else 'black'
    return 'color: %s' % color

## Porfolio Analysis Results
In this section, the porfolio weight table for each industry in terms of three kinds of estimates are displayed, and the companies that exist in all three constructed portfolios are hilighted.

In [2]:
columns = pd.MultiIndex.from_product([["Prepackaged Software", 
                                       "Pharmaceutical Preparations", "Real Estate Investment Trusts", 
                                       "State Commercial Banks",],
                                      ['Sample', 'Cosine Similarity', 'Factor Model']])

data = [[0.6,1.1,2.1,1.2,1.2,0.4,0.5,0.6,0.3,1.2,1.1,0.9],
        [2.4,2.9,15.9,2.1,2.6,12.2,1.8,1.7,8.0,2.7,2.2,20.8],
        [-0.57,-0.30,0.00,-0.35,-0.32,-0.13,-0.80,-0.81,-0.22,-0.28,-0.38,-0.05]]

methods = ["Expected Annual Return", "Annual Volatility", "Sharpe Ratio"]


import pandas as pd
df = pd.DataFrame(data, index = methods, columns = columns).T.round(2)

import seaborn as sns
cm = sns.light_palette("#5CCDC6", n_colors = 35, as_cmap=True)

df.style.background_gradient(cmap=cm)

Unnamed: 0,Unnamed: 1,Expected Annual Return,Annual Volatility,Sharpe Ratio
Prepackaged Software,Sample,0.6,2.4,-0.57
Prepackaged Software,Cosine Similarity,1.1,2.9,-0.3
Prepackaged Software,Factor Model,2.1,15.9,0.0
Pharmaceutical Preparations,Sample,1.2,2.1,-0.35
Pharmaceutical Preparations,Cosine Similarity,1.2,2.6,-0.32
Pharmaceutical Preparations,Factor Model,0.4,12.2,-0.13
Real Estate Investment Trusts,Sample,0.5,1.8,-0.8
Real Estate Investment Trusts,Cosine Similarity,0.6,1.7,-0.81
Real Estate Investment Trusts,Factor Model,0.3,8.0,-0.22
State Commercial Banks,Sample,1.2,2.7,-0.28


### Prepackaged Software (mass reproduction of software)

In [3]:
sample_software = pd.read_csv("data/min_vol_sample_Prepackaged_Software.csv")
cos_sim_software = pd.read_csv("data/min_vol_cos_sim_Prepackaged_Software.csv")
factor_model_software = pd.read_csv("data/min_vol_factor_model_Prepackaged_Software.csv")

sample_software = sample_software.sort_values(by=["Weight"], ascending=False).reset_index(drop=True)
cos_sim_software = cos_sim_software.sort_values(by=["Weight"], ascending=False).reset_index(drop=True)
factor_model_software = factor_model_software.sort_values(by=["Weight"], ascending=False).reset_index(drop=True)

columns = pd.MultiIndex.from_product([['Sample Estimate', 'Cosine Similarity Estimate', 'Factor Model Estimate'], 
                                      ['Company Name', 'Weight']])
software = pd.concat([pd.concat([sample_software, cos_sim_software], axis=1), factor_model_software], axis=1)
software.columns = columns

Same = (set(sample_software.Company_Name) & set(cos_sim_software.Company_Name)) & set(factor_model_software.Company_Name)

In [4]:
software.style.applymap(highlight)

Unnamed: 0_level_0,Sample Estimate,Sample Estimate,Cosine Similarity Estimate,Cosine Similarity Estimate,Factor Model Estimate,Factor Model Estimate
Unnamed: 0_level_1,Company Name,Weight,Company Name,Weight,Company Name,Weight
0,AWARE INC /MA/,0.2,"BLACK KNIGHT, INC.",0.2,"POLARITYTE, INC.",0.2
1,"BLACK KNIGHT, INC.",0.2,ORACLE CORP,0.16315,"2U, INC.",0.17282
2,"ACI WORLDWIDE, INC.",0.11314,ANSYS INC,0.1539,MICROSTRATEGY INC,0.12752
3,ORACLE CORP,0.0917,ULTIMATE SOFTWARE GROUP INC,0.1035,"ENDURANCE INTERNATIONAL GROUP HOLDINGS, INC.",0.11228
4,"NUANCE COMMUNICATIONS, INC.",0.08608,NATIONAL INSTRUMENTS CORP,0.09372,ANSYS INC,0.06752
5,COMMVAULT SYSTEMS INC,0.07381,"Q2 HOLDINGS, INC.",0.0619,TABLEAU SOFTWARE INC,0.05614
6,"QUALYS, INC.",0.06668,"NUANCE COMMUNICATIONS, INC.",0.05947,QUMU CORP,0.04725
7,QUMU CORP,0.05153,"ACI WORLDWIDE, INC.",0.04754,"BRIDGELINE DIGITAL, INC.",0.03593
8,"ENDURANCE INTERNATIONAL GROUP HOLDINGS, INC.",0.02554,GSE SYSTEMS INC,0.04031,NATIONAL INSTRUMENTS CORP,0.02857
9,MICROSTRATEGY INC,0.0216,REALPAGE INC,0.02937,REALPAGE INC,0.02637


### Pharmaceutical Preparations

In [5]:
sample_pharm = pd.read_csv("data/min_vol_sample_Pharmaceutical_Preparations.csv")
cos_sim_pharm = pd.read_csv("data/min_vol_cos_sim_Pharmaceutical_Preparations.csv")
factor_model_pharm = pd.read_csv("data/min_vol_factor_model_Pharmaceutical_Preparations.csv")

In [6]:
sample_pharm = sample_pharm.sort_values(by=["Weight"], ascending=False).reset_index(drop=True)
cos_sim_pharm = cos_sim_pharm.sort_values(by=["Weight"], ascending=False).reset_index(drop=True)
factor_model_pharm = factor_model_pharm.sort_values(by=["Weight"], ascending=False).reset_index(drop=True)

In [7]:
columns = pd.MultiIndex.from_product([['Sample Estimate', 'Cosine Similarity Estimate', 'Factor Model Estimate'], 
                                      ['Company Name', 'Weight']])
pharm = pd.concat([pd.concat([sample_pharm, cos_sim_pharm], axis=1), factor_model_pharm], axis=1)
pharm.columns = columns
Same = (set(sample_pharm.Company_Name) & set(cos_sim_pharm.Company_Name)) & set(factor_model_pharm.Company_Name)

In [8]:
pharm.style.applymap(highlight)

Unnamed: 0_level_0,Sample Estimate,Sample Estimate,Cosine Similarity Estimate,Cosine Similarity Estimate,Factor Model Estimate,Factor Model Estimate
Unnamed: 0_level_1,Company Name,Weight,Company Name,Weight,Company Name,Weight
0,"MERCK & CO., INC.",0.2,ZOETIS INC.,0.2,ZOETIS INC.,0.15867
1,JOHNSON & JOHNSON,0.17878,PFIZER INC,0.2,NATURES SUNSHINE PRODUCTS INC,0.12507
2,BRISTOL MYERS SQUIBB CO,0.12824,JOHNSON & JOHNSON,0.18756,JOHNSON & JOHNSON,0.106
3,"ASSEMBLY BIOSCIENCES, INC.",0.05775,"MERCK & CO., INC.",0.13753,"TELIGENT, INC.",0.08773
4,"PROPHASE LABS, INC.",0.0512,BIOSPECIFICS TECHNOLOGIES CORP,0.07394,ARENA PHARMACEUTICALS INC,0.07682
5,ORAMED PHARMACEUTICALS INC.,0.04982,BIOMARIN PHARMACEUTICAL INC,0.04572,PROGENICS PHARMACEUTICALS INC,0.07063
6,STEMLINE THERAPEUTICS INC,0.04273,BRISTOL MYERS SQUIBB CO,0.03719,FLEXION THERAPEUTICS INC,0.06494
7,"IMPRIMIS PHARMACEUTICALS, INC.",0.04181,LILLY ELI & CO,0.03562,"ACLARIS THERAPEUTICS, INC.",0.05848
8,PFENEX INC.,0.03777,XENCOR INC,0.02108,ANI PHARMACEUTICALS INC,0.05767
9,BIODELIVERY SCIENCES INTERNATIONAL INC,0.0368,"PACIRA PHARMACEUTICALS, INC.",0.01883,XOMA CORP,0.04907


### Real Estate Investment Trusts

In [9]:
sample_real_estate = pd.read_csv("data/min_vol_sample_Real_Estate_Investment_Trusts.csv")
cos_sim_real_estate = pd.read_csv("data/min_vol_cos_sim_Real_Estate_Investment_Trusts.csv")
factor_model_real_estate = pd.read_csv("data/min_vol_factor_model_Real_Estate_Investment_Trusts.csv")

In [10]:
sample_real_estate = sample_real_estate.sort_values(by=["Weight"], ascending=False).reset_index(drop=True)
cos_sim_real_estate = cos_sim_real_estate.sort_values(by=["Weight"], ascending=False).reset_index(drop=True)
factor_model_real_estate = factor_model_real_estate.sort_values(by=["Weight"], ascending=False).reset_index(drop=True)

In [11]:
columns = pd.MultiIndex.from_product([['Sample Estimate', 'Cosine Similarity Estimate', 'Factor Model Estimate'], 
                                      ['Company Name', 'Weight']])
real_estate = pd.concat([pd.concat([sample_real_estate, cos_sim_real_estate], axis=1), factor_model_real_estate], axis=1)
real_estate.columns = columns
Same = (set(sample_real_estate.Company_Name) & set(cos_sim_real_estate.Company_Name)) & set(factor_model_real_estate.Company_Name)

In [12]:
real_estate.style.applymap(highlight)

Unnamed: 0_level_0,Sample Estimate,Sample Estimate,Cosine Similarity Estimate,Cosine Similarity Estimate,Factor Model Estimate,Factor Model Estimate
Unnamed: 0_level_1,Company Name,Weight,Company Name,Weight,Company Name,Weight
0,GREAT AJAX CORP.,0.2,EQUITY COMMONWEALTH,0.16327,EQUITY RESIDENTIAL,0.12677
1,EQUITY COMMONWEALTH,0.2,SUN COMMUNITIES INC,0.14907,CYRUSONE INC.,0.08523
2,HMG COURTLAND PROPERTIES INC,0.12513,GREAT AJAX CORP.,0.13806,HOWARD HUGHES CORP,0.08041
3,PUBLIC STORAGE,0.10938,EQUINIX INC,0.07068,KILROY REALTY CORP,0.07864
4,ARES COMMERCIAL REAL ESTATE CORP,0.09107,"GAMING & LEISURE PROPERTIES, INC.",0.06734,LAMAR ADVERTISING CO/NEW,0.07446
5,CIM COMMERCIAL TRUST CORP,0.05461,PUBLIC STORAGE,0.06339,OUTFRONT MEDIA INC.,0.073
6,IMPAC MORTGAGE HOLDINGS INC,0.05108,DUKE REALTY CORP,0.05369,AMERICAN CAMPUS COMMUNITIES INC,0.06287
7,CROWN CASTLE INTERNATIONAL CORP,0.04875,HIGHWOODS PROPERTIES INC,0.05347,LTC PROPERTIES INC,0.04618
8,LADDER CAPITAL CORP,0.0442,"MFA FINANCIAL, INC.",0.05101,FRANKLIN STREET PROPERTIES CORP /MA/,0.04327
9,ALEXANDERS INC,0.02285,ANNALY CAPITAL MANAGEMENT INC,0.05094,EQUITY COMMONWEALTH,0.04242


### State Commercial Banks (commercial banking)

In [13]:
real_estate.style.applymap(highlight)

Unnamed: 0_level_0,Sample Estimate,Sample Estimate,Cosine Similarity Estimate,Cosine Similarity Estimate,Factor Model Estimate,Factor Model Estimate
Unnamed: 0_level_1,Company Name,Weight,Company Name,Weight,Company Name,Weight
0,GREAT AJAX CORP.,0.2,EQUITY COMMONWEALTH,0.16327,EQUITY RESIDENTIAL,0.12677
1,EQUITY COMMONWEALTH,0.2,SUN COMMUNITIES INC,0.14907,CYRUSONE INC.,0.08523
2,HMG COURTLAND PROPERTIES INC,0.12513,GREAT AJAX CORP.,0.13806,HOWARD HUGHES CORP,0.08041
3,PUBLIC STORAGE,0.10938,EQUINIX INC,0.07068,KILROY REALTY CORP,0.07864
4,ARES COMMERCIAL REAL ESTATE CORP,0.09107,"GAMING & LEISURE PROPERTIES, INC.",0.06734,LAMAR ADVERTISING CO/NEW,0.07446
5,CIM COMMERCIAL TRUST CORP,0.05461,PUBLIC STORAGE,0.06339,OUTFRONT MEDIA INC.,0.073
6,IMPAC MORTGAGE HOLDINGS INC,0.05108,DUKE REALTY CORP,0.05369,AMERICAN CAMPUS COMMUNITIES INC,0.06287
7,CROWN CASTLE INTERNATIONAL CORP,0.04875,HIGHWOODS PROPERTIES INC,0.05347,LTC PROPERTIES INC,0.04618
8,LADDER CAPITAL CORP,0.0442,"MFA FINANCIAL, INC.",0.05101,FRANKLIN STREET PROPERTIES CORP /MA/,0.04327
9,ALEXANDERS INC,0.02285,ANNALY CAPITAL MANAGEMENT INC,0.05094,EQUITY COMMONWEALTH,0.04242


In [14]:
sample_banks = pd.read_csv("data/min_vol_sample_State_Commercial_Banks.csv")
cos_sim_banks = pd.read_csv("data/min_vol_cos_sim_State_Commercial_Banks.csv")
factor_model_banks = pd.read_csv("data/min_vol_factor_model_State_Commercial_Banks.csv")

In [15]:
sample_banks = sample_banks.sort_values(by=["Weight"], ascending=False).reset_index(drop=True)
cos_sim_banks = cos_sim_banks.sort_values(by=["Weight"], ascending=False).reset_index(drop=True)
factor_model_banks = factor_model_banks.sort_values(by=["Weight"], ascending=False).reset_index(drop=True)

In [16]:
columns = pd.MultiIndex.from_product([['Sample Estimate', 'Cosine Similarity Estimate', 'Factor Model Estimate'], 
                                      ['Company Name', 'Weight']])
banks = pd.concat([pd.concat([sample_banks, cos_sim_banks], axis=1), factor_model_banks], axis=1)
banks.columns = columns
Same = (set(sample_banks.Company_Name) & set(cos_sim_banks.Company_Name)) & set(factor_model_banks.Company_Name)

In [17]:
banks.style.applymap(highlight)

Unnamed: 0_level_0,Sample Estimate,Sample Estimate,Cosine Similarity Estimate,Cosine Similarity Estimate,Factor Model Estimate,Factor Model Estimate
Unnamed: 0_level_1,Company Name,Weight,Company Name,Weight,Company Name,Weight
0,INVESTAR HOLDING CORP,0.1944,BANNER CORP,0.2,"CAROLINA TRUST BANCSHARES, INC.",0.06034
1,GUARANTY FEDERAL BANCSHARES INC,0.17724,INVESTAR HOLDING CORP,0.16789,SUMMIT FINANCIAL GROUP INC,0.05077
2,VILLAGE BANK & TRUST FINANCIAL CORP.,0.13994,CITIZENS & NORTHERN CORP,0.11305,"ATLANTIC CAPITAL BANCSHARES, INC.",0.0468
3,"RELIANT BANCORP, INC.",0.12273,BANK OF NEW YORK MELLON CORP,0.09816,HOWARD BANCORP INC,0.04524
4,"CAROLINA TRUST BANCSHARES, INC.",0.11786,INDEPENDENT BANK CORP /MI/,0.0954,"INDEPENDENT BANK GROUP, INC.",0.04493
5,BANK OF NEW YORK MELLON CORP,0.09533,EAST WEST BANCORP INC,0.08342,COMMERCE BANCSHARES INC /MO/,0.04119
6,CITIZENS & NORTHERN CORP,0.05375,ENTERPRISE FINANCIAL SERVICES CORP,0.07078,UNITED BANKSHARES INC/WV,0.04042
7,FIRST COMMUNITY CORP /SC/,0.05076,S&T BANCORP INC,0.05201,TEXAS CAPITAL BANCSHARES INC/TX,0.03947
8,MACKINAC FINANCIAL CORP /MI/,0.02478,BANK OF HAWAII CORP,0.04935,OHIO VALLEY BANC CORP,0.03818
9,"FAUQUIER BANKSHARES, INC.",0.02143,HOWARD BANCORP INC,0.02931,EAST WEST BANCORP INC,0.03521


### Crude Petroleum and Natural Gas
Since there is no optimal portfolio generating from sample estimate and cosine similarity estimate for the Crude Petroleum and Natural Gas industry, we only display the portfolio weights for factor model estimate.

In [21]:
factor_model_crude = pd.read_csv("data/min_vol_factor_model_Crude_Petroleum_and_Natural_Gas.csv")

sample_crude = sample_crude.sort_values(by=["Weight"], ascending=False).reset_index(drop=True)
factor_model_crude = factor_model_crude.sort_values(by=["Weight"], ascending=False).reset_index(drop=True)

In [24]:
columns = pd.MultiIndex.from_product([['Cosine Similarity Estimate'], 
                                      ['Company Name', 'Weight']])

factor_model_crude.columns = columns

In [25]:
factor_model_crude.style.applymap(highlight)

Unnamed: 0_level_0,Cosine Similarity Estimate,Cosine Similarity Estimate
Unnamed: 0_level_1,Company Name,Weight
0,CALIFORNIA RESOURCES CORP,0.13625
1,KOSMOS ENERGY LTD.,0.12827
2,ANTERO RESOURCES CORP,0.07803
3,CALLON PETROLEUM CO,0.06983
4,SM ENERGY CO,0.06395
5,PANHANDLE OIL & GAS INC,0.05509
6,WHITING PETROLEUM CORP,0.05473
7,PEDEVCO CORP,0.05452
8,ISRAMCO INC,0.0435
9,CONCHO RESOURCES INC,0.04303
