<a href="https://colab.research.google.com/github/justinballas/WorkAutomation/blob/main/BICFR_selector_v1_0.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import numpy as np
from glob import glob
from sklearn import preprocessing 

**Overall Rank**

In [None]:
df = pd.read_csv(glob('NWD*')[0], skipfooter=13).set_index('Name')

df = df[df['Closed to \nNew Inv'] != 'Yes']


#enter columns to use for ranking with their associated weights
rank_and_weight = {
    'Morningstar \nRating \nOverall': 1,
    'Morningstar \nRating \n3 Yr': 1,
    'Morningstar \nRating \n5 Yr': 1,
    'Morningstar \nRating \n10 Yr': 1,
    'Total Ret \n% Rank Cat \n1 Yr (Mo-End)': 1,
    'Total Ret \n% Rank Cat \n3 Yr (Mo-End)':1,
    'Total Ret \n% Rank Cat \n5 Yr (Mo-End)':1,
    'Total Ret \n% Rank Cat \n10 Yr (Mo-End)': 1,
    'Sharpe Ratio \n3 Yr (Mo-End) \nRisk \nCurrency': 1,
    'Manager \nTenure \n(Average)': 1,
    'Prospectus \nNet Expense \nRatio': 1
}

#Return all selected metrics [y/n]?
#n for only selected fund and asset class
return_ans = 'n'

#name of excel file when exported
file_name = 'Best_in_class_selections'

columns = list(rank_and_weight.keys())
category_col = df['Morningstar Category']

#selecting columns to scale
df2 = df.loc[:, columns]
x=df2.values

#scaling selected colums
min_max_scaler = preprocessing.MinMaxScaler(feature_range =(0, 1)) 
new_df = min_max_scaler.fit_transform(x)
scaled_df = pd.DataFrame(new_df, index=df.index, columns=list(rank_and_weight.keys()))

#applying weight
for k, v in rank_and_weight.items():
    scaled_df[k] = scaled_df[k].apply(lambda x: x*v)

#Reversing the score of columns where the lower score is better
scaled_df = scaled_df.apply(lambda x: 1-x if x.name in reverse_cols else x)

#calculating overall score
scaled_df['Overall Score'] = scaled_df.sum(axis=1)

#ranking overall score
scaled_df['Overall Rank'] = scaled_df['Overall Score'].rank(ascending=True)

#adding back morningstar categories
scaled_df['Morningstar Category'] = category_col

#creating list of asset classes to iterate over
asset_classes = list(category_col.unique())

#getting highest ranked fund per asset class
max_asset_classes = {}
for asset_class in asset_classes:
  class_df = scaled_df[scaled_df['Morningstar Category']==asset_class]
  bicf = class_df['Overall Rank'].idxmax()
  max_asset_classes[asset_class] = bicf

#creating dataframe and naming excel file
if return_ans =='y':
  export_df = df.loc[list(max_asset_classes.values())]
else:
  export_df = pd.DataFrame.from_dict(max_asset_classes, orient='index')

#exporting to excel
export_df.to_excel(file_name + '.xlsx')

export_df

**Bifurcating Morningstar Ratings and secondary columns**

In [None]:
file_ = glob('*')[0]

df = pd.read_csv(file_, index_col='Name', skipfooter=13)

df = df[df['Closed to \nNew Inv'] != 'Yes']

df['Morningstar Category'] = df['Morningstar Category'].apply(lambda x: x.replace('US Fund ', ""))
df['Morningstar Category'] = df['Morningstar Category'].apply(lambda x: x.replace('US Insurance ', ""))
df['Morningstar Category'] = df['Morningstar Category'].apply(lambda x: x.replace('Fund ', ""))

ratings_cols = [
 'Morningstar \nRating \nOverall',
  'Morningstar \nRating \n3 Yr',
  'Morningstar \nRating \n5 Yr',
  'Morningstar \nRating \n10 Yr'
    ]

#enter columns to use for ranking with their associated weights
rank_and_weight = {
    'Total Ret \n% Rank Cat \n1 Yr (Mo-End)': 1,
    'Total Ret \n% Rank Cat \n3 Yr (Mo-End)': 1,
    'Total Ret \n% Rank Cat \n5 Yr (Mo-End)': 1,
    'Total Ret \n% Rank Cat \n10 Yr (Mo-End)': 1,
    'Sharpe Ratio \n3 Yr (Mo-End) \nRisk \nCurrency': 1/3,
    'Manager \nTenure \n(Average)': 1/3,
    'Prospectus \nNet Expense \nRatio': 1/3
}

#Making a list of all columns where the lower value is better
reverse_cols = list(rank_and_weight.keys())[0:4]
reverse_cols.append(list(rank_and_weight.keys())[6])

#Return all selected metrics [y/n]?
#n for only selected fund and asset class
return_ans = 'y'

#name of excel file when exported
_ = file_.split('.')[0]
file_name = f'{_}_BICFs'

columns = ratings_cols + list(rank_and_weight.keys())

category_col = df['Morningstar Category']

#selecting columns to scale
df2 = df.loc[:, columns]
x=df2.values

#scaling selected colums
min_max_scaler = preprocessing.MinMaxScaler(feature_range =(0, 1)) 
new_df = min_max_scaler.fit_transform(x)
scaled_df = pd.DataFrame(new_df, index=df.index, columns=columns)

#applying weight
for k, v in rank_and_weight.items():
    scaled_df[k] = scaled_df[k].apply(lambda x: x*v)

#Reversing the score of columns where the lower score is better
scaled_df = scaled_df.apply(lambda x: 1-x if x.name in reverse_cols else x)


#calculating overall score
scaled_df['Overall Rating'] = scaled_df[ratings_cols].sum(axis=1)

scaled_df['Secondary Score'] = scaled_df[list(rank_and_weight.keys())].sum(axis=1)

#adding back morningstar categories
scaled_df['Morningstar Category'] = category_col

#creating list of asset classes to iterate over
asset_classes = list(category_col.unique())

#getting highest ranked fund per asset class
max_asset_classes = []
for asset_class in asset_classes:
  class_df = scaled_df[scaled_df['Morningstar Category']==asset_class]
  bicf = class_df[class_df['Overall Rating'] == class_df['Overall Rating'].max()]
  max_asset_classes.append(bicf)

export_df = pd.concat(max_asset_classes)

secondary_df = export_df[export_df['Morningstar Category'].isin([x for x in asset_classes if len(export_df[export_df['Morningstar Category']==x]) > 1])]

export_df.drop(list(secondary_df.index), axis=0, inplace=True)

max_asset_classes = []
for asset_class in list(secondary_df['Morningstar Category'].unique()):
  class_df = secondary_df[secondary_df['Morningstar Category']==asset_class]
  bicf = class_df['Secondary Score'].idxmax()
  max_asset_classes.append(pd.DataFrame(secondary_df.loc[bicf]).transpose())

#For cases when there are no asset classes with multiple funds with the maximum sum of Morningstar Ratings
if len(max_asset_classes) > 1:
  secondary_df = pd.concat(max_asset_classes)
  export_df = pd.concat([export_df, secondary_df]).sort_values(by='Morningstar Category')
else:
  pass

#creating dataframe and naming excel file
if return_ans =='y':
  export_df = export_df
else:
  export_df = pd.DataFrame({'BICF': export_df.index}).set_index('BICF')

export_df.to_excel(file_name + '.xlsx')

export_df

Bifurcated and index fund for Large Blend.

In [None]:
file_ = glob('*')[0]

df = pd.read_csv(file_, index_col='Name', skipfooter=13)

df = df[df['Closed to \nNew Inv'] != 'Yes']

df['Morningstar Category'] = df['Morningstar Category'].apply(lambda x: x.replace('US Fund ', ""))
df['Morningstar Category'] = df['Morningstar Category'].apply(lambda x: x.replace('US Insurance ', ""))
df['Morningstar Category'] = df['Morningstar Category'].apply(lambda x: x.replace('Fund ', ""))

ratings_cols = [
 'Morningstar \nRating \nOverall',
  'Morningstar \nRating \n3 Yr',
  'Morningstar \nRating \n5 Yr',
  'Morningstar \nRating \n10 Yr'
    ]

#enter columns to use for ranking with their associated weights
rank_and_weight = {
    'Total Ret \n% Rank Cat \n1 Yr (Mo-End)': 1,
    'Total Ret \n% Rank Cat \n3 Yr (Mo-End)': 1,
    'Total Ret \n% Rank Cat \n5 Yr (Mo-End)': 1,
    'Total Ret \n% Rank Cat \n10 Yr (Mo-End)': 1,
    'Sharpe Ratio \n3 Yr (Mo-End) \nRisk \nCurrency': 1/3,
    'Manager \nTenure \n(Average)': 1/3,
    'Prospectus \nNet Expense \nRatio': 1/3
}

#Making a list of all columns where the lower value is better
reverse_cols = list(rank_and_weight.keys())[0:4]
reverse_cols.append(list(rank_and_weight.keys())[6])

#Return all selected metrics [y/n]?
#n for only selected fund and asset class
return_ans = 'y'

#name of excel file when exported
_ = file_.split('.')[0]
file_name = f'{_}_BICFs'

columns = ratings_cols + list(rank_and_weight.keys())

category_col = df['Morningstar Category']

#selecting columns to scale
df2 = df.loc[:, columns]
x=df2.values

#scaling selected colums
min_max_scaler = preprocessing.MinMaxScaler(feature_range =(0, 1)) 
new_df = min_max_scaler.fit_transform(x)
scaled_df = pd.DataFrame(new_df, index=df.index, columns=columns)

#applying weight
for k, v in rank_and_weight.items():
    scaled_df[k] = scaled_df[k].apply(lambda x: x*v)

#Reversing the score of columns where the lower score is better
scaled_df = scaled_df.apply(lambda x: 1-x if x.name in reverse_cols else x)


#calculating overall score
scaled_df['Overall Rating'] = scaled_df[ratings_cols].sum(axis=1)

scaled_df['Secondary Score'] = scaled_df[list(rank_and_weight.keys())].sum(axis=1)

#adding back morningstar categories
scaled_df['Morningstar Category'] = category_col

#creating list of asset classes to iterate over
asset_classes = list(category_col.unique())

#getting highest ranked fund per asset class
max_asset_classes = []
for asset_class in asset_classes:
  class_df = scaled_df[scaled_df['Morningstar Category']==asset_class]
  if asset_class == 'Large Blend':
    bicf = class_df[class_df['Prospectus \nNet Expense \nRatio'] == class_df['Prospectus \nNet Expense \nRatio'].max()]
  else:
    bicf = class_df[class_df['Overall Rating'] == class_df['Overall Rating'].max()]
  max_asset_classes.append(bicf)

export_df = pd.concat(max_asset_classes)

secondary_df = export_df[export_df['Morningstar Category'].isin([x for x in asset_classes if len(export_df[export_df['Morningstar Category']==x]) > 1])]

export_df.drop(list(secondary_df.index), axis=0, inplace=True)

max_asset_classes = []
for asset_class in list(secondary_df['Morningstar Category'].unique()):
  class_df = secondary_df[secondary_df['Morningstar Category']==asset_class]
  bicf = class_df['Secondary Score'].idxmax()
  max_asset_classes.append(pd.DataFrame(secondary_df.loc[bicf]).transpose())

#For cases when there are no asset classes with multiple funds with the maximum sum of Morningstar Ratings
if len(max_asset_classes) > 1:
  secondary_df = pd.concat(max_asset_classes)
  export_df = pd.concat([export_df, secondary_df]).sort_values(by='Morningstar Category')
else:
  pass

#creating dataframe and naming excel file
if return_ans =='y':
  export_df = export_df
else:
  export_df = pd.DataFrame({'BICF': export_df.index}).set_index('BICF')

# export_df.to_excel(file_name + '.xlsx')

export_df