In [1]:
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity as cs
from sklearn.preprocessing import OrdinalEncoder
import numpy as np
import ipywidgets as widgets
from IPython.display import display
import requests
from io import StringIO

In [2]:
def get_data(filename):
    token = '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' 
    owner = 'nuel-emeka'
    repo = 'RECOMMENDER'
    path = 'Data/{}'.format(filename)

    # send a request
    r = requests.get(
        'https://api.github.com/repos/{owner}/{repo}/contents/{path}'.format(
        owner=owner, repo=repo, path=path),
        headers={
            'accept': 'application/vnd.github.v3.raw',
            'authorization': 'token {}'.format(token)
                }
        )

    # convert string to StringIO object
    string_io_obj = StringIO(r.text)

    # Load data to df
    df = pd.read_csv(string_io_obj, sep=",", index_col=0)
    df = df.reset_index()
    return df

In [3]:
data = get_data('Clean_data.csv')
data_copy = data.copy()
data.head()

Unnamed: 0,Name,Plan,Premium,Premium Tier,geographical coverage,family Planning,Mental health,Dental care,Telemedicine service,Cash back benefit?,ANC delivery coverage,eye care cost limits,Gym membership,Annual Routine Medical Screening
0,Integrated Health Care Family,Ebony,,Tier 1,Nationwide,No,No,Yes,No,No,No,Yes,No,No
1,Integrated Health Care Family,Pine,,Tier 1,Nationwide,No,No,Yes,No,No,No,No,No,No
2,Integrated Health Care Family,Iroko,,Tier 2,Nationwide,Yes,No,Yes,No,No,Yes,No,No,No
3,Integrated Health Care Family,Teak,,Tier 3,Nationwide,Yes,No,Yes,No,No,Yes,No,No,Yes
4,Integrated Health Care Family,Marble,,Tier 4,Nationwide,Yes,Yes,Yes,No,No,Yes,Yes,No,Yes


In [4]:
def yesNo_encode(value):
    if value.lower().strip() == 'yes':
        return 1
    elif value.lower().strip() == 'no':
        return 0
    else:
        return value

In [5]:
for col in data.iloc[:, 4:].columns:
    data[col] = data[col].apply(yesNo_encode)
data.head()

Unnamed: 0,Name,Plan,Premium,Premium Tier,geographical coverage,family Planning,Mental health,Dental care,Telemedicine service,Cash back benefit?,ANC delivery coverage,eye care cost limits,Gym membership,Annual Routine Medical Screening
0,Integrated Health Care Family,Ebony,,Tier 1,Nationwide,0,0,1,0,0,0,1,0,0
1,Integrated Health Care Family,Pine,,Tier 1,Nationwide,0,0,1,0,0,0,0,0,0
2,Integrated Health Care Family,Iroko,,Tier 2,Nationwide,1,0,1,0,0,1,0,0,0
3,Integrated Health Care Family,Teak,,Tier 3,Nationwide,1,0,1,0,0,1,0,0,1
4,Integrated Health Care Family,Marble,,Tier 4,Nationwide,1,1,1,0,0,1,1,0,1


In [6]:
data = pd.get_dummies(data, columns=['geographical coverage'], prefix=['location'])
data.head()

Unnamed: 0,Name,Plan,Premium,Premium Tier,family Planning,Mental health,Dental care,Telemedicine service,Cash back benefit?,ANC delivery coverage,eye care cost limits,Gym membership,Annual Routine Medical Screening,location_Lagos,location_Nationwide
0,Integrated Health Care Family,Ebony,,Tier 1,0,0,1,0,0,0,1,0,0,0,1
1,Integrated Health Care Family,Pine,,Tier 1,0,0,1,0,0,0,0,0,0,0,1
2,Integrated Health Care Family,Iroko,,Tier 2,1,0,1,0,0,1,0,0,0,0,1
3,Integrated Health Care Family,Teak,,Tier 3,1,0,1,0,0,1,0,0,1,0,1
4,Integrated Health Care Family,Marble,,Tier 4,1,1,1,0,0,1,1,0,1,0,1


In [7]:
encode = OrdinalEncoder()
encode.fit(data[['Premium Tier']])
encode.categories_

[array(['Tier 1', 'Tier 2', 'Tier 3', 'Tier 4'], dtype=object)]

In [8]:
data['Premium Tier'] = encode.fit_transform(data[['Premium Tier']])
data.head()

Unnamed: 0,Name,Plan,Premium,Premium Tier,family Planning,Mental health,Dental care,Telemedicine service,Cash back benefit?,ANC delivery coverage,eye care cost limits,Gym membership,Annual Routine Medical Screening,location_Lagos,location_Nationwide
0,Integrated Health Care Family,Ebony,,0.0,0,0,1,0,0,0,1,0,0,0,1
1,Integrated Health Care Family,Pine,,0.0,0,0,1,0,0,0,0,0,0,0,1
2,Integrated Health Care Family,Iroko,,1.0,1,0,1,0,0,1,0,0,0,0,1
3,Integrated Health Care Family,Teak,,2.0,1,0,1,0,0,1,0,0,1,0,1
4,Integrated Health Care Family,Marble,,3.0,1,1,1,0,0,1,1,0,1,0,1


In [9]:
df = data.iloc[:, 3:]
df.head()

Unnamed: 0,Premium Tier,family Planning,Mental health,Dental care,Telemedicine service,Cash back benefit?,ANC delivery coverage,eye care cost limits,Gym membership,Annual Routine Medical Screening,location_Lagos,location_Nationwide
0,0.0,0,0,1,0,0,0,1,0,0,0,1
1,0.0,0,0,1,0,0,0,0,0,0,0,1
2,1.0,1,0,1,0,0,1,0,0,0,0,1
3,2.0,1,0,1,0,0,1,0,0,1,0,1
4,3.0,1,1,1,0,0,1,1,0,1,0,1


In [11]:
rating_data = get_data("HMO ratings - Form responses 1.csv")

In [13]:
rating_data.head()

Unnamed: 0,Timestamp,Do you have health insurance through any HMO in Nigeria?,What is the name of your HMO?,Rate from 1 to 5 for the responsiveness of the HMO to your needs,Rate from 1 - 5 for the degree of flexibility with health facilities under your HMO.,Rate from 1 - 5 how well you are treated in health facilities because of your HMO.,Rate from 1 - 5 your level of satisfaction with your HMO
0,29/03/2021 06:25:46,Yes,AVON HEALTHCARE LIMITED,3.0,3.0,,3.0
1,29/03/2021 06:32:22,Yes,AVON HEALTHCARE LIMITED,4.0,4.0,3.0,4.0
2,29/03/2021 06:37:06,No. If No submit the form,NOVOHEALTH HMO,2.0,2.0,2.0,1.0
3,29/03/2021 06:44:15,Yes,AXA MANSARD HEALTHCARE LTD,3.0,3.0,4.0,3.0
4,29/03/2021 06:56:38,Yes,AXA MANSARD HEALTHCARE LTD,5.0,5.0,5.0,5.0


In [23]:
def cosine_sim(response, data):
    """ largest value signifies great similarity"""
    df = data
    test = [response]
    index_ = []
    results = []
    for index in df.index:
        value = cs([df.loc[index,:].values], test)[0][0]
        index_.append(index)
        results.append(value)
    
    df_ = pd.DataFrame({'HMO index': index_ ,'cosine similarity': results})
    df_ = df_.sort_values('cosine similarity', ascending=False).set_index('HMO index')

    return df_.index[:5].values

In [31]:
def clean_ratings(ratings_data, hmo):
    ratings = ratings_data
    hmo = [i.upper() for i in hmo]
    
    ratings.rename(columns={'What is the name of your HMO?': 'Name'}, inplace=True)
    ratings.dropna(subset=['Name'], inplace=True)
    ratings['Name'] = ratings['Name'].apply(lambda x: x.upper().strip())
    ratings['sum ratings'] = ratings.iloc[:, [3,4,5,6]].sum(axis=1)
    ratings = ratings.groupby('Name').mean()[['sum ratings']]
    
    hmo = [hmo_ for hmo_ in hmo if hmo_ in ratings.index]
    hmo_ratings = ratings.loc[hmo, ['sum ratings']].sort_values(by='sum ratings', ascending=False)

    return hmo_ratings.index[:3].to_list()


def top_5_dict(result):
    top_5 = result
    
    hmo_names = [hmo.upper().strip() for hmo in data.loc[top_5, 'Name']]
    hmo_dict = {hmo: [] for hmo in set(hmo_names)}
    for hmo, index in zip(hmo_names, top_5):
        hmo_dict.get(hmo).append(index)
    
    return hmo_dict, hmo_names


def top_3_index(top5):
    top5_dict = top_5_dict(top5)
    rating = clean_ratings(rating_data, top5_dict[0].keys())
    for hmo in top5_dict[1]:
        if hmo not in rating:
            rating.append(hmo)
        else:
            pass

    index = []
    for name in rating[:3]:
        index.extend(top5_dict[0].get(name))
    
    return index[:3]

def print_top_3(result):
    top_3 = result
    
    print('TOP FIVE HMO RECOMMENDED BASED ON YOUR REQUIREMENTS ARE')
    print('-------------------------------------------------------')
    print('_______________________________________________________')
    for hmo in top_3:
        print(data_copy.loc[hmo, :])
        print('------------------------------------------------------- \n')
        

#print_top_3(top_3_index(top_5))

In [25]:

def predict(test, df=df):
    tier = test[0]
    loc = test[-2]
    
    if loc == 0:
        rows_drop = df[(df['location_Lagos']==1)].index
        df = df.drop(rows_drop, axis=0)
    else:
        pass
    
    if tier == 0:
        rows_drop = df[df['Premium Tier']>0].index
        df = df.drop(rows_drop, axis=0)
        results = cosine_sim(test, df)
    elif tier == 1:
        rows_drop = df[df['Premium Tier']>1].index
        df = df.drop(rows_drop, axis=0)
        results = cosine_sim(test, df)
    elif tier == 2:
        rows_drop = df[df['Premium Tier']>2].index
        df = df.drop(rows_drop, axis=0)
        results = cosine_sim(test, df)
    else:
        results = cosine_sim(test, df)
        
    index = top_3_index(results)
    
    return print_top_3(index)


# <font color='red'> Function and Markdown for USER INPUT <font color='green'> COSINE SIMILARITY Algo 

In [26]:
def request(wants):
    test = wants
    if test[-1]==1:
        test.append(0)
    else:
        test.append(1)

    return test


tier =  widgets.Dropdown(options=[('Tier 1' ,0),('Tier 2',1),('Tier 3',2),('Tier 4',3)],
                 value=0,
                 description='TIER',
                 disabled=False)
family_plan =  widgets.Dropdown(options=[('YES',1),('NO',0)],
                 value=1,
                 description='FAMILY PLANNING',
                 disabled=False)
mental_health = widgets.Dropdown(options=[('YES',1),('NO',0)],
                 value=1,
                 description='MENTAL HEALTH',
                 disabled=False)
dental_care = widgets.Dropdown(options=[('YES',1),('NO',0)],
                 value=1,
                 description='DENTAL CARE',
                 disabled=False)
telemed = widgets.Dropdown(options=[('YES',1),('NO',0)],
                 value=1,
                 description='TELEMED',
                 disabled=False)
cbb = widgets.Dropdown(options=[('YES',1),('NO',0)],
                 value=1,
                 description='CASH BACK BENEFITS',
                 disabled=False)
ANC = widgets.Dropdown(options=[('YES',1),('NO',0)],
                 value=1,
                 description='ANC DELIVERY COVERAGE',
                 disabled=False)
eye_care = widgets.Dropdown(options=[('YES',1),('NO',0)],
                 value=1,
                 description='EYE CARE',
                 disabled=False)
gym =  widgets.Dropdown(options=[('YES',1),('NO',0)],
                 value=1,
                 description='GYM',
                 disabled=False)
annual_rms = widgets.Dropdown(options=[('YES',1),('NO',0)],
                 value=1,
                 description='ANNUAL ROUTINE MEDICAL SCREENING',
                 disabled=False)
loc = widgets.Dropdown(options=[('Lagos',1),('Nationwide',0)],
                 value=1,
                 description='STATE',
                 disabled=False)

display(tier, family_plan, mental_health, dental_care, telemed, cbb, ANC,
       eye_care, gym, annual_rms, loc)

Dropdown(description='TIER', options=(('Tier 1', 0), ('Tier 2', 1), ('Tier 3', 2), ('Tier 4', 3)), value=0)

Dropdown(description='FAMILY PLANNING', options=(('YES', 1), ('NO', 0)), value=1)

Dropdown(description='MENTAL HEALTH', options=(('YES', 1), ('NO', 0)), value=1)

Dropdown(description='DENTAL CARE', options=(('YES', 1), ('NO', 0)), value=1)

Dropdown(description='TELEMED', options=(('YES', 1), ('NO', 0)), value=1)

Dropdown(description='CASH BACK BENEFITS', options=(('YES', 1), ('NO', 0)), value=1)

Dropdown(description='ANC DELIVERY COVERAGE', options=(('YES', 1), ('NO', 0)), value=1)

Dropdown(description='EYE CARE', options=(('YES', 1), ('NO', 0)), value=1)

Dropdown(description='GYM', options=(('YES', 1), ('NO', 0)), value=1)

Dropdown(description='ANNUAL ROUTINE MEDICAL SCREENING', options=(('YES', 1), ('NO', 0)), value=1)

Dropdown(description='STATE', options=(('Lagos', 1), ('Nationwide', 0)), value=1)

## Run the cell below AFTER MAKING YOUR CHANGES ABOVE to print your recommendation

In [32]:
needs = (tier, family_plan, mental_health, dental_care, telemed, cbb, ANC, eye_care, gym, annual_rms, loc)
needs = [i.value for i in needs]
test = request(needs)
predict(test)

TOP FIVE HMO RECOMMENDED BASED ON YOUR REQUIREMENTS ARE
-------------------------------------------------------
_______________________________________________________
Name                                RELIANCE HMO LIMITED
Plan                                         Alexandrite
Premium                                           72,000
Premium Tier                                      Tier 3
geographical coverage                         Nationwide
family Planning                                       No
Mental health                                        Yes
Dental care                                          Yes
Telemedicine service                                 Yes
Cash back benefit?                                   Yes
ANC delivery coverage                                 No
eye care cost limits                                 Yes
Gym membership                                       Yes
Annual Routine Medical Screening                     Yes
Name: 36, dtype: object
----------