# Dating Algorithm Recommender System

In [1]:
# libraries
import pandas as pd
import random
import numpy as np

# Creating Data

## Profile Questions

In [74]:
# Creating a Dataset of men and women
men = pd.DataFrame()

women = pd.DataFrame()

# Number of users
num = 1000

# Dating profile questions for each
qs = ['Q1', 'Q2', 'Q3', 'Q4', 'Q5']

# Answers to profile questions
ans = ['A', 'B', 'C', 'D', 'E']

for q in qs:
    
    # Making them categorical for preprocessing later
    men[q] = pd.Categorical(random.choices(ans, k=num), categories=ans)
    
    women[q] = pd.Categorical(random.choices(ans, k=num), categories=ans)
    
    # IDs
    men['id'] = ["m"+str(i) for i in range(num)]
    
    women['id'] = ["w"+str(i) for i in range(num)]
    
# Setting index
men.set_index('id', inplace=True)

women.set_index('id', inplace=True)

# Showing the DFs
display(men)

display(women)

Unnamed: 0_level_0,Q1,Q2,Q3,Q4,Q5
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
m0,E,C,A,A,A
m1,A,A,E,A,A
m2,B,C,A,D,B
m3,B,C,E,D,C
m4,C,D,D,B,B
...,...,...,...,...,...
m995,D,D,D,E,C
m996,D,E,B,E,E
m997,C,E,D,C,E
m998,D,E,D,E,A


Unnamed: 0_level_0,Q1,Q2,Q3,Q4,Q5
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
w0,B,A,B,E,A
w1,B,E,E,A,A
w2,E,E,B,D,E
w3,E,E,C,B,A
w4,A,E,D,B,C
...,...,...,...,...,...
w995,C,C,A,E,E
w996,E,A,B,B,D
w997,D,D,D,A,C
w998,C,D,D,E,D


## Swipe Behavior

In [75]:
# Creating match ratings between users
ratings = pd.DataFrame(index=men.index, columns=women.index)

for i in ratings.columns:
    ratings[i] = random.choices([0,1,"unseen"], k=num)
    
ratings

id,w0,w1,w2,w3,w4,w5,w6,w7,w8,w9,...,w990,w991,w992,w993,w994,w995,w996,w997,w998,w999
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
m0,1,1,1,0,1,unseen,1,0,unseen,1,...,unseen,0,1,0,0,unseen,0,unseen,1,0
m1,unseen,0,1,unseen,unseen,unseen,unseen,1,1,0,...,0,unseen,unseen,1,0,unseen,unseen,unseen,0,0
m2,0,1,unseen,unseen,unseen,1,1,unseen,1,0,...,unseen,unseen,0,0,1,0,0,1,1,unseen
m3,0,unseen,unseen,unseen,0,1,unseen,1,unseen,unseen,...,unseen,0,unseen,unseen,unseen,1,unseen,unseen,0,unseen
m4,unseen,1,0,1,unseen,0,1,unseen,0,unseen,...,1,unseen,unseen,0,0,1,1,1,1,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
m995,0,unseen,unseen,1,unseen,1,1,1,unseen,0,...,1,0,unseen,unseen,1,0,unseen,1,1,0
m996,unseen,0,1,1,0,0,unseen,0,1,0,...,unseen,0,unseen,0,1,0,0,1,1,1
m997,0,unseen,unseen,0,0,unseen,0,1,1,1,...,unseen,unseen,unseen,unseen,0,unseen,1,1,unseen,0
m998,1,0,1,0,1,unseen,0,1,1,0,...,unseen,1,unseen,unseen,1,1,0,0,unseen,0


# Finding Similar Users

### Finding Users who had the most unseen
Will make recommendations for them.

In [12]:
# Man
m_user = ratings.T.apply(pd.Series.value_counts).T.sort_values(
    by="unseen", 
    ascending=False
).iloc[0]

m_user

1         327
0         284
unseen    389
Name: m543, dtype: int64

In [10]:
# Woman
w_user = ratings.apply(pd.Series.value_counts).T.sort_values(by="unseen", ascending=False).iloc[0]

w_user

1         318
0         301
unseen    381
Name: w261, dtype: int64

## Users that haven't been seen yet by the user above

In [13]:
# Man's unseen users 
m_nrate = ratings.T[ratings.T[m_user.name]=="unseen"].index

m_nrate

Index(['w2', 'w6', 'w8', 'w9', 'w16', 'w17', 'w18', 'w19', 'w21', 'w24',
       ...
       'w976', 'w979', 'w981', 'w985', 'w988', 'w989', 'w991', 'w997', 'w998',
       'w999'],
      dtype='object', name='id', length=389)

In [14]:
# Woman's unseen users 
w_nrate = ratings[ratings[w_user.name]=="unseen"].index

w_nrate

Index(['m0', 'm6', 'm10', 'm14', 'm23', 'm24', 'm26', 'm27', 'm29', 'm30',
       ...
       'm968', 'm969', 'm970', 'm976', 'm978', 'm979', 'm980', 'm982', 'm986',
       'm992'],
      dtype='object', name='id', length=381)

## Top 10 Similar Users

__Simple Data Preprocessing__

In [15]:
# First need to replace the answers with their numerical values
n_men = men.apply(lambda x: x.cat.codes)

n_women = women.apply(lambda x: x.cat.codes)

In [16]:
display(n_men)
display(n_women)

Unnamed: 0_level_0,Q1,Q2,Q3,Q4,Q5
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
m0,3,1,0,3,0
m1,0,1,3,2,1
m2,4,1,3,4,2
m3,4,2,0,3,1
m4,1,4,1,3,3
...,...,...,...,...,...
m995,0,2,4,1,0
m996,1,0,0,3,4
m997,3,3,4,2,2
m998,1,2,0,4,0


Unnamed: 0_level_0,Q1,Q2,Q3,Q4,Q5
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
w0,4,0,0,4,2
w1,3,0,1,3,2
w2,4,2,2,3,1
w3,0,0,3,3,1
w4,3,2,2,1,2
...,...,...,...,...,...
w995,3,2,4,2,3
w996,2,2,4,2,1
w997,4,4,4,1,0
w998,1,1,3,2,4


In [17]:
# Similar men
m_sim = n_men.T.corrwith(
    n_men.T[m_user.name]
).sort_values(
    ascending=False
)[1:11]

m_sim

id
m869    1.000000
m935    1.000000
m564    1.000000
m543    1.000000
m470    1.000000
m812    0.976803
m945    0.963087
m405    0.963087
m308    0.963087
m311    0.963087
dtype: float64

In [18]:
# Simliar women
w_sim = n_women.T.corrwith(n_women.T[w_user.name]).sort_values(ascending=False)[1:11]

w_sim

id
w441    1.000000
w676    1.000000
w169    0.979958
w617    0.968822
w302    0.966988
w788    0.966988
w986    0.966988
w652    0.966988
w431    0.964286
w519    0.945611
dtype: float64

## The Similar Users' Ratings for the Unseen Users

In [19]:
# Similar mens' ratings
msim_rate = ratings.loc[list(m_sim.index)][m_nrate]

msim_rate

id,w2,w6,w8,w9,w16,w17,w18,w19,w21,w24,...,w976,w979,w981,w985,w988,w989,w991,w997,w998,w999
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
m869,1,1,1,unseen,unseen,0,unseen,unseen,1,unseen,...,0,unseen,unseen,1,1,0,0,0,0,unseen
m935,1,0,0,1,0,unseen,1,1,0,unseen,...,unseen,0,unseen,unseen,1,unseen,unseen,1,unseen,0
m564,1,unseen,1,unseen,1,unseen,unseen,0,unseen,1,...,0,unseen,1,0,1,0,0,unseen,unseen,unseen
m543,unseen,unseen,unseen,unseen,unseen,unseen,unseen,unseen,unseen,unseen,...,unseen,unseen,unseen,unseen,unseen,unseen,unseen,unseen,unseen,unseen
m470,1,unseen,0,1,unseen,unseen,1,unseen,0,0,...,0,unseen,0,0,unseen,unseen,unseen,1,1,unseen
m812,0,unseen,unseen,1,0,1,1,0,0,unseen,...,1,0,unseen,unseen,1,0,unseen,unseen,unseen,1
m945,0,unseen,0,0,1,0,1,unseen,1,unseen,...,1,unseen,0,1,unseen,unseen,unseen,1,0,0
m405,unseen,0,0,unseen,1,0,0,unseen,unseen,1,...,1,unseen,1,0,unseen,0,0,unseen,0,0
m308,0,unseen,0,unseen,1,0,1,1,unseen,1,...,0,unseen,unseen,0,1,0,1,unseen,unseen,1
m311,0,1,0,unseen,0,1,unseen,unseen,0,1,...,0,unseen,1,1,1,1,unseen,0,1,0


In [20]:
# Similar womens' ratings
wsim_rate = ratings[list(w_sim.index)].T[w_nrate]

wsim_rate

id,m0,m6,m10,m14,m23,m24,m26,m27,m29,m30,...,m968,m969,m970,m976,m978,m979,m980,m982,m986,m992
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
w441,0,0,1,0,0,unseen,unseen,unseen,unseen,unseen,...,unseen,unseen,0,unseen,1,unseen,0,unseen,unseen,0
w676,unseen,unseen,0,0,unseen,unseen,0,1,unseen,1,...,1,unseen,unseen,0,1,1,0,unseen,0,unseen
w169,1,0,1,1,unseen,1,unseen,0,1,1,...,unseen,0,1,1,unseen,1,1,0,1,1
w617,1,1,unseen,1,1,0,unseen,0,1,1,...,0,unseen,unseen,0,0,unseen,unseen,0,0,unseen
w302,1,unseen,1,unseen,unseen,unseen,0,unseen,0,0,...,0,1,1,0,1,1,unseen,unseen,unseen,unseen
w788,1,1,unseen,unseen,unseen,unseen,0,1,1,unseen,...,1,1,unseen,unseen,0,0,0,0,0,1
w986,unseen,unseen,1,unseen,1,1,unseen,1,unseen,unseen,...,0,0,unseen,1,1,1,1,0,0,1
w652,unseen,0,unseen,0,0,unseen,0,unseen,1,unseen,...,0,unseen,unseen,0,1,0,1,0,0,unseen
w431,1,0,1,1,1,0,unseen,0,unseen,1,...,unseen,0,0,1,0,0,unseen,1,1,unseen
w519,unseen,1,1,unseen,unseen,1,1,unseen,1,unseen,...,0,1,0,unseen,1,1,unseen,1,0,0


# Comparing Methods of Prediction

In [43]:
# Man predictions
m_predict = pd.DataFrame()

# Filling the unseen value with NaNs for calculation purposes
msim_rate.replace(
    "unseen", 
    np.nan, 
    inplace=True
)

# Average
m_predict['avg'] = msim_rate.mean()

# Frequency
m_predict['freq'] = msim_rate.mode().T[0]

# Median
m_predict['median'] = msim_rate.median()

m_predict

Unnamed: 0_level_0,avg,freq,median
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
w2,0.500000,0.0,0.5
w6,0.500000,0.0,0.5
w8,0.250000,0.0,0.0
w9,0.750000,1.0,1.0
w16,0.571429,1.0,1.0
...,...,...,...
w989,0.166667,0.0,0.0
w991,0.250000,0.0,0.0
w997,0.600000,1.0,1.0
w998,0.400000,0.0,0.0


In [44]:
# Woman predictions
w_predict = pd.DataFrame()

# Filling the unseen value with NaNs for calculation purposes
wsim_rate.replace("unseen", np.nan, inplace=True)

# Average
w_predict['avg'] = wsim_rate.mean()

# Frequency
w_predict['freq'] = wsim_rate.mode().T[0]

# Median
w_predict['median'] = wsim_rate.median()

w_predict

Unnamed: 0_level_0,avg,freq,median
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
m0,0.833333,1.0,1.0
m6,0.428571,0.0,0.0
m10,0.857143,1.0,1.0
m14,0.500000,0.0,0.5
m23,0.600000,1.0,1.0
...,...,...,...
m979,0.625000,1.0,1.0
m980,0.500000,0.0,0.5
m982,0.285714,0.0,0.0
m986,0.250000,0.0,0.0


# Handling a New User

## 1. Onboarding a New User
New user will answer questions to determine other profiles most similar to them.

In [78]:
# Dataframe of new user
m_new_user = pd.DataFrame(
    [random.choices(ans, k=5)],
    columns=men.columns,
    index=['m'+str(int(men.index[-1][1:])+1)] # New ID for the new user
)

# Categorizing the answers
m_new_user = m_new_user.apply(lambda x: pd.Categorical(x, categories=ans))

# The New User and their answers
m_new_user

Unnamed: 0,Q1,Q2,Q3,Q4,Q5
m1000,B,A,A,D,E


## 2. Finding Similar Users
These similar users' compatibility ratings will be used to find better recommendations for the new user.

In [79]:
# Categorizing the new user's answers to fit with the rest of the users
m_new_user = m_new_user.apply(lambda x: x.cat.codes, axis=1)

m_new_user

Unnamed: 0,Q1,Q2,Q3,Q4,Q5
m1000,1,0,0,3,4


In [80]:
# First need to replace the answers with their numerical values
n_men = men.apply(lambda x: x.cat.codes)

# Getting the correlation score and sorting by most correlated
new_sim = n_men.corrwith(m_new_user.iloc[0], axis=1).sort_values(ascending=False)[:10]

# The most similar users to the new user
new_sim

id
m401    0.974250
m8      0.973124
m695    0.963343
m523    0.963343
m945    0.963343
m768    0.956296
m380    0.954786
m325    0.953959
m232    0.943701
m508    0.941469
dtype: float64

## 3. Getting the Similar Users' Aggregated Ratings

In [81]:
# Getting the similar users' ratings
new_sim_rate = ratings.T[new_sim.index]

# Filling in unseen values with nan for calculation purposes
new_sim_rate.replace("unseen", np.nan, inplace=True)

# Finding top recommended based on the median of the similar users' rating
msim_rate.mean().sort_values(ascending=False)

id
w685    1.0
w605    1.0
w75     1.0
w34     1.0
w189    1.0
       ... 
w817    0.0
w691    0.0
w979    0.0
w211    0.0
w254    0.0
Length: 389, dtype: float64

__These are the top recommended users that may be the most compatible with the new user__

# Helper Functions
These functions will consolidate the process above.

In [105]:
def matchMan(men_df, women_df, ratings, new_man_answers, num_sim=10):
    """
    This function will return the most likely compatible women based on a few given
    dataframes for a new male user.  Will use the top N similar users' compatibility 
    ratings to find the potentially most compatible women.
    """
    
    # First need to replace the DF answers with their numerical values
    men_df = men_df.apply(lambda x: x.cat.codes)

    women_df = women_df.apply(lambda x: x.cat.codes)
    
    # Dataframe of new user
    new_man = pd.DataFrame(
        [new_man_answers],
        columns=men_df.columns,
        index=['m'+str(int(men_df.index[-1][1:])+1)] # Getting the new man's id 
    )
    
    # Categorical answers to the profile questions
    ans = ['A', 'B', 'C', 'D', 'E']
    
    # Categorizing the answers
    new_man = new_man.apply(
        lambda x: pd.Categorical(x, categories=ans)
    ).apply(
        lambda x: x.cat.codes, axis=1
    )
        
    # Getting the top N similar users
    sim_men = men_df.corrwith(
        new_man.iloc[0], 
        axis=1
    ).sort_values(ascending=False)[:num_sim].index
    
    # Getting the similar users' ratings
    sim_rate = ratings.T[sim_men]
    
    # Filling in unseen values with nan for calculation purposes
    sim_rate.replace("unseen", np.nan, inplace=True)
    
    # The potentially most compatible women for the new man
    most_comp = sim_rate.mean(axis=1).sort_values(ascending=False)
    
    return most_comp

In [127]:
# New man's answers
new_man_answers = random.choices(ans, k=5)

print(new_man_answers)

recs = matchMan(
    men, 
    women, 
    ratings, 
    new_man_answers, 
    num_sim=10
)

['E', 'C', 'D', 'A', 'E']


In [130]:
recs[:20]

id
w468    1.000000
w533    1.000000
w320    1.000000
w230    1.000000
w699    1.000000
w770    1.000000
w688    1.000000
w178    1.000000
w623    1.000000
w281    1.000000
w621    1.000000
w964    1.000000
w62     1.000000
w64     1.000000
w622    1.000000
w110    0.888889
w941    0.888889
w519    0.875000
w920    0.875000
w313    0.875000
dtype: float64

In [107]:
def matchWoman(men_df, women_df, ratings, new_woman_answers, num_sim=10):
    """
    This function will return the most likely compatible men based on a few given
    dataframes for a new female user.  Will use the top N similar users' compatibility 
    ratings to find the potentially most compatible men.
    """
    # First need to replace the DF answers with their numerical values
    men_df = men_df.apply(lambda x: x.cat.codes)

    women_df = women_df.apply(lambda x: x.cat.codes)
    
    # Dataframe of new user
    new_woman = pd.DataFrame(
        [new_woman_answers],
        columns=women_df.columns,
        index=['m'+str(int(women_df.index[-1][1:])+1)] # Getting the new woman's id 
    )
    
    # Categorical answers to the profile questions
    ans = ['A', 'B', 'C', 'D', 'E']
    
    # Categorizing the answers
    new_woman = new_woman.apply(
        lambda x: pd.Categorical(x, categories=ans)
    ).apply(
        lambda x: x.cat.codes, axis=1
    )
        
    # Getting the top N similar users
    sim_women = women_df.corrwith(
        new_woman.iloc[0], 
        axis=1
    ).sort_values(ascending=False)[:num_sim].index
    
    # Getting the similar users' ratings
    sim_rate = ratings[sim_women]
    
    # Filling in unseen values with nan for calculation purposes
    sim_rate.replace("unseen", np.nan, inplace=True)
    
    # The potentially most compatible men for the new woman
    most_comp = sim_rate.mean(axis=1).sort_values(ascending=False)
    
    return most_comp

In [110]:
# New woman's answers
new_woman_answers = random.choices(ans, k=5)

matchWoman(
    men, 
    women, 
    ratings, 
    new_woman_answers, 
    num_sim=10)

id
m625    1.0
m727    1.0
m809    1.0
m124    1.0
m652    1.0
       ... 
m970    0.0
m480    0.0
m296    0.0
m299    0.0
m729    0.0
Length: 1000, dtype: float64

# Predicting Compatibility Rating 
Between Two Users (man and woman)

In [111]:
def predictComp(m, w, men_df=men, women_df=women, ratings=ratings, num_sim=10):
    """
    Compares two existing users with one another and returns
    their compatibility score or at least one existing user with
    a new set of answers to also return a compatibility score.  
    Defaults to the already established DFs.
    """
    
    try:
        answers = list(men_df.loc[m].values)
        
        comp_score = matchMan(
            men_df, 
            women_df, 
            ratings, 
            answers, 
            num_sim=10
        )[w]
        
        return comp_score
    
    except:
        answers = list(women_df.loc[w].values)
        
        comp_score = matchWoman(
            men_df, 
            women_df, 
            ratings, 
            answers, 
            num_sim=10
        )[m]
        
        return comp_score

    finally:
        if type(m)==list:
            
            comp_score = matchMan(
                men_df, 
                women_df, 
                ratings, 
                m, 
                num_sim=10
            )[w]
        
            return comp_score
        
        elif type(w)==list:
            
            comp_score = matchWoman(
                men_df, 
                women_df, 
                ratings, 
                answers, 
                num_sim=10
            )[m]

            return comp_score

In [112]:
# Example with two existing users
print("Two existing:", predictComp('m4', 'w13'))


# Answers to profile questions
ans = ['A', 'B', 'C', 'D', 'E']

# New user answers (man or woman)
new_user_ans = random.choices(ans, k=5)

# Example with an already existing male user
print("New female user:", predictComp('m20', new_user_ans))

# Example with an already existing female user
print("New male user:", predictComp(new_user_ans, 'w333'))

Two existing: 0.3333333333333333
New female user: 0.42857142857142855
New male user: 0.16666666666666666


# Comparing Predictions with Actual Values

## 1. Find a Woman User
With the least amount of unseen

In [114]:
# Finding the right user
user = ratings.apply(pd.Series.value_counts).T.sort_values(by="unseen", ascending=False).iloc[-1].name

# Getting their ratings and filling in unseen with Nans
user_ratings = ratings[user].replace("unseen", np.nan)

user_ratings

id
m0      NaN
m1      1.0
m2      1.0
m3      NaN
m4      0.0
       ... 
m995    NaN
m996    1.0
m997    1.0
m998    1.0
m999    0.0
Name: w8, Length: 1000, dtype: float64

# 2. Getting Predictions
For this user

In [115]:
# User's answers
user_ans = list(women.loc[user].values)

preds = matchWoman(
    men, 
    women, 
    ratings, 
    user_ans, 
    num_sim=10
)

preds

id
m730    1.0
m35     1.0
m881    1.0
m935    1.0
m725    1.0
       ... 
m904    0.0
m326    0.0
m418    0.0
m610    0.0
m351    0.0
Length: 1000, dtype: float64

# 3. Comparing Predictions with Actual Values

In [118]:
# Combining both
comb = pd.concat([user_ratings, preds], axis=1).rename(columns={"w8":"actual",
                                                                0:"preds"})

# Dropping Nans
comb.dropna(inplace=True)

comb

Unnamed: 0_level_0,actual,preds
id,Unnamed: 1_level_1,Unnamed: 2_level_1
m1,1.0,0.400000
m2,1.0,0.500000
m4,0.0,0.375000
m5,1.0,0.444444
m6,0.0,0.500000
...,...,...
m994,1.0,0.500000
m996,1.0,0.600000
m997,1.0,0.600000
m998,1.0,0.714286


## Relabeling the Predictions
Rounding the predicted values

In [122]:
comb['preds'] = comb['preds'].apply(round)

In [123]:
comb

Unnamed: 0_level_0,actual,preds
id,Unnamed: 1_level_1,Unnamed: 2_level_1
m1,1.0,0
m2,1.0,0
m4,0.0,0
m5,1.0,0
m6,0.0,0
...,...,...
m994,1.0,0
m996,1.0,1
m997,1.0,1
m998,1.0,1


## Evaluation Metrics

In [126]:
from sklearn.metrics import accuracy_score, f1_score

acc = accuracy_score(comb['actual'], comb['preds'])

f1 = f1_score(comb['actual'], comb['preds'])

print(f"F1 Score: {f1}\nAccuracy Score: {acc}")

F1 Score: 0.5629139072847682
Accuracy Score: 0.6281690140845071


__Since the underlying dataset is random, it is hard to tell if this evaluation is truly accurate.__