In [1]:
# Importing the libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import plotly.express as px

In [2]:
#importing the data from drive
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
data = pd.read_csv("/content/drive/MyDrive/dating_recommendation/dating_app_dataset.csv")

In [4]:
data.head()

Unnamed: 0,User ID,Age,Gender,Height,Interests,Looking For,Children,Education Level,Occupation,Swiping History,Frequency of Usage
0,1,30,Male,5.240385,"['Sports', 'Cooking', 'Hiking', 'Music', 'Movi...",Casual Dating,No,High School,Student,96,Weekly
1,2,27,Female,4.937625,"['Sports', 'Reading']",Friendship,Yes,Master's Degree,Artist,96,Monthly
2,3,29,Female,5.806296,['Sports'],Casual Dating,No,Bachelor's Degree,Social Media Influencer,64,Daily
3,4,29,Female,5.101402,['Reading'],Marriage,No,Ph.D.,Doctor,67,Daily
4,5,32,Male,5.98667,"['Sports', 'Hiking', 'Music', 'Movies', 'Readi...",Long-term Relationship,Yes,Ph.D.,Engineer,93,Monthly


EDA

In [5]:
#age distribution by gender
fig = px.histogram(data,x="Age",color="Gender", nbins=20, title="Age Distribution by Gender")
fig.update_layout(xaxis_title="Age",yaxis_title="Count")
fig.show()

In [7]:
#education level distribution by gender
education_order = ["High School","Bachelor's Degree","Master's Degree","Ph.D."]
fig = px.bar(data,x="Education Level", color="Gender", category_orders={"Education Level":education_order},
             title="Education Level Distribution by Gender")

fig.update_layout(xaxis_title="Education Level", yaxis_title="Count")
fig.show()

In [10]:
#frequency of app usage by gender

fig = px.bar(data, x="Frequency of Usage",
             title="Frequency of App Usage Distribution")
fig.update_layout(xaxis_title="Frequency of Usage",
                  yaxis_title="Count")
fig.show()

In [11]:
#separating the data into male and female
male_profiles = data[data['Gender'] == 'Male']
female_profiles = data[data['Gender']=='Female']


Calculating the match score for dating recommendation

In [12]:
def calculate_match_score(profile1, profile2):
  #shared interest score(1 point per shared interest)
  interest1 = set(eval(profile1['Interests']))
  interest2 = set(eval(profile2['Interests']))
  shared_interests_score = len(interest1.intersection(interest2))

  #age difference score(higher the age diff lower the score)
  age_difference_score = max(0,10-abs(profile1['Age']-profile2['Age']))

  #swiping history score(higher the swiping history, higher score)
  swiping_history_score = min(profile1['Swiping History'],profile2['Swiping History'])/100

  #relationship type score(1 point for matching types)
  relationship_type_score = 0
  if profile1['Looking For'] == profile2['Looking For']:
    relationship_type_score = 1

  #Total match score
  total_score = (shared_interests_score + age_difference_score + swiping_history_score + relationship_type_score)

  return total_score


In [14]:
#Calculating match score between two profiles
profile1 = male_profiles.iloc[0]
profile2 = female_profiles.iloc[0]
match_score = calculate_match_score(profile1,profile2)
print(f"Match score between User{profile1['User ID']} and User{profile2['User ID']}: {match_score}")

Match score between User1 and User2: 9.96


In [15]:
# Recommending profiles

def recommend_profiles(male_profiles, female_profiles):
  recommendations = []

  for _, male_profile in male_profiles.iterrows():
    best_match = None
    best_score = -1

    for _, female_profile in female_profiles.iterrows():
      score = calculate_match_score(male_profile, female_profile)

      if score>best_score:
        best_match = female_profile
        best_score = score

    recommendations.append((male_profile,best_match,best_score))
  return recommendations


#Generate Recommendations
recommendations = recommend_profiles(male_profiles, female_profiles)

#Sort recommendations by match score in descending order
recommendations.sort(key=lambda x: x[2],reverse=True)

#display the top recommendations
for idx, (male_profile, female_profile, match_score) in enumerate(recommendations[:10]):
  print(f"Recommendation{idx+1}:")
  print(f"Male profile(User {male_profile['User ID']}): Age {male_profile['Age']}, Interests {male_profile['Interests']}")
  print(f"Female Profile (User {female_profile['User ID']}): Age {female_profile['Age']}, Interests {female_profile['Interests']}")
  print(f"Match Score: {match_score}")
  print()

Recommendation1:
Male profile(User 36): Age 19, Interests ['Movies', 'Cooking', 'Hiking', 'Reading', 'Sports', 'Travel', 'Music']
Female Profile (User 451): Age 19, Interests ['Reading', 'Music', 'Cooking', 'Hiking', 'Travel', 'Sports', 'Movies']
Match Score: 18.79

Recommendation2:
Male profile(User 274): Age 29, Interests ['Reading', 'Movies', 'Travel', 'Music', 'Hiking', 'Cooking', 'Sports']
Female Profile (User 300): Age 29, Interests ['Cooking', 'Reading', 'Music', 'Hiking', 'Travel', 'Sports', 'Movies']
Match Score: 18.73

Recommendation3:
Male profile(User 456): Age 29, Interests ['Cooking', 'Hiking', 'Sports', 'Travel', 'Music', 'Movies', 'Reading']
Female Profile (User 65): Age 29, Interests ['Travel', 'Movies', 'Reading', 'Sports', 'Music', 'Cooking', 'Hiking']
Match Score: 18.69

Recommendation4:
Male profile(User 147): Age 34, Interests ['Reading', 'Travel', 'Movies', 'Hiking', 'Cooking', 'Music', 'Sports']
Female Profile (User 287): Age 34, Interests ['Reading', 'Hiking', 