In [1]:
import pandas as pd
import numpy as np
import re

In [3]:
data = pd.read_csv('Cleaned_User_Matching_Dataset.csv')

# Convert durations into floats
def parse_duration(s):
    if pd.isna(s):
        return np.nan
    s = str(s).lower().strip()
    match = re.match(r"(\d+)", s)
    if not match:
        return np.nan
    number = int(match.group(1))
    if 'week' in s:
        return number * 7
    elif 'month' in s:
        return number * 30
    elif 'day' in s:
        return number
    else:
        return number

for col in ['project_deadline', 'availability']:
    if col in data.columns:
        data[col] = data[col].apply(parse_duration).fillna(0)

# Split into user types and set index by user_id
founders_df = data[data['user_type'] == 'Founder'].set_index('user_id')
providers_df = data[data['user_type'] == 'Service Provider'].set_index('user_id')
mentors_df = data[data['user_type'] == 'Mentor'].set_index('user_id')

In [5]:
#Defining dictionaries for lookups to evaluate weights of the pairings

startup_industry = ['SaaS', 'FinTech', 'HealthTech', 'AgriTech', 'E-Commerce', 'EdTech']
industry_preference = ['E-Commerce', 'FinTech', 'HealthTech', 'AgriTech', 'SaaS', 'EdTech']
tech_requirement = ['AWS', 'React', 'GST Filing', 'Python', 'Copywriting', 'SEO', 'Figma', 'Node.js']
core_skill = ['GST Filing', 'SEO', 'Node.js', 'Python', 'React', 'Figma', 'AWS', 'Copywriting']
project_need = [
    'Digital Marketing', 'Pitch Deck Design', 'Fundraising Support',
    'UI/UX Revamp', 'MVP Development', 'Compliance & Legal'
]
preferred_project_type = [
    'UI/UX Revamp', 'MVP Development', 'Pitch Deck Design',
    'Compliance & Legal', 'Fundraising Support', 'Digital Marketing'
]

# Startup industry
startup_industry_scores = {}
for i, si in enumerate(startup_industry):
    for j, ip in enumerate(industry_preference):
        if si == ip:
            score = 1.0
        elif abs(i - j) == 1:
            score = 0.7
        else:
            score = 0.2
        startup_industry_scores[(si, ip)] = score

# Tech skill
tech_skill_scores = {}
for t1 in tech_requirement:
    for t2 in core_skill:
        if t1 == t2:
            score = 1.0
        elif {t1, t2} & {'Python', 'Node.js', 'React', 'Figma', 'AWS'} and t1 != t2:
            score = 0.7
        elif {t1, t2} & {'Copywriting', 'SEO'} and t1 != t2:
            score = 0.5
        elif 'GST' in t1 or 'GST' in t2:
            score = 0.3
        else:
            score = 0.1
        tech_skill_scores[(t1, t2)] = score

# Project need (strict mathcing)
project_need_scores = {}
for pn in project_need:
    for ppt in preferred_project_type:
        score = 1.0 if pn == ppt else 0.0
        project_need_scores[(pn, ppt)] = score

In [6]:
#Scoring Functions

def get_startup_industry_score(si, pref):
    return startup_industry_scores.get((si, pref), 0.0)

def get_tech_skill_score(t1, t2):
    return tech_skill_scores.get((t1, t2), 0.0)

def get_project_need_score(pn, ppt):
    return project_need_scores.get((pn, ppt), 0.0)

def get_deadline_availability_score(req, avail):
    try:
        req = float(req)
        avail = float(avail)
    except:
        return 0.0
    if req == 0:
        return 0.0
    if avail == req:
        return 1.0
    elif avail > req:
        overstay_ratio = (avail - req) / req
        return max(0.5, 1 / (1 + overstay_ratio))
    else:
        return avail / req

# Weights of each pairings
WEIGHTS = {
    'industry': 0.25,
    'project_need': 0.25,
    'tech_skill': 0.30,
    'deadline_availability': 0.20,
}

def calculate_match_score(userA, userB):
    ind_score = get_startup_industry_score(userA.get('startup_industry',''), userB.get('industry_preference',''))
    proj_score = get_project_need_score(userA.get('project_need',''), userB.get('preferred_project_type',''))
    tech_score = get_tech_skill_score(userA.get('tech_requirement',''), userB.get('core_skill',''))
    deadline_score = get_deadline_availability_score(userA.get('project_deadline',0), userB.get('availability',0))
    total = (
        WEIGHTS['industry'] * ind_score +
        WEIGHTS['project_need'] * proj_score +
        WEIGHTS['tech_skill'] * tech_score +
        WEIGHTS['deadline_availability'] * deadline_score
    )
    return round(total, 4)

In [7]:
#Find Matches for a User

def find_matches_for_user(user_id, user_type, founders_df, providers_df, mentors_df, top_k=5):
    user_type = user_type.lower()
    if user_type == 'founder':
        user_row = founders_df.loc[user_id]
        match_dict = {
            'mentors': mentors_df,
            'service_providers': providers_df
        }
    elif user_type == 'provider':
        user_row = providers_df.loc[user_id]
        match_dict = {
            'mentors': mentors_df,
            'founders': founders_df
        }
    elif user_type == 'mentor':
        user_row = mentors_df.loc[user_id]
        match_dict = {
            'founders': founders_df,
            'service_providers': providers_df
        }
    else:
        raise ValueError(f"Invalid user_type '{user_type}'. Must be one of 'founder', 'provider', 'mentor'.")

    results = {}
    for target_type, df in match_dict.items():
        matches = []
        for idx, match_row in df.iterrows():
            score = calculate_match_score(user_row, match_row)
            matches.append((idx, score))
        matches_sorted = sorted(matches, key=lambda x: x[1], reverse=True)[:top_k]
        results[target_type] = matches_sorted
    return results

In [14]:
#Display Top K Matches as Tables

def display_top_k_full_tables(user_id, user_type, founders_df, providers_df, mentors_df, top_k=5):
    matches_dict = find_matches_for_user(user_id, user_type, founders_df, providers_df, mentors_df, top_k)
    if user_type.lower() == 'founder':
        input_df = founders_df.loc[[user_id]]
    elif user_type.lower() == 'provider':
        input_df = providers_df.loc[[user_id]]
    elif user_type.lower() == 'mentor':
        input_df = mentors_df.loc[[user_id]]
    else:
        raise ValueError("Invalid user_type")
    
    print("\nInput User Data:")
    display(input_df)
    
    for matched_type, matches in matches_dict.items():
        print(f"\nTop {top_k} matches from {matched_type}:")
        matched_ids = [mid for mid, score in matches]
        if not matched_ids:
            print("  No matches found.")
        else:
            if matched_type == 'founders':
                rows = founders_df.loc[matched_ids].copy()
            elif matched_type == 'service_providers':
                rows = providers_df.loc[matched_ids].copy()
            elif matched_type == 'mentors':
                rows = mentors_df.loc[matched_ids].copy()
            else:
                continue
            scores = [score for mid, score in matches]
            rows['matching_score'] = scores
            display(rows)

In [15]:
user_id = data.loc[50]['user_id']
user_type = data.loc[50]['user_type']

In [16]:
display_top_k_full_tables('F001', 'founder', founders_df, providers_df, mentors_df, top_k=5)


Input User Data:


Unnamed: 0_level_0,user_type,startup_stage,startup_industry,project_need,tech_requirement,project_deadline,expertise_area,industry_preference,preferred_project_type,core_skill,availability
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
F001,Founder,Ideation,SaaS,Digital Marketing,AWS,30.0,,,,,0.0



Top 5 matches from mentors:


Unnamed: 0_level_0,user_type,startup_stage,startup_industry,project_need,tech_requirement,project_deadline,expertise_area,industry_preference,preferred_project_type,core_skill,availability,matching_score
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
S029,Mentor,,,,,0.0,Growth Marketing,FinTech,Digital Marketing,GST Filing,7.0,0.6817
S011,Mentor,,,,,0.0,Full Stack Development,SaaS,MVP Development,GST Filing,30.0,0.66
S021,Mentor,,,,,0.0,Startup Funding,SaaS,Compliance & Legal,AWS,14.0,0.6433
S009,Mentor,,,,,0.0,Startup Funding,SaaS,UI/UX Revamp,AWS,7.0,0.5967
S039,Mentor,,,,,0.0,Legal Advisor,FinTech,UI/UX Revamp,Node.js,30.0,0.585



Top 5 matches from service_providers:


Unnamed: 0_level_0,user_type,startup_stage,startup_industry,project_need,tech_requirement,project_deadline,expertise_area,industry_preference,preferred_project_type,core_skill,availability,matching_score
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
S048,Service Provider,,,,,0.0,Full Stack Development,SaaS,Digital Marketing,Python,60.0,0.81
S047,Service Provider,,,,,0.0,Full Stack Development,SaaS,Digital Marketing,Python,7.0,0.7567
S008,Service Provider,,,,,0.0,Legal Advisor,SaaS,Fundraising Support,AWS,30.0,0.75
S015,Service Provider,,,,,0.0,Full Stack Development,SaaS,Pitch Deck Design,AWS,30.0,0.75
S034,Service Provider,,,,,0.0,Design Expert,E-Commerce,Digital Marketing,AWS,14.0,0.6933
