In [2]:
from google.colab import files
import pandas as pd

uploaded = files.upload()

file_name = 'ConnectU_Extended_Dataset.xlsx'
data = pd.read_excel(file_name)

data.head()


Saving ConnectU_Extended_Dataset.xlsx to ConnectU_Extended_Dataset (1).xlsx


Unnamed: 0,ID,Age Group,Major,Degree Level,Connection Type,Availability Day,Availability Time,On/Off Campus,Communication,Languages,...,Years of Experience (Related),Interest in Networking Events,Participation Preference,Preferred Communication Frequency,Interest in Multidisciplinary Projects,Preferred Meeting Length,N number,Top Match 1,Top Match 2,Top Match 3
0,1,18-24,Biology,Undergraduate,Social,Thursday,Morning,Off Campus,Messaging Apps,French,...,4-5,Moderate,Flexible,Daily,High,2 hours+,N39077026,N34085912,N19384057,N90487216
1,2,18-24,Psychology,Undergraduate,Academic,Friday,Morning,On Campus,In-Person,English,...,4-5,High,Flexible,Daily,Low,30 mins,N84035600,N59037281,N39206285,N94056712
2,3,18-24,Biology,PhD,Study Group,Friday,Morning,On Campus,Messaging Apps,"English, Spanish",...,4-5,Low,Active,Daily,Low,1 hour,N64106861,N83045127,N39206285,N57203849
3,4,18-24,Computer Science,Undergraduate,Social,Wednesday,Morning,Off Campus,Messaging Apps,Spanish,...,0-1,Low,Passive,Daily,Moderate,1 hour,N68104106,N47583949,N92037514,N83041592
4,5,18-24,Computer Science,Undergraduate,Social,Friday,Morning,Off Campus,Email,Spanish,...,4-5,High,Passive,Weekly,High,1 hour,N47583949,N83041592,N68104106,N47208139


In [3]:

weights = {
    "Age Group": 10,
    "Major": 25,
    "Degree Level": 20,
    "Connection Type": 15,
    "Availability Day": 10,
    "Availability Time": 10,
    "On/Off Campus": 5,
    "Preferred Learning Style": 10,
    "Group Size": 10,
    "Top Hobby": 5,
    "Preferred Collaboration Tool": 5,
    "Languages": 10,
    "Team Role Preference": 10,
    "Career Field Interest": 15,
    "Preferred Study Location": 5
}

def calculate_compatibility(student1, student2):
    score = 0
    for attribute, weight in weights.items():
        if student1[attribute] == student2[attribute]:
            score += weight
    return score


In [4]:

def generate_recommendations(user_id, dataset):
    target_user = dataset[dataset["ID"] == user_id].iloc[0]
    recommendations = []

    for _, user in dataset.iterrows():
        if user["ID"] != user_id:
            similarity = calculate_compatibility(target_user, user)
            recommendations.append((user["ID"], similarity))

    recommendations.sort(key=lambda x: x[1], reverse=True)
    return recommendations[:5]


In [5]:

user_id = 1
top_matches = generate_recommendations(user_id, data)

print("Top Matches for User ID", user_id, ":", top_matches)


Top Matches for User ID 1 : [(20, 95), (40, 85), (93, 85), (4, 80), (21, 80)]


In [6]:

recommendations_df = pd.DataFrame(top_matches, columns=["User ID", "Compatibility Score"])
recommendations_df.to_csv(f"top_matches_user_{user_id}.csv", index=False)

print(f"Top matches for user {user_id} saved to CSV file.")


Top matches for user 1 saved to CSV file.


In [7]:

def generate_all_recommendations(dataset):
    all_recommendations = []


    for user_id in dataset["ID"]:
        target_user = dataset[dataset["ID"] == user_id].iloc[0]


        for _, other_user in dataset.iterrows():
            if other_user["ID"] != user_id:
                similarity = calculate_compatibility(target_user, other_user)
                all_recommendations.append({
                    "Target User ID": user_id,
                    "Matched User ID": other_user["ID"],
                    "Compatibility Score": similarity
                })

    return pd.DataFrame(all_recommendations)


all_recommendations_df = generate_all_recommendations(data)


all_recommendations_df.to_csv("all_user_recommendations.csv", index=False)

print("All user recommendations saved to 'all_user_recommendations.csv'.")


All user recommendations saved to 'all_user_recommendations.csv'.


In [8]:
def generate_top_matches(dataset, top_n=3):
    top_matches_all_users = []

    for user_id in dataset["ID"]:
        target_user = dataset[dataset["ID"] == user_id].iloc[0]
        matches = []

        for _, other_user in dataset.iterrows():
            if other_user["ID"] != user_id:
                similarity = calculate_compatibility(target_user, other_user)
                matches.append({"Matched User ID": other_user["ID"], "Compatibility Score": similarity})

        matches = sorted(matches, key=lambda x: x["Compatibility Score"], reverse=True)

        top_matches_all_users.extend([
            {"Target User ID": user_id, "Matched User ID": match["Matched User ID"], "Compatibility Score": match["Compatibility Score"]}
            for match in matches[:top_n]
        ])

    return pd.DataFrame(top_matches_all_users)

top_3_matches_df = generate_top_matches(data, top_n=3)

top_3_matches_df.to_csv("top_3_matches_all_users.csv", index=False)

print("Top 3 matches for all users saved to 'top_3_matches_all_users.csv'.")


Top 3 matches for all users saved to 'top_3_matches_all_users.csv'.


In [9]:
total_possible_score = sum(weights.values())
print("Total Possible Compatibility Score:", total_possible_score)


Total Possible Compatibility Score: 165


In [10]:
normalized_weights = {key: (value / total_possible_score) * 100 for key, value in weights.items()}


In [19]:

total_possible_score = sum(weights.values())
normalized_weights = {key: (value / total_possible_score) * 100 for key, value in weights.items()}

def calculate_compatibility(student1, student2):
    score = 0
    for attribute, weight in normalized_weights.items():
        if student1[attribute] == student2[attribute]:
            score += weight
    return score


In [22]:
def generate_top_matches(dataset, top_n=3):
    top_matches_all_users = []

    for user_id in dataset["ID"]:
        target_user = dataset[dataset["ID"] == user_id].iloc[0]
        matches = []

        for _, other_user in dataset.iterrows():
            if other_user["ID"] != user_id:
                similarity = calculate_compatibility(target_user, other_user)
                matches.append({"Matched User ID": other_user["ID"], "Compatibility Score": similarity})


        top_matches = sorted(matches, key=lambda x: x["Compatibility Score"], reverse=True)[:top_n]
        top_matches_all_users.append({"User ID": user_id, "Top Matches": top_matches})

    return top_matches_all_users


In [23]:

top_3_matches_df.to_csv("top_3_matches_all_users_normalized.csv", index=False)

print("Top 3 matches for all users saved to 'top_3_matches_all_users_normalized.csv'.")


Top 3 matches for all users saved to 'top_3_matches_all_users_normalized.csv'.


In [24]:
def calculate_compatibility(student1, student2):
    score = 0
    for attribute, weight in normalized_weights.items():
        if student1[attribute] == student2[attribute]:
            score += weight
    return round(score)


In [25]:
top_3_matches_df["Compatibility Score"] = top_3_matches_df["Compatibility Score"].round(0).astype(int)


In [26]:
top_3_matches_df["Compatibility Score"] = top_3_matches_df["Compatibility Score"].round(0).astype(int)

top_3_matches_df.to_csv("top_3_matches_all_users_rounded.csv", index=False)

print("Updated file with rounded compatibility scores saved as 'top_3_matches_all_users_rounded.csv'.")


Updated file with rounded compatibility scores saved as 'top_3_matches_all_users_rounded.csv'.
