In [1]:
import pandas as pd
from sklearn.preprocessing import MultiLabelBinarizer
from sklearn.metrics.pairwise import cosine_similarity

# Expanded Mentor Data

In [3]:
mentors = pd.DataFrame([
    {
        'name': 'Mentor A',
        'preferred_subjects': ['Legal Reasoning', 'Logical Reasoning'],
        'target_colleges': ['NLSIU', 'NLU Delhi'],
        'prep_level': 'Advanced',
        'learning_style': 'Visual'
    },
    {
        'name': 'Mentor B',
        'preferred_subjects': ['English', 'GK'],
        'target_colleges': ['NLU Jodhpur'],
        'prep_level': 'Intermediate',
        'learning_style': 'Auditory'
    },
    {
        'name': 'Mentor C',
        'preferred_subjects': ['Legal Reasoning', 'English'],
        'target_colleges': ['NLU Bhopal'],
        'prep_level': 'Beginner',
        'learning_style': 'Reading/Writing'
    },
    {
        'name': 'Mentor D',
        'preferred_subjects': ['Logical Reasoning', 'GK'],
        'target_colleges': ['NLU Kolkata'],
        'prep_level': 'Advanced',
        'learning_style': 'Kinesthetic'
    },
    {
        'name': 'Mentor E',
        'preferred_subjects': ['Legal Reasoning', 'GK'],
        'target_colleges': ['NLU Odisha', 'NLU Delhi'],
        'prep_level': 'Intermediate',
        'learning_style': 'Visual'
    },
    {
        'name': 'Mentor F',
        'preferred_subjects': ['English', 'Logical Reasoning'],
        'target_colleges': ['NALSAR', 'NLU Ranchi'],
        'prep_level': 'Beginner',
        'learning_style': 'Auditory'
    },
    {
        'name': 'Mentor G',
        'preferred_subjects': ['GK', 'English'],
        'target_colleges': ['NLU Patna', 'NLU Kochi'],
        'prep_level': 'Advanced',
        'learning_style': 'Reading/Writing'
    },
    {
        'name': 'Mentor H',
        'preferred_subjects': ['Legal Reasoning', 'Logical Reasoning', 'GK'],
        'target_colleges': ['NLU Delhi', 'NLU Jodhpur'],
        'prep_level': 'Advanced',
        'learning_style': 'Kinesthetic'
    },
    {
        'name': 'Mentor I',
        'preferred_subjects': ['Logical Reasoning', 'GK', 'English'],
        'target_colleges': ['NLU Bhopal'],
        'prep_level': 'Intermediate',
        'learning_style': 'Visual'
    },
    {
        'name': 'Mentor J',
        'preferred_subjects': ['Legal Reasoning', 'English'],
        'target_colleges': ['NLU Mumbai', 'NLSIU'],
        'prep_level': 'Beginner',
        'learning_style': 'Reading/Writing'
    }
])


## Mock Aspirant

In [4]:
aspirant = {
    'preferred_subjects': ['Legal Reasoning', 'English'],
    'target_colleges': ['NLU Bhopal'],
    'prep_level': 'Beginner',
    'learning_style': 'Reading/Writing'
}

## Preprocessing Function

In [6]:
def preprocess_profiles(profiles, aspirant):
    df = profiles.copy()
    df['is_aspirant'] = 0

    aspirant_df = pd.DataFrame([aspirant])
    aspirant_df['name'] = 'Aspirant'
    aspirant_df['is_aspirant'] = 1

    combined = pd.concat([df, aspirant_df], ignore_index=True)

    # One-hot encode multi-label fields
    mlb_subjects = MultiLabelBinarizer()
    mlb_colleges = MultiLabelBinarizer()

    subjects_encoded = mlb_subjects.fit_transform(combined['preferred_subjects'])
    colleges_encoded = mlb_colleges.fit_transform(combined['target_colleges'])

    subjects_df = pd.DataFrame(subjects_encoded, columns=mlb_subjects.classes_)
    colleges_df = pd.DataFrame(colleges_encoded, columns=mlb_colleges.classes_)

    # Encode categorical fields manually
    prep_levels = pd.get_dummies(combined['prep_level'], prefix='prep')
    learning_styles = pd.get_dummies(combined['learning_style'], prefix='style')

    features = pd.concat([subjects_df, colleges_df, prep_levels, learning_styles], axis=1)

    return combined, features


# Recommendation Function

In [7]:
def recommend_mentors(mentors_df, aspirant_profile):
    combined, features = preprocess_profiles(mentors_df, aspirant_profile)

    # Separate aspirant and mentor feature vectors
    aspirant_vector = features.iloc[-1].values.reshape(1, -1)
    mentor_vectors = features.iloc[:-1]

    # Calculate cosine similarity
    similarities = cosine_similarity(aspirant_vector, mentor_vectors)[0]

    # Add similarity scores to mentors_df
    mentors_df = mentors_df.copy()
    mentors_df['similarity'] = similarities

    # Recommend top 3 mentors
    top_3 = mentors_df.sort_values(by='similarity', ascending=False).head(3)
    return top_3[['name', 'similarity']]


In [8]:
recommendations = recommend_mentors(mentors, aspirant)
print("Top 3 Mentor Recommendations:\n")
print(recommendations)

Top 3 Mentor Recommendations:

       name  similarity
2  Mentor C    1.000000
9  Mentor J    0.730297
5  Mentor F    0.365148


Over time, the system can be improved using:

User feedback loop: Let aspirants rate their mentor matches (e.g., 1–5 stars), and use collaborative filtering to refine future recommendations.

Tracking interactions: Monitor chats or sessions and apply reinforcement learning to boost mentors with good outcomes.

Weighted similarity: Learn feature weights (e.g., subject match is more important than prep level) from user feedback.