In [13]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow import keras
from sklearn.preprocessing import OneHotEncoder, OrdinalEncoder, StandardScaler
import random
import ast

In [14]:
np.random.seed(42)
tf.random.set_seed(42)
random.seed(42)

aspirants_df = pd.read_csv('aspirants.csv')
mentors_df = pd.read_csv('mentors.csv')
interactions_df = pd.read_csv('interactions.csv')

def process_features(aspirants_df, mentors_df):
    def parse_list(x):
        try:
            return ast.literal_eval(x) if isinstance(x, str) else x
        except (ValueError, SyntaxError):
            return []

    aspirants_df['Preferred_Subjects_List'] = aspirants_df['Preferred Subjects'].apply(parse_list)
    all_subjects = []
    for subjects in aspirants_df['Preferred_Subjects_List']:
        all_subjects.extend(subjects)
    unique_subjects = list(set(all_subjects))
    for subject in unique_subjects:
        aspirants_df[f'Pref_{subject}'] = aspirants_df['Preferred_Subjects_List'].apply(
            lambda x: 1 if subject in x else 0)
    aspirants_df['Target_Colleges_List'] = aspirants_df['Target Colleges'].apply(parse_list)
    all_colleges = []
    for colleges in aspirants_df['Target_Colleges_List']:
        all_colleges.extend(colleges)
    unique_colleges = list(set(all_colleges))
    for college in unique_colleges:
        aspirants_df[f'Target_{college}'] = aspirants_df['Target_Colleges_List'].apply(
            lambda x: 1 if college in x else 0)
    prep_level_mapping = {'Beginner': 0, 'Intermediate': 1, 'Advanced': 2}
    aspirants_df['Prep_Level_Encoded'] = aspirants_df['Current Preparation Level'].map(prep_level_mapping)
    learning_styles = pd.get_dummies(aspirants_df['Learning Style'], prefix='Learning')
    aspirants_df = pd.concat([aspirants_df, learning_styles], axis=1)
    def extract_strengths_weaknesses(sw_dict):
        try:
            sw = ast.literal_eval(sw_dict) if isinstance(sw_dict, str) else sw_dict
            if not sw:
                return {}
            return sw
        except (ValueError, SyntaxError):
            return {}
    aspirants_df['SW_Dict'] = aspirants_df['Subject-wise Strengths and Weaknesses'].apply(extract_strengths_weaknesses)
    strength_map = {'Strong': 1.0, 'Average': 0.5, 'Weak': 0.0}
    for subject in unique_subjects:
        aspirants_df[f'Strength_{subject}'] = aspirants_df['SW_Dict'].apply(
            lambda x: strength_map.get(x.get(subject, 'Weak'), 0.0))
    mentors_df['Specialization_List'] = mentors_df['Specialization'].apply(parse_list)
    for subject in unique_subjects:
        mentors_df[f'Spec_{subject}'] = mentors_df['Specialization_List'].apply(
            lambda x: 1 if subject in x else 0)
    college_dummies = pd.get_dummies(mentors_df['College Attended'], prefix='College')
    mentors_df = pd.concat([mentors_df, college_dummies], axis=1)
    teaching_dummies = pd.get_dummies(mentors_df['Teaching Style'], prefix='Teaching')
    mentors_df = pd.concat([mentors_df, teaching_dummies], axis=1)
    mentors_df['Mentorship_Style_List'] = mentors_df['Mentorship Style'].apply(parse_list)
    all_styles = []
    for styles in mentors_df['Mentorship_Style_List']:
        all_styles.extend(styles)
    unique_styles = list(set(all_styles))
    for style in unique_styles:
        mentors_df[f'Style_{style}'] = mentors_df['Mentorship_Style_List'].apply(
            lambda x: 1 if style in x else 0)
    scaler = StandardScaler()
    numerical_features = ['Years of Experience', 'Past Success Rate', 'Rating', 'Number of Aspirants helped']
    mentors_df[numerical_features] = scaler.fit_transform(mentors_df[numerical_features])
    aspirant_drop_cols = ['Preferred Subjects', 'Target Colleges', 'Current Preparation Level',
                         'Learning Style', 'Subject-wise Strengths and Weaknesses',
                         'Preferred_Subjects_List', 'Target_Colleges_List', 'SW_Dict']
    aspirants_processed = aspirants_df.drop(aspirant_drop_cols, axis=1)
    mentor_drop_cols = ['Specialization', 'College Attended', 'Teaching Style',
                       'Mentorship Style', 'Specialization_List', 'Mentorship_Style_List']
    mentors_processed = mentors_df.drop(mentor_drop_cols, axis=1)
    return aspirants_processed, mentors_processed

aspirants_processed, mentors_processed = process_features(aspirants_df.copy(), mentors_df.copy())

In [18]:
aspirants_processed

Unnamed: 0,Aspirant ID,Location,Past Mock Test Scores,Time Commitment,Essay/Personal Statement,Pref_General Knowledge,Pref_English Comprehension,Pref_Quantitative Techniques,Pref_Legal Reasoning,Pref_Logical Reasoning,...,Prep_Level_Encoded,Learning_Auditory,Learning_Kinesthetic,Learning_Reading/Writing,Learning_Visual,Strength_General Knowledge,Strength_English Comprehension,Strength_Quantitative Techniques,Strength_Legal Reasoning,Strength_Logical Reasoning
0,1,Delhi,"[78, 61, 126, 122, 125]",28,Looking to improve my Legal Reasoning skills,1,0,0,1,0,...,1,False,False,False,True,0.0,0.0,0.0,0.0,1.0
1,2,Kolkata,"[96, 73, 82]",20,Looking to improve my General Knowledge skills,0,0,1,0,0,...,1,False,True,False,False,0.0,0.0,0.0,0.0,0.0
2,3,Bangalore,"[106, 70, 90, 106]",17,Looking to improve my English Comprehension sk...,1,1,0,0,0,...,2,False,False,False,True,1.0,0.0,0.0,0.0,0.0
3,4,Kolkata,"[101, 80, 130, 140, 75, 139]",16,Looking to improve my Legal Reasoning skills,0,1,0,0,0,...,0,False,False,True,False,0.0,1.0,0.0,0.0,0.5
4,5,Delhi,"[60, 60, 92, 126]",26,Looking to improve my Logical Reasoning skills,1,0,0,1,1,...,1,False,False,True,False,0.0,0.0,0.0,0.0,0.5
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,96,Delhi,"[54, 140, 126, 141, 105]",20,Looking to improve my Legal Reasoning skills,0,1,1,0,0,...,2,False,True,False,False,0.0,0.0,0.0,1.0,0.0
96,97,Bangalore,"[144, 141, 66, 102, 51]",21,Looking to improve my Quantitative Techniques ...,0,0,0,0,1,...,2,False,False,True,False,0.0,1.0,0.0,0.0,0.5
97,98,Mumbai,"[120, 105, 55, 142, 98, 139]",29,Looking to improve my Quantitative Techniques ...,0,0,1,0,0,...,0,False,False,False,True,0.5,0.0,0.0,0.5,0.0
98,99,Kolkata,"[137, 53, 68, 75, 105]",10,Looking to improve my General Knowledge skills,0,1,0,0,0,...,0,False,True,False,False,0.5,0.0,0.0,0.0,0.5


In [19]:
mentors_processed

Unnamed: 0,Mentor ID,Years of Experience,Past Success Rate,Rating,Availability,Number of Aspirants helped,Spec_General Knowledge,Spec_English Comprehension,Spec_Quantitative Techniques,Spec_Legal Reasoning,...,College_NLIU Bhopal,College_NLSIU Bangalore,College_NUJS Kolkata,Teaching_Interactive,Teaching_Lecture-based,Teaching_Problem-solving,Style_Personalized Guidance,Style_Concept Clarity,Style_Motivation,Style_Test Strategy
0,101,-0.239115,-0.702999,-0.349482,Weekdays,1.630224,0,0,0,1,...,False,False,True,True,False,False,1,0,0,0
1,102,1.468852,-0.524309,0.305796,Weekends,-1.709155,0,0,1,0,...,False,False,False,False,False,True,0,1,0,1
2,103,1.468852,-0.849795,1.1795,Anytime,0.94717,0,1,0,0,...,True,False,False,False,False,True,1,0,0,0
3,104,1.127259,1.474688,0.961074,Weekends,-0.191255,0,1,0,0,...,False,False,False,False,True,False,0,0,0,1
4,105,0.444072,-1.463238,0.742648,Weekends,0.4918,0,0,0,0,...,False,False,False,False,False,True,0,0,1,0
5,106,1.468852,0.736771,1.397926,Anytime,1.09896,0,0,0,1,...,False,False,False,True,False,False,0,0,0,1
6,107,1.127259,0.949339,-1.441612,Weekdays,0.567695,0,1,0,0,...,False,False,False,True,False,False,0,1,0,0
7,108,-0.922302,-0.353537,-1.441612,Weekends,-1.709155,1,0,0,0,...,False,False,False,False,False,True,0,1,0,0
8,109,-0.239115,-1.152349,-0.786334,Weekends,1.25075,0,0,1,1,...,False,False,True,False,False,True,0,0,0,1
9,110,0.102478,0.952374,-0.786334,Weekdays,-1.48147,0,1,1,0,...,False,True,False,False,False,True,1,0,0,0


In [17]:
aspirants_processed.to_csv('aspirants_processed.csv', index=False)
mentors_processed.to_csv('mentors_processed.csv', index=False)