In [10]:
import numpy as np
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from streamlit_gsheets import GSheetsConnection
import streamlit as st


In [21]:
conn = st.connection("gsheets", type=GSheetsConnection)

# Fetching existing data
dataset = conn.read(worksheet="Students", usecols=list(range(10)), ttl=5)
dataset = dataset.dropna(how="all")

# Combine text features
text_features = dataset['Languages Known'] + ' ' + dataset['Soft Skills'] + ' ' + dataset['Hard Skills'] + ' ' + dataset['Co-Curricular']

# Vectorize text features
vectorizer = TfidfVectorizer(min_df=1, stop_words='english', lowercase=True)
text_feature_vectors = vectorizer.fit_transform(text_features)

def find(language, soft_skills, hard_skills, ex_curricular, min_cpi, min_gdpi, num_students, technical, aptitude):
    # Combine input features
    candidate_text = f"{language} {soft_skills} {hard_skills} {ex_curricular}"
    candidate_text_vector = vectorizer.transform([candidate_text]).toarray()

    # Compute cosine similarity for text features
    similarity_scores = cosine_similarity(candidate_text_vector, text_feature_vectors)

    # Get indices of candidates meeting CPI and GDPI criteria
    valid_indices = np.where((dataset['CPI'] >= min_cpi) & (dataset['GDPI'] >= min_gdpi) & (dataset['Technical'] >= technical) & (dataset['Aptitude'] >= aptitude))[0]

    
    # Get top similar candidates among those meeting criteria
    valid_similarity_scores = similarity_scores[:, valid_indices]
    top_indices = valid_indices[np.argsort(valid_similarity_scores[0])[-num_students:][::-1]]

    # Get names of top similar candidates
    # similar_students = dataset.iloc[top_indices]['Student Name'].tolist()

    similar_students = dataset.iloc[top_indices][['Student ID', 'Student Name']].values.tolist()

    
    return similar_students

# Test the function
similar_students = find("JAVA, C++", " Leadership Skills", "Web Development", "Leetcode, Codeforces", 9.0, 12.5, 5, 15, 16)
for student in similar_students:
    student_id, student_name = student
    print(f"{student_id} - {student_name}")




2024-04-25 18:42:03.281 No runtime found, using MemoryCacheStorageManager


201550022.0 - Vihaan Kumar
201550013.0 - Nakul Patel
201550052.0 - Aaliyah Gupta
201550040.0 - Veer Singh
201550019.0 - Samaira Jain
