In [31]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

df = pd.read_csv("Comprehensive_Student_Dataset.csv")

df.fillna('', inplace=True)

df['activity_features'] = df['Extracurricular Activities'] + ' ' + df['Social Interests'] + ' ' + df['Desired Activities']

tfidf_vectorizer = TfidfVectorizer(stop_words='english')

tfidf_matrix = tfidf_vectorizer.fit_transform(df['activity_features'])

cosine_sim = cosine_similarity(tfidf_matrix, tfidf_matrix)

def recommend_activities(student_name, cosine_sim=cosine_sim, df=df):
    """
    Recommends activities for a given student based on their similarity to other students.
    """
    # Getting the index of the student
    idx = df[df['Name'] == student_name].index[0]

    # Getting the pairwise similarity scores
    sim_scores = list(enumerate(cosine_sim[idx]))

    # Sorting students based on the similarity scores
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)

    # top 5 most similar students 
    sim_scores = sim_scores[1:6]

    # indices of the similar students
    student_indices = [i[0] for i in sim_scores]

    # Return the top activities from similar students
    top_activities = df['Desired Activities'].iloc[student_indices].values.tolist()
    return top_activities

# Example usage:
student_name = "Student_1"
recommended_activities = recommend_activities(student_name)
print(f"Recommended activities for {student_name}: {recommended_activities}")


Recommended activities for Student_1: ["['Dance Club', 'Hackathons']", "['Dance Club', 'Hackathons']", "['Hackathons', 'Music Club']", "['Music Club', 'Hackathons']", "['Hackathons']"]
