In [45]:
# Import necessary libraries
import numpy as np
import os
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import linear_kernel

import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize

import pandas as pd  # Import the pandas library

# Reading study group data from the CSV file
current_directory = os.getcwd()
print("Current working directory:", current_directory)
study_groups_df = pd.read_csv('Study_Group_Data.csv')

# Converting the DataFrame to a list of dictionaries
study_groups = study_groups_df.to_dict(orient='records')

# Downloading NLTK stopwords data
nltk.download('stopwords')
nltk.download('punkt')

# User interaction: Collect user's name, major, classes, study style, availability, and location
name = input("What is your name? ")
major = input("Enter your major: ")
user_classes = input("Enter the computer science classes you want to find a study group for: ")
user_style = input("List the learning styles you prefer from visual, audio, reading/writing, kinesthetic: ")
user_availability = input("Enter your availability (input mornings, afternoon, or evening): ")
user_day_availability = input("Enter the days you're available (input weekday, weekend, or every day): ")

# Processing the user's description using NLTK
# Tokenizing the description into words
words = word_tokenize(user_style)

# Removing stopwords and punctuation
stop_words = set(stopwords.words('english'))
keywords = [word for word in words if word.lower() not in stop_words and word.isalnum()]

# Calculating TF-IDF vectors for user's classes and description
tfidf_vectorizer = TfidfVectorizer()
user_matrix = tfidf_vectorizer.fit_transform([user_classes, user_style])
classes_matrix = tfidf_vectorizer.transform([group['class'] for group in study_groups])
style_matrix = tfidf_vectorizer.transform([group['style'] for group in study_groups])

# Calculating cosine similarity between the user's classes and study group classes
cosine_similarities_classes = linear_kernel(user_matrix[:1], classes_matrix).flatten()

# Calculating cosine similarity between the user's description and study group study styles
cosine_similarities_style = linear_kernel(user_matrix[1:], style_matrix).flatten()

# Normalizing the individual scores to be in the range [0, 100]
max_class_score = 1.0  # Maximum class score
max_style_score = 1.0  # Maximum style score

class_score_normalized = (cosine_similarities_classes / max_class_score) * 100
style_score_normalized = (cosine_similarities_style / max_style_score) * 100

# Defining custom weights for each factor
class_weight = 0.35  
style_weight = 0.18  
availability_weight = 0.235  
day_availability_weight = 0.235  

# Calculating combined compatibility scores based on classes, availability, and study style
compatibility_scores = []

# Converting user's availability into a list of available times
user_availability_list = user_availability.lower().split(", ")
user_day_availability_list = user_day_availability.lower().split(", ")

# Filter study groups with the same classes as the user
matching_study_groups = [group for group in study_groups if user_classes.lower() in group['class'].lower()]

for group in matching_study_groups:
    # Extracting availability information
    group_availability = group['availability'].lower().split(", ")
    group_day_availability = group['day availability'].lower().split(", ")

    # Calculating the availability score based on how many available times match
    matching_times = set(user_availability_list).intersection(group_availability)
    matching_days = set(user_day_availability_list).intersection(group_day_availability)
    availability_score = (len(matching_times) / len(user_availability_list)) * 100
    day_availability_score = (len(matching_days)/len(user_day_availability_list)) * 100 

    # Calculating the combined compatibility score with custom weights
    combined_score = (
        class_score_normalized[study_groups.index(group)] * class_weight
        + style_score_normalized[study_groups.index(group)] * style_weight
        + availability_score * availability_weight
        + day_availability_score * day_availability_weight
    )

    compatibility_scores.append((group['name '], combined_score))

# Sorting the matching study groups by combined compatibility score in descending order
compatibility_scores.sort(key=lambda x: x[1], reverse=True)

# Displaying matching study groups
print("Matching to the Top 3 Study Groups:")
for group, score in compatibility_scores[:3]:
    # Extract group's class
    group_class = next((item['class'] for item in matching_study_groups if item['name '] == group), None)
    group_availability = next((item['availability'] for item in matching_study_groups if item['name '] == group), None)
    group_day_availability = next((item['day availability'] for item in matching_study_groups if item['name '] == group), None)
    group_style = next((item['style'] for item in matching_study_groups if item['name '] == group), None)

    # Finding the biggest setback
    print(f"Group: {group}")
    print(f"Compatibility: {score:.2f}%")
    print(f"Class: {group_class}")
    print(f"Availability: {group_availability}")
    print(f"Day Availability: {group_day_availability}")
    print(f"Learning Style: {group_style}\n")

# Displaying user information
print("\nName:", name)
print("Major:", major)
print("User Information:")
print("Classes:", user_classes)
print("Description:", user_style)
print("Availability:", user_availability)
print("Day Availability:", user_day_availability)



[nltk_data] Downloading package stopwords to
[nltk_data]     /Users/queensona/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package punkt to /Users/queensona/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


Current working directory: /Users/queensona


What is your name?  Nare
Enter your major:  CS
Enter the computer science classes you want to find a study group for:  CS 103
List the learning styles you prefer from visual, audio, reading/writing, kinesthetic:  visual
Enter your availability (input mornings, afternoon, or evening):  mornings
Enter the days you're available (input weekday, weekend, or every day):  weekend


Matching to the Top 3 Study Groups:
Group: Study Group 13
Compatibility: 82.00%
Class: CS 103
Availability: mornings
Day Availability: weekend
Learning Style: reading/writing

Group: Study Group 16
Compatibility: 76.50%
Class: CS 103
Availability: mornings
Day Availability: every day
Learning Style: visual

Group: Study Group 10
Compatibility: 58.50%
Class: CS 103
Availability: mornings
Day Availability: weekday
Learning Style: reading/writing


Name: Nare
Major: CS
User Information:
Classes: CS 103
Description: visual
Availability: mornings
Day Availability: weekend
