In [1]:
import re

import pandas as pd
import sqlite3

conn = sqlite3.connect('smartAggie.sqlite')
course_prof_df = pd.read_sql_query("SELECT C.\"Course Code\", C.Quarter, C.\"Course Description\", C.\"Course Units\", P.\"Prof Quality Rating\", P.\"Level of Difficulty\", P.Department, P.\"Would Take Again\", P.\"Total Ratings\" FROM main.Professors P INNER JOIN main.Courses C on P.Name = C.ProfName and P.Department = C.\"Subject Name\"", con=conn)

In [2]:
# data preprocessing
course_prof_df.reset_index(drop=True, inplace=True)
for index, rows in course_prof_df.iterrows():
    course_prof_df.at[index, 'Prof Quality Rating'] = rows['Prof Quality Rating']/5.0
    course_prof_df.at[index, 'Level of Difficulty'] = rows['Level of Difficulty']/5.0
    course_prof_df.at[index, 'Would Take Again'] = (0 if rows['Would Take Again'] == -1 else rows['Would Take Again'])/100.0
    course_prof_df.at[index, 'Student Level'] = 'Undergraduate' if int(re.findall(r'\w+ (\d)\d*[A-Z]*', rows['Course Code'])[0]) < 2 else 'Graduate'

course_prof_df['Total Ratings'] = course_prof_df.groupby('Department')['Total Ratings'].transform(lambda col: col/col.sum())

In [3]:
# user input
from resume_course_similarity_scorer import get_course_similarity_course_desc
user_major = 'Aerospace Science & Engineering '
user_education_level = 'Undergraduate'
user_courses = [('ECS 032A', 'A'), ('ECS 050', 'A-'), ('ECS 140A', 'B'), ('ECS 154A', 'B-'), ('ECS 170', 'A')]
user_resume = 'sample resume.pdf'

x_input = course_prof_df.copy(deep=True)
x_input = x_input[x_input['Department'] == user_major]
x_input = x_input[x_input['Student Level'] == user_education_level]
    
x_input['Similarity Score'] = [get_course_similarity_course_desc(user_resume, rows['Course Description']) for _, rows in x_input.iterrows()]    

Spacy model is loading...
['orbital mechanics', 'satellite orbits', 'orbits multistage', 'mechanics satellite', 'optimizations interplanetary', 'satellites trajectory', 'interplanetary trajectories', 'orbits', 'multistage rockets', 'trajectory optimizations']
course description 'Orbital Mechanics' similarity with resume experience: 10.175276290557166
Spacy model is loading...
['space vehicle', 'spacecraft reverse', 'project spacecraft', 'systems engineering', 'spacecraft', 'vehicle design', 'space travel', 'engineering design', 'reverse engineering', 'principles systems']
course description 'Space Vehicle Design' similarity with resume experience: 11.623119829389628
Spacy model is loading...
['computational aerodynamics', 'aerodynamics development', 'airfoil theory', 'aerodynamics', 'wing analysis', 'body airfoil', 'theory viscous', 'viscous effects', 'airfoil', 'fluid motion']
course description 'Theoretical & Computational Aerodynamics' similarity with resume experience: 12.077303981

In [9]:
from sklearn.cluster import KMeans

inp = x_input[['Prof Quality Rating', 'Level of Difficulty', 'Would Take Again', 'Total Ratings', 'Similarity Score']].values
km = KMeans(n_clusters=3)
km.fit(inp)
labels = km.labels_
x_input['Cluster'] = labels

In [10]:
x_output = pd.DataFrame(columns = ['Prof Quality Rating', 'Level of Difficulty', 'Would Take Again', 'Total Ratings', 'Similarity Score'])
user_course_list = [x[0] for x in user_courses]
for course in user_course_list:
    for index, rows in course_prof_df.iterrows():
        if rows['Course Code'] == course:
            x_output.loc[len(x_output)] = [rows['Prof Quality Rating'], rows['Level of Difficulty'], rows['Would Take Again'], rows['Total Ratings'], get_course_similarity_course_desc(user_resume, rows['Course Description'])]
            break
        
output_labels = km.predict(x_output)

Spacy model is loading...
['programming introduction', 'python aimed', 'solving python', 'introduction programming', 'python', 'programming', 'programming problem', 'introduction', 'problem solving', 'major students']
course description 'Introduction to Programming' similarity with resume experience: 14.229190158016468
Spacy model is loading...
['assembly languages', 'architectures programming', 'programming assembly', 'hardware architectures', 'devices programming', 'software producing', 'machine introduction', 'abstract machine', 'programming', 'machine dependent']
course description 'Computer Organization & Machine-Dependent Programming' similarity with resume experience: 15.801565322965729
Spacy model is loading...
['programming language', 'programming languages', 'introduction programming', 'definition programming', 'imperative programming', 'languages introduction', 'level programming', 'programming', 'object orientedness', 'languages syntactic']
course description 'Programming L



In [24]:
grade_map = {
    'Good': ['A+', 'A', 'A-'],
    'Fair': ['B+', 'B', 'B-'],
    'Barely passing': ['C+', 'C', 'C-']
}
good_course_clusters = []
fair_course_clusters = []
for (course, grade), label in zip(user_courses, output_labels):
    if grade in grade_map['Good']:
        good_course_clusters.append(label)
    elif grade in grade_map['Fair']:
        fair_course_clusters.append(label)

good_course_clusters = list(set(good_course_clusters))
fair_course_clusters = list(set(fair_course_clusters))

In [25]:
good_courses = x_input[x_input['Cluster'].isin(good_course_clusters)].groupby('Quarter')
fair_courses = x_input[x_input['Cluster'].isin(fair_course_clusters)].groupby('Quarter')

print('Good Courses')
for quarter, course in good_courses:
    print(quarter, course['Course Code'].values)

print('Fair Courses')
for quarter, course in fair_courses:
    print(quarter, course['Course Code'].values)

Good Courses
202403 ['EAE 143B']
202410 ['EAE 001' 'EAE 133']
Fair Courses
202403 ['EAE 143B']
202410 ['EAE 001' 'EAE 133']


In [46]:
grade_credits = {
    'A+': 4, 'A': 4, 'A-': 3.7, 'B+': 3.3, 'B': 3.0, 'B-': 2.7, 'C+': 2.3, 'C': 2.0, 'C-': 1.7
}

current_gpa = sum([grade_credits[grade] for _, grade in user_courses])/len(user_courses)

quarter_course = {}
predicted_grades = []
for quarter, course in good_courses:
    if quarter not in quarter_course:
        quarter_course[quarter] = {}
    if 'Good' not in quarter_course[quarter]:
        quarter_course[quarter]['Good'] = 0
    quarter_course[quarter]['Good'] += 1

for quarter, course in fair_courses:
    if quarter not in quarter_course:
        quarter_course[quarter] = {}
    if 'Fair' not in quarter_course[quarter]:
        quarter_course[quarter]['Fair'] = 0
    quarter_course[quarter]['Fair'] += 1
    
quarters = [202403, 202410, 202401]
predicted_grades.append(current_gpa)
no_of_courses = len(user_courses)
for quarter in quarters:
    gpa = predicted_grades[len(predicted_grades)-1]
    if quarter in quarter_course and quarter_course[quarter]['Good'] != 0:
        predicted_grades.append((gpa*no_of_courses + quarter_course[quarter]['Good']*4)/(no_of_courses + quarter_course[quarter]['Good']))
        no_of_courses += quarter_course[quarter]['Good']
    elif quarter in quarter_course and quarter_course[quarter]['Fair'] != 0:
        predicted_grades.append((current_gpa*no_of_courses + quarter_course[quarter]['Fair']*4)/(no_of_courses + quarter_course[quarter]['Fair']))
        no_of_courses += quarter_course[quarter]['Fair']
    else:
        predicted_grades.append(gpa)

In [47]:
import plotly.express as px
quarters = [str(q) for q in quarters]
fig = px.line(x=['Current GPA'] + quarters, y = predicted_grades, title='GPA Timeline')
fig.update_layout(xaxis=dict(tickmode='array', tickvals=['Current GPA'] + quarters, ticktext = ['Current GPA', 'Spring 2024', 'Fall 2025', 'Winter 2025']))
fig.show()