# Recommender System for Coaching and Academic Advising
#### Goal of the system is to consider user interests and feedback to recommend specific courses based on their level.

In [1]:
#AUTHOR: Aubry, Nanae & Ruchti, Kilian
import numpy as np
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
import math
import os.path
from os import path

In [2]:
class color:
   BLUE = '\033[94m'
   GREEN = '\033[92m'
   BOLD = '\033[1m'
   UNDERLINE = '\033[4m'
   END = '\033[0m'

## User enter name and topic choice

In [6]:
user_name = input(color.BOLD + color.UNDERLINE + "What is your name?" + color.END + " ")

What is your name? Bill


In [7]:
print("Topics:" + color.BOLD +  color.GREEN +  " \n Musical Instrument \n Business Finance" + color.END)
choice = input(color.BOLD + color.UNDERLINE + "What do you want to learn about?" + color.END + " ")

Topics:[1m[92m 
 Musical Instrument 
 Business Finance[0m
What do you want to learn about? Business


#### Read dataset of courses corresponding to the main topic user chose

In [8]:
topic = ""
main_topic_choice = choice.lower()

#Select csv to open based on user input
if 'music' in main_topic_choice:
    all_courses = pd.read_csv('data/music_courses.csv', header = 0, sep=',')
    topic = "music"
elif 'business' or 'finance' in main_topic_choice:
    all_courses = pd.read_csv('data/business_courses.csv', header = 0, sep=',')
    topic = "business"

all_courses.set_index(["course_title"], inplace=True)

#### Filter through courses to only show rows that are associated with specific subject

In [9]:
#Filter dataframe to find courses containing a keyword
def subjects_filter(subject_keywords):
    rc = all_courses.copy()
    filtered_resources = rc.loc[rc['specific_subject'].str.contains(subject_keywords)]
    return filtered_resources

## Ask user what courses they would like and apply filter

In [36]:
if topic == 'music':
    print("Subjects:" + color.BOLD +  color.GREEN +  " \n Drums \n Guitar \n Piano \n Chords \n Harmonica \n Songs \n etc..." + color.END)
elif topic == 'business':
    print("Subjects:" + color.BOLD +  color.GREEN +  " \n Investment \n Trading \n Cryptocurrency \n Stock Market \n Analysis \n Accounting \n etc..." + color.END)

user_filter = input(color.BOLD + color.UNDERLINE + "What topics would you like to see?" + color.END + " ")

Subjects:[1m[92m 
 Investment 
 Trading 
 Cryptocurrency 
 Stock Market 
 Analysis 
 Accounting 
 etc...[0m
What topics would you like to see? Accounting


In [37]:
specific_subject = subjects_filter(user_filter)

#### Seperate the filtered data by level of difficulty

In [38]:
def levels_filter(filtered_courses):
    beginner = filtered_courses[filtered_courses["level"].isin({'Beginner Level'})]
    intermediate = filtered_courses[filtered_courses["level"].isin({'Intermediate Level'})]
    expert = filtered_courses[filtered_courses["level"].isin({'Expert Level'})]
   
    list_levels = list()
    list_levels.append(beginner)
    list_levels.append(intermediate)
    list_levels.append(expert)
    
    return list_levels

#### Read course_attributes dataset to create similarity matrix

In [39]:
if topic == "music":
    topic_attributes = pd.read_csv('data/music_attributes.csv', header=0, index_col="course_title")
if topic == "business":
    topic_attributes = pd.read_csv('data/business_attributes.csv', header=0, index_col="course_title")

#### Calculate Cosine Similarity Accross the Courses

##### We are calculating the similarity by considering their subjects

In [40]:
def similarity_matrix(courses_attributes):
    name_courses= list(courses_attributes.index)
    sim = pd.DataFrame(cosine_similarity(courses_attributes), columns=name_courses, index=name_courses)
    return sim

##### Calculating similarity by taking into account what user already rated

In [41]:
def user_pref_sim_matrix(topic_attr, user_pref):
    
    #Sort by index preferences of users
    #Create dataframe copy with only course id and ratings
    user = user_preferences.sort_values(by = ['id'])

    #Create new dataframe with the course_attributes and user_preferences appended
    #Add 0 for indexes that have not been rated
    df = pd.DataFrame()
    df = df.append(topic_attributes)
    df['Num_Attr'] = df.drop('id', axis=1).sum(axis=1)
    df = df.merge(user, on=['id', 'course_title'], how='left').fillna(0)

    user_rating = df[['rating']].copy() 

    #Get number columns to use in operations
    column_values = len(df.columns)

    #Create weighted matrix
    weighted_matrix = pd.DataFrame(df.iloc[:,2:column_values - 3].values/pd.DataFrame([math.sqrt(i) for i in df.iloc[:,column_values - 2].values]).values)

    #Calculate like scores for user
    user_likes_score = pd.DataFrame((weighted_matrix.values*user_rating.values).sum(axis=0)).T

    #Calculate predictions for user
    pred_user=(df.iloc[:,2:column_values - 3].values*user_likes_score.values).sum(axis=1)
    df["pred_user"]=pred_user

    #Display top 5 predictions
    predictions = df.loc[df['rating'] == 0].nlargest(5, 'pred_user')[['course_title', 'pred_user']]
    
    return predictions

#### Show courses to user, ordered by level

In [42]:
list_courses = levels_filter(specific_subject)
merged_list = pd.concat(list_courses)
merged_list

Unnamed: 0_level_0,id,url,level,main_subject,specific_subject
course_title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Introduction to Accounting : Mastering Financial Statements,22,https://www.udemy.com/introduction-to-accounti...,Beginner Level,Business Finance,Accounting
Accounting & Financial Statement Analysis: Complete Training,43,https://www.udemy.com/accounting-fsa-a-solid-f...,Beginner Level,Business Finance,Accounting
Learn Accounting. Understand Business.,54,https://www.udemy.com/learn-accounting-underst...,Beginner Level,Business Finance,Accounting
Introduction to Accounting: The Language of Business,64,https://www.udemy.com/learnaccountingforfree/,Beginner Level,Business Finance,Accounting
Basic Excel for Basic Bookkeeping and Accounting,65,https://www.udemy.com/basic-excel-for-basic-bo...,Beginner Level,Business Finance,Accounting
Accounting for Beginners : Learn Basics in under 1 Hour,79,https://www.udemy.com/accounting-for-beginners...,Beginner Level,Business Finance,Accounting
Accounting: Get Hired Without Work Experience,98,https://www.udemy.com/accounting-get-hired-wit...,Beginner Level,Business Finance,Accounting
Shark Accounting - Building a Business by the Numbers!,103,https://www.udemy.com/shark-accounting/,Beginner Level,Business Finance,Accounting
Accounting in 60 Minutes - A Brief Introduction,105,https://www.udemy.com/accounting-in-60-minutes...,Beginner Level,Business Finance,Accounting
Fundamentals of Accounting,106,https://www.udemy.com/fundamentals-of-accounting/,Beginner Level,Business Finance,Accounting


## Choose a course and display link

In [52]:
course = input(color.BOLD + color.UNDERLINE + "What course do you want to watch?" + color.END + " ")

What course do you want to watch? Legal Accounting


In [53]:
extract_url = all_courses.loc[[course], ["url"]].values[0]
course_url = extract_url[0]
print(color.BOLD + color.BLUE + color.UNDERLINE + course_url + color.END)

[1m[94m[4mhttps://www.udemy.com/solicitors-accounts/[0m


## Ask if user liked course to store rating

In [61]:
user_rating = input(color.BOLD + color.UNDERLINE + "Did you like the video? (yes/no):" + color.END + " ")

Did you like the video? (yes/no): -5


In [62]:
if user_rating == "yes":
    like = 1
else:
    like = -1

#### Add course and rating to user's file. If new user, create new csv

##### Find course id to store

In [63]:
extract_id = all_courses.loc[[course], ["id"]].values[0]
course_id = extract_id[0]

In [64]:
def update_rating(row):
    df = pd.read_csv('data/users/' + user_name + "_" + topic +".csv") 
    df = df[~df['id'].isin([row[0]])]
    updated_csv = df.to_csv('data/users/' + user_name + "_" + topic +".csv", index=False)
    return updated_csv

In [65]:
import csv

file_name = user_name + "_" + topic +".csv"

if path.exists('data/users/' + file_name) == True:
    with open('data/users/' + file_name, 'r') as file:
        csv_reader = csv.reader(file)
        rated = False
        for row in csv_reader:
            if course in row:
                print("You already rated this course, we'll update your rating!")
                updated_csv = update_rating(row)
                break
    
    with open('data/users/' + file_name, 'a') as filea:
        writer = csv.writer(filea)
        writer.writerow([course_id, course, like])
            
else:
    with open('data/users/' + file_name, 'w', newline='') as file:
        writer = csv.writer(file)
        writer.writerow(["id","course_title", "rating"])
        writer.writerow([course_id, course, like])
file.close()

You already rated this course, we'll update your rating!


##### Open file with user ratings

In [66]:
#Open csv of user preferences if user exists for topic    
file = 'data/users/' + user_name + "_" + topic +".csv"
if path.exists(file) == True:
    user_preferences = pd.read_csv(file)
else:
    user_preferences = pd.DataFrame()

#### Recommend next courses based on user ratings

In [67]:
print(color.BOLD + "Courses recommended based on what you've rated so far: \n" + color.END)
top_pred = user_pref_sim_matrix(topic_attributes, user_preferences)
top_pred

[1mCourses recommended based on what you've rated so far: 
[0m


Unnamed: 0,course_title,pred_user
279,How to Trade Forex like a Hedge Fund: Long FX ...,7.360066
116,Analysis of Company Financial Statements,6.913484
166,Best Practices in Corporate Budgeting,6.604083
99,Forex Trading with Fixed Risk through Options ...,6.446311
155,Technical Analysis - A practical approach for ...,6.336134


#### Show most similar courses to course user chose

In [68]:
print(color.BOLD + "Courses recommended based course you just took: \n" + color.END)

similarity = similarity_matrix(topic_attributes)
corr_top5=pd.DataFrame(similarity.loc[course].sort_values(ascending=False)[1:6])
corr_top5

[1mCourses recommended based course you just took: 
[0m


Unnamed: 0,Legal Accounting
Accounting Simplified,0.99999
Accounting 102: Guide to Business Accounting,0.999984
How To Earn Some Bitcoin Every Time You Use A Search Engine,0.999974
Learn Financial Accounting Different,0.999973
Accounting Books of Accounts (College Level),0.999971
