# Recommendation System  
**Libraries**

In [17]:
import pandas as pd
import numpy as np
import neattext.functions as nfx


from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity

#avoid  warnings
import warnings
warnings.filterwarnings('ignore')

## Read file

In [2]:
data = pd.read_csv('udemy_courses.csv')

In [4]:
data.head(1)

Unnamed: 0,course_id,course_title,url,is_paid,price,num_subscribers,num_reviews,num_lectures,level,content_duration,published_timestamp,subject
0,1070968,Ultimate Investment Banking Course,https://www.udemy.com/ultimate-investment-bank...,True,200,2147,23,51,All Levels,1.5,2017-01-18T20:58:58Z,Business Finance


In [5]:
#check null values
data.isnull().sum()

course_id              0
course_title           0
url                    0
is_paid                0
price                  0
num_subscribers        0
num_reviews            0
num_lectures           0
level                  0
content_duration       0
published_timestamp    0
subject                0
dtype: int64

In [6]:
#check duplicates
data.duplicated().any()

True

In [7]:
#drop duplicates
data = data.drop_duplicates()
data.shape

(3672, 12)

### Popularity-Based recommendation system

In [12]:
#function 

def popularity_based_recommendation(df,top_n=5):
    #add new column popularity score 
    data['popularity_score'] = 0.6 * data['num_subscribers'] + 0.4 * data['num_reviews'] 
    
    #sort courses by popularity score
    df_sorted = data.sort_values(by = 'popularity_score', ascending = False)
    
    #Return the recomemednded courses
    recommened_courses = df_sorted[['course_title','popularity_score' ]].head(top_n)
    
    return recommened_courses

In [13]:
popularity_based_recommendation(data)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['popularity_score'] = 0.6 * data['num_subscribers'] + 0.4 * data['num_reviews']


Unnamed: 0,course_title,popularity_score
2827,Learn HTML5 Programming From Scratch,164805.4
3032,Coding for Entrepreneurs Basic,96729.0
3230,The Web Developer Bootcamp,83928.4
3232,The Complete Web Developer Course 2.0,77672.0
2783,Build Your First Website in 1 Week with HTML5 ...,74544.2


### Conetent-Based Recommedation System

#### Clean Data

In [14]:
data['course_title'] = data['course_title'].apply(nfx.remove_stopwords)
data['course_title'] = data['course_title'].apply(nfx.remove_special_characters)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['course_title'] = data['course_title'].apply(nfx.remove_stopwords)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['course_title'] = data['course_title'].apply(nfx.remove_special_characters)


In [15]:
data.sample(5)

Unnamed: 0,course_id,course_title,url,is_paid,price,num_subscribers,num_reviews,num_lectures,level,content_duration,published_timestamp,subject,popularity_score
1535,368958,Advanced Graphic DesignPhotoshop CC 2014 Photo...,https://www.udemy.com/advanced-graphic-design-...,True,20,314,11,19,All Levels,1.5,2014-12-19T05:55:57Z,Graphic Design,192.8
2045,917432,Professional Bass Masterclass,https://www.udemy.com/the-professional-bass-ma...,True,200,1449,136,88,All Levels,7.0,2016-10-24T18:22:57Z,Musical Instruments,923.8
2358,594852,Getting Started Power Chords,https://www.udemy.com/getting-started-with-pow...,True,20,6,0,27,Beginner Level,1.0,2015-12-17T20:36:08Z,Musical Instruments,3.6
1547,288850,Logo design pay illustrator day,https://www.udemy.com/designing-logos-with-ill...,True,50,22,1,20,All Levels,4.0,2014-09-01T08:23:29Z,Graphic Design,13.6
2243,459207,Master Bassoon Intermediate Instruction Simple,https://www.udemy.com/intermediate-bassoon/,True,50,177,3,102,All Levels,7.5,2015-04-15T17:12:37Z,Musical Instruments,107.4


In [16]:
#add new colum
data['title_subject'] = data['course_title'] + '' + data['subject']

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['title_subject'] = data['course_title'] + '' + data['subject']


### Vectorizer 

In [20]:
cv = CountVectorizer(max_features=3000)
vectors = cv.fit_transform(data['title_subject']).toarray()

In [22]:
vectors[0]

array([0, 0, 0, ..., 0, 0, 0], dtype=int64)

In [23]:
len(cv.get_feature_names())

3000

In [24]:
#calculate the similarity score
similarity = cosine_similarity(vectors)

In [25]:
#sorted values and print the first 5
sorted(list(enumerate(similarity[0])), reverse =True, key = lambda x:x[1])[1:6]

[(417, 0.6),
 (39, 0.5477225575051662),
 (657, 0.5477225575051662),
 (1066, 0.5477225575051662),
 (227, 0.50709255283711)]

In [27]:
#Function to get the similarity score and print the title of the recommendations
def recommend(course):
    #featch the index
    course_index = data[data['course_title']==course].index[0]
    distances = similarity[course_index]
    courses_list = sorted(list(enumerate(distances)), reverse =True, key = lambda x:x[1])[1:6]
    for i in courses_list:
        print(data.iloc[i[0]]['course_title'])

In [28]:
recommend('know HTML Learn HTML Basics')

Kids Coding  Beginners CSS
WordPress Beginners  Tutorial  Novice KnowHow
WordPress Beginners 2017  Google AdSense Implementation
WordPress Development Beginners
Explore JavaScript Beginners Guide Coding JavaScript


In [30]:
recommend('Professional Bass Masterclass')

Rock Drumset Grooves Level 1
Worship Piano Beginner Pro
Piano Willie Piano Chords Vol 1
Easy Piano Kids  Complete Beginner Piano Course
Playing Piano Popular Pieces Vol II


### Save

In [32]:
import pickle

In [34]:
pickle.dump(data,open('course_dict.pkl','wb'))

In [35]:
pickle.dump(similarity,open('similarity.pkl','wb'))

## GUI

In [39]:
import tkinter as tk
from tkinter import ttk, messagebox
import pandas as pd

# Assume 'data', 'similarity' are defined elsewhere

#Define the popularity-based recomendation funtion
def popularity_based_recommendation(df,top_n=5):
    #add new column popularity score 
    data['popularity_score'] = 0.6 * data['num_subscribers'] + 0.4 * data['num_reviews'] 
    
    #sort courses by popularity score
    df_sorted = data.sort_values(by = 'popularity_score', ascending = False)
    
    #Return the recomemednded courses
    recommened_courses = df_sorted[['course_title','popularity_score' ]].head(top_n)
    
    return recommened_courses

#Define the  recomendation funtion
def recommend(course):
    try:
        #featch the index
        course_index = data[data['course_title']==course].index[0]
        distances = similarity[course_index]
        courses_list = sorted(list(enumerate(distances)), reverse =True, key = lambda x:x[1])[1:6]
        recommended_courses = [data.iloc[i[0]]['course_title'] for i in courses_list]
        return recommended_courses
    except IndexError:
        messagebox.showerror("Error", f"Course '{course}' not found.")
        
# Event handler for the "Recommend" button
def recommend_button_click():
    course_title = course_var.get()
    recommended_courses = recommend(course_title)
    if recommended_courses:
        popularity_label.pack_forget()
        result_label.config(text="Recommended Courses:\n" + '\n'.join(recommended_courses))
        
#Create the main applicatin window
root = tk.Tk()
root.title('Course Recommender')
root.geometry('400x300')

#change font and color
font_style = ('Arial',12)
label_color = 'blue'
heading_color = 'red'
button_color = 'green'
result_label_color = 'black'

#create and place GUI elements
label = tk.Label(root, text="Select Course:", font=font_style, fg=label_color)
label.pack(pady=10)

course_titles = data['course_title'].to_list()
course_var = tk.StringVar(value =course_titles[0] )
course_dropdown = ttk.Combobox(root,textvariable=course_var, values=course_titles, width=40, font=font_style )
course_dropdown.pack(pady =5)

popularity_recommendations = popularity_based_recommendation(data, top_n=5)
popularity_label = tk.Label(root, text="Popularity-based Recommendations:\n" + popularity_recommendations.to_string(index=False),
                             font=font_style, fg=label_color)
popularity_label.pack()

recommend_button = tk.Button(root, text="Recommend", command=recommend_button_click, width=20, font=font_style, fg=button_color)
recommend_button.pack(pady=10)


result_label = tk.Label(root, text="", wraplength=350, font=font_style, fg=result_label_color)
result_label.pack()



root.mainloop()