# Content Based Recommendation System

**Libraries**

In [1]:
import pandas as pd
import numpy as np
#avoid Warnings
import warnings
warnings.filterwarnings('ignore')

In [2]:
df = pd.read_csv('data.csv')

#drop duplicates 
df = df.drop_duplicates()
df.head(2)

Unnamed: 0,course_id,course_title,url,is_paid,price,num_subscribers,num_reviews,num_lectures,level,content_duration,published_timestamp,subject
0,1070968,Ultimate Investment Banking Course,https://www.udemy.com/ultimate-investment-bank...,True,200,2147,23,51,All Levels,1.5,2017-01-18T20:58:58Z,Business Finance
1,1113822,Complete GST Course & Certification - Grow You...,https://www.udemy.com/goods-and-services-tax/,True,75,2792,923,274,All Levels,39.0,2017-03-09T16:34:20Z,Business Finance


### Add New Column

In [4]:
#concatenar la columa titulo y subject
df['title_subject'] = df['course_title'] + ' ' +  df['subject']
df[['course_title', 'subject', 'title_subject']].head()

Unnamed: 0,course_title,subject,title_subject
0,Ultimate Investment Banking Course,Business Finance,Ultimate Investment Banking Course Business Fi...
1,Complete GST Course & Certification - Grow You...,Business Finance,Complete GST Course & Certification - Grow You...
2,Financial Modeling for Business Analysts and C...,Business Finance,Financial Modeling for Business Analysts and C...
3,Beginner to Pro - Financial Analysis in Excel ...,Business Finance,Beginner to Pro - Financial Analysis in Excel ...
4,How To Maximize Your Profits Trading Options,Business Finance,How To Maximize Your Profits Trading Options B...


## Content Based Recomendation System

In [7]:
df.head(1)['title_subject']

0    Ultimate Investment Banking Course Business Fi...
Name: title_subject, dtype: object

In [9]:
#setup
from sklearn.feature_extraction.text import TfidfVectorizer
tdf = TfidfVectorizer(min_df=3, max_features=None,
                     strip_accents='unicode', analyzer='word', token_pattern='\w{1,}',
                     ngram_range=(1,3), stop_words='english')

In [10]:
#fitting
df_matrix = tdf.fit_transform(df['title_subject'])

In [11]:
df_matrix.shape

(3672, 2869)

#### Compute the sigmoid kernel

In [12]:
from sklearn.metrics.pairwise import sigmoid_kernel 

In [13]:
sig = sigmoid_kernel(df_matrix, df_matrix)

#### Reverse mapping of indeces

In [15]:
indices = pd.Series(df.index, index = df['course_title'])
indices.head(2)

course_title
Ultimate Investment Banking Course                             0
Complete GST Course & Certification - Grow Your CA Practice    1
dtype: int64

In [17]:
sig[1]

array([0.76160718, 0.7617405 , 0.7616024 , ..., 0.76159416, 0.76159416,
       0.76159416])

In [22]:
list(enumerate(sig[indices['Ultimate Investment Banking Course']]))[0:6]

[(0, 0.7617405006296453),
 (1, 0.7616071791511033),
 (2, 0.7616038515743866),
 (3, 0.761600244221251),
 (4, 0.7616014227588085),
 (5, 0.7615998086325286)]

In [26]:
#order  list of recomendation
sorted(list(enumerate(sig[indices['Ultimate Investment Banking Course']])), key = lambda x:x[1], reverse =True)[1:6]

[(417, 0.7616904321676709),
 (240, 0.7616896553090964),
 (137, 0.7616699226779012),
 (722, 0.7616665587188381),
 (39, 0.7616647674980528)]

In [46]:
#functioon  get the inces and sorting 
def give_rec(title, sig = sig):
    #get the index of the original recod
    idx = indices[title]
    #get the similarity score
    sig_scores = list(enumerate(sig[idx]))
    #sort 
    sig_scores = sorted(sig_scores, key = lambda x:x[1], reverse = True)
    #scores the top 10 recommended
    sig_scores = sig_scores[1:10]
    #Book indices
    book_indices = [i[0] for i in sig_scores]
    #Top 10 similarity
    # Create a DataFrame with the top 10 courses
    df_recommendations = pd.DataFrame({'Top 10 Recommended Courses': df['course_title'].iloc[book_indices]})
    return df_recommendations

In [47]:
give_rec('Ultimate Investment Banking Course')

Unnamed: 0,Top 10 Recommended Courses
417,The Investment Banking Recruitment Series
240,Advanced Accounting for Investment Banking
137,"Intro to Investment Banking, M&A, IPO, Modelin..."
722,Investment Banking Operations : Securities Tra...
39,The Complete Investment Banking Course 2017
162,Trading for everyone - The Independent Investo...
997,Certificate in MYOB AccountRight Premier 2017 ...
657,Financial Accounting - The Ultimate Beginner C...
435,Simple Linear Regression Analysis ( A Complete...
