In [1]:
# Import necessary libraries
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

In [2]:
# !pip install scikit-learn

In [3]:
# Load course data (assuming data is available as a CSV file)
data = pd.read_csv('youtube_api.csv')  # Replace with your data source
data.head(10) # Check the first few entries to understand the structure

Unnamed: 0,title,description,publishedAt,channelTitle,videoId
0,Organic Chemistry | Reaction Mechanism 01 | Ty...,For PDF Notes and best Assignments visit @ ht...,2019-01-18T14:34:13Z,Physics Wallah - Alakh Pandey,yMgv_yvL8o8
1,Organic Chemistry | Reaction Mechanism 02 | Fr...,For PDF Notes and best Assignments visit @ ht...,2019-01-18T14:34:13Z,Physics Wallah - Alakh Pandey,ByfT-458Eoc
2,Reaction Mechanism 03 | Free Radical Substitut...,For PDF Notes and best Assignments visit http:...,2019-01-18T14:34:13Z,Physics Wallah - Alakh Pandey,iXvpYAxpvy0
3,Reaction Mechanism 04 || Free Radical Substitu...,For PDF Notes and best Assignments visit http:...,2019-01-18T14:34:13Z,Physics Wallah - Alakh Pandey,ghbjFdbn7Bs
4,Reaction Mechanism 05 | Electrophilic Substitu...,For PDF Notes and best Assignments visit http:...,2019-01-19T18:36:23Z,Physics Wallah - Alakh Pandey,FaqaUXfbVVg
5,Reaction Mechanism 06 | Electrophilic Substitu...,For PDF Notes and best Assignments visit http:...,2019-01-20T18:59:57Z,Physics Wallah - Alakh Pandey,DpEz-viSqIk
6,Reaction Mechanism 07| Electrophilic Substitut...,For PDF Notes and best Assignments visit http:...,2019-01-28T09:08:47Z,Physics Wallah - Alakh Pandey,DbLRZuwozuY
7,Reaction Mechanism 08 | Nucleophilic Substitut...,For PDF Notes and best Assignments visit http:...,2019-01-29T10:46:12Z,Physics Wallah - Alakh Pandey,Qwqemb3a1oc
8,Reaction Mechanism 09 | Nucleophilic Substitut...,For PDF Notes and best Assignments visit http:...,2019-01-30T12:49:50Z,Physics Wallah - Alakh Pandey,zzaGc5J4J_E
9,Reaction Mechanism 10 | Nucleophilic Substitut...,For PDF Notes and best Assignments visit http:...,2019-01-31T17:34:54Z,Physics Wallah - Alakh Pandey,AkxlE-GJPHE


In [4]:
# Step 1: Preprocessing
# Convert descriptions to lowercase and handle NaN values
data['description'] = data['description'].fillna('').str.lower()

In [5]:
# Step 2: TF-IDF Vectorization
vectorizer = TfidfVectorizer(stop_words='english')
tfidf_matrix = vectorizer.fit_transform(data['description'])

In [6]:
# Step 3: Compute Cosine Similarity Matrix
cosine_sim_matrix = cosine_similarity(tfidf_matrix, tfidf_matrix)

In [None]:
# Step 4: Building a Recommendation Function
def get_recommendations(course_index, cosine_sim_matrix, data):
    # Get similarity scores for the input course
    similarity_scores = list(enumerate(cosine_sim_matrix[course_index]))
    # Sort the courses based on similarity scores
    sorted_courses = sorted(similarity_scores, key=lambda x: x[1], reverse=True)
    # Get top recommendations (excluding itself)
    top_recommendations = sorted_courses[1:10]  # Top 10
    # Display recommendations
    recommendations = [data['title'].iloc[i[0]] for i in top_recommendations]
    return recommendations

In [10]:
# Test the recommender
sample_index = 30  # Index of the course you want to find recommendations for
recommendations = get_recommendations(sample_index, cosine_sim_matrix, data)
print(f"Recommended courses for '{data['title'].iloc[sample_index]}':\n")

Recommended courses for 'Fetching Data From an API | Day 17 | 100 Days of Machine Learning':



In [11]:
recommendations

['Fetching data using Web Scraping | Day 18 | 100 Days of Machine Learning',
 'Working with JSON/SQL | Day 16 | 100 Days of Machine Learning',
 'Outlier Detection using the Percentile Method | Winsorization Technique',
 'Column Transformer in Machine Learning | How to use ColumnTransformer in Sklearn',
 'Curse of Dimensionality',
 'Pandas Profiling | Day 22 | 100 Days of Machine Learning',
 'One Hot Encoding | Handling Categorical Data | Day 27 | 100 Days of Machine Learning',
 'Outlier Detection and Removal using the IQR Method | Handing Outliers Part 3',
 'Handling Date and Time Variables | Day 34 | 100 Days of Machine Learning']