<a href="https://colab.research.google.com/github/sugunayaparala/Hotel-booking-_Analysis/blob/main/course_recomendation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install -U scikit-learn
import numpy as np
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity



In [None]:
udemy_data = pd.read_csv("/content/Udemy.csv")
coursera_data = pd.read_csv("/content/Coursera.csv")

In [None]:
udemy_data.rename(columns={'title': 'title', 'level': 'difficulty'}, inplace=True)
coursera_data.rename(columns={'course': 'title', 'level': 'difficulty'}, inplace=True)

In [None]:
udemy_data['platform'] = 'udemy'
coursera_data['platform'] = 'coursera'
combined_data = pd.concat([udemy_data, coursera_data], ignore_index=True)
combined_data.shape

(27395, 13)

In [None]:
combined_data['title'] = combined_data['title'].fillna('')
combined_data['description'] = combined_data['description'].fillna('')
combined_data['course_text'] = combined_data['title'] + " " + combined_data['description'] + " " + combined_data['platform']

In [None]:
# Create TF-IDF matrix with limited features
tfidf = TfidfVectorizer(stop_words='english', max_features=1000, ngram_range=(1, 2))
tfidf_matrix = tfidf.fit_transform(combined_data['course_text'])

In [None]:
from scipy.sparse import csr_matrix
# Convert to sparse matrix
tfidf_matrix = csr_matrix(tfidf_matrix)

# Compute cosine similarity in batches
batch_size = 1000
num_courses = tfidf_matrix.shape[0]
course_similarity = np.zeros((num_courses, num_courses))

for i in range(0, num_courses, batch_size):
    for j in range(0, num_courses, batch_size):
        batch_similarity = cosine_similarity(tfidf_matrix[i:i+batch_size], tfidf_matrix[j:j+batch_size])
        course_similarity[i:i+batch_size, j:j+batch_size] = batch_similarity

In [None]:
# Function to recommend courses
def recommend_similar_courses(course_title, top_n=10):
    course_index = combined_data[combined_data['title'] == course_title].index
    if len(course_index) == 0:
        return f"No course found with title: {course_title}"

    course_index = course_index[0]
    similar_courses = list(enumerate(course_similarity[course_index]))
    similar_courses = sorted(similar_courses, key=lambda x: x[1], reverse=True)
    similar_courses = similar_courses[1:top_n + 1]

    recommendations = [
        (combined_data.iloc[x[0]]['title'], combined_data.iloc[x[0]]['platform'], x[1])
        for x in similar_courses
    ]
    return recommendations

In [None]:
recomendercourses=recommend_similar_courses("Cyber Security Fundamentals",top_n=5)
print(recomendercourses)

[('Introduction to Cyber Security', 'coursera', np.float64(0.8803660591386718)), ('Cyber Security: Phishing', 'udemy', np.float64(0.8748389187035832)), ('Master of Science in Cyber Security', 'coursera', np.float64(0.8418127237846377)), ('ICS/SCADA Cyber Security', 'udemy', np.float64(0.8251088793453807)), ('The Complete Cyber Security Awareness Training for Employees', 'udemy', np.float64(0.8155770059642355))]


In [None]:
import numpy as np

def precision_at_k(recommended_courses, relevant_courses, k=10):
    recommended_k = recommended_courses[:k]
    hits = sum(1 for course in recommended_k if course in relevant_courses)
    return hits / k if k else 0

def recall_at_k(recommended_courses, relevant_courses, k=10):
    recommended_k = recommended_courses[:k]
    hits = sum(1 for course in recommended_k if course in relevant_courses)
    return hits / len(relevant_courses) if relevant_courses else 0

def average_precision(recommended_courses, relevant_courses):
    hits, sum_precision = 0, 0
    for i, course in enumerate(recommended_courses):
        if course in relevant_courses:
            hits += 1
            sum_precision += hits / (i + 1)
    return sum_precision / len(relevant_courses) if relevant_courses else 0


def ndcg_at_k(recommended_courses, relevant_courses, k=10):
    def dcg(scores):
        return sum((rel / np.log2(idx + 2)) for idx, rel in enumerate(scores))

    recommended_k = recommended_courses[:k]
    relevance_scores = [1 if course in relevant_courses else 0 for course in recommended_k]

    actual_dcg = dcg(relevance_scores)
    ideal_dcg = dcg(sorted(relevance_scores, reverse=True))

    return actual_dcg / ideal_dcg if ideal_dcg > 0 else 0





In [None]:
print("Precision@3:", precision_at_k(recommendations["Course A"], k=3))
print("Recall@3:", recall_at_k(recommendations["Course A"], ground_truth["Course A"], k=3))
print("MAP:", mean_average_precision(recommendations, ground_truth))
print("NDCG@3:", ndcg_at_k(recommendations["Course A"], ground_truth["Course A"], k=3))
print("MRR:", mean_reciprocal_rank(recommendations, ground_truth))

In [None]:
# checking function
sequence_of_titles = ["Cyber Security Fundamentals"]
sequential_recommendations = []

for title in sequence_of_titles:
    recommendations = recommend_similar_courses(title, top_n=5)
    sequential_recommendations.append((title, recommendations))

# sequential recommendations for tittle
for course_title, recommendations in sequential_recommendations:
    print(f"Sequential Recommendations for '{course_title}':")
    for recommended_title, platform, similarity_score in recommendations:
        print(f"  - {recommended_title} (Platform: {platform}, Similarity Score: {similarity_score:.4f})")
    print()

Sequential Recommendations for 'Cyber Security Fundamentals':
  - Introduction to Cyber Security (Platform: coursera, Similarity Score: 0.8804)
  - Cyber Security: Phishing (Platform: udemy, Similarity Score: 0.8748)
  - Master of Science in Cyber Security (Platform: coursera, Similarity Score: 0.8418)
  - ICS/SCADA Cyber Security (Platform: udemy, Similarity Score: 0.8251)
  - The Complete Cyber Security Awareness Training for Employees (Platform: udemy, Similarity Score: 0.8156)

