In [2]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import linear_kernel

In [3]:
# Load dataset
data = pd.read_csv('../Data/podcasts_data.csv')
data.head()

Unnamed: 0,Genre,Podcast Name,Description,Publisher,Total Episodes,Spotify URL,Cover Image URL
0,arts and entertainment,Easy Stories in English,"Learning a language is hard, but Easy Stories ...","Ariel Goodbody, Polyglot English Teacher & Gla...",216,https://open.spotify.com/show/23zdIqNUb0riR51w...,https://i.scdn.co/image/ab6765630000ba8a767693...
1,arts and entertainment,Podcast Buku Kutu,"EPISODE BARU SETIAP SENIN, RABU, dan JUMAT -- ...",Aditya Hadi - PODLUCK,162,https://open.spotify.com/show/3w5zKrbQ6kgB0RKI...,https://i.scdn.co/image/ab6765630000ba8a04fa1a...
2,arts and entertainment,Underwood and Flinch and Other Audiobooks by M...,Underwood and Flinch is a three-time Parsec aw...,Mike Bennett,244,https://open.spotify.com/show/3VwIE3bG0zpTCNzR...,https://i.scdn.co/image/ab6765630000ba8a4e7b42...
3,arts and entertainment,Podcast Resensi Buku,Kumpulan resensi beragam buku berbagai genre d...,Podcast Resensi Buku - PODLUCK,264,https://open.spotify.com/show/6woLsDl6CSntzeWU...,https://i.scdn.co/image/ab6765630000ba8a1e97ef...
4,arts and entertainment,SupremeMasterTV,Supreme Master Television is an international ...,SupremeMasterTV,500,https://open.spotify.com/show/5bCgERRINgZWhauS...,https://i.scdn.co/image/ab6765630000ba8a7899e5...


In [4]:
# Feature engineering by combining 'Genre' and 'Podcast Name'
data['features'] = data['Genre'] + ' ' + data['Podcast Name']



In [5]:
# Initialize TF-IDF vectorizer
tfidf_vectorizer = TfidfVectorizer()

# Fit and transform the text data
tfidf_matrix = tfidf_vectorizer.fit_transform(data['features'])

In [6]:
# Compute cosine similarity between podcasts
cosine_sim = linear_kernel(tfidf_matrix, tfidf_matrix)

In [7]:
# Function to get recommendations for a given genre
def get_recommendations_by_genre(genre, n=10):
    # Convert the input genre to lowercase
    genre = genre.lower()

    # Filter the dataset to include only podcasts with the given genre (case-insensitive)
    genre_podcasts = data[data['Genre'].str.lower() == genre]

    # If there are podcasts with the given genre
    if not genre_podcasts.empty:
        # Get the indices of podcasts within the genre
        genre_indices = genre_podcasts.index.tolist()

        # Compute the mean similarity score for the genre podcasts with all other podcasts
        sim_scores = cosine_sim[genre_indices].mean(axis=0)
        sim_scores = list(enumerate(sim_scores))

        # Sort the podcasts based on similarity scores
        sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
        sim_scores = sim_scores[:n]

        # Get the indices of the top recommended podcasts
        podcast_indices = [i[0] for i in sim_scores]

        # Return the top n recommended podcasts with additional columns
        return data.iloc[podcast_indices][['Genre', 'Podcast Name', 'Description', 'Publisher', 'Total Episodes', 'Spotify URL', 'Cover Image URL']]
    else:
        return pd.DataFrame(columns=['Genre', 'Podcast Name', 'Description', 'Publisher', 'Total Episodes', 'Spotify URL', 'Cover Image URL'])  # Return an empty DataFrame

In [8]:
# Example usage: Get recommendations for a given genre
genre = 'horror'
recommendations = get_recommendations_by_genre(genre)
print(f"Recommendations for genre '{genre}':")
print(recommendations)

Recommendations for genre 'horror':
       Genre                         Podcast Name  \
8939  horror                         Horror A. F.   
8901  horror                       Horror Podcast   
8697  horror               Horror Stories Podcast   
8982  horror                      The Horror Pod.   
8706  horror                 Horror House Podcast   
8898  horror                        What's Horror   
8825  horror              World Of Horror Podcast   
8840  horror  Horror Mania: A Horror Film Podcast   
8828  horror                 The Beauty of Horror   
9001  horror                      The Horror Hour   

                                            Description             Publisher  \
8939  We are horror enjoyers! Join us every episode ...           Ana Forrest   
8901  Welcome to Horror Podcast, where I bring you t...       Debalik Ganguly   
8697  Short stories of horror, mystery, death and re...       Robert Crandall   
8982  Horror, horror and more horror! We cover discu... 