In [5]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import linear_kernel

# Load dataset
data = pd.read_csv('podcasts_data.csv')

# Feature engineering by combining 'Genre' and 'Podcast Name'
data['features'] = data['Genre'] + ' ' + data['Podcast Name']

# Initialize TF-IDF vectorizer
tfidf_vectorizer = TfidfVectorizer()

# Fit and transform the text data
tfidf_matrix = tfidf_vectorizer.fit_transform(data['features'])

# Compute cosine similarity between podcasts
cosine_sim = linear_kernel(tfidf_matrix, tfidf_matrix)

# Function to get recommendations for a given genre
def get_recommendations_by_genre(genre, n=10):
    # Convert the input genre to lowercase
    genre = genre.lower()

    # Filter the dataset to include only podcasts with the given genre (case-insensitive)
    genre_podcasts = data[data['Genre'].str.lower() == genre]

    # If there are podcasts with the given genre
    if not genre_podcasts.empty:
        # Get the indices of podcasts within the genre
        genre_indices = genre_podcasts.index.tolist()

        # Compute the mean similarity score for the genre podcasts with all other podcasts
        sim_scores = cosine_sim[genre_indices].mean(axis=0)
        sim_scores = list(enumerate(sim_scores))

        # Sort the podcasts based on similarity scores
        sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
        sim_scores = sim_scores[:n]

        # Get the indices of the top recommended podcasts
        podcast_indices = [i[0] for i in sim_scores]

        # Return the top n recommended podcasts with additional columns
        return data.iloc[podcast_indices][['Genre', 'Podcast Name', 'Description', 'Publisher', 'Total Episodes', 'Spotify URL', 'Cover Image URL']]
    else:
        return pd.DataFrame(columns=['Genre', 'Podcast Name', 'Description', 'Publisher', 'Total Episodes', 'Spotify URL', 'Cover Image URL'])  # Return an empty DataFrame

# Example usage: Get recommendations for a given genre
genre = 'books'
recommendations = get_recommendations_by_genre(genre)
print(f"Recommendations for genre '{genre}':")
print(recommendations)



Recommendations for genre 'books':
      Genre                 Podcast Name  \
1542  books                    B-O-O-K-D   
1177  books  Books, books and more books   
1116  books                  Audio Books   
1106  books                   Books Talk   
1174  books              Books and Crime   
1047  books             B.S. About Books   
1099  books             Beyond the Books   
1791  books                      Stories   
1078  books             Books and Beyond   
1124  books             Books and Beyond   

                                            Description           Publisher  \
1542        Solo thoughts on books of different genres!                 Omi   
1177  Zoe describes a book per episode. She gives a ...                 Zoë   
1116  This podcast will make you know the content of...  Rupesh Darimisetti   
1106  Welcome to our podcast, where we provide insig...          Books Talk   
1174  A podcast where we talk about books in the cri...  Emma Grace Michael   
1047  