In [13]:
import json
from collections import defaultdict
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity

# Load JSON data
with open('data.json', 'r') as f:
    data = json.load(f)

users = data['users']
videos = data['videos']

# Parse video metadata into a dictionary for quick access
video_metadata = {video['video_id']: video for video in videos}

# Convert watch history to sets for quick lookup
watch_history = {user['user_id']: set(user['watch_history']) for user in users}

# Function to calculate similarity between videos based on tags and category
def calculate_similarity(video1, video2):
    tags1 = ' '.join(video1['tags'])
    tags2 = ' '.join(video2['tags'])
    category1 = video1['category']
    category2 = video2['category']
    
    text_data = [tags1, tags2, category1, category2]
    
    vectorizer = CountVectorizer().fit_transform(text_data)
    vectors = vectorizer.toarray()
    
    return cosine_similarity([vectors[0]], [vectors[1]])[0][0]

# Function to get recommendations for a given user ID
def get_recommendations(user_id, n=5):
    user_history = watch_history[user_id]
    candidate_videos = [video['video_id'] for video in videos if video['video_id'] not in user_history]
    
    # Calculate similarity scores
    similarity_scores = defaultdict(float)
    for video_id in candidate_videos:
        video = video_metadata[video_id]
        for watched_video_id in user_history:
            watched_video = video_metadata[watched_video_id]
            similarity_scores[video_id] += calculate_similarity(video, watched_video)
    
    # Rank videos based on similarity scores
    ranked_videos = sorted(candidate_videos, key=lambda x: similarity_scores[x], reverse=True)
    
    # Return top-N recommended video IDs
    return ranked_videos[:n]

# Example usage:
if __name__ == '__main__':
    recommendations = get_recommendations(user_id=1, n=5)
    print(recommendations)


[122, 123, 121, 134, 149]


#### steps to building a basic recommendation system for the video streaming platform based on the given JSON data

### 1. Data Parsing

In [None]:
#First, we need to parse the JSON data into appropriate data structures. 
#The JSON provided has user information (user_id, name, watch_history) and video metadata (video_id, title, category, tags, duration).



### 2. Similarity calculation

In [None]:
# Uses CountVectorizer from sklearn to convert tags and category into numerical vectors, then calculates cosine similarity between video pairs.

#implemented a function to calculate similarity between videos based on their metadata (category and tags). 
#We can apply techniques like cosine similarity or Jaccard similarity, where:
#Cosine similarity measures the cosine of the angle between two vectors, computed from category and tags.
#Jaccard similarity measures the intersection over union of sets (tags).

### 3. Recommendation algorithm

In [None]:
#Calculate the similarity score between the videos in the user's watch history and all other videos.
#Rank the videos based on similarity scores.
#Recommend the top-N videos that the user has not watched yet.

### 4. Rank videos based on similarity score

In [None]:
#For a given user, identifies videos they haven't watched, calculates similarity scores against watched videos, and ranks candidates based on these scores.
#Implement a function that returns the top N recommended video IDs for a given user ID.

### 5. Return Top N recommendations

In [None]:
#Returns the top N recommended video IDs based on the calculated scores.
#Finally, the function returns the top n video IDs from ranked_videos, which are the videos recommended to the user.

In [None]:
#The above code function displays 5 recommended video ID's