<a href="https://colab.research.google.com/github/shilpathota/RecommendationSystem/blob/main/RecommendationSystem.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# ✅ STEP 1: Install Google API Client (only needed once per session)
!pip install --quiet google-api-python-client

In [2]:
# ✅ STEP 2: Import libraries
import pandas as pd
from googleapiclient.discovery import build
import getpass

In [3]:
# ✅ STEP 3: Securely input API key
API_KEY = getpass.getpass('🔑 Enter your YouTube API key: ')
YOUTUBE_API_SERVICE_NAME = "youtube"
YOUTUBE_API_VERSION = "v3"


🔑 Enter your YouTube API key: ··········


In [4]:
# ✅ STEP 4: Initialize API client
youtube = build(YOUTUBE_API_SERVICE_NAME, YOUTUBE_API_VERSION, developerKey=API_KEY)


In [5]:
# ✅ STEP 5: Search YouTube videos by topic (age-specific query)
def search_youtube_videos(query="learning ABC", max_results=10):
    response = youtube.search().list(
        q=query,
        part="snippet",
        maxResults=max_results,
        type="video"
    ).execute()

    video_ids = [item['id']['videoId'] for item in response['items']]
    return video_ids


In [6]:
# ✅ STEP 6: Get metadata for a list of video IDs
def get_video_metadata(video_ids):
    response = youtube.videos().list(
        part="snippet,contentDetails,statistics",
        id=",".join(video_ids)
    ).execute()

    video_data = []
    for item in response['items']:
        video_data.append({
            'video_id': item['id'],
            'title': item['snippet']['title'],
            'description': item['snippet'].get('description', ''),
            'channel_title': item['snippet']['channelTitle'],
            'tags': item['snippet'].get('tags', []),
            'published_at': item['snippet']['publishedAt'],
            'view_count': item['statistics'].get('viewCount', 0),
            'like_count': item['statistics'].get('likeCount', 0),
            'comment_count': item['statistics'].get('commentCount', 0),
        })
    return video_data

In [7]:
# ✅ STEP 7: Run the Search + Fetch
query = "nursery rhymes for toddlers"
video_ids = search_youtube_videos(query, max_results=15)
video_metadata = get_video_metadata(video_ids)


In [8]:
# ✅ STEP 8: Save to CSV or show dataframe
df = pd.DataFrame(video_metadata)
df.to_csv("youtube_kids_data.csv", index=False)
df.head()

Unnamed: 0,video_id,title,description,channel_title,tags,published_at,view_count,like_count,comment_count
0,5gZOYKHXwyQ,Nursery Rhymes for Kids | Songs Compilation - ...,Itsy Bitsy Spider and many more children songs...,Twinkle Little Songs - Nursery Rhymes,"[nursery rhymes, songs for kids, children song...",2019-07-26T16:15:17Z,65434463,128248,0
1,e_04ZrNroTo,Wheels on the Bus | @CoComelon Nursery Rhymes ...,Bounce along in the bus all over town with thi...,Cocomelon - Nursery Rhymes,"[preschool, toddler, children songs, abckidtv,...",2018-05-24T07:00:02Z,7484233263,19636714,0
2,LrAtBtQnvCE,Five Little Ducks + More | Kids Songs and Nurs...,Get the Super Simple App! ► http://bit.ly/TheS...,Super Simple Songs - Kids Songs,"[Five Little Ducks, children's songs, Nursery ...",2016-10-08T15:30:00Z,152710588,195580,0
3,0n_J2z-ILXo,Humpty Dumpty | + More Kids Songs | Super Simp...,Get the Super Simple App! ► http://bit.ly/TheS...,Super Simple Songs - Kids Songs,"[kids songs, humpty dumpty, humpty dumpty kids...",2017-11-04T15:30:00Z,110299548,158691,0
4,QA48wTGbU7A,Head Shoulders Knees & Toes | @CoComelon Nurse...,"Dance along with this classic nursery rhyme, a...",Cocomelon - Nursery Rhymes,"[kids videos, abckidtv, kindergarten, kid song...",2017-10-03T18:17:08Z,1037607401,1910743,0


In [10]:
import base64
import json
import requests

# 🔐 Securely input GitHub token
github_token = getpass.getpass('🔐 Enter your GitHub token: ')

# 🔗 GitHub repo details
GITHUB_USERNAME = "shilpathota"
REPO_NAME = "RecommendationSystem"
FILE_PATH = "data/youtube_kids_data.csv"  # path inside the repo
COMMIT_MESSAGE = "Add latest YouTube kids metadata"

# 📤 Read the CSV content
with open("youtube_kids_data.csv", "rb") as file:
    content = file.read()
    encoded_content = base64.b64encode(content).decode("utf-8")

# 🧠 GitHub API URL
url = f"https://api.github.com/repos/{GITHUB_USERNAME}/{REPO_NAME}/contents/{FILE_PATH}"

# Check if file exists to determine PUT or PATCH
response = requests.get(url, headers={"Authorization": f"token {github_token}"})
if response.status_code == 200:
    sha = response.json()['sha']  # Needed for update
else:
    sha = None

# 📤 Upload the file (create or update)
data = {
    "message": COMMIT_MESSAGE,
    "content": encoded_content,
    "branch": "main"
}
if sha:
    data["sha"] = sha

response = requests.put(url, headers={"Authorization": f"token {github_token}"}, data=json.dumps(data))

if response.status_code in [200, 201]:
    print("✅ File pushed to GitHub!")
else:
    print("❌ Failed to push:", response.json())


🔐 Enter your GitHub token: ··········
✅ File pushed to GitHub!


In [11]:
import pandas as pd

# Define the age-query mapping
mapping_data = [
    {"query": "nursery rhymes for toddlers", "age_min": 3, "age_max": 5, "category": "Music"},
    {"query": "learning ABC for preschoolers", "age_min": 3, "age_max": 5, "category": "Education"},
    {"query": "colors and shapes for kids", "age_min": 3, "age_max": 5, "category": "Education"},
    {"query": "science for kids age 6 to 8", "age_min": 6, "age_max": 8, "category": "Science"},
    {"query": "math games for kids", "age_min": 6, "age_max": 8, "category": "Math"},
    {"query": "bedtime stories for kids", "age_min": 6, "age_max": 8, "category": "Storytelling"},
    {"query": "coding for kids age 9", "age_min": 9, "age_max": 12, "category": "Programming"},
    {"query": "solar system for kids", "age_min": 9, "age_max": 12, "category": "Space/Science"},
    {"query": "how to draw for kids", "age_min": 9, "age_max": 12, "category": "Arts & Crafts"},
    {"query": "STEM activities for kids", "age_min": 9, "age_max": 12, "category": "STEM"},
]

# Convert to DataFrame and save
mapping_df = pd.DataFrame(mapping_data)
mapping_df.to_csv("age_query_mapping.csv", index=False)

print("✅ Mapping CSV created successfully!")
mapping_df


✅ Mapping CSV created successfully!


Unnamed: 0,query,age_min,age_max,category
0,nursery rhymes for toddlers,3,5,Music
1,learning ABC for preschoolers,3,5,Education
2,colors and shapes for kids,3,5,Education
3,science for kids age 6 to 8,6,8,Science
4,math games for kids,6,8,Math
5,bedtime stories for kids,6,8,Storytelling
6,coding for kids age 9,9,12,Programming
7,solar system for kids,9,12,Space/Science
8,how to draw for kids,9,12,Arts & Crafts
9,STEM activities for kids,9,12,STEM


In [12]:
import base64
import json
import requests
import getpass

# 🔐 Enter your GitHub token
github_token = getpass.getpass('🔐 Enter your GitHub token: ')

# GitHub repository info
GITHUB_USERNAME = "shilpathota"  # 👈 change this
REPO_NAME = "RecommendationSystem"            # 👈 change this
FILE_PATH = "data/age_query_mapping.csv"  # 👈 path inside repo
COMMIT_MESSAGE = "Add age-query mapping CSV"

# Read CSV file and encode
with open("age_query_mapping.csv", "rb") as file:
    content = file.read()
    encoded_content = base64.b64encode(content).decode("utf-8")

# Check if file already exists to get its SHA
url = f"https://api.github.com/repos/{GITHUB_USERNAME}/{REPO_NAME}/contents/{FILE_PATH}"
headers = {"Authorization": f"token {github_token}"}
response = requests.get(url, headers=headers)

sha = response.json()['sha'] if response.status_code == 200 else None

# Prepare PUT request
data = {
    "message": COMMIT_MESSAGE,
    "content": encoded_content,
    "branch": "main"
}
if sha:
    data["sha"] = sha

response = requests.put(url, headers=headers, data=json.dumps(data))

# Status output
if response.status_code in [200, 201]:
    print("✅ age_query_mapping.csv pushed to GitHub!")
else:
    print("❌ Failed to push:", response.json())


🔐 Enter your GitHub token: ··········
✅ age_query_mapping.csv pushed to GitHub!


In [13]:
import pandas as pd

mapping_df = pd.read_csv("age_query_mapping.csv")


In [14]:
#Get queries for user age
import random

def get_queries_for_age(age, n_queries=2):
    matches = mapping_df[(mapping_df['age_min'] <= age) & (mapping_df['age_max'] >= age)]
    return random.sample(matches['query'].tolist(), k=min(n_queries, len(matches)))


In [15]:
#Fetch YouTube Videos for Each Query
def get_videos_for_age(age):
    queries = get_queries_for_age(age)
    all_video_data = []

    for q in queries:
        print(f"🔎 Searching for: {q}")
        ids = search_youtube_videos(q, max_results=10)
        metadata = get_video_metadata(ids)
        all_video_data.extend(metadata)

    return all_video_data


In [16]:
#Recommend Top N Videos
def recommend_for_age(age, top_n=5):
    videos = get_videos_for_age(age)
    df = pd.DataFrame(videos)

    # Example: Sort by view_count
    df['view_count'] = df['view_count'].astype(int)
    recommended = df.sort_values(by="view_count", ascending=False).head(top_n)

    return recommended[['title', 'channel_title', 'view_count', 'published_at']]


In [17]:
user_age = int(input("Enter child’s age: "))
recommendations = recommend_for_age(user_age)

print("🎉 Top Recommendations:")
print(recommendations)


Enter child’s age: 6
🔎 Searching for: bedtime stories for kids
🔎 Searching for: math games for kids
🎉 Top Recommendations:
                                                title  \
12  "Math Whiz!" Addition Song  /// Danny Go! Kids...   
10  Addition and Subtraction with Dinosaurs - Math...   
6   3 Little Pigs | Bedtime Stories for Kids in En...   
1   The Lion, The Mouse and The Sleepy Bear | Bedt...   
11              Maths Kids By RV AppStudios [English]   

                                        channel_title  view_count  \
12                                          Danny Go!    39170899   
10                          Smile and Learn - English    12056865   
6                    Fairy Tales and Stories for Kids    11267503   
1                    Fairy Tales and Stories for Kids     7762140   
11  Toddler Learning Videos For Kids - Lucas & Fri...     7207558   

            published_at  
12  2022-10-10T11:00:33Z  
10  2020-06-25T15:30:03Z  
6   2020-05-16T11:00:00Z  
1   2021-03