1.Process database to find different age classifications

In [2]:
import json
from collections import defaultdict
import pandas as pd

# Load the JSON file (replace with your own file path)
with open('detailed_reviews_with_ratings.json', 'r') as file:
    data = json.load(file)

# Function to process age ranges from JSON data
def process_age_ranges(data):
    age_classification = defaultdict(int)
    
    # Loop through the data to extract age ranges
    for entry in data:
        # Extract the age number from the age_rating string, assuming it's always in the format 'age X+'
        age = int(entry['age_rating'].replace('age ', '').replace('+', ''))
        age_classification[age] += 1

    # Convert the age classification data into a DataFrame for easier visualization
    df = pd.DataFrame(list(age_classification.items()), columns=["Age", "Count"])
    return df

# Process the detailed reviews JSON data to extract age classifications
age_classifications_df = process_age_ranges(data)

# Display the DataFrame
print(age_classifications_df)


    Age  Count
0     6    185
1     5    197
2     8    211
3     4    208
4     3    270
5     2     54
6     7    287
7     9     59
8    12      6
9    10     24
10   13      4
11   11      8
12   14      3
13   15      3


In [5]:
import json
import csv

# Load the JSON file (replace 'detailed_reviews_with_ratings.json' with the actual file path)
with open('detailed_reviews_with_ratings.json', 'r') as file:
    data = json.load(file)

# Function to classify age ratings into the specified ranges and check if it's from the last decade
def classify_age_and_decade(age_rating, release_year):
    age = int(age_rating.replace('age ', '').replace('+', ''))
    current_year = 2024  # Assuming current year

    # Classify age range
    if 2 <= age <= 5:
        age_range = "2-5"
    elif 5 < age <= 8:
        age_range = "5-8"
    elif 8 < age <= 12:
        age_range = "8-12"
    else:
        age_range = "Outside"
    
    # Check if it's from the last decade
    is_last_decade = "Yes" if int(release_year) >= (current_year - 10) else "No"
    
    return age_range, is_last_decade

# Function to create CSV from the JSON data with the required fields
def create_csv_from_json(data, output_file):
    with open(output_file, 'w', newline='') as csvfile:
        fieldnames = ['Title', 'Age Rating', 'Classified Age Range', 'Release Year', 'From Last Decade']
        writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
        
        writer.writeheader()
        
        for entry in data:
            title = entry['title']
            age_rating = entry['age_rating']
            release_year = entry['release_year']
            
            # Classify the age range and determine if it's from the last decade
            age_range, is_last_decade = classify_age_and_decade(age_rating, release_year)
            
            # Write to CSV
            writer.writerow({
                'Title': title,
                'Age Rating': age_rating,
                'Classified Age Range': age_range,
                'Release Year': release_year,
                'From Last Decade': is_last_decade
            })

# Example usage
output_file = 'classified_shows.csv'
create_csv_from_json(data, output_file)

print(f"CSV file saved as {output_file}")


CSV file saved as classified_shows.csv


In [6]:
import random
import json

# Load the JSON file (replace 'detailed_reviews_with_ratings.json' with your file path)
with open('detailed_reviews_with_ratings.json', 'r') as file:
    data = json.load(file)

# Function to classify age ratings into the specified ranges and check if it's from the last decade
def classify_age_and_filter(age_rating, release_year):
    age = int(age_rating.replace('age ', '').replace('+', ''))
    current_year = 2024  # Assuming current year
    
    # Classify age range
    if 2 <= age <= 5:
        age_range = "2-5"
    elif 5 < age <= 8:
        age_range = "5-8"
    elif 8 < age <= 12:
        age_range = "8-12"
    else:
        age_range = "Outside"
    
    # Check if it's from the last decade
    is_last_decade = int(release_year) >= (current_year - 10)
    
    return age_range, is_last_decade

# Filter data for shows from the last decade and classify by age range
filtered_data = {
    "2-5": [],
    "5-8": [],
    "8-12": []
}

for entry in data:
    age_rating = entry['age_rating']
    release_year = entry['release_year']
    
    age_range, is_last_decade = classify_age_and_filter(age_rating, release_year)
    
    # Only include shows from the last decade and within the defined age ranges
    if is_last_decade and age_range != "Outside":
        filtered_data[age_range].append(entry)

# Function to get a diverse sample from each category
def get_sample(data, sample_size=5):
    sample = {}
    
    for age_range, shows in data.items():
        if len(shows) >= sample_size:
            sample[age_range] = random.sample(shows, sample_size)  # Get random 5 shows
        else:
            sample[age_range] = shows  # If less than 5, take all available
        
    return sample

# Get a sample of 5 shows per age range
sample_data = get_sample(filtered_data, sample_size=5)

# Display the sample data (names, age ratings, release years)
for age_range, shows in sample_data.items():
    print(f"Sample for age range {age_range}:")
    for show in shows:
        print(f"- {show['title']} (Age Rating: {show['age_rating']}, Release Year: {show['release_year']})")
    print("\n")


Sample for age range 2-5:
- Doug Unplugs (Age Rating: age 4+, Release Year: 2020)
- Top Wing (Age Rating: age 4+, Release Year: 2017)
- Creative Galaxy: Arty's Holiday Masterpiece (Age Rating: age 4+, Release Year: 2018)
- Daniel Tiger's Neighborhood: Won't You Sing Along With Me? (Age Rating: age 3+, Release Year: 2020)
- The Fox-Badger Family (Age Rating: age 4+, Release Year: 2018)


Sample for age range 5-8:
- Care Bears: Unlock the Magic (Age Rating: age 7+, Release Year: 2019)
- The Epic Tale of Captain Underpants in Space (Age Rating: age 6+, Release Year: 2020)
- Marvel Funko (Age Rating: age 6+, Release Year: 2016)
- Gremlins: Secrets of the Mogwai (Age Rating: age 8+, Release Year: 2023)
- Lego Jurassic World: Legend of Isla Nublar (Age Rating: age 7+, Release Year: 2019)


Sample for age range 8-12:
- Maya and the Three (Age Rating: age 9+, Release Year: 2021)
- I Heart Arlo (Age Rating: age 9+, Release Year: 2021)
- Clarence (Age Rating: age 9+, Release Year: 2014)
- Steven