In [0]:
%sql
WITH ranked_categories AS (
    SELECT 
        g.country,
        p.category,
        COUNT(p.category) AS category_count,
        ROW_NUMBER() OVER (PARTITION BY g.country ORDER BY COUNT(p.category) DESC) AS rank
    FROM
        geo_table g
    JOIN
        pin_table p ON g.ind = p.ind
    GROUP BY
        g.country, p.category
)
SELECT
    country,
    category,
    category_count
FROM
    ranked_categories
WHERE
    rank = 1
ORDER BY
    country;


country,category,category_count
Afghanistan,education,6
Albania,beauty,6
Algeria,quotes,11
American Samoa,tattoos,5
Andorra,tattoos,5
Angola,diy-and-crafts,3
Anguilla,christmas,2
Antarctica (the territory South of 60 deg S),tattoos,2
Antigua and Barbuda,christmas,5
Argentina,tattoos,5


In [0]:
%sql
WITH ranked_categories AS (
    SELECT
        YEAR(g.timestamp) AS post_year,
        p.category,
        COUNT(p.category) AS category_count,
        ROW_NUMBER() OVER (PARTITION BY YEAR(g.timestamp) ORDER BY COUNT(p.category) DESC) AS rank
    FROM  
        pin_table p
    JOIN
        geo_table g ON p.ind = g.ind
    WHERE 
        YEAR(g.timestamp) BETWEEN 2018 AND 2022
    GROUP BY
        p.category, YEAR(g.timestamp)
)
SELECT
    post_year,
    category,
    category_count
FROM
    ranked_categories
WHERE
    rank = 1
ORDER BY
    post_year;


post_year,category,category_count
2018,education,12
2019,travel,12
2020,finance,12
2021,tattoos,11
2022,christmas,14


In [0]:
%sql
-- step one
WITH ranked_followers AS (
    SELECT
        g.country,
        p.poster_name,
        p.follower_count,
        ROW_NUMBER() OVER (PARTITION BY g.country ORDER BY p.follower_count DESC) AS rank
    FROM
        geo_table g
    JOIN 
        pin_table p ON g.ind = p.ind
)
SELECT
    country,
    poster_name,
    follower_count
FROM 
    ranked_followers
WHERE 
    rank = 1
ORDER BY
    follower_count DESC




country,poster_name,follower_count
American Samoa,Mamas Uncut,8000000
Angola,Tastemade,8000000
Azerbaijan,Style Me Pretty,6000000
Burkina Faso,Behance,6000000
Albania,The Minds Journal,5000000
Bouvet Island (Bouvetoya),POPSUGAR,5000000
Bangladesh,Better Homes and Gardens,4000000
Afghanistan,9GAG,3000000
Botswana,OkChicas,3000000
Christmas Island,Instructables,3000000


In [0]:
%sql
-- step two

SELECT
    g.country,
    p.follower_count
FROM
    geo_table g
JOIN
    pin_table p ON g.ind = p.ind
ORDER BY  
    p.follower_count DESC
LIMIT 
    1


country,follower_count
American Samoa,8000000


In [0]:
%sql
-- step two alternative method
WITH ranked_followers AS (
    SELECT
        g.country,
        p.poster_name,
        p.follower_count,
        ROW_NUMBER() OVER (PARTITION BY g.country ORDER BY p.follower_count DESC) AS rank
    FROM
        geo_table g
    JOIN 
        pin_table p ON g.ind = p.ind
)
SELECT
    country,
    follower_count
FROM 
    ranked_followers
WHERE 
    rank = 1
ORDER BY
    follower_count DESC
LIMIT
    1

country,follower_count
American Samoa,8000000


In [0]:
%sql
WITH age_ranges AS (
    SELECT
        CASE
            WHEN age >= 18 AND age <= 24 THEN '18-24'
            WHEN age >= 25 AND age <= 35 THEN '25-35'
            WHEN age >= 36 AND age <= 49 THEN '36-49'
            WHEN age > 50 THEN '50+'
        END AS age_group,
        p.category
    FROM user_table u
    JOIN pin_table p ON u.ind = p.ind
),
category_counts AS (
    SELECT
        age_group,
        category,
        COUNT(*) AS category_count
    FROM age_ranges
    GROUP BY age_group, category
),
ranked_categories AS (
    SELECT
        age_group,
        category,
        category_count,
        ROW_NUMBER() OVER (PARTITION BY age_group ORDER BY category_count DESC) AS rank
    FROM category_counts
)
SELECT
    age_group,
    category,
    category_count
FROM
    ranked_categories
WHERE
    rank = 1
ORDER BY
    age_group;


age_group,category,category_count
,christmas,2
18-24,tattoos,27
25-35,art,14
36-49,quotes,12
50+,beauty,5


In [0]:
%sql
WITH age_ranges AS (
    SELECT
        CASE
            WHEN age >= 18 AND age <= 24 THEN '18-24'
            WHEN age >= 25 AND age <= 35 THEN '25-35'
            WHEN age >= 36 AND age <= 49 THEN '36-49'
            WHEN age > 50 THEN '50+'
        END AS age_group,
        p.follower_count
    FROM user_table u
    JOIN pin_table p ON u.ind = p.ind
),
ranked_follower_count AS (
    SELECT
        age_group,
        follower_count,
        ROW_NUMBER() OVER (PARTITION BY age_group ORDER BY follower_count) AS row_num,
        COUNT(*) OVER (PARTITION BY age_group) AS total_rows
    FROM age_ranges
),
median_follower_calculator AS (
    SELECT
        age_group,
        CASE
            WHEN total_rows % 2 = 1 THEN -- Odd number of rows
                MAX(CASE WHEN row_num = (total_rows + 1) / 2 THEN follower_count END)
            ELSE -- Even number of rows
                AVG(CASE WHEN row_num IN (total_rows / 2, total_rows / 2 + 1) THEN follower_count END)
        END AS median_follower_count
    FROM
        ranked_follower_count
    GROUP BY
        age_group,
        total_rows
)
SELECT
    age_group,
    median_follower_count
FROM
    median_follower_calculator
ORDER BY
    age_group;  


age_group,median_follower_count
,3500.0
18-24,108000.0
25-35,27000.0
36-49,6000.0
50+,1000.0


In [0]:
%sql

SELECT
    YEAR(date_joined) AS post_year,
    COUNT(YEAR(date_joined)) AS number_users_joined
FROM
    user_table 
GROUP BY
    post_year

post_year,number_users_joined
2015,184
2016,235
2017,81


In [0]:
%sql

WITH new_users AS (
    SELECT
        YEAR(date_joined) AS post_year,
        COUNT(YEAR(date_joined)) AS number_users_joined,
        ind
    FROM
        user_table 
    GROUP BY
        post_year,
        ind
), ranked_follower_count AS (
    SELECT
        n.post_year,
        p.follower_count,
        ROW_NUMBER() OVER (PARTITION BY n.post_year ORDER BY p.follower_count) AS row_num,
        COUNT(*) OVER (PARTITION BY n.post_year) AS total_rows
    FROM 
        new_users n
    JOIN
        pin_table p ON n.ind = p.ind
), median_follower_calculator AS (
    SELECT
        post_year,
        CASE
            WHEN total_rows % 2 = 1 THEN -- Odd number of rows
                MAX(CASE WHEN row_num = (total_rows + 1) / 2 THEN follower_count END)
            ELSE -- Even number of rows
                AVG(CASE WHEN row_num IN (total_rows / 2, total_rows / 2 + 1) THEN follower_count END)
        END AS median_follower_count
    FROM
        ranked_follower_count
    GROUP BY
        post_year,
        total_rows
)
SELECT
    post_year,
    median_follower_count
FROM
    median_follower_calculator



post_year,median_follower_count
2015,94500.0
2016,19000.0
2017,3000.0


In [0]:
%sql
WITH new_users AS (
    SELECT
        YEAR(date_joined) AS post_year,
        ind,
        age
    FROM
        user_table
), 
age_ranges AS (
    SELECT
        CASE
            WHEN age >= 18 AND age <= 24 THEN '18-24'
            WHEN age >= 25 AND age <= 35 THEN '25-35'
            WHEN age >= 36 AND age <= 49 THEN '36-49'
            WHEN age > 50 THEN '50+'
        END AS age_group,
        p.follower_count,
        n.post_year
    FROM 
        new_users n
    JOIN 
        pin_table p ON n.ind = p.ind
),
ranked_follower_count AS (
    SELECT
        age_group,
        follower_count,
        ROW_NUMBER() OVER (PARTITION BY post_year, age_group ORDER BY follower_count) AS row_num,
        COUNT(*) OVER (PARTITION BY post_year, age_group) AS total_rows,
        post_year
    FROM 
        age_ranges
),
median_follower_calculator AS (
    SELECT
        post_year,
        age_group,
        CASE
            WHEN total_rows % 2 = 1 THEN -- Odd number of rows
                MAX(CASE WHEN row_num = (total_rows + 1) / 2 THEN follower_count END)
            ELSE -- Even number of rows
                AVG(CASE WHEN row_num IN (total_rows / 2, total_rows / 2 + 1) THEN follower_count END)
        END AS median_follower_count
    FROM 
        ranked_follower_count
    GROUP BY 
        post_year,
        age_group,
        total_rows
)
SELECT
    age_group,
    post_year,
    median_follower_count
FROM 
    median_follower_calculator
ORDER BY 
    post_year, 
    age_group;


age_group,post_year,median_follower_count
,2015,5500.0
18-24,2015,292000.0
25-35,2015,42000.0
36-49,2015,23000.0
50+,2015,14000.0
,2016,630.0
18-24,2016,46000.0
25-35,2016,27000.0
36-49,2016,7000.0
50+,2016,1000.0
