SQL Exercise: Average View Time Per Video, Ranked by Category

In [1]:
import sqlite3
import pandas as pd

SCHEMA DESCRIPTION : 

In [2]:
# Create in-memory SQLite database
conn = sqlite3.connect(":memory:")
cursor = conn.cursor()

# Create tables and insert sample data
cursor.executescript("""
CREATE TABLE video_views (
  viewer_id INT,
  video_id INT,
  view_time INT,
  view_date TEXT
);

CREATE TABLE videos (
  video_id INT,
  category TEXT
);

INSERT INTO video_views VALUES
(1, 101, 120, '2024-01-01'),
(2, 101, 150, '2024-01-01'),
(3, 102, 300, '2024-01-01'),
(4, 103, 200, '2024-01-01'),
(5, 104, 50,  '2024-01-01'),
(6, 102, 250, '2024-01-02'),
(7, 104, 80,  '2024-01-02'),
(8, 105, 100, '2024-01-02'),
(9, 102, 200, '2024-01-03'),
(10, 104, 300,'2024-01-03');

INSERT INTO videos VALUES
(101, 'Music'),
(102, 'Education'),
(103, 'Gaming'),
(104, 'Music'),
(105, 'News');
""")


<sqlite3.Cursor at 0x1227550c0>

VISUALIZING TABLES

In [6]:
query_table1 = """
select * from video_views
"""
query_table2 = """
select * from videos
"""
res_table1 = pd.read_sql_query(query_table1, conn)
res_table1

Unnamed: 0,viewer_id,video_id,view_time,view_date
0,1,101,120,2024-01-01
1,2,101,150,2024-01-01
2,3,102,300,2024-01-01
3,4,103,200,2024-01-01
4,5,104,50,2024-01-01
5,6,102,250,2024-01-02
6,7,104,80,2024-01-02
7,8,105,100,2024-01-02
8,9,102,200,2024-01-03
9,10,104,300,2024-01-03


In [7]:
res_table2 = pd.read_sql_query(query_table2, conn)
res_table2

Unnamed: 0,video_id,category
0,101,Music
1,102,Education
2,103,Gaming
3,104,Music
4,105,News


SOLUTION 

Strategie : <br>
<br>
For each category, calculate: <br>

° The total view time <br>

° The number of unique videos in that category <br>

° The average view time per video <br>

Then rank the categories by that average (highest to lowest). <br>

In [18]:
query_sol = """
select v.category, sum(vv.view_time) as total_view_time, count(distinct vv.video_id) as num_videos, 
cast(sum(vv.view_time) as float)/ count(distinct vv.video_id) as avg_view_time
from video_views vv
join videos v on v.video_id = vv.video_id
group by v.category 
order by avg_view_time desc
"""

res_query_sol = pd.read_sql_query(query_sol, conn)
res_query_sol

Unnamed: 0,category,total_view_time,num_videos,avg_view_time
0,Education,750,1,750.0
1,Music,700,2,350.0
2,Gaming,200,1,200.0
3,News,100,1,100.0
