In [20]:
import duckdb
import pandas

In [71]:
df_games = duckdb.query("SELECT * FROM 'games.csv'")

In [72]:
df_games.columns

['column00',
 'Title',
 'Release Date',
 'Team',
 'Rating',
 'Times Listed',
 'Number of Reviews',
 'Genres',
 'Summary',
 'Reviews',
 'Plays',
 'Playing',
 'Backlogs',
 'Wishlist']

In [73]:
df_games_processed =  duckdb.query("""SELECT *,
                 RIGHT(\"Release Date\",4) as Year,
                 CASE WHEN contains(Plays, 'K') THEN
                    CAST(replace(Plays, 'K', '') as FLOAT)*1000
                    ELSE
                    CAST(Plays AS FLOAT)
                    END as Plays_Float
                 FROM df_games""")

In [76]:
duckdb.query("""    SELECT TITLE, SUM(Plays_Float) as total_plays FROM 
                    df_games_processed
                  
                    group by title
                    order by total_plays desc
                    LIMIT 10
                  """)

┌─────────────────────────────────────────┬─────────────┐
│                  Title                  │ total_plays │
│                 varchar                 │   double    │
├─────────────────────────────────────────┼─────────────┤
│ Minecraft                               │    110000.0 │
│ The Legend of Zelda: Breath of the Wild │     90000.0 │
│ Grand Theft Auto V                      │     90000.0 │
│ Doom                                    │     89300.0 │
│ Portal 2                                │     87000.0 │
│ Undertale                               │     84000.0 │
│ Portal                                  │     84000.0 │
│ Among Us                                │     75000.0 │
│ Super Mario Odyssey                     │     75000.0 │
│ Super Smash Bros. Ultimate              │     75000.0 │
├─────────────────────────────────────────┴─────────────┤
│ 10 rows                                     2 columns │
└───────────────────────────────────────────────────────┘

In [107]:
duckdb.query("""    SELECT DISTINCT Year, 
                  first(Title)  OVER(partition by Year order by SUM(Plays_Float) desc), 
                  max(SUM(Plays_Float)) OVER(partition by Year order by SUM(Plays_Float) desc) as total_plays
                  FROM 
                    df_games_processed  
                    where trim(year) != 'TBD' and year <= '2023'         
                    group by Year, title                               
                    order by Year desc
                  """)

┌─────────┬────────────────────────────────────────────────────────────────────────────┬─────────────┐
│  Year   │ first("Title") OVER (PARTITION BY "Year" ORDER BY sum("Plays_Float") DESC) │ total_plays │
│ varchar │                                  varchar                                   │   double    │
├─────────┼────────────────────────────────────────────────────────────────────────────┼─────────────┤
│ 2023    │ Hi-Fi Rush                                                                 │      9000.0 │
│ 2022    │ Elden Ring                                                                 │     51000.0 │
│ 2021    │ Resident Evil Village                                                      │     29700.0 │
│ 2020    │ Genshin Impact                                                             │     42000.0 │
│ 2019    │ Hades                                                                      │     63000.0 │
│ 2018    │ Among Us                                                     