In [2]:
import sqlite3
import pandas as pd
connection = sqlite3.connect('../tables_db/sales.db')

In [6]:
query = r'''
    SELECT * FROM sales
'''
pd.read_sql_query(query, connection).head()

Unnamed: 0,year,month,quarter,plan,price,quantity,revenue
0,2019,1,1,silver,60,200,12000
1,2019,1,1,gold,240,50,12000
2,2019,1,1,platinum,600,10,6000
3,2019,2,1,silver,60,660,39600
4,2019,2,1,gold,240,60,14400


In [7]:
# Допустим, мы хотим посмотреть, как выручка за каждый год соотносится с выручкой за два года.
query = r'''
    SELECT year, SUM(revenue) AS revenue
    FROM sales
    GROUP BY year
    ORDER BY year
'''
pd.read_sql_query(query, connection)

Unnamed: 0,year,revenue
0,2019,722460
1,2020,1244940


In [2]:
# Теперь с помощью оконных функций считаем общую выручку на агрегированных результатах
query = r'''
    WITH query_1 AS (
    SELECT year, SUM(revenue) AS revenue
    FROM sales
    GROUP BY year
    ORDER BY year
    )
    SELECT year, revenue,
        SUM(revenue) OVER () AS total
    FROM query_1
    ORDER BY year
'''
pd.read_sql_query(query, connection)

Unnamed: 0,year,revenue,total
0,2019,722460,1967400
1,2020,1244940,1967400


In [4]:
# В простых случаях можно совместить оба запроса в один
query = r'''
    SELECT year, 
        SUM(revenue) AS revenue,
        SUM(SUM(revenue)) OVER () AS total
    FROM sales
    GROUP BY year
    ORDER BY year
'''
pd.read_sql_query(query, connection)

Unnamed: 0,year,revenue,total
0,2019,722460,1967400
1,2020,1244940,1967400


In [12]:
# Сначала окна, затем отсев

# Допустим, мы хотим посмотреть, как соотносится месячная выручка по тарифам silver и gold.

query = r'''
    WITH query_1 AS (
    SELECT month, revenue, plan,
        LEAD(CASE WHEN plan = 'silver' THEN revenue END) OVER w AS silver,
        (CASE WHEN plan = 'gold' THEN revenue END) AS gold
        /*(CASE WHEN plan = 'silver' THEN revenue END) AS silver,
        LAG(CASE WHEN plan = 'gold' THEN revenue END) OVER w AS gold */
    FROM sales
    WHERE year = '2020' AND plan IN ('silver', 'gold')
    WINDOW w AS (ORDER BY month, plan)
    ORDER BY month, plan
    ) 
    SELECT month, silver, gold
    FROM query_1
    WHERE silver NOT NULL
    ORDER BY month
'''
# query = r'''
#     SELECT month,
#         SUM(CASE WHEN plan = 'silver' THEN revenue ELSE 0 END) AS silver,
#         SUM(CASE WHEN plan = 'gold' THEN revenue ELSE 0 END) AS gold
#     FROM sales
#     WHERE year = '2020' AND plan IN ('silver', 'gold')
#     GROUP BY month
#     ORDER BY month
# '''
pd.read_sql_query(query, connection)

Unnamed: 0,month,silver,gold
0,1,27000,14400
1,2,61200,29040
2,3,42000,31200
3,4,42000,43200
4,5,39000,34560
5,6,52800,44880
6,7,46800,40320
7,8,33000,28800
8,9,54000,28800
9,10,57000,36000


In [13]:
# query = r'''
#     WITH query_1 AS (
#     SELECT month, revenue, plan,
#         LEAD(CASE WHEN plan = 'silver' THEN revenue END) OVER w AS silver,
#         (CASE WHEN plan = 'gold' THEN revenue END) AS gold
#         /*(CASE WHEN plan = 'silver' THEN revenue END) AS silver,
#         LAG(CASE WHEN plan = 'gold' THEN revenue END) OVER w AS gold */
#     FROM sales
#     WHERE year = '2020' AND plan IN ('silver', 'gold')
#     WINDOW w AS (ORDER BY month, plan)
#     ORDER BY month, plan
#     ) 
#     SELECT month, silver, gold
#     FROM query_1
#     WHERE silver NOT NULL
#     ORDER BY month
# '''
query = r'''
    SELECT month,
        SUM(CASE WHEN plan = 'silver' THEN revenue ELSE 0 END) AS silver,
        SUM(CASE WHEN plan = 'gold' THEN revenue ELSE 0 END) AS gold
    FROM sales
    WHERE year = '2020' AND plan IN ('silver', 'gold')
    GROUP BY month
    ORDER BY month
'''
pd.read_sql_query(query, connection)

Unnamed: 0,month,silver,gold
0,1,27000,14400
1,2,61200,29040
2,3,42000,31200
3,4,42000,43200
4,5,39000,34560
5,6,52800,44880
6,7,46800,40320
7,8,33000,28800
8,9,54000,28800
9,10,57000,36000


In [18]:
# Выручка по тарифу gold

# Есть таблица продаж sales. Посчитайте выручку для тарифа gold по месяцам 2020 года.

# Для каждого месяца дополнительно укажите:

#     выручку за предыдущий месяц (prev);
#     процент, который составляет выручка текущего месяца от prev (perc).

# Процент округлите до целого.
query = r'''
    SELECT year, month, revenue,
        lag(revenue) OVER w AS prev,
        ROUND(revenue * 100 / lag(revenue) OVER w) AS perc
    FROM sales
    WHERE year = '2020' AND plan = 'gold'
    WINDOW w AS (
        ORDER BY month
    )
    ORDER BY year, month
'''
pd.read_sql_query(query, connection)

Unnamed: 0,year,month,revenue,prev,perc
0,2020,1,14400,,
1,2020,2,29040,14400.0,201.0
2,2020,3,31200,29040.0,107.0
3,2020,4,43200,31200.0,138.0
4,2020,5,34560,43200.0,80.0
5,2020,6,44880,34560.0,129.0
6,2020,7,40320,44880.0,89.0
7,2020,8,28800,40320.0,71.0
8,2020,9,28800,28800.0,100.0
9,2020,10,36000,28800.0,125.0


In [20]:
# Выручка по тарифам за 1 квартал

# Есть таблица продаж sales. Посчитайте выручку нарастающим 
# итогом по каждому тарифному плану за первые три месяца 2020 года.
query = r'''
    SELECT plan, year, month, revenue,
        SUM(revenue) OVER w AS total
    FROM sales
    WHERE year = '2020' AND month < 4
    WINDOW w AS (
        PARTITION BY plan
        ORDER BY month
        ROWS BETWEEN unbounded preceding AND current row
    )
    ORDER BY plan
'''
pd.read_sql_query(query, connection)

Unnamed: 0,plan,year,month,revenue,total
0,gold,2020,1,14400,14400
1,gold,2020,2,29040,43440
2,gold,2020,3,31200,74640
3,platinum,2020,1,7200,7200
4,platinum,2020,2,13200,20400
5,platinum,2020,3,16800,37200
6,silver,2020,1,27000,27000
7,silver,2020,2,61200,88200
8,silver,2020,3,42000,130200


In [22]:
# Скользящее среднее по тарифу platinum

# Есть таблица продаж sales. Посчитайте скользящую среднюю выручку за 3 месяца 
# (предыдущий, текущий, следующий) для тарифа platinum в 2020 году.

# Округлите среднюю выручку до целого.
query = r'''
    SELECT year, month, revenue,
        ROUND(AVG(revenue) OVER w) AS avg3m
    FROM sales
    WHERE year = '2020' AND plan = 'platinum'
    WINDOW w AS (
        ORDER BY month
        ROWS BETWEEN 1 preceding AND 1 following
    )
    ORDER BY month
'''
pd.read_sql_query(query, connection)

Unnamed: 0,year,month,revenue,avg3m
0,2020,1,7200,10200.0
1,2020,2,13200,12400.0
2,2020,3,16800,18400.0
3,2020,4,25200,22000.0
4,2020,5,24000,27200.0
5,2020,6,32400,28400.0
6,2020,7,28800,24800.0
7,2020,8,13200,18600.0
8,2020,9,13800,15000.0
9,2020,10,18000,22600.0


In [27]:
# Сравнение с декабрем

# Есть таблица продаж sales. Посчитайте выручку по месяцам для тарифа silver.

# Для каждого месяца дополнительно укажите:

#     выручку за декабрь этого же года (december);
#     процент, который составляет выручка текущего месяца от december (perc).

# Процент округлите до целого.
query = r'''
    SELECT year, month, revenue,
        LAST_VALUE(revenue) OVER w AS december,
        ROUND(revenue * 100 / LAST_VALUE(revenue) OVER w) AS perc
    FROM sales
    WHERE plan = 'silver'
    WINDOW w AS (
        PARTITION BY year
        ORDER BY month
        ROWS BETWEEN unbounded preceding AND unbounded following
    )
    ORDER BY year, month
'''
pd.read_sql_query(query, connection)

Unnamed: 0,year,month,revenue,december,perc
0,2019,1,12000,26400,45.0
1,2019,2,39600,26400,150.0
2,2019,3,24000,26400,90.0
3,2019,4,18000,26400,68.0
4,2019,5,26400,26400,100.0
5,2019,6,32400,26400,122.0
6,2019,7,26400,26400,100.0
7,2019,8,26400,26400,100.0
8,2019,9,15000,26400,56.0
9,2019,10,25200,26400,95.0


In [32]:
# Вклад тарифов

# Есть таблица продаж sales. Посчитайте, 
# какой вклад (в процентах) внес каждый из тарифов в общую выручку за год.

# Процент округлите до целого.
query = r'''
    WITH query_1 AS (
    SELECT year, plan,
        SUM(revenue) AS revenue
    FROM sales
    GROUP BY year, plan
    ORDER BY year, plan
    )
    SELECT year, plan, revenue,
        SUM(revenue) OVER w AS total,
        ROUND(revenue * 100 / SUM(revenue) OVER w) AS perc
    FROM query_1
    WINDOW w AS (
        PARTITION BY year
        ROWS BETWEEN unbounded preceding AND unbounded following
    )
    ORDER BY year, plan
'''
pd.read_sql_query(query, connection)

Unnamed: 0,year,plan,revenue,total,perc
0,2019,gold,252960,722460,35.0
1,2019,platinum,168000,722460,23.0
2,2019,silver,301500,722460,41.0
3,2020,gold,411840,1244940,33.0
4,2020,platinum,249600,1244940,20.0
5,2020,silver,583500,1244940,46.0


In [41]:
query = r'''
    WITH q_1 AS (
    SELECT year, plan,
        SUM(revenue) OVER w AS revenue,
        SUM(revenue) OVER (PARTITION BY year) AS total,
        ROUND(SUM(revenue) OVER w * 100 / SUM(revenue) OVER (PARTITION BY year)) AS perc
    FROM sales
    WINDOW w AS (
        PARTITION BY year, plan
    )
    ORDER BY year, plan
    )
    SELECT year, plan, revenue, total, perc
    FROM q_1
    GROUP BY year, plan, revenue, total, perc
    ORDER BY year, plan
'''
pd.read_sql_query(query, connection)

Unnamed: 0,year,plan,revenue,total,perc
0,2019,gold,252960,722460,35.0
1,2019,platinum,168000,722460,23.0
2,2019,silver,301500,722460,41.0
3,2020,gold,411840,1244940,33.0
4,2020,platinum,249600,1244940,20.0
5,2020,silver,583500,1244940,46.0


In [3]:
# Высокая, средняя и низкая выручка

# Есть таблица продаж sales. Разбейте месяцы 2020 года на три группы по выручке:

#     tile = 1 — высокая,
#     tile = 2 — средняя,
#     tile = 3 — низкая.
query = r'''
    WITH group_plans AS (
    SELECT year, month, 
        SUM(revenue) AS revenue
    FROM sales
    WHERE year = '2020'
    GROUP BY month
    )
    SELECT year, month, revenue,
        NTILE(3) OVER w AS tile
    FROM group_plans
    WINDOW w AS (ORDER BY revenue DESC)
    ORDER BY revenue DESC
'''
pd.read_sql_query(query, connection)

Unnamed: 0,year,month,revenue,tile
0,2020,11,150540,1
1,2020,6,130080,1
2,2020,7,115920,1
3,2020,12,115800,1
4,2020,10,111000,2
5,2020,4,110400,2
6,2020,2,103440,2
7,2020,5,97560,2
8,2020,9,96600,3
9,2020,3,90000,3


In [46]:
# 2020 vs 2019

# Есть таблица продаж sales. Посчитайте выручку по кварталам 2020 года.

# Для каждого квартала дополнительно укажите:

#     выручку за аналогичный квартал 2019 года (prev);
#     процент, который составляет выручка текущего квартала от prev (perc).

# Процент округлите до целого.
query = r'''
    WITH stl AS (
    SELECT year, month, 
        (CASE WHEN year = '2020' THEN SUM(revenue) END) AS revenue,
        LAG(CASE WHEN year = '2019' THEN SUM(revenue) END, 12) OVER () AS prev,
        NTILE(4) OVER w AS quarter
    FROM sales
    GROUP BY year, month
    WINDOW w AS (
        PARTITION BY year
        ORDER BY month
    )
    ORDER BY year, month
    )
    SELECT year, quarter, 
        SUM(revenue) AS revenue,
        SUM(prev) AS prev,
        ROUND(SUM(revenue) * 100 / SUM(prev)) AS perc
    FROM stl
    WHERE year = '2020'
    GROUP BY quarter
    ORDER BY quarter
'''
pd.read_sql_query(query, connection)

Unnamed: 0,year,quarter,revenue,prev,perc
0,2020,1,242040,155040,156.0
1,2020,2,338040,162600,207.0
2,2020,3,287520,204120,140.0
3,2020,4,377340,200700,188.0


In [71]:
# Рейтинг месяцев по продажам

# Есть таблица продаж sales. Составьте рейтинг месяцев 2020 года 
# с точки зрения количества продаж (quantity) по каждому из тарифов. 
# Чем больше подписок тарифа P было продано в месяц M, тем выше место M в рейтинге по тарифу P:
query = r'''
    WITH stl AS (
    SELECT year, month, plan,
        (CASE WHEN plan = 'silver' THEN RANK() OVER w END) AS silver,
        (CASE WHEN plan = 'gold' THEN RANK() OVER w END) AS gold,
        (CASE WHEN plan = 'platinum' THEN RANK() OVER w END) AS platinum
    FROM sales
    WHERE year = '2020'
    GROUP BY year, month, plan
    WINDOW w AS (
        PARTITION BY plan
        ORDER BY SUM(quantity) DESC
    )
    ORDER BY month, plan
    ), stl_2 AS (
    SELECT year, month, plan,
        LEAD(silver, 2) OVER (ORDER BY month, plan) AS silver, 
        gold, 
        LEAD(platinum, 1) OVER (ORDER BY month, plan) AS platinum 
    FROM stl
    ORDER BY month, plan
    )
    SELECT year, month, silver, gold, platinum
    FROM stl_2
    WHERE plan = 'gold'
    ORDER BY month
'''
pd.read_sql_query(query, connection)

Unnamed: 0,year,month,silver,gold,platinum
0,2020,1,12,12,12
1,2020,2,3,8,10
2,2020,3,8,7,8
3,2020,4,8,3,4
4,2020,5,10,6,5
5,2020,6,6,2,2
6,2020,7,7,4,3
7,2020,8,11,9,10
8,2020,9,5,9,9
9,2020,10,4,5,7
