# Cерия тестовых заданий на SQL

Схема описывает товары в магазине.

Основная таблица – `Goods`
- `id` – идентификатор товара
- `name` – имя товара
- `supplier_id` – идентификатор поставщика

Поставщики (таблица `Suppliers`)
- `id` – id поставщика
- `name` – имя поставщика
- `manufacturer_id` – id производителя

Производители (таблица `Manufacturer`)
- `id` – id производителя
- `name` – имя производителя
- `INN` – ИНН производителя
- `location` – локация производителя

Количество товара (таблица `Quantity`)
- `goods_id` – id товара
- `value` – количество товара

Цена товара (таблица `Prices`)
- `id` – id цены
- `goods_id` – id товара
- `value` – значение цены товара
- `discount` – скидка на товар

Требуется найти:
1. Найти самый дорогой товар. Вывести имя товара и его цену
2. Найти товары с нулевым остатком. Вывести имя товара и его цену
3. Найти производителя с самой большой средней ценой за товары. Вывести имя производителя и среднюю стоимость
4. Найти все товары производителей из Москвы. Вывести имена товаров, их цены и имена производителей

In [None]:
import pandas as pd
import numpy as np

In [None]:
def select(query):
  return pd.read_sql(query, con=engine)

## Создание таблиц

In [None]:
from sqlalchemy import create_engine

engine = create_engine(f'postgresql://yvsnplal:t1YniOCU8IWgFrR-hWug_Qk-A9Y3CH1V@mouse.db.elephantsql.com/yvsnplal')
# postgres://yvsnplal:t1YniOCU8IWgFrR-hWug_Qk-A9Y3CH1V@mouse.db.elephantsql.com/yvsnplal

In [None]:
create_queries = [
'''DROP TABLE IF EXISTS Goods;
		
CREATE TABLE Goods (
  id INTEGER,
  name VARCHAR(50),
  supplier_id INTEGER,
  PRIMARY KEY (id)
);''',

'''DROP TABLE IF EXISTS Prices;
		
CREATE TABLE Prices (
  id INTEGER  ,
  goods_id INTEGER ,
  value INTEGER ,
  discount INTEGER ,
  PRIMARY KEY (id)
);''',

'''DROP TABLE IF EXISTS Suppliers;
		
CREATE TABLE Suppliers (
  id INTEGER ,
  name VARCHAR(50) ,
  manufacturer_id INTEGER ,
  PRIMARY KEY (id)
);''',

'''DROP TABLE IF EXISTS Manufacturer;
		
CREATE TABLE Manufacturer (
  id INTEGER ,
  name VARCHAR(50) ,
  INN INTEGER ,
  location VARCHAR(50) ,
  PRIMARY KEY (id)
);''',

'''DROP TABLE IF EXISTS Quantity;
		
CREATE TABLE Quantity (
  goods_id INTEGER ,
  value INTEGER ,
  PRIMARY KEY (goods_id)
);'''
]

In [None]:
with engine.connect() as conn:
    for query in create_queries:
        conn.execute(query)

In [None]:
insert_queries = '''INSERT INTO Goods(id,name,supplier_id) VALUES (1, 'Goods_1', '9');
INSERT INTO Goods(id,name,supplier_id) VALUES (2, 'Goods_2', '8');
INSERT INTO Goods(id,name,supplier_id) VALUES (3, 'Goods_3', '10');
INSERT INTO Goods(id,name,supplier_id) VALUES (4, 'Goods_4', '3');
INSERT INTO Goods(id,name,supplier_id) VALUES (5, 'Goods_5', '3');
INSERT INTO Goods(id,name,supplier_id) VALUES (6, 'Goods_6', '5');
INSERT INTO Goods(id,name,supplier_id) VALUES (7, 'Goods_7', '4');
INSERT INTO Goods(id,name,supplier_id) VALUES (8, 'Goods_8', '2');
INSERT INTO Goods(id,name,supplier_id) VALUES (9, 'Goods_9', '7');
INSERT INTO Goods(id,name,supplier_id) VALUES (10, 'Goods_10', '6');
INSERT INTO Goods(id,name,supplier_id) VALUES (11, 'Goods_11', '1');
INSERT INTO Goods(id,name,supplier_id) VALUES (12, 'Goods_12', '5');
INSERT INTO Goods(id,name,supplier_id) VALUES (13, 'Goods_13', '1');
INSERT INTO Goods(id,name,supplier_id) VALUES (14, 'Goods_14', '10');
INSERT INTO Goods(id,name,supplier_id) VALUES (15, 'Goods_15', '8');
INSERT INTO Goods(id,name,supplier_id) VALUES (16, 'Goods_16', '1');
INSERT INTO Goods(id,name,supplier_id) VALUES (17, 'Goods_17', '4');
INSERT INTO Goods(id,name,supplier_id) VALUES (18, 'Goods_18', '5');
INSERT INTO Goods(id,name,supplier_id) VALUES (19, 'Goods_19', '4');
INSERT INTO Goods(id,name,supplier_id) VALUES (20, 'Goods_20', '9');
INSERT INTO Prices(id,goods_id,value,discount) VALUES (1, '1', '990', 3);
INSERT INTO Prices(id,goods_id,value,discount) VALUES (2, '2', '1960', 5);
INSERT INTO Prices(id,goods_id,value,discount) VALUES (3, '3', '2970', 7);
INSERT INTO Prices(id,goods_id,value,discount) VALUES (4, '4', '3920', 10);
INSERT INTO Prices(id,goods_id,value,discount) VALUES (5, '5', '4950', 3);
INSERT INTO Prices(id,goods_id,value,discount) VALUES (6, '6', '5880', 5);
INSERT INTO Prices(id,goods_id,value,discount) VALUES (7, '7', '6930', 7);
INSERT INTO Prices(id,goods_id,value,discount) VALUES (8, '8', '7840', 10);
INSERT INTO Prices(id,goods_id,value,discount) VALUES (9, '9', '8910', 14);
INSERT INTO Prices(id,goods_id,value,discount) VALUES (10, '10', '9800', 12);
INSERT INTO Prices(id,goods_id,value,discount) VALUES (11, '11', '10890', 5);
INSERT INTO Prices(id,goods_id,value,discount) VALUES (12, '12', '11760', 2);
INSERT INTO Prices(id,goods_id,value,discount) VALUES (13, '13', '12870', 3);
INSERT INTO Prices(id,goods_id,value,discount) VALUES (14, '14', '13720', 4);
INSERT INTO Prices(id,goods_id,value,discount) VALUES (15, '15', '14850', 5);
INSERT INTO Prices(id,goods_id,value,discount) VALUES (16, '16', '15680', 9);
INSERT INTO Prices(id,goods_id,value,discount) VALUES (17, '17', '16830', 8);
INSERT INTO Prices(id,goods_id,value,discount) VALUES (19, '19', '18620', 6);
INSERT INTO Prices(id,goods_id,value,discount) VALUES (20, '20', '19800', 10);
INSERT INTO Quantity(goods_id,value) VALUES (1, '3');
INSERT INTO Quantity(goods_id,value) VALUES (2, '4');
INSERT INTO Quantity(goods_id,value) VALUES (3, '7');
INSERT INTO Quantity(goods_id,value) VALUES (4, '9');
INSERT INTO Quantity(goods_id,value) VALUES (5, '0');
INSERT INTO Quantity(goods_id,value) VALUES (6, '10');
INSERT INTO Quantity(goods_id,value) VALUES (7, '12');
INSERT INTO Quantity(goods_id,value) VALUES (8, '25');
INSERT INTO Quantity(goods_id,value) VALUES (9, '1');
INSERT INTO Quantity(goods_id,value) VALUES (10, '25');
INSERT INTO Quantity(goods_id,value) VALUES (11, '6');
INSERT INTO Quantity(goods_id,value) VALUES (12, '18');
INSERT INTO Quantity(goods_id,value) VALUES (13, '19');
INSERT INTO Quantity(goods_id,value) VALUES (16, '5');
INSERT INTO Quantity(goods_id,value) VALUES (17, '3');
INSERT INTO Quantity(goods_id,value) VALUES (19, '5');
INSERT INTO Quantity(goods_id,value) VALUES (20, '3');
INSERT INTO Manufacturer(id,name,INN,location) VALUES (1, 'Manufacturer_1', '1251925131', 'Moscow');
INSERT INTO Manufacturer(id,name,INN,location) VALUES (2, 'Manufacturer_2', '1251925141', 'Kazan');
INSERT INTO Manufacturer(id,name,INN,location) VALUES (3, 'Manufacturer_3', '1251925151', 'Vladivostok');
INSERT INTO Manufacturer(id,name,INN,location) VALUES (4, 'Manufacturer_4', '1251925161', 'Saratov');
INSERT INTO Manufacturer(id,name,INN,location) VALUES (5, 'Manufacturer_5', '1251925171', 'Moscow');
INSERT INTO Manufacturer(id,name,INN,location) VALUES (6, 'Manufacturer_6', '1251925181', 'Kazan');
INSERT INTO Manufacturer(id,name,INN,location) VALUES (7, 'Manufacturer_7', '1251925191', 'Vladivostok');
INSERT INTO Manufacturer(id,name,INN,location) VALUES (8, 'Manufacturer_8', '1251925201', 'Saratov');
INSERT INTO Manufacturer(id,name,INN,location) VALUES (9, 'Manufacturer_9', '1251925211', 'Moscow');
INSERT INTO Manufacturer(id,name,INN,location) VALUES (10, 'Manufacturer_10', '1251925221', 'Kazan');
INSERT INTO Manufacturer(id,name,INN,location) VALUES (11, 'Manufacturer_11', '1251925231', 'Vladivostok');
INSERT INTO Manufacturer(id,name,INN,location) VALUES (12, 'Manufacturer_12', '1251925241', 'Saratov');
INSERT INTO Suppliers(id,name,manufacturer_id) VALUES (1, 'Supplier_1', '1');
INSERT INTO Suppliers(id,name,manufacturer_id) VALUES (2, 'Supplier_2', '2');
INSERT INTO Suppliers(id,name,manufacturer_id) VALUES (3, 'Supplier_3', '3');
INSERT INTO Suppliers(id,name,manufacturer_id) VALUES (4, 'Supplier_4', '4');
INSERT INTO Suppliers(id,name,manufacturer_id) VALUES (5, 'Supplier_5', '5');
INSERT INTO Suppliers(id,name,manufacturer_id) VALUES (6, 'Supplier_6', '8');
INSERT INTO Suppliers(id,name,manufacturer_id) VALUES (7, 'Supplier_7', '9');
INSERT INTO Suppliers(id,name,manufacturer_id) VALUES (8, 'Supplier_8', '10');
INSERT INTO Suppliers(id,name,manufacturer_id) VALUES (9, 'Supplier_9', '12');
INSERT INTO Suppliers(id,name,manufacturer_id) VALUES (10, 'Supplier_10', '7');
INSERT INTO Suppliers(id,name,manufacturer_id) VALUES (11, 'Supplier_11', '7');'''.split('\n')

In [None]:
with engine.connect() as conn:
    for query in insert_queries:
        conn.execute(query)

**1. Найти самый дорогой товар. Вывести имя товара и его цену**

In [None]:
query = '''
SELECT g.name, p.value AS price
FROM Goods AS g
JOIN Prices AS p
  ON g.id = p.goods_id
ORDER BY 2 DESC
LIMIT 1;
'''
select(query)

Unnamed: 0,name,price
0,Goods_20,19800


**2. Найти товары с нулевым остатком. Вывести имя товара и его цену**

In [None]:
query = '''
SELECT g.name, p.value AS price
FROM Goods AS g
LEFT JOIN Quantity AS q
ON g.id = q.goods_id
JOIN Prices AS p
  ON g.id = p.goods_id
WHERE COALESCE(q.value, 0) = 0;
'''
select(query)

Unnamed: 0,name,price
0,Goods_5,4950
1,Goods_14,13720
2,Goods_15,14850


**3. Найти производителя с самой большой средней ценой за товары. Вывести имя производителя и среднюю стоимость**



In [None]:
query = '''
SELECT m.name AS manufacturer, AVG(p.value) AS avg_price
FROM Goods AS g
JOIN Prices AS p
  ON g.id = p.goods_id
JOIN Manufacturer AS m
  ON g.supplier_id = m.id
GROUP BY 1
ORDER BY 2 DESC
LIMIT 1;
'''
select(query)

Unnamed: 0,manufacturer,avg_price
0,Manufacturer_4,14126.666667


**4. Найти все товары производителей из Москвы. Вывести имена товаров, их цены и имена производителей**

In [None]:
query = '''
SELECT
  g.name
  , p.value AS price
  , m.name AS manufacturer
FROM Goods AS g
JOIN Prices AS p
  ON g.id = p.goods_id
JOIN Manufacturer AS m
  ON g.supplier_id = m.id
WHERE m.location = 'Moscow'
'''
select(query)

Unnamed: 0,name,price,manufacturer
0,Goods_1,990,Manufacturer_9
1,Goods_6,5880,Manufacturer_5
2,Goods_11,10890,Manufacturer_1
3,Goods_12,11760,Manufacturer_5
4,Goods_13,12870,Manufacturer_1
5,Goods_16,15680,Manufacturer_1
6,Goods_20,19800,Manufacturer_9


## NORTHWIND

1. Найдите цену наиболее дорогого товара в каждой категории, заказанного за лето 1996 года. Вывести название категории и цену.
2. Найдите средний чек повторного заказа.
3. Вывести название компаний (покупателей), которые заказывали более 20% товаров со скидками? (англ. discount).
4. Найдите 3 самых дорогих групп товаров по категориям, которые заказывали в течение 1996 года. Вывести название категории и общую стоимость заказанных товаров.
5. Найдите самый популярный город, в который отправляли заказы
6. Вывести первые 3 заказа каждого покупателя. Нужно ID покупателя, ID заказа, его дату и порядковый номер. Если у покупателя меньше 3 заказов, то для него выводить ничего не требуется.

В качестве БД используется демонстрационная база https://github.com/pthom/northwind_psql

<img src="https://github.com/pthom/northwind_psql/raw/master/ER.png">

In [None]:
orders = select('SELECT * FROM orders;')
orders.head()

Unnamed: 0,order_id,customer_id,employee_id,order_date,required_date,shipped_date,ship_via,freight,ship_name,ship_address,ship_city,ship_region,ship_postal_code,ship_country
0,10248,VINET,5,1996-07-04,1996-08-01,1996-07-16,3,32.38,Vins et alcools Chevalier,59 rue de l'Abbaye,Reims,,51100,France
1,10249,TOMSP,6,1996-07-05,1996-08-16,1996-07-10,1,11.61,Toms Spezialitäten,Luisenstr. 48,Münster,,44087,Germany
2,10250,HANAR,4,1996-07-08,1996-08-05,1996-07-12,2,65.83,Hanari Carnes,"Rua do Paço, 67",Rio de Janeiro,RJ,05454-876,Brazil
3,10251,VICTE,3,1996-07-08,1996-08-05,1996-07-15,1,41.34,Victuailles en stock,"2, rue du Commerce",Lyon,,69004,France
4,10252,SUPRD,4,1996-07-09,1996-08-06,1996-07-11,2,51.3,Suprêmes délices,"Boulevard Tirou, 255",Charleroi,,B-6000,Belgium


In [None]:
order_details = select('SELECT * FROM order_details;')
order_details.head()

Unnamed: 0,order_id,product_id,unit_price,quantity,discount
0,10248,11,14.0,12,0.0
1,10248,42,9.8,10,0.0
2,10248,72,34.8,5,0.0
3,10249,14,18.6,9,0.0
4,10249,51,42.4,40,0.0


In [None]:
categories = select('SELECT * FROM categories;')
categories.head()

Unnamed: 0,category_id,category_name,description,picture
0,1,Beverages,"Soft drinks, coffees, teas, beers, and ales",[]
1,2,Condiments,"Sweet and savory sauces, relishes, spreads, an...",[]
2,3,Confections,"Desserts, candies, and sweet breads",[]
3,4,Dairy Products,Cheeses,[]
4,5,Grains/Cereals,"Breads, crackers, pasta, and cereal",[]


In [None]:
customers = select('SELECT * FROM customers;')
customers.head()

Unnamed: 0,customer_id,company_name,contact_name,contact_title,address,city,region,postal_code,country,phone,fax
0,ALFKI,Alfreds Futterkiste,Maria Anders,Sales Representative,Obere Str. 57,Berlin,,12209,Germany,030-0074321,030-0076545
1,ANATR,Ana Trujillo Emparedados y helados,Ana Trujillo,Owner,Avda. de la Constitución 2222,México D.F.,,05021,Mexico,(5) 555-4729,(5) 555-3745
2,ANTON,Antonio Moreno Taquería,Antonio Moreno,Owner,Mataderos 2312,México D.F.,,05023,Mexico,(5) 555-3932,
3,AROUT,Around the Horn,Thomas Hardy,Sales Representative,120 Hanover Sq.,London,,WA1 1DP,UK,(171) 555-7788,(171) 555-6750
4,BERGS,Berglunds snabbköp,Christina Berglund,Order Administrator,Berguvsvägen 8,Luleå,,S-958 22,Sweden,0921-12 34 65,0921-12 34 67


In [None]:
products = select('SELECT * FROM products;')
products.head()

Unnamed: 0,product_id,product_name,supplier_id,category_id,quantity_per_unit,unit_price,units_in_stock,units_on_order,reorder_level,discontinued
0,1,Chai,8,1,10 boxes x 30 bags,18.0,39,0,10,1
1,2,Chang,1,1,24 - 12 oz bottles,19.0,17,40,25,1
2,3,Aniseed Syrup,1,2,12 - 550 ml bottles,10.0,13,70,25,0
3,4,Chef Anton's Cajun Seasoning,2,2,48 - 6 oz jars,22.0,53,0,0,0
4,5,Chef Anton's Gumbo Mix,2,2,36 boxes,21.35,0,0,0,1


__Цена наиболее дорогого товара в каждой категории, заказанного за лето 1996 года.__

In [None]:
query = '''
  SELECT
    DISTINCT cat.category_name
    , MAX(od.unit_price) OVER(PARTITION BY cat.category_id) max_price
  FROM orders AS o
  JOIN order_details AS od
    ON od.order_id = o.order_id
  JOIN products AS pr
    ON od.product_id = pr.product_id
  JOIN categories AS cat
    ON pr.category_id = cat.category_id
  WHERE order_date >= '1996-06-01'
    AND order_date <= '1996-08-31';
'''
select(query)

Unnamed: 0,category_name,max_price
3,Grains/Cereals,30.4
7,Condiments,35.1
6,Beverages,36.8
1,Produce,42.4
0,Dairy Products,44.0
2,Seafood,50.0
5,Confections,64.8
4,Meat/Poultry,99.0


In [None]:
orders['order_date'] = pd.to_datetime(orders['order_date'])
orders_summer_1996 = orders[orders['order_date'].between('1996-06-01',
                                                         '1996-08-31')]
(
    orders_summer_1996[['order_id']]
      .merge(order_details[
          ['order_id', 'product_id', 'unit_price']
      ], on='order_id')
      .merge(products[['product_id', 'category_id']], on='product_id')
      .merge(categories[['category_id',	'category_name']], on='category_id')
      .groupby('category_name')['unit_price'].max()
      .to_frame('max_price').reset_index()
)

Unnamed: 0,category_name,max_price
0,Beverages,36.8
1,Condiments,35.1
2,Confections,64.8
3,Dairy Products,44.0
4,Grains/Cereals,30.4
5,Meat/Poultry,99.0
6,Produce,42.4
7,Seafood,50.0


**Средний чек повторного заказа**

In [None]:
query = '''
  WITH orders_by_customer AS (
    SELECT
      order_id
      , ROW_NUMBER() OVER(PARTITION BY customer_id ORDER BY order_date) AS rn
    FROM orders
  ),
  sum_by_rep_order AS (
    SELECT
      oc.order_id
      , SUM((od.unit_price * od.quantity) - od.discount) AS total_by_rep_chek
    FROM orders_by_customer AS oc
    JOIN order_details AS od
      ON od.order_id = oc.order_id
    WHERE rn = 2
    GROUP BY 1
  )
  
  SELECT AVG(total_by_rep_chek) AS avg_rep_check
  FROM sum_by_rep_order;
'''
select(query)

Unnamed: 0,avg_rep_check
0,1194.24773


In [None]:
query = '''
  SELECT
    AVG(total_by_rep_chek) AS avg_rep_check
  FROM (  
    SELECT
      o.rep_id
      , SUM((od.unit_price * od.quantity) - od.discount) AS total_by_rep_chek
    FROM (
      SELECT
        DISTINCT NTH_VALUE(order_id, 2) OVER(PARTITION BY customer_id
                                           ORDER BY order_date) AS rep_id
      FROM orders
    ) AS o
    JOIN order_details AS od
      ON od.order_id = o.rep_id
      AND o.rep_id IS NOT NULL
    GROUP BY o.rep_id
  ) AS rep;
'''
select(query)

Unnamed: 0,avg_rep_check
0,1194.24773


In [None]:
def nth_value(x, n=1):
    try:
        return x.sort_values('order_date')['order_id'].iloc[n]
    except:
        return np.nan

(
    orders.groupby('customer_id')
          .apply(nth_value).dropna().astype(int).to_frame('order_id')
          .merge(order_details, on='order_id')
          .assign(total=lambda x: (x['unit_price'] * x['quantity']) - x['discount'])
          .groupby('order_id')['total'].sum().mean().round(5)
)

1194.24773

**Компании, которые заказывали более 20% товаров со скидками**

In [None]:
# Вариант 1
query = '''
SELECT company_name FROM (
    SELECT
      DISTINCT cu.company_name
      , SUM(CASE WHEN od.discount > 0 THEN 1 ELSE 0 END) OVER w::NUMERIC
                                              / COUNT(*) OVER w discount_ratio
    FROM orders o
    JOIN order_details od
      ON od.order_id = o.order_id
    JOIN customers cu
      ON cu.customer_id = o.customer_id
    WINDOW w AS (PARTITION BY cu.company_name)
) t
WHERE discount_ratio >= 0.2
'''
select(query)

Unnamed: 0,company_name
0,LINO-Delicateses
1,Ottilies Käseladen
2,Suprêmes délices
3,La maison d'Asie
4,Antonio Moreno Taquería
5,QUICK-Stop
6,Furia Bacalhau e Frutos do Mar
7,Que Delícia
8,Vaffeljernet
9,Tradição Hipermercados


In [None]:
# Вариант 2

query = '''
  SELECT 
    company_name
  FROM (
    SELECT
      cu.company_name
      , PERCENTILE_CONT(0.8) WITHIN GROUP(ORDER BY od.discount) perc_80
    FROM orders o
    JOIN order_details od
      ON od.order_id = o.order_id
    JOIN customers cu
      ON cu.customer_id = o.customer_id
    GROUP BY cu.company_name 
  ) AS per
  WHERE perc_80 > 0;
'''
select(query)

Unnamed: 0,company_name
0,Alfreds Futterkiste
1,Antonio Moreno Taquería
2,Around the Horn
3,Berglunds snabbköp
4,Blondesddsl père et fils
5,Bólido Comidas preparadas
6,Bon app'
7,Bottom-Dollar Markets
8,Chop-suey Chinese
9,Die Wandernde Kuh


In [None]:
# Имплементация на python

comp = (
    orders[['order_id', 'customer_id']]
    .merge(order_details[['order_id', 'discount']], on='order_id')
    .merge(customers[['customer_id', 'company_name']], on='customer_id')
    .groupby('company_name', as_index=False)['discount'].quantile(0.8)
)

comp[comp['discount'] > 0]['company_name']

0                Alfreds Futterkiste
2            Antonio Moreno Taquería
3                    Around the Horn
5                 Berglunds snabbköp
7           Blondesddsl père et fils
8                           Bon app'
9              Bottom-Dollar Markets
10         Bólido Comidas preparadas
13                 Chop-suey Chinese
16                 Die Wandernde Kuh
20                      Ernst Handel
21                Familia Arquibaldo
23                    Folk och fä HB
26                    Frankenversand
27    Furia Bacalhau e Frutos do Mar
30               Godos Cocina Típica
31               Gourmet Lanchonetes
32           Great Lakes Food Market
33                  HILARION-Abastos
34                     Hanari Carnes
36      Hungry Owl All-Night Grocers
38                   Königlich Essen
39                 LILA-Supermercado
40                  LINO-Delicateses
42                  La maison d'Asie
45               Lehmanns Marktstand
46                 Let's Stop N Shop
4

**3 самых дорогих групп товаров по категориям, которые заказывали в течение 1996 года**

In [None]:
query = '''
  SELECT
    DISTINCT category_name, rank
  FROM (
    SELECT
      cat.category_name
      , pr.product_name
      , od.unit_price
      , DENSE_RANK() OVER(ORDER BY od.unit_price DESC) AS rank
    FROM orders AS o
    JOIN order_details AS od
      ON od.order_id = o.order_id
    JOIN products pr
      ON pr.product_id = od.product_id
    JOIN categories cat
      ON cat.category_id = pr.category_id
    WHERE order_date >= '1996-01-01'
      AND order_date <= '1996-12-31'
  ) price_rank
  WHERE rank <= 3
  ORDER BY rank;
'''
select(query)

Unnamed: 0,category_name,rank
0,Beverages,1
1,Meat/Poultry,2
2,Confections,3


In [None]:
orders['order_date'] = pd.to_datetime(orders['order_date'])
orders_1996 = orders[orders['order_date'].between('1996-01-01', '1996-12-31')]
(
    orders_1996[['order_id']]
      .merge(order_details[
          ['order_id', 'product_id', 'unit_price']
      ], on='order_id')
      .merge(products[['product_id', 'category_id']], on='product_id')
      .merge(categories[['category_id',	'category_name']], on='category_id')
      .groupby('category_name')['unit_price'].max().nlargest(3)
)

category_name
Beverages       210.8
Meat/Poultry     99.0
Confections      64.8
Name: unit_price, dtype: float64

**Самый популярный город, в который отправляли заказы**

In [None]:
query = '''
  SELECT ship_city
  FROM (
    SELECT
      o.ship_country
      , o.ship_city
      , COUNT(*) delivery_counts
    FROM orders o
    GROUP BY 1, 2
    ORDER BY 3 DESC
    ) dc
  LIMIT 1;
'''
select(query)

Unnamed: 0,ship_city
0,Rio de Janeiro


In [None]:
orders.groupby(['ship_city'])['order_id'].count().nlargest(1)

ship_city
Rio de Janeiro    34
Name: order_id, dtype: int64

**Первые 3 заказа каждого покупателя**

In [None]:
query = '''
  SELECT
    customer_id
    , order_id
    , order_date
    , rn AS order_num
  FROM (
    SELECT
      order_id
      , customer_id
      , order_date
      , ROW_NUMBER() OVER(PARTITION BY customer_id ORDER BY order_date) rn
      , COUNT(*) OVER(PARTITION BY customer_id) order_counts
    FROM orders
  ) o
  WHERE rn <= 3
    AND order_counts >= 3;
'''
select(query)

Unnamed: 0,customer_id,order_id,order_date,order_num
0,ALFKI,10643,1997-08-25,1
1,ALFKI,10692,1997-10-03,2
2,ALFKI,10702,1997-10-13,3
3,ANATR,10308,1996-09-18,1
4,ANATR,10625,1997-08-08,2
...,...,...,...,...
253,WILMK,10673,1997-09-18,2
254,WILMK,10695,1997-10-07,3
255,WOLZA,10374,1996-12-05,1
256,WOLZA,10611,1997-07-25,2


In [None]:
orders['order_num'] = orders.sort_values(['order_date']) \
                            .groupby(['customer_id']) \
                            .cumcount() + 1

In [None]:
(
    orders.groupby(['customer_id'])[
        ['customer_id', 'order_id', 'order_date', 'order_num']
    ].filter(lambda x: len(x) >= 3)
    .groupby('customer_id', group_keys=False)
    .apply(lambda x: x.sort_values('order_date')[:3])
)

Unnamed: 0,customer_id,order_id,order_date,order_num
395,ALFKI,10643,1997-08-25,1
444,ALFKI,10692,1997-10-03,2
454,ALFKI,10702,1997-10-13,3
60,ANATR,10308,1996-09-18,1
377,ANATR,10625,1997-08-08,2
...,...,...,...,...
425,WILMK,10673,1997-09-18,2
447,WILMK,10695,1997-10-07,3
126,WOLZA,10374,1996-12-05,1
363,WOLZA,10611,1997-07-25,2
