In [1]:
import pandas as pd
import numpy as np

#### Подключение к бд и заливка данных

In [2]:
import sqlalchemy
import pyodbc
import warnings
warnings.filterwarnings('ignore')

In [3]:
conn = pyodbc.connect('DSN=TestDB;Trusted_Connection=yes;')

In [4]:
def select(sql):
  return pd.read_sql(sql,conn)

# 6. Джойны

In [5]:
users = pd.DataFrame({'id':[1,2,3],'name':['gleb','jon snow','tyrion']})

In [6]:
items = pd.DataFrame({'user_id':[1,3,3],'item_name':['hleb','gold','wine'],'value':[5,100,20]})

In [7]:
cur = conn.cursor()
sql = '''
drop table if exists users;
CREATE TABLE users (
    id        INTEGER,
    name      VARCHAR(max)
);
'''
cur.execute(sql)
conn.commit()
for index,row in users.iterrows():
    cur.execute('''INSERT INTO users(
                    [id],[name]
                    ) 
                    values (?,?)
    ''', 
                    row['id'], 
                    row['name']
               )
conn.commit()
cur.close()
sql = '''select t.* from users t'''
select(sql)

Unnamed: 0,id,name
0,1,gleb
1,2,jon snow
2,3,tyrion


In [8]:
cur = conn.cursor()
sql = '''
drop table if exists items;
CREATE TABLE items (
    user_id        INTEGER,
    item_name      VARCHAR(max), 
    value          MONEY
);
'''
cur.execute(sql)
conn.commit()
for index,row in items.iterrows():
    cur.execute('''INSERT INTO items(
                    [user_id],[item_name],[value]
                    ) 
                    values (?,?,?)
    ''', 
                    row['user_id'], 
                    row['item_name'],
                    row['value']
               )
conn.commit()
cur.close()
sql = '''select t.* from items t'''
select(sql)

Unnamed: 0,user_id,item_name,value
0,1,hleb,5.0
1,3,gold,100.0
2,3,wine,20.0


## 2. Лефт и иннер джойн

In [9]:
sql = '''select 
t.*, i.item_name, i.value, i.user_id 
from users t
left join items i on t.id = i.user_id
'''

In [10]:
select(sql)

Unnamed: 0,id,name,item_name,value,user_id
0,1,gleb,hleb,5.0,1.0
1,2,jon snow,,,
2,3,tyrion,gold,100.0,3.0
3,3,tyrion,wine,20.0,3.0


In [11]:
sql = '''select 
t.*, i.item_name, i.value, i.user_id 
from users t
left join items i on t.id = i.user_id
where i.item_name is not null
'''

In [12]:
select(sql)

Unnamed: 0,id,name,item_name,value,user_id
0,1,gleb,hleb,5.0,1
1,3,tyrion,gold,100.0,3
2,3,tyrion,wine,20.0,3


In [13]:
sql = '''select 
t.*, i.item_name 
from users t
join items i on t.id = i.user_id
'''

In [14]:
select(sql)

Unnamed: 0,id,name,item_name
0,1,gleb,hleb
1,3,tyrion,gold
2,3,tyrion,wine


## 3. Агрегируй перед джойном!

In [15]:
users = pd.DataFrame({'id':[1,2,3],'name':['gleb','jon snow','tyrion'],
                      'victory':[2,10,1]})

In [16]:
cur = conn.cursor()
sql = '''
drop table if exists users;
CREATE TABLE users (
    id        INTEGER,
    name      VARCHAR(max),
    victory   INTEGER
);
'''
cur.execute(sql)
conn.commit()

for index,row in users.iterrows():
    cur.execute('''INSERT INTO users(
                    [id],[name],[victory]
                    ) 
                    values (?,?,?)
    ''', 
                    row['id'], 
                    row['name'],
                    row['victory']
               )
    
conn.commit()
cur.close()
sql = '''select t.* from users t'''
select(sql)

Unnamed: 0,id,name,victory
0,1,gleb,2
1,2,jon snow,10
2,3,tyrion,1


In [17]:
sql = '''select t.*, 
i.item_name, i.value, i.user_id 
from users t
left join items i on t.id = i.user_id
'''

In [18]:
t = select(sql)
t

Unnamed: 0,id,name,victory,item_name,value,user_id
0,1,gleb,2,hleb,5.0,1.0
1,2,jon snow,10,,,
2,3,tyrion,1,gold,100.0,3.0
3,3,tyrion,1,wine,20.0,3.0


In [19]:
t['victory'].sum()

14

In [20]:
sql = '''select sum(t.victory) from users t'''

In [21]:
select(sql)

Unnamed: 0,Unnamed: 1
0,13


#### После джойнов:
1. Проверяй контрольную сумму
2. Проверяй дубликаты

In [22]:
sql = '''select t.*, i.item_name, i.value, i.user_id 
from users t
join items i on t.id = i.user_id
'''

In [23]:
select(sql)

Unnamed: 0,id,name,victory,item_name,value,user_id
0,1,gleb,2,hleb,5.0,1
1,3,tyrion,1,gold,100.0,3
2,3,tyrion,1,wine,20.0,3


#### Как правильно:

In [24]:
sql = '''select 
t.id, t.name, t.victory,

count(i.item_name) as item_cnt,
coalesce(sum(i.value),0) as value_sum

from users t
left join items i on t.id = i.user_id
group by t.id, t.name, t.victory
'''

In [25]:
select(sql)

Unnamed: 0,id,name,victory,item_cnt,value_sum
0,1,gleb,2,1,5.0
1,2,jon snow,10,0,0.0
2,3,tyrion,1,2,120.0


#### Надо перед джойном сгруппировать items:

In [26]:
sql = '''select t.user_id, 
count(t.item_name) as item_cnt,
sum(value) as value_sum from items t
group by t.user_id'''

In [27]:
select(sql)

Unnamed: 0,user_id,item_cnt,value_sum
0,1,1,5.0
1,3,2,120.0


In [28]:
sql = '''with 
items_agg as (
    select t.user_id, 
    count(t.item_name) as item_cnt,
    sum(value) as value_sum 
    from items t
    group by t.user_id
)
select t.id, t.name, t.victory,

coalesce(i.item_cnt,0) as item_cnt,
coalesce(i.value_sum,0) as value_sum

from users t

left join items_agg i on t.id = i.user_id
'''

In [29]:
select(sql)

Unnamed: 0,id,name,victory,item_cnt,value_sum
0,1,gleb,2,1,5.0
1,2,jon snow,10,0,0.0
2,3,tyrion,1,2,120.0


## 4. Как не надо делать джойны

#### всегда надо писать псевдонимы:

In [30]:
sql = '''with 
items_agg as (
    select t.user_id, 
    count(t.item_name) as item_cnt,
    sum(value) as value_sum 
    from items t
    group by t.user_id
)
select t.id, t.name, t.victory,

coalesce(item_cnt,0) as item_cnt,
coalesce(value_sum,0) as value_sum

from users t

left join items_agg i on t.id = i.user_id
'''

In [31]:
select(sql)

Unnamed: 0,id,name,victory,item_cnt,value_sum
0,1,gleb,2,1,5.0
1,2,jon snow,10,0,0.0
2,3,tyrion,1,2,120.0


## 5. Никогда не используй right join!

In [32]:
users = pd.DataFrame({'id':[1,2,3],'name':['gleb','jon snow','tyrion']})

In [33]:
items = pd.DataFrame({'user_id':[1,3,3,4],'item_name':['hleb','gold','wine','sword'],'value':[5,100,20,50]})

In [34]:
cur = conn.cursor()
sql = '''
drop table if exists users;
CREATE TABLE users (
    id        INTEGER,
    name      VARCHAR(max)
);
'''
cur.execute(sql)
conn.commit()
for index,row in users.iterrows():
    cur.execute('''INSERT INTO users(
                    [id],[name]
                    ) 
                    values (?,?)
    ''', 
                    row['id'], 
                    row['name']
               )
conn.commit()
cur.close()
sql = '''select t.* from users t'''
select(sql)

Unnamed: 0,id,name
0,1,gleb
1,2,jon snow
2,3,tyrion


In [35]:
cur = conn.cursor()
sql = '''
drop table if exists items;
CREATE TABLE items (
    user_id        INTEGER,
    item_name      VARCHAR(max), 
    value          MONEY
);
'''
cur.execute(sql)
conn.commit()
for index,row in items.iterrows():
    cur.execute('''INSERT INTO items(
                    [user_id],[item_name],[value]
                    ) 
                    values (?,?,?)
    ''', 
                    row['user_id'], 
                    row['item_name'],
                    row['value']
               )
conn.commit()
cur.close()
sql = '''select t.* from items t'''
select(sql)

Unnamed: 0,user_id,item_name,value
0,1,hleb,5.0
1,3,gold,100.0
2,3,wine,20.0
3,4,sword,50.0


In [36]:
sql = '''select t.*, i.* 
from users t
left join items i on t.id = i.user_id
'''

In [37]:
select(sql)

Unnamed: 0,id,name,user_id,item_name,value
0,1,gleb,1.0,hleb,5.0
1,2,jon snow,,,
2,3,tyrion,3.0,gold,100.0
3,3,tyrion,3.0,wine,20.0


In [38]:
sql = '''select t.*, u.* 
from items t 
left join users u on t.user_id = u.id
'''

In [39]:
select(sql)

Unnamed: 0,user_id,item_name,value,id,name
0,1,hleb,5.0,1.0,gleb
1,3,gold,100.0,3.0,tyrion
2,3,wine,20.0,3.0,tyrion
3,4,sword,50.0,,


In [40]:
sql = '''select t.*, i.* 
from users t
right join items i on t.id = i.user_id'''

In [41]:
select(sql)

Unnamed: 0,id,name,user_id,item_name,value
0,1.0,gleb,1,hleb,5.0
1,3.0,tyrion,3,gold,100.0
2,3.0,tyrion,3,wine,20.0
3,,,4,sword,50.0


## 6. Full join

In [42]:
sql = '''select t.*, i.* 
from users t
full join items i on t.id = i.user_id'''

In [43]:
select(sql)

Unnamed: 0,id,name,user_id,item_name,value
0,1.0,gleb,1.0,hleb,5.0
1,2.0,jon snow,,,
2,3.0,tyrion,3.0,gold,100.0
3,3.0,tyrion,3.0,wine,20.0
4,,,4.0,sword,50.0


Если вдруг не можешь вспомнить как делать full join (да и вообще что либо) -- всегда гугли.  
https://stackoverflow.com/questions/1923259/full-outer-join-with-sqlite

#### имитация full join:

In [44]:
sql = '''select t.*, i.* 
from users t
left join items i on t.id = i.user_id
union 
select u.*, t.* 
from items t 
left join users u on t.user_id = u.id
'''

In [45]:
select(sql)

Unnamed: 0,id,name,user_id,item_name,value
0,,,4.0,sword,50.0
1,1.0,gleb,1.0,hleb,5.0
2,2.0,jon snow,,,
3,3.0,tyrion,3.0,gold,100.0
4,3.0,tyrion,3.0,wine,20.0


## 7. Фишки с inner join

#### сопоставление с "присланным" файлом:

In [46]:
sql = '''select top(5) * from german_credit t '''

In [47]:
select(sql)

Unnamed: 0,age,sex,job,housing,saving_accounts,checking_account,credit_amount,duration,purpose,default,contract_dt,client_id
0,33,male,2,own,,,3074,9,radio/TV,0,2008-06-29 18:52:00,210
1,43,male,1,own,little,little,1344,12,car,0,2007-05-20 18:30:19,929
2,52,male,2,own,quite rich,,936,9,education,0,2008-04-27 08:23:07,200
3,35,female,3,own,little,,1393,11,car,0,2007-05-06 10:58:22,45
4,28,male,2,own,little,,776,12,radio/TV,0,2007-07-21 13:22:14,358


In [48]:
clients = pd.DataFrame({'client_id':[200,45],'data':[1.0, 2.0]})

In [49]:
cur = conn.cursor()
sql = '''
drop table if exists clients_task_name;
CREATE TABLE clients_task_name (
    client_id        int,
    data             int
);
'''
cur.execute(sql)
conn.commit()
for index,row in clients.iterrows():
    cur.execute('''INSERT INTO clients_task_name(
                    [client_id],[data]
                    ) 
                    values (?,?)
    ''', 
                    row['client_id'], 
                    row['data']
               )
conn.commit()
cur.close()
sql = '''select t.* from clients_task_name t'''
select(sql)

Unnamed: 0,client_id,data
0,200,1
1,45,2


In [50]:
sql = '''select t.*, ctn.data 
from german_credit t 
join clients_task_name ctn on t.client_id = ctn.client_id
'''

In [51]:
select(sql)

Unnamed: 0,age,sex,job,housing,saving_accounts,checking_account,credit_amount,duration,purpose,default,contract_dt,client_id,data
0,52,male,2,own,quite rich,,936,9,education,0,2008-04-27 08:23:07,200,1
1,35,female,3,own,little,,1393,11,car,0,2007-05-06 10:58:22,45,2


#### генерация заготовок под отчёт:

In [52]:
sql = '''select 1 as user_id
union all
select 2 as user_id
union all
select 3 as user_id'''

In [53]:
select(sql)

Unnamed: 0,user_id
0,1
1,2
2,3


In [54]:
sql = '''
select convert(date, '01.03.2021', 104) as month
union all
select convert(date, '01.04.2021', 104) as month
'''

In [55]:
select(sql)

Unnamed: 0,month
0,2021-03-01
1,2021-04-01


In [56]:
sql = '''with 
users as (
    select 1 as user_id
    union all
    select 2 as user_id
    union all
    select 3 as user_id
),
month as (
    select convert(date, '01.03.2021', 104) as month
    union all
    select convert(date, '01.04.2021', 104) as month
)
select * from users t
join month m on 1=1
'''

In [57]:
select(sql)

Unnamed: 0,user_id,month
0,1,2021-03-01
1,2,2021-03-01
2,3,2021-03-01
3,1,2021-04-01
4,2,2021-04-01
5,3,2021-04-01


----------------

In [58]:
conn.close()