In [1]:
import pandas as pd
import numpy as np

#### Подключение к бд и заливка данных

In [2]:
import sqlalchemy
import pyodbc
import warnings
warnings.filterwarnings('ignore')

In [3]:
conn = pyodbc.connect('DSN=TestDB;Trusted_Connection=yes;')

In [4]:
def select(sql):
  return pd.read_sql(sql,conn)

# 5. Подзапросы

## 1. Простой подзапрос

In [5]:
t = pd.DataFrame({'id':[1,1,2,2,3],
                  'name':['a','b','c','d','e']})
# t

In [6]:
cur = conn.cursor()
sql = '''
drop table if exists dupl_test;
CREATE TABLE dupl_test (
    id        INTEGER,
    name      VARCHAR(max)
);
'''
cur.execute(sql)
conn.commit()

for index,row in t.iterrows():
    cur.execute('''INSERT INTO dupl_test(
                    [id],[name]
                    ) 
                    values (?,?)
    ''', 
                    row['id'], 
                    row['name']
               )
    
conn.commit()
cur.close()
sql = '''select * from dupl_test t'''
select(sql)

Unnamed: 0,id,name
0,1,a
1,1,b
2,2,c
3,2,d
4,3,e


### Дубликаты Id:

In [7]:
sql = '''select 
t.id 
from dupl_test t
group by t.id
having count(1) > 1
'''

In [8]:
select(sql)

Unnamed: 0,id
0,1
1,2


In [9]:
sql = '''select * 
from dupl_test t
where t.id in (
    select t.id 
    from dupl_test t
    group by t.id
    having count(1) > 1
)
'''

In [10]:
select(sql)

Unnamed: 0,id,name
0,1,a
1,1,b
2,2,c
3,2,d


#### с созданием промежуточной таблицы:

In [11]:
cur = conn.cursor()
sql = '''
drop table if exists dupls;

select t.id 
into dupls
from dupl_test t
group by t.id
having count(1) > 1
'''
cur.execute(sql)
conn.commit()
cur.close()
sql = '''select * from dupls t'''
select(sql)

Unnamed: 0,id
0,1
1,2


In [12]:
sql = '''select * 
from dupl_test t
where t.id in (
    select id from dupls
)
'''

In [13]:
select(sql)

Unnamed: 0,id,name
0,1,a
1,1,b
2,2,c
3,2,d


### having в подзапросах:

In [14]:
sql = '''select t.id, 
count(1) as cnt
from dupl_test t
group by t.id
having count(1) > 1'''

In [15]:
select(sql)

Unnamed: 0,id,cnt
0,1,2
1,2,2


In [16]:
sql = '''select * from (
    select t.id, 
    count(1) as cnt 
    from dupl_test t
    group by t.id
) t
where t.cnt > 1
'''

In [17]:
select(sql)

Unnamed: 0,id,cnt
0,1,2
1,2,2


## 2. CTE (with)

In [18]:
sql = '''select * from (
    select * from (
        select t.id,
        count(1) as cnt 
        from dupl_test t
        group by t.id
    ) t
    where t.cnt > 1
) t
where t.id = 1
'''

In [19]:
select(sql)

Unnamed: 0,id,cnt
0,1,2


In [20]:
sql = '''with 
id_cnt as (
    select t.id,
    count(1) as cnt 
    from dupl_test t
    group by t.id
),
id_cnt_2 as (
    select * 
    from id_cnt t
    where t.cnt > 1
)
select * from id_cnt_2 t
where t.id = 1
'''

In [21]:
select(sql)

Unnamed: 0,id,cnt
0,1,2


#### закрепим понимание:

In [22]:
cat = '''select t.purpose,

case when t.purpose like '%свадьб%' then 'свадьба'
when t.purpose like '%машин%' or t.purpose like '%авто%' then 'машина'
when t.purpose like '%недвиж%' then 'недвижимость'
else 'другое' end as purpose_cat

from purpose t
'''

In [23]:
print(cat)

select t.purpose,

case when t.purpose like '%свадьб%' then 'свадьба'
when t.purpose like '%машин%' or t.purpose like '%авто%' then 'машина'
when t.purpose like '%недвиж%' then 'недвижимость'
else 'другое' end as purpose_cat

from purpose t



In [24]:
sql = f'''select 
t.purpose_cat,
count(1)
from ({cat}) t
group by t.purpose_cat
'''

In [25]:
print(sql)

select 
t.purpose_cat,
count(1)
from (select t.purpose,

case when t.purpose like '%свадьб%' then 'свадьба'
when t.purpose like '%машин%' or t.purpose like '%авто%' then 'машина'
when t.purpose like '%недвиж%' then 'недвижимость'
else 'другое' end as purpose_cat

from purpose t
) t
group by t.purpose_cat



In [26]:
select(sql)

Unnamed: 0,purpose_cat,Unnamed: 2
0,другое,1
1,машина,6
2,недвижимость,2
3,свадьба,5


In [27]:
sql = '''with 
categories as (
    select t.purpose,

    case when t.purpose like '%свадьб%' then 'свадьба'
    when t.purpose like '%машин%' or t.purpose like '%авто%' then 'машина'
    when t.purpose like '%недвиж%' then 'недвижимость'

    else 'другое' end as purpose_cat

    from purpose t
)
select t.purpose_cat,
count(1) 
from categories t
group by t.purpose_cat
'''

In [28]:
select(sql)

Unnamed: 0,purpose_cat,Unnamed: 2
0,другое,1
1,машина,6
2,недвижимость,2
3,свадьба,5


## 3. Когда лучше создать таблицу, а не использовать подзапрос

In [29]:
cur = conn.cursor()
sql = '''
drop table if exists categories;

select t.purpose,

case when t.purpose like '%свадьб%' then 'свадьба'
when t.purpose like '%машин%' or t.purpose like '%авто%' then 'машина'
when t.purpose like '%недвиж%' then 'недвижимость'

else 'другое' end as purpose_cat

into categories

from purpose t
'''
cur.execute(sql)
conn.commit()
cur.close()
sql = '''select * from categories t'''
select(sql)

Unnamed: 0,purpose,purpose_cat
0,машина,машина
1,машина,машина
2,машина,машина
3,на машину,машина
4,на покупку машины,машина
5,автомобиль,машина
6,на возвращение 2007,другое
7,на свадьбу,свадьба
8,свадьба,свадьба
9,свадьба,свадьба


In [30]:
sql = '''select 
t.purpose_cat,
count(1)
from categories t
group by t.purpose_cat'''

In [31]:
select(sql)

Unnamed: 0,purpose_cat,Unnamed: 2
0,другое,1
1,машина,6
2,недвижимость,2
3,свадьба,5


In [32]:
sql = '''select 
t.purpose, 
count(1) 

from categories t

where t.purpose_cat = 'другое'

group by t.purpose
order by count(1) desc
'''

In [33]:
select(sql)

Unnamed: 0,purpose,Unnamed: 2
0,на возвращение 2007,1


#### берёт временную (with categories) а не categories в БД:

In [34]:
sql = '''with 
categories as (
select 1 as p
from purpose t
)
select * from categories t
'''

In [35]:
select(sql)

Unnamed: 0,p
0,1
1,1
2,1
3,1
4,1
5,1
6,1
7,1
8,1
9,1


----------------

In [36]:
conn.close()