In [1]:
import pandas as pd
import numpy as np

#### Подключение к бд и заливка данных

In [2]:
import sqlalchemy
import pyodbc
import warnings
warnings.filterwarnings('ignore')

In [3]:
conn = pyodbc.connect('DSN=TestDB;Trusted_Connection=yes;')

In [4]:
def select(sql):
  return pd.read_sql(sql,conn)

In [5]:
sql = '''select * from german_credit t'''
select(sql)

Unnamed: 0,age,sex,job,housing,saving_accounts,checking_account,credit_amount,duration,purpose,default,contract_dt,client_id
0,33,male,2,own,,,3074,9,radio/TV,0,2008-06-29 18:52:00,210
1,43,male,1,own,little,little,1344,12,car,0,2007-05-20 18:30:19,929
2,52,male,2,own,quite rich,,936,9,education,0,2008-04-27 08:23:07,200
3,35,female,3,own,little,,1393,11,car,0,2007-05-06 10:58:22,45
4,28,male,2,own,little,,776,12,radio/TV,0,2007-07-21 13:22:14,358
...,...,...,...,...,...,...,...,...,...,...,...,...
995,65,male,2,free,little,little,2600,18,radio/TV,1,2007-12-16 20:17:19,624
996,30,male,3,own,little,moderate,4455,36,business,1,2007-07-12 14:08:58,181
997,33,male,2,own,little,moderate,6403,24,radio/TV,0,2008-04-08 03:24:26,730
998,29,female,2,own,,,5003,21,car,1,2007-11-29 15:51:45,557


# 3. Select

## 1. Псевдонимы

In [6]:
sql = '''SELECT t.age * 3 AS age_mult3,
       t.housing
FROM german_credit AS t
'''

In [7]:
select(sql)

Unnamed: 0,age_mult3,housing
0,99,own
1,129,own
2,156,own
3,105,own
4,84,own
...,...,...
995,195,free
996,90,own
997,99,own
998,87,own


## 2. Базовые операции со столбцами

In [8]:
sql = '''select t.*, 
t.age * 3 as age_mult3,
t.age + t.credit_amount as age_plus_amount,
t.age * 1.0 / t.credit_amount as age_div_amount,
t.age as age_2
from german_credit t
'''

In [9]:
select(sql)

Unnamed: 0,age,sex,job,housing,saving_accounts,checking_account,credit_amount,duration,purpose,default,contract_dt,client_id,age_mult3,age_plus_amount,age_div_amount,age_2
0,33,male,2,own,,,3074,9,radio/TV,0,2008-06-29 18:52:00,210,99,3107,0.010735,33
1,43,male,1,own,little,little,1344,12,car,0,2007-05-20 18:30:19,929,129,1387,0.031994,43
2,52,male,2,own,quite rich,,936,9,education,0,2008-04-27 08:23:07,200,156,988,0.055556,52
3,35,female,3,own,little,,1393,11,car,0,2007-05-06 10:58:22,45,105,1428,0.025126,35
4,28,male,2,own,little,,776,12,radio/TV,0,2007-07-21 13:22:14,358,84,804,0.036082,28
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
995,65,male,2,free,little,little,2600,18,radio/TV,1,2007-12-16 20:17:19,624,195,2665,0.025000,65
996,30,male,3,own,little,moderate,4455,36,business,1,2007-07-12 14:08:58,181,90,4485,0.006734,30
997,33,male,2,own,little,moderate,6403,24,radio/TV,0,2008-04-08 03:24:26,730,99,6436,0.005154,33
998,29,female,2,own,,,5003,21,car,1,2007-11-29 15:51:45,557,87,5032,0.005797,29


## 3. Where

In [10]:
sql = '''select count(1) from german_credit t 
where t.contract_dt between 
Convert(Date, '01.01.2007', 104) and Convert(Date, '31.12.2007', 104)
'''

In [11]:
select(sql)

Unnamed: 0,Unnamed: 1
0,573


In [12]:
sql = '''select * from german_credit t 
where t.contract_dt between Convert(Date, '01.01.2007', 104) and Convert(Date, '31.12.2007', 104)
and t.purpose in ('car' ,'repairs')
order by t.contract_dt desc, credit_amount 
'''

In [13]:
select(sql)

Unnamed: 0,age,sex,job,housing,saving_accounts,checking_account,credit_amount,duration,purpose,default,contract_dt,client_id
0,36,male,3,rent,,moderate,7057,20,car,0,2007-12-29 16:10:08,99
1,30,male,2,own,little,moderate,639,12,repairs,1,2007-12-28 17:09:43,127
2,25,male,2,rent,moderate,moderate,1264,15,car,1,2007-12-28 08:38:58,979
3,48,male,2,own,little,,2134,9,car,0,2007-12-24 16:28:30,20
4,67,female,2,own,little,moderate,3872,18,repairs,0,2007-12-24 12:25:28,779
...,...,...,...,...,...,...,...,...,...,...,...,...
192,55,male,2,own,rich,,1413,12,car,0,2007-05-05 06:35:11,209
193,55,female,0,free,little,little,1190,18,repairs,1,2007-05-05 00:14:17,429
194,47,male,3,own,little,moderate,1209,6,car,1,2007-05-03 10:29:01,485
195,36,male,2,own,little,moderate,884,18,car,1,2007-05-02 06:22:11,184


## 5. Case when

### Доля клиентов с размером кредита > 1000:

In [14]:
sql = '''select count(*) from german_credit t'''
select(sql)

Unnamed: 0,Unnamed: 1
0,1000


In [15]:
sql = '''select count(*) from german_credit t
where t.credit_amount > 1000
'''

In [16]:
select(sql)

Unnamed: 0,Unnamed: 1
0,884


In [17]:
884/1000

0.884

In [18]:
sql = '''select t.credit_amount,
case when t.credit_amount > 1000 then 1 else 0 end as greater_1000_flag,
iif(t.credit_amount > 1000,1,0) as greater_1000_flag2
from german_credit t
'''

In [19]:
select(sql)

Unnamed: 0,credit_amount,greater_1000_flag,greater_1000_flag2
0,3074,1,1
1,1344,1,1
2,936,0,0
3,1393,1,1
4,776,0,0
...,...,...,...
995,2600,1,1
996,4455,1,1
997,6403,1,1
998,5003,1,1


In [20]:
sql = '''select 
avg(case when t.credit_amount > 1000 then 1.0 else 0 end) as greater_1000_frac
from german_credit t
'''

In [21]:
select(sql)

Unnamed: 0,greater_1000_frac
0,0.884


## 7. Создание таблицы

In [22]:
cur = conn.cursor()
sql = '''
drop table if exists greater_1000_credit;

select * 
into greater_1000_credit
from german_credit t
where t.credit_amount > 1000
'''
cur.execute(sql)
conn.commit()
cur.close()

In [23]:
sql = '''select * from greater_1000_credit t'''

In [24]:
select(sql)

Unnamed: 0,age,sex,job,housing,saving_accounts,checking_account,credit_amount,duration,purpose,default,contract_dt,client_id
0,33,male,2,own,,,3074,9,radio/TV,0,2008-06-29 18:52:00,210
1,43,male,1,own,little,little,1344,12,car,0,2007-05-20 18:30:19,929
2,35,female,3,own,little,,1393,11,car,0,2007-05-06 10:58:22,45
3,27,female,2,own,little,moderate,1295,18,furniture/equipment,0,2008-06-18 04:10:05,86
4,26,male,2,own,little,little,4370,42,radio/TV,1,2007-11-29 00:20:44,639
...,...,...,...,...,...,...,...,...,...,...,...,...
879,65,male,2,free,little,little,2600,18,radio/TV,1,2007-12-16 20:17:19,624
880,30,male,3,own,little,moderate,4455,36,business,1,2007-07-12 14:08:58,181
881,33,male,2,own,little,moderate,6403,24,radio/TV,0,2008-04-08 03:24:26,730
882,29,female,2,own,,,5003,21,car,1,2007-11-29 15:51:45,557


----------------

In [25]:
conn.close()