In [1]:
import pandas as pd
import numpy as np

#### Подключение к бд и заливка данных

In [2]:
import sqlalchemy
import pyodbc
import warnings
warnings.filterwarnings('ignore')

In [3]:
conn = pyodbc.connect('DSN=TestDB;Trusted_Connection=yes;')

In [4]:
def select(sql):
  return pd.read_sql(sql,conn)

In [5]:
sql = '''select * from client_transactions t'''
select(sql)

Unnamed: 0,dt,client_id,amount
0,2008-04-06 11:54:47,950,161.38
1,2007-07-28 00:00:19,418,35.34
2,2008-03-14 20:43:54,131,146.50
3,2007-12-18 13:03:24,353,119.21
4,2007-11-09 05:18:30,849,105.24
...,...,...,...
4270,2007-08-18 04:05:05,185,10063.07
4271,2007-06-04 15:23:32,375,156.91
4272,2007-12-06 21:34:06,418,10053.82
4273,2008-04-19 17:30:07,409,10050.35


# 6. Джойны (Продолжение)

## 8. Ежемесячный отчет (практический пример)

#### прислали транзакции по клиентам:

In [6]:
sql = '''select top(5) * from client_transactions t'''
select(sql)

Unnamed: 0,dt,client_id,amount
0,2008-04-06 11:54:47,950,161.38
1,2007-07-28 00:00:19,418,35.34
2,2008-03-14 20:43:54,131,146.5
3,2007-12-18 13:03:24,353,119.21
4,2007-11-09 05:18:30,849,105.24


In [7]:
sql = '''select count(*) from client_transactions t'''
select(sql)

Unnamed: 0,Unnamed: 1
0,4275


сгруппируем:

In [8]:
sql = '''select 
year(t.dt) as year,  month(t.dt) as month,
count(1) as transaction_cnt,
sum(t.amount) as amount_sum

 from client_transactions t
group by year(t.dt), month(t.dt)
order by year(t.dt), month(t.dt)
'''

In [9]:
select(sql)

Unnamed: 0,year,month,transaction_cnt,amount_sum
0,2007,5,338,450912.77
1,2007,6,379,551664.83
2,2007,7,304,494134.5
3,2007,8,255,426903.23
4,2007,10,332,634846.49
5,2007,11,389,500420.98
6,2007,12,364,561449.89
7,2008,1,413,630137.22
8,2008,2,228,337043.47
9,2008,3,309,425599.09


нет сентября...

#### надо сгенерить заготовку, чтобы были все месяцы:

<a href="https://stackovergo.com/ru/q/3063246/how-to-generate-a-range-of-dates-in-sql-server">
    Как создать диапазон дат в SQL Server</a>

In [10]:
sql = '''
Declare   @FromDate   Date,
          @ToDate     Date
select @FromDate = min(t.dt) from client_transactions t
select @ToDate = max(t.dt) from client_transactions t;  

WITH n AS 
(
  SELECT TOP (DATEDIFF(DAY, @FromDate, @ToDate) + 1) 
    n = ROW_NUMBER() OVER (ORDER BY [object_id])
  FROM sys.all_objects
),
p as
(
SELECT DATEADD(DAY, n-1, @FromDate) as dt
FROM n
)
select year(dt) year, month(dt) month from p
'''

In [11]:
select(sql)

Unnamed: 0,year,month
0,2007,5
1,2007,5
2,2007,5
3,2007,5
4,2007,5
...,...,...
421,2008,6
422,2008,6
423,2008,6
424,2008,6


In [12]:
sql = '''
Declare   @FromDate   Date,
          @ToDate     Date
select @FromDate = min(t.dt) from client_transactions t
select @ToDate = max(t.dt) from client_transactions t;  

WITH n AS 
(
  SELECT TOP (DATEDIFF(DAY, @FromDate, @ToDate) + 1) 
    n = ROW_NUMBER() OVER (ORDER BY [object_id])
  FROM sys.all_objects
),
p as
(
SELECT DATEADD(DAY, n-1, @FromDate) as dt
FROM n
),
ym as(
select year(dt) year, month(dt) month from p
group by year(dt), month(dt)
),
tr as(
select 
year(t.dt) as year,  month(t.dt) as month,
count(1) as transaction_cnt,
sum(t.amount) as amount_sum
from client_transactions t
group by year(t.dt), month(t.dt)
--order by year(t.dt), month(t.dt)
)

--select * from ym

--/*
select ym.year, ym.month,
coalesce(tr.transaction_cnt,0) as transaction_cnt,
coalesce(tr.amount_sum,0) as amount_sum
from ym
left join tr on tr.year = ym.year and tr.month = ym.month
--*/
'''

In [13]:
select(sql)

Unnamed: 0,year,month,transaction_cnt,amount_sum
0,2007,5,338,450912.77
1,2007,6,379,551664.83
2,2007,7,304,494134.5
3,2007,8,255,426903.23
4,2007,9,0,0.0
5,2007,10,332,634846.49
6,2007,11,389,500420.98
7,2007,12,364,561449.89
8,2008,1,413,630137.22
9,2008,2,228,337043.47


## 9. Ежемесячный отчет на пользователя (практический пример)

In [14]:
sql = '''select * from german_credit t'''
select(sql)

Unnamed: 0,age,sex,job,housing,saving_accounts,checking_account,credit_amount,duration,purpose,default,contract_dt,client_id
0,33,male,2,own,,,3074,9,radio/TV,0,2008-06-29 18:52:00,210
1,43,male,1,own,little,little,1344,12,car,0,2007-05-20 18:30:19,929
2,52,male,2,own,quite rich,,936,9,education,0,2008-04-27 08:23:07,200
3,35,female,3,own,little,,1393,11,car,0,2007-05-06 10:58:22,45
4,28,male,2,own,little,,776,12,radio/TV,0,2007-07-21 13:22:14,358
...,...,...,...,...,...,...,...,...,...,...,...,...
995,65,male,2,free,little,little,2600,18,radio/TV,1,2007-12-16 20:17:19,624
996,30,male,3,own,little,moderate,4455,36,business,1,2007-07-12 14:08:58,181
997,33,male,2,own,little,moderate,6403,24,radio/TV,0,2008-04-08 03:24:26,730
998,29,female,2,own,,,5003,21,car,1,2007-11-29 15:51:45,557


In [15]:
sql = '''select distinct t.client_id from german_credit t'''

In [16]:
select(sql)

Unnamed: 0,client_id
0,0
1,1
2,2
3,3
4,4
...,...
995,995
996,996
997,997
998,998


In [17]:
sql = '''
Declare   @FromDate   Date,
          @ToDate     Date
select @FromDate = min(t.dt) from client_transactions t
select @ToDate = max(t.dt) from client_transactions t;  

WITH n AS 
(
  SELECT TOP (DATEDIFF(DAY, @FromDate, @ToDate) + 1) 
    n = ROW_NUMBER() OVER (ORDER BY [object_id])
  FROM sys.all_objects
),
p as
(
SELECT DATEADD(DAY, n-1, @FromDate) as dt
FROM n
),

--список дат
dates as(
select year(dt) year, month(dt) month from p
group by year(dt), month(dt)
),

--клиенты
clients as (
select distinct t.client_id from german_credit t
),

--привязка каждого клиента к дате
clients_month as
(SELECT t.year, t.month, c.client_id FROM dates t
join clients c on 1=1),

--реестр транзакций (из файла)
trans_month as(
select 
year(t.dt) as year,  month(t.dt) as month,
t.client_id,
count(1) as transaction_cnt,
sum(t.amount) as amount_sum
from client_transactions t
group by year(t.dt), month(t.dt), t.client_id
)

--/*
,client_trans_month as (

select t.client_id, t.year, t.month,
tm.transaction_cnt,
tm.amount_sum,
1 as [user],
case when tm.transaction_cnt > 0 then 1 else 0 end as active
from clients_month t
left join trans_month tm on t.client_id = tm.client_id
    and t.year = tm.year and t.month = tm.month
)
--*/

/*
select * from client_trans_month
where client_id=900
order by client_id, year, month
--*/
--/*
select t.year, t.month, sum(t.[user]) as user_cnt, sum(t.amount_sum) as amount_sum , 
sum(t.active) as active_cnt from client_trans_month t
group by t.year, t.month
order by t.year, t.month
--*/
'''

In [18]:
select(sql)

Unnamed: 0,year,month,user_cnt,amount_sum,active_cnt
0,2007,5,1000,450912.77,288
1,2007,6,1000,551664.83,297
2,2007,7,1000,494134.5,259
3,2007,8,1000,426903.23,222
4,2007,9,1000,,0
5,2007,10,1000,634846.49,283
6,2007,11,1000,500420.98,323
7,2007,12,1000,561449.89,287
8,2008,1,1000,630137.22,325
9,2008,2,1000,337043.47,204


#### проверим:

In [19]:
t = select(sql)

In [20]:
t['amount_sum'].sum()

6548980.619999999

In [21]:
sql = '''select sum(t.amount) from client_transactions t'''

In [22]:
select(sql)

Unnamed: 0,Unnamed: 1
0,6548980.62


## 11. Джойн таблицы самой на себя (нарастающий итог)

In [23]:
t = pd.DataFrame({'dt':pd.to_datetime(['2021-04-01','2021-04-02','2021-04-03'],format='%Y-%m-%d'),
                  'revenue':[1,2,3]})

In [24]:
cur = conn.cursor()
sql = '''
drop table if exists revenue;
CREATE TABLE revenue (
    dt        datetime,
    revenue   int
);
'''
cur.execute(sql)
conn.commit()
for index,row in t.iterrows():
    cur.execute('''INSERT INTO revenue(
                    [dt],[revenue]
                    ) 
                    values (?,?)
    ''', 
                    row['dt'], 
                    row['revenue']
               )
conn.commit()
cur.close()
sql = '''select * from revenue t'''
select(sql)

Unnamed: 0,dt,revenue
0,2021-04-01,1
1,2021-04-02,2
2,2021-04-03,3


In [25]:
sql = '''select t.dt,t.revenue, 
sum(r.revenue) as cumsum
from revenue t
join revenue r on r.dt <= t.dt 
group by t.dt, t.revenue
'''

In [26]:
select(sql)

Unnamed: 0,dt,revenue,cumsum
0,2021-04-01,1,1
1,2021-04-02,2,3
2,2021-04-03,3,6


----------------

In [27]:
conn.close()