In [1]:
import pandas as pd
import numpy as np

## Подключение к бд и заливка данных

<a href="https://pythonru.com/biblioteki/ustanovka-i-podklyuchenie-sqlalchemy-k-baze-dannyh">Установка и подключение SQLAlchemy к базе данных: mysql, postgresql, sqlite3 и oracle</a>

In [2]:
import sqlalchemy

In [3]:
sqlalchemy.__version__

'1.4.39'

In [4]:
import pyodbc

In [5]:
import warnings
warnings.filterwarnings('ignore')

In [6]:
conn = pyodbc.connect('DSN=TestDB;Trusted_Connection=yes;')

In [7]:
def select(sql):
  return pd.read_sql(sql,conn)

In [8]:
cur = conn.cursor()
sql = '''
drop table if exists Employee
create table Employee(Id int, Salary int)
insert into Employee(Id, Salary) values (1, 100)
insert into Employee(Id, Salary) values (2, 200)
insert into Employee(Id, Salary) values (3, 300)
'''
cur.execute(sql)
conn.commit()
cur.close()
sql = '''select * from Employee t'''
select(sql)

Unnamed: 0,Id,Salary
0,1,100
1,2,200
2,3,300


### Создание, подключение и заливка данных

In [9]:
df = pd.read_csv('../data/german_credit_augmented.csv')
# df

In [10]:
df['contract_dt'] = pd.to_datetime(df['contract_dt'],format='%Y-%m-%d %H:%M:%S')

In [11]:
df.dtypes

age                          int64
sex                         object
job                          int64
housing                     object
saving_accounts             object
checking_account            object
credit_amount                int64
duration                     int64
purpose                     object
default                      int64
contract_dt         datetime64[ns]
client_id                    int64
dtype: object

In [12]:
df = df.replace({np.nan:None})
# df

<a href="https://vc.ru/dev/245799-chto-vybrat-text-ili-varchar-max">
Что выбрать, text или varchar (MAX)?</a>

<a href="https://learn.microsoft.com/ru-ru/sql/machine-learning/data-exploration/python-dataframe-sql-server?view=azuresqldb-current">
Вставка кадра данных Python в таблицу SQL</a><br>
<a href="https://learn.microsoft.com/ru-ru/sql/machine-learning/python/python-libraries-and-data-types?source=recommendations&view=sql-server-ver16">
Сопоставления типов данных между Python и SQL Server</a>

In [13]:
cur = conn.cursor()
sql = '''
drop table if exists german_credit;
CREATE TABLE german_credit (
    age              INTEGER,
    sex              VARCHAR(max),
    job              INTEGER,
    housing          VARCHAR(max),
    saving_accounts  VARCHAR(max),
    checking_account VARCHAR(max),
    credit_amount    INTEGER,
    duration         INTEGER,
    purpose          VARCHAR(max),
    [default]        INTEGER,
    contract_dt      DATETIME,
    client_id        INTEGER
);
'''
cur.execute(sql)
conn.commit()

for index,row in df.head(1000).iterrows():
    cur.execute('''INSERT INTO german_credit(
                    [age],[sex],[job],[housing],[saving_accounts],
                    [checking_account],[credit_amount],[duration],[purpose],[default],
                    [contract_dt],[client_id]) 
                    values (?,?,?,?,?,?,?,?,?,?,?,?)
    ''', 
                    row['age'], 
                    row['sex'], 
                    row['job'],
                    row['housing'],                    
                    row['saving_accounts'],
                    row['checking_account'],
                    row['credit_amount'],
                    row['duration'],
                    row['purpose'],
                    row['default'],
                    row['contract_dt'],
                    row['client_id'])
    
conn.commit()
cur.close()
sql = '''select * from german_credit t'''
select(sql)

Unnamed: 0,age,sex,job,housing,saving_accounts,checking_account,credit_amount,duration,purpose,default,contract_dt,client_id
0,33,male,2,own,,,3074,9,radio/TV,0,2008-06-29 18:52:00,210
1,43,male,1,own,little,little,1344,12,car,0,2007-05-20 18:30:19,929
2,52,male,2,own,quite rich,,936,9,education,0,2008-04-27 08:23:07,200
3,35,female,3,own,little,,1393,11,car,0,2007-05-06 10:58:22,45
4,28,male,2,own,little,,776,12,radio/TV,0,2007-07-21 13:22:14,358
...,...,...,...,...,...,...,...,...,...,...,...,...
995,65,male,2,free,little,little,2600,18,radio/TV,1,2007-12-16 20:17:19,624
996,30,male,3,own,little,moderate,4455,36,business,1,2007-07-12 14:08:58,181
997,33,male,2,own,little,moderate,6403,24,radio/TV,0,2008-04-08 03:24:26,730
998,29,female,2,own,,,5003,21,car,1,2007-11-29 15:51:45,557


In [14]:
transactions = pd.read_csv('../data/german_credit_augmented_transactions.csv')
transactions['dt'] = pd.to_datetime(transactions['dt'],format='%Y-%m-%d %H:%M:%S')
transactions = transactions.replace({np.nan:None})

cur = conn.cursor()
sql = '''
drop table if exists client_transactions;
CREATE TABLE client_transactions (
    dt               datetime,
    client_id        int,
    amount           decimal(19,4)
);
'''
cur.execute(sql)
conn.commit()

for index,row in transactions.iterrows():
    cur.execute('''INSERT INTO client_transactions(
                    [dt],[client_id],[amount]
                    ) 
                    values (?,?,?)
    ''', 
                    row['dt'],
                    row['client_id'], 
                    row['amount']
               )
    
conn.commit()
cur.close()
sql = '''select * from client_transactions t'''
select(sql)

Unnamed: 0,dt,client_id,amount
0,2008-04-06 11:54:47,950,161.38
1,2007-07-28 00:00:19,418,35.34
2,2008-03-14 20:43:54,131,146.50
3,2007-12-18 13:03:24,353,119.21
4,2007-11-09 05:18:30,849,105.24
...,...,...,...
4270,2007-08-18 04:05:05,185,10063.07
4271,2007-06-04 15:23:32,375,156.91
4272,2007-12-06 21:34:06,418,10053.82
4273,2008-04-19 17:30:07,409,10050.35


In [15]:
conn.close()