In [1]:
from sqlalchemy.exc import ProgrammingError

## engine
engine持有和数据库的连接信息，是后面所有操作的基础。

我安装postgresql,建立一个测试用的用户和数据库，名称都用了'test',请根据自己情况修改。

In [12]:
from sqlalchemy import create_engine

def create_db_engine(dbtype, user, password, dbname, port):    
    url = 'postgresql://{user}:{password}@localhost:{port}/{dbname}'.format(**locals())

    return create_engine(url)


engine = create_db_engine('postgresql','test','test','test','5432')

## schema操作

### 新建

注意如果建已经存在的schema,会出现ProgrammingError的错误，可以用try语句处理下

In [20]:
from sqlalchemy.schema import CreateSchema
def create_schema(engine, schema_name):
    try:
        sql = CreateSchema(schema_name)
        engine.execute(sql)        
    except ProgrammingError as e:
        if 'already exists' in str(e):
            print('schema already exists')
        else:
            raise e
    finally:
        return sql

### 删除

同样, 如果删除已经不存在的schema,会报ProgrammingError,需要用try语句处理下。

In [21]:
from sqlalchemy.schema import DropSchema
def drop_schema(engine, schema_name, cascade = False):
    try:
        engine.execute(DropSchema(schema_name, cascade = cascade))
    except ProgrammingError as e:
        if 'does not exist' in str(e):
            print("schema doesn't exists")
        else:
            raise e
            
drop_schema(engine,'test', cascade=True)            

## table操作

### 新建

In [24]:
from sqlalchemy import Table, Column, Integer, String, MetaData, ForeignKey, Sequence
from sqlalchemy.schema import CreateTable

table_name = 'users'
schema = 'test'
# put mandatory parameters in [] and optional parameters in {}
column_configs = [
    (['id',Integer],{'primary_key':True}),
    (['name',String(20)],{}),
    (['fullname',String(20)],{})
]

def create_table(engine,table_name,column_configs, schema = schema):
    try:
        metadata = MetaData()

        columns = [
            Column(*args,**kwargs) for
            args, kwargs in column_configs
        ]

        table = Table(
            table_name,
            metadata,
            *columns,
            schema = schema
        )
        
        engine.execute(CreateTable(table))
    except ProgrammingError as e:
        if 'already exists' in str(e):
            print('Table already exists')
        else:
            raise e
            
create_schema(engine, 'test')    
create_table(engine,table_name,column_configs, schema = 'test')

schema already exists
Table already exists


In [25]:
create_table(engine,table_name,column_configs, schema = 'test')

Table already exists


sqlalchemy默认会将第一个Integer类型且被标记成primary key的Column设置成autoincrement。不过如果按照[文档](http://docs.sqlalchemy.org/en/rel_1_1/core/tutorial.html#define-and-create-tables)里的要求，改成适用更多数据库的
```
Column('id', Integer, Sequence('user_id_seq'), primary_key=True)
```
后,会失去这一特性。暂未找到解决方案。

注意，不同的数据库对于configs的要求会不同。例如，上面例子中的configs是Oracle的要求，如果是postgresql，可以简化成　
```
column_configs = [
    (['id',Integer],{'primary_key':True}),
    (['name',String],{}),
    (['fullname',String],{})
]
```


## 删除

In [26]:
from sqlalchemy.schema import DropTable

table_name = 'users'
schema = 'test'

def drop_table(engine,table_name,schema = schema):
    try:
        metadata = MetaData()

        table = Table(
            table_name,
            metadata,
            schema = schema
        )
        engine.execute(DropTable(table))
    except ProgrammingError as e:
        if "does not exist" in str(e):
            print("Table doesn't exist")
    
drop_table(engine,table_name,schema = 'test')

## 获取表信息
需要配合select等方法使用，后面会介绍。由于之前被删除了，先新建表。

In [27]:
column_configs = [
    (['id',Integer],{'primary_key':True}),
    (['name',String(20)],{}),
    (['fullname',String(20)],{})
]

create_table(engine,'users',column_configs, schema = 'test')   

In [31]:
def get_table(engine, table_name, schema = None):
    metadata = MetaData()
    return Table(table_name, metadata, schema = schema, autoload=True, autoload_with=engine)

t = get_table(engine, table_name, schema='test')

t.columns.keys()

['id', 'name', 'fullname']

## 插入数据

### 构造insert object

In [40]:
ins = t.insert().values(name='jack', fullname='Jack Jones')

由于不同的sql数据库,对应的sql语句会有区别,上面的函数执行后只是在ins object中记录了待绑定的数据。　如果要预览生成的sql语句，需要传入engine或者指定dialect明确是哪种数据库。

In [38]:
str(ins.compile(engine, compile_kwargs={"literal_binds": True}))

"INSERT INTO test.users (name, fullname) VALUES ('jack', 'Jack Jones') RETURNING test.users.id"

In [39]:
from sqlalchemy.dialects import postgresql
str(ins.compile(dialect = postgresql.dialect(), compile_kwargs={"literal_binds": True}))

"INSERT INTO test.users (id, name, fullname) VALUES (%(id)s, 'jack', 'Jack Jones')"

注意两者还是有一定的差别的, 传入engine后生成的sql是实际被执行的sql语句,更准确。

### 执行insert object

In [41]:
with engine.connect() as conn:
    result = conn.execute(ins)

可以用result.insered_primary_key很方便的找到插入记录的id

In [42]:
result.inserted_primary_key

[1]

### 检查插入的数据

In [43]:
with engine.connect() as conn:
    print(conn.execute(t.select()).fetchall())

[(1, 'jack', 'Jack Jones')]


如果创表的时候用了大写， 时候， 只能用'A'来选择

### 批量插入数据
例如从DataFrame插入数据

In [45]:
from pandas import DataFrame

In [46]:
df = DataFrame({'name':['Junjie','Xu'],'fullname':['Cai','Zhang']})
df

Unnamed: 0,fullname,name
0,Cai,Junjie
1,Zhang,Xu


转成dict格式后插入

In [47]:
df.to_dict(orient='records')

[{'fullname': 'Cai', 'name': 'Junjie'}, {'fullname': 'Zhang', 'name': 'Xu'}]

In [48]:
ins = t.insert().values(df.to_dict(orient='records'))

with engine.connect() as conn:
    result = conn.execute(ins)    
    
with engine.connect() as conn:
    print(conn.execute(t.select()).fetchall())

[(1, 'jack', 'Jack Jones'), (2, 'Junjie', 'Cai'), (3, 'Xu', 'Zhang')]


注意如果要插入dict list,sqlalchemy会以list中第一条记录的key为准

In [49]:
data = [
    {'name':'Name1'},
    {'name':'Name2','fullname':'FULLNAME2'}
]
ins = t.insert().values(data)

with engine.connect() as conn:
    result = conn.execute(ins)
with engine.connect() as conn:
    print(conn.execute(t.select()).fetchall())

[(1, 'jack', 'Jack Jones'), (2, 'Junjie', 'Cai'), (3, 'Xu', 'Zhang'), (4, 'Name1', None), (5, 'Name2', None)]


第一行包含了所有的key,后面的记录key缺失的话，会报错

In [51]:
data = [
    {'name':'Name3','fullname':'FULLNAME3'},
    {'name':'Name4','fullname':None},    
]
ins = t.insert().values(data)

try:
    with engine.connect() as conn:
        result = conn.execute(ins)
    with engine.connect() as conn:
        print(conn.execute(t.select()).fetchall())
except Exception as e:
    print(e)
    

[(1, 'jack', 'Jack Jones'), (2, 'Junjie', 'Cai'), (3, 'Xu', 'Zhang'), (4, 'Name1', None), (5, 'Name2', None), (6, 'Name3', 'FULLNAME3'), (7, 'Name4', None)]


也可以在execute函数中传入数据

In [52]:
df = DataFrame({'name':['Junjie','Xu'],'fullname':['Cai','Zhang']})
data = df.to_dict(orient='records')

ins = t.insert()
with engine.connect() as conn:
    result = conn.execute(ins, data) 
with engine.connect() as conn:
    print(conn.execute(t.select()).fetchall())

[(1, 'jack', 'Jack Jones'), (2, 'Junjie', 'Cai'), (3, 'Xu', 'Zhang'), (4, 'Name1', None), (5, 'Name2', None), (6, 'Name3', 'FULLNAME3'), (7, 'Name4', None), (8, 'Junjie', 'Cai'), (9, 'Xu', 'Zhang')]


如果插入数据时会使用所有的列,那么可以简化成直接用tuple list插入数据。　但是这是就不能利用自动编号id，而是要传入时指定id。

In [56]:
list(df.to_records())

[(0, 'Cai', 'Junjie'), (1, 'Zhang', 'Xu')]

In [None]:
data = [
    (10,'Cai','Junjie'),
    (11,'Zhang','Xu')
]
ins = t.insert().values(data)

with engine.connect() as conn:
    result = conn.execute(ins)
    
with engine.connect() as conn:
    print(conn.execute(t.select()).fetchall())

但是传入id时指定id的话，似乎自动id的状态并不会做出相应的调整,而是继续从8开始，导致报错。因此这种方式适合一次性将整张表导入数据库。

In [58]:
df = DataFrame({'name':['Junjie','Xu'],'fullname':['Cai','Zhang']})
data = df.to_dict(orient='records')

try:
    ins = t.insert().values(data)
    with engine.connect() as conn:
        result = conn.execute(ins)
    with engine.connect() as conn:
        print(conn.execute(t.select()).fetchall())
except Exception as e:
    print(e)

(psycopg2.IntegrityError) duplicate key value violates unique constraint "users_pkey"
DETAIL:  Key (id)=(10) already exists.
 [SQL: 'INSERT INTO test.users (name, fullname) VALUES (%(name_0)s, %(fullname_0)s), (%(name_1)s, %(fullname_1)s)'] [parameters: {'name_1': 'Xu', 'fullname_0': 'Cai', 'name_0': 'Junjie', 'fullname_1': 'Zhang'}]


## select功能

### 基本的select
选择全部的列

两种方式都可以, 可以观察生成的sql语句是什么

In [59]:
users = get_table(engine, table_name, schema='test')

s1 = users.select()

print(s1)

SELECT test.users.id, test.users.name, test.users.fullname 
FROM test.users


In [60]:
from sqlalchemy import select
s2 = select([users])
print(s2)

SELECT test.users.id, test.users.name, test.users.fullname 
FROM test.users


看一下运行效果

In [61]:
with engine.connect() as conn:
    print(conn.execute(s1).fetchall())

[(1, 'jack', 'Jack Jones'), (2, 'Junjie', 'Cai'), (3, 'Xu', 'Zhang'), (4, 'Name1', None), (5, 'Name2', None), (6, 'Name3', 'FULLNAME3'), (7, 'Name4', None), (8, 'Junjie', 'Cai'), (9, 'Xu', 'Zhang'), (10, 'Cai', 'Junjie'), (11, 'Zhang', 'Xu')]


也可以直接在engine中传入object

In [62]:
print(engine.execute(s1).fetchall())

[(1, 'jack', 'Jack Jones'), (2, 'Junjie', 'Cai'), (3, 'Xu', 'Zhang'), (4, 'Name1', None), (5, 'Name2', None), (6, 'Name3', 'FULLNAME3'), (7, 'Name4', None), (8, 'Junjie', 'Cai'), (9, 'Xu', 'Zhang'), (10, 'Cai', 'Junjie'), (11, 'Zhang', 'Xu')]


注意除了传入object,也可以直接传入字符串形式的sql语句

In [65]:
sql = users.select().compile(engine, compile_kwargs={"literal_binds": True})

print(sql)

SELECT test.users.id, test.users.name, test.users.fullname 
FROM test.users


In [66]:
print(engine.execute(sql).fetchall())

[(1, 'jack', 'Jack Jones'), (2, 'Junjie', 'Cai'), (3, 'Xu', 'Zhang'), (4, 'Name1', None), (5, 'Name2', None), (6, 'Name3', 'FULLNAME3'), (7, 'Name4', None), (8, 'Junjie', 'Cai'), (9, 'Xu', 'Zhang'), (10, 'Cai', 'Junjie'), (11, 'Zhang', 'Xu')]


## 选择个别列
可以在select的list中指定

In [69]:
s = select(
    [
        users.c.id,
        users.c.fullname,
    ]
)

print(s.compile(engine, compile_kwargs={"literal_binds": True}))

SELECT test.users.id, test.users.fullname 
FROM test.users


## 改名

In [71]:
s = select(
    [
        users.c.id.label('user_id'),
        users.c.name.label('user_name'),
    ]
)

print(s.compile(engine, compile_kwargs={"literal_binds": True}))

with engine.connect() as conn:
    print(conn.execute(s).fetchall())

SELECT test.users.id AS user_id, test.users.name AS user_name 
FROM test.users
[(1, 'jack'), (2, 'Junjie'), (3, 'Xu'), (4, 'Name1'), (5, 'Name2'), (6, 'Name3'), (7, 'Name4'), (8, 'Junjie'), (9, 'Xu'), (10, 'Cai'), (11, 'Zhang')]


## 添加常数列

In [101]:
from sqlalchemy import literal, text

s = select(
    [
        users.c.id.label('user_id'),
        literal('AAAAAA').label('constant'),
        text('NULL as null_bar')   
    ]
)

print(s.compile(engine, compile_kwargs={"literal_binds": True}))

with engine.connect() as conn:
    print(conn.execute(s).fetchall())

SELECT test.users.id AS user_id, 'AAAAAA' AS constant, NULL as null_bar 
FROM test.users
[(1, 'AAAAAA', None), (2, 'AAAAAA', None)]


## 应用函数
可以使用func.func_name的形式应用函数,使用的时候只需要导入func模块即可

In [74]:
from sqlalchemy import func

s = select(
    [
        users.c.id.label('user_id'),
        func.upper(users.c.name).label('user_name'),
    ]
)

print(s.compile(engine, compile_kwargs={"literal_binds": True}))

with engine.connect() as conn:
    print(conn.execute(s).fetchall())

SELECT test.users.id AS user_id, upper(test.users.name) AS user_name 
FROM test.users
[(1, 'JACK'), (2, 'JUNJIE'), (3, 'XU'), (4, 'NAME1'), (5, 'NAME2'), (6, 'NAME3'), (7, 'NAME4'), (8, 'JUNJIE'), (9, 'XU'), (10, 'CAI'), (11, 'ZHANG')]


注意应用函数的时候，label要放在在函数之外使用，否则是无效的，这是一个容易犯的错误。可以看到下面的例子里，name列采用了自动命名。

In [75]:
from sqlalchemy import func

s = select(
    [
        users.c.id.label('user_id'),
        func.upper(users.c.name.label('user_name')),
    ]
)

print(s.compile(engine, compile_kwargs={"literal_binds": True}))


SELECT test.users.id AS user_id, upper(test.users.name) AS upper_1 
FROM test.users


### 添加where条件

初始化数据库状态

In [110]:
def reset_db(engine):
    drop_table(engine,'users',schema = 'test')
    drop_table(engine,'addresses',schema = 'test')

    column_configs = [
        (['id',Integer],{'primary_key':True}),
        (['name',String(20)],{}),
        (['fullname',String(20)],{})
    ]
    
    create_table(engine,'users',column_configs, schema = 'test')   

    users = get_table(engine, 'users', schema = 'test')

    data = [
        (1,'jack', 'Jack Jones'),
        (2,'wendy', 'Wendy Williams')
    ]

    engine.execute(users.insert().values(data))

    column_configs = [
        (['id',Integer],{'primary_key':True}),
        (['user_id',Integer],{}),
        (['email_address',String],{'nullable':False})
    ]

    create_table(engine,'addresses',column_configs, schema = 'test')   

    addresses = get_table(engine,'addresses', schema = 'test')

    data = [
        {'user_id': 1, 'email_address' : 'jack@yahoo.com'},
        {'user_id': 1, 'email_address' : 'jack@msn.com'},
        {'user_id': 2, 'email_address' : 'www@www.org'},
        {'user_id': 2, 'email_address' : 'wendy@aol.com'},
    ]

    engine.execute(addresses.insert().values(data))
    
reset_db(engine)

In [111]:
users = get_table(engine, 'users', schema = 'test')

In [112]:
from sqlalchemy import select
reset_db(engine)

users = get_table(engine, 'users', schema = 'test')
addresses = get_table(engine, 'addresses', schema = 'test')

s = select([users,addresses]).where(users.c.id == addresses.c.user_id)

for row in engine.execute(s):
    print(row)

(1, 'jack', 'Jack Jones', 1, 1, 'jack@yahoo.com')
(1, 'jack', 'Jack Jones', 2, 1, 'jack@msn.com')
(2, 'wendy', 'Wendy Williams', 3, 2, 'www@www.org')
(2, 'wendy', 'Wendy Williams', 4, 2, 'wendy@aol.com')


条件表达式本身也是object

In [113]:
users.c.id == addresses.c.user_id

<sqlalchemy.sql.elements.BinaryExpression object at 0x7f5829b9a358>

可以观察转成string以后的效果

In [88]:
str((users.c.id == addresses.c.user_id).compile(engine, compile_kwargs={"literal_binds": True}))

'test.users.id = test.addresses.user_id'

也可以利用其他的条件。这里实际上sqlalchemy将object中的```__equal__```特殊函数覆盖掉了,因此==的含义发生了改变

In [89]:
str((users.c.id > addresses.c.user_id).compile(engine, compile_kwargs={"literal_binds": True}))

'test.users.id > test.addresses.user_id'

In [90]:
str((users.c.id != addresses.c.user_id).compile(engine, compile_kwargs={"literal_binds": True}))

'test.users.id != test.addresses.user_id'

In [91]:
str((users.c.id == None).compile(engine, compile_kwargs={"literal_binds": True}))

'test.users.id IS NULL'

但并不是所有的特殊函数都进行了处理，例如想表达in, 是不能用python的in的

In [92]:
(users.c.id in [1,2,3])

False

而是应该用object本身提供的函数in_

In [93]:
str((users.c.id.in_([1,2,3])).compile(engine, compile_kwargs={"literal_binds": True}))

'test.users.id IN (1, 2, 3)'

between

In [94]:
str((users.c.id.between(1,3)).compile(engine, compile_kwargs={"literal_binds": True}))

'test.users.id BETWEEN 1 AND 3'

字符串匹配like

In [95]:
str((users.c.name.like('C%')).compile(engine, compile_kwargs={"literal_binds": True}))

"test.users.name LIKE 'C%'"

### 字符串连接

In [96]:
str((users.c.name+users.c.fullname).compile(engine, compile_kwargs={"literal_binds": True}))

'test.users.name || test.users.fullname'

### 数字相加

In [97]:
str((users.c.id+addresses.c.id).compile(engine, compile_kwargs={"literal_binds": True}))

'test.users.id + test.addresses.id'

### 特殊operator

如果有一些非常规的operator，总是可以用.op和字符串去实现

In [98]:
str((users.c.id.op('special_operator')('foo')).compile(engine, compile_kwargs={"literal_binds": True}))

"test.users.id special_operator 'foo'"

### 自定义operator
[这里](docs.sqlalchemy.org/en/rel_1_1/core/custom_types.html#types-operators)，不过暂时懒得看

In [122]:
## 带参数的where条件

target_id = 6

def get_select(target_id):
    s = select(
        [
           users.c.name 
        ]
    ).where(
        users.c.id == target_id
    )
    
    return s

print(get_select(target_id).compile(engine, compile_kwargs={"literal_binds": True}))


SELECT test.users.name 
FROM test.users 
WHERE test.users.id = 6


## 逻辑连词

In [105]:
from sqlalchemy.sql import and_, or_, not_

users = get_table(engine,'users', schema = 'test')
addresses = get_table(engine,'addresses', schema = 'test')

s = and_(
    users.c.name.like('j%'),
    users.c.id == addresses.c.user_id,
    or_(
        addresses.c.email_address == 'wendy@aol.com',
        addresses.c.email_address == 'jack@yahoo.com'
    ),
    not_(users.c.id > 5)
)
str((s).compile(engine, compile_kwargs={"literal_binds": True}))

"test.users.name LIKE 'j%' AND test.users.id = test.addresses.user_id AND (test.addresses.email_address = 'wendy@aol.com' OR test.addresses.email_address = 'jack@yahoo.com') AND test.users.id <= 5"

连续多个where连用也可以起到and的效果

In [106]:
s=(select([users])
.where(users.c.name.like('j%'))
.where(users.c.id == addresses.c.user_id)
  )

str((s).compile(engine, compile_kwargs={"literal_binds": True}))

"SELECT test.users.id, test.users.name, test.users.fullname \nFROM test.users, test.addresses \nWHERE test.users.name LIKE 'j%' AND test.users.id = test.addresses.user_id"

也可以用python的&,|,~等逻辑连接符号代替and_(), or_(), not_, 不过如果是要实现比较复杂的条件,这种方法从可读性上并没有什么优势

In [107]:
s = (
    users.c.name.like('j%') &
    users.c.id == addresses.c.user_id &
    (
        (addresses.c.email_address == 'wendy@aol.com') |
        (addresses.c.email_address == 'jack@yahoo.com')
    ) &
    (~(users.c.id > 5))
)
str((s).compile(engine, compile_kwargs={"literal_binds": True}))

"(test.users.name LIKE 'j%' AND test.users.id) = (test.addresses.user_id AND (test.addresses.email_address = 'wendy@aol.com' OR test.addresses.email_address = 'jack@yahoo.com') AND test.users.id <= 5)"

## 使用已有的parameterized sql模板
不像sql语句那样可以直接在execute中使用,不过利用text()处理后传入,数据可以通过execute的keyword parameter传入

In [108]:
s = (
"SELECT users.fullname || ', ' || addresses.email_address AS title "
    "FROM users, addresses "
    "WHERE users.id = addresses.user_id "
    "AND users.name BETWEEN :x AND :y "
    "AND (addresses.email_address LIKE :e1 "
         "OR addresses.email_address LIKE :e2)")
try:
    engine.execute(s, x='m', y='z', e1='%@aol.com', e2='%@msn.com').fetchall()
except Exception as e:
    print(e)

(psycopg2.ProgrammingError) syntax error at or near ":"
LINE 1: ...ers.id = addresses.user_id AND users.name BETWEEN :x AND :y ...
                                                             ^
 [SQL: "SELECT users.fullname || ', ' || addresses.email_address AS title FROM users, addresses WHERE users.id = addresses.user_id AND users.name BETWEEN :x AND :y AND (addresses.email_address LIKE :e1 OR addresses.email_address LIKE :e2)"] [parameters: {'y': 'z', 'e1': '%@aol.com', 'x': 'm', 'e2': '%@msn.com'}]


In [109]:
from sqlalchemy.sql import text
sql = text(s)

engine.execute(sql, x='m', y='z', e1='%@aol.com', e2='%@msn.com').fetchall()

[('Wendy Williams, wendy@aol.com',)]

In [123]:
from pandas import DataFrame

In [124]:
df_1 = DataFrame({'A':[1,1,1,2,2]})
df_2 = DataFrame({'A':[1,1,1,2,2]})

df_1.merge(df_2, on = 'A')

Unnamed: 0,A
0,1
1,1
2,1
3,1
4,1
5,1
6,1
7,1
8,1
9,2


In [143]:
import pandas as pd
df_1 = DataFrame({'A':['1','2','3','4'],'B':['b','b','b','b']})
df_2 = DataFrame({'A':['1','2','3','4'],'C':['c','c','c','c']})

pd.concat([df.set_index('A') for df in [df_1, df_2]], axis=1).reset_index()

Unnamed: 0,A,B,C
0,1,b,c
1,2,b,c
2,3,b,c
3,4,b,c
