In [23]:
# import python libs
import pandas as pd
import numpy as np

In [31]:
# define the database connection string
DB_HOST = '86.119.36.94' 
DB_PORT = '5432'
DB_DBNAME = 'bank_db' # or warenkorb_db
DB_USERNAME = 'db_user' 
DB_PASSWORD = 'db_user_pw' 
db_str = 'postgresql://{username}:{password}@{host}:{port}/{dbname}'.format(username=DB_USERNAME,password=DB_PASSWORD,host=DB_HOST,port=DB_PORT,dbname=DB_DBNAME)
print(db_str)

postgresql://db_user:db_user_pw@86.119.36.94:5432/bank_db


In [32]:
# use plain SQL to query database

# load dependency
%load_ext sql
# set the connection string
%sql $db_str

The sql extension is already loaded. To reload it, use:
  %reload_ext sql


'Connected: db_user@bank_db'

In [33]:
# extract info about tables in database bank_db
# %sql SELECT * FROM pg_catalog.pg_tables WHERE schemaname = 'public'
tables = %sql SELECT * FROM information_schema.tables WHERE table_type = 'BASE TABLE' AND table_schema = 'public' ORDER BY table_type, table_name
tables

 * postgresql://db_user:***@86.119.36.94:5432/bank_db
   postgresql://db_user:***@86.119.36.94:5432/warenkorb_db
8 rows affected.


table_catalog,table_schema,table_name,table_type,self_referencing_column_name,reference_generation,user_defined_type_catalog,user_defined_type_schema,user_defined_type_name,is_insertable_into,is_typed,commit_action
bank_db,public,account,BASE TABLE,,,,,,YES,NO,
bank_db,public,card,BASE TABLE,,,,,,YES,NO,
bank_db,public,client,BASE TABLE,,,,,,YES,NO,
bank_db,public,disp,BASE TABLE,,,,,,YES,NO,
bank_db,public,district,BASE TABLE,,,,,,YES,NO,
bank_db,public,loan,BASE TABLE,,,,,,YES,NO,
bank_db,public,orders,BASE TABLE,,,,,,YES,NO,
bank_db,public,trans,BASE TABLE,,,,,,YES,NO,


In [34]:
# extract info about table
table_name_var = tables[0][2]
#print(table_nm)
%sql SELECT * FROM INFORMATION_SCHEMA.COLUMNS where table_name = :table_name_var

 * postgresql://db_user:***@86.119.36.94:5432/bank_db
   postgresql://db_user:***@86.119.36.94:5432/warenkorb_db
4 rows affected.


table_catalog,table_schema,table_name,column_name,ordinal_position,column_default,is_nullable,data_type,character_maximum_length,character_octet_length,numeric_precision,numeric_precision_radix,numeric_scale,datetime_precision,interval_type,interval_precision,character_set_catalog,character_set_schema,character_set_name,collation_catalog,collation_schema,collation_name,domain_catalog,domain_schema,domain_name,udt_catalog,udt_schema,udt_name,scope_catalog,scope_schema,scope_name,maximum_cardinality,dtd_identifier,is_self_referencing,is_identity,identity_generation,identity_start,identity_increment,identity_maximum,identity_minimum,identity_cycle,is_generated,generation_expression,is_updatable
bank_db,public,account,account_id,1,,NO,integer,,,32.0,2.0,0.0,,,,,,,,,,,,,bank_db,pg_catalog,int4,,,,,1,NO,NO,,,,,,NO,NEVER,,YES
bank_db,public,account,district_id,2,,YES,integer,,,32.0,2.0,0.0,,,,,,,,,,,,,bank_db,pg_catalog,int4,,,,,2,NO,NO,,,,,,NO,NEVER,,YES
bank_db,public,account,frequency,3,,YES,text,,1073741824.0,,,,,,,,,,,,,,,,bank_db,pg_catalog,text,,,,,3,NO,NO,,,,,,NO,NEVER,,YES
bank_db,public,account,date,4,,YES,date,,,,,,0.0,,,,,,,,,,,,bank_db,pg_catalog,date,,,,,4,NO,NO,,,,,,NO,NEVER,,YES


In [35]:
# query database's table account
values = %sql SELECT * FROM $table_name_var
# access accounts by index
print('Default selection: '+ str(values[0]))
# access accounts as pandas DataFrame by index
print('DataFrame based selection: '+ str(values.DataFrame().iloc[0]))

 * postgresql://db_user:***@86.119.36.94:5432/bank_db
   postgresql://db_user:***@86.119.36.94:5432/warenkorb_db
4500 rows affected.
Default selection: (576, 55, 'POPLATEK MESICNE', datetime.date(1993, 1, 1))
DataFrame based selection: account_id                  576
district_id                  55
frequency      POPLATEK MESICNE
date                 1993-01-01
Name: 0, dtype: object


In [36]:
# Use Pandas sql and store query results in pandas DataFrame
import pandas.io.sql as sql
import sqlalchemy

# create connection to database
engine = sqlalchemy.create_engine(db_str)
# query database's table account
values = sql.read_sql("SELECT * FROM "+table_name_var, engine)
print('DataFrame based selection: '+ str(values.iloc[0]))
# set index to first column (as in the database)
values.set_index(values.columns[0], inplace=True)
print('DataFrame based selection: '+ str(values.iloc[0]))
# print result
values

DataFrame based selection: account_id                  576
district_id                  55
frequency      POPLATEK MESICNE
date                 1993-01-01
Name: 0, dtype: object
DataFrame based selection: district_id                  55
frequency      POPLATEK MESICNE
date                 1993-01-01
Name: 576, dtype: object


Unnamed: 0_level_0,district_id,frequency,date
account_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
576,55,POPLATEK MESICNE,1993-01-01
3818,74,POPLATEK MESICNE,1993-01-01
704,55,POPLATEK MESICNE,1993-01-01
2378,16,POPLATEK MESICNE,1993-01-01
2632,24,POPLATEK MESICNE,1993-01-02
1972,77,POPLATEK MESICNE,1993-01-02
1539,1,POPLATEK PO OBRATU,1993-01-03
793,47,POPLATEK MESICNE,1993-01-03
2484,74,POPLATEK MESICNE,1993-01-03
1695,76,POPLATEK MESICNE,1993-01-03


In [37]:
# use postgres adapter to query database 

# install postgres dependency
import psycopg2 as pg

# create connection
connection = pg.connect(db_str)
cursor = connection.cursor()

# query database's table account
query = "SELECT * FROM "+ table_name_var
cursor.execute(query)

# print query result
accounts = cursor.fetchall() 
for row in accounts:
    print(row)
    
# close communication with the PostgreSQL database server
cursor.close()
# close the connection
connection.close()

(576, 55, 'POPLATEK MESICNE', datetime.date(1993, 1, 1))
(3818, 74, 'POPLATEK MESICNE', datetime.date(1993, 1, 1))
(704, 55, 'POPLATEK MESICNE', datetime.date(1993, 1, 1))
(2378, 16, 'POPLATEK MESICNE', datetime.date(1993, 1, 1))
(2632, 24, 'POPLATEK MESICNE', datetime.date(1993, 1, 2))
(1972, 77, 'POPLATEK MESICNE', datetime.date(1993, 1, 2))
(1539, 1, 'POPLATEK PO OBRATU', datetime.date(1993, 1, 3))
(793, 47, 'POPLATEK MESICNE', datetime.date(1993, 1, 3))
(2484, 74, 'POPLATEK MESICNE', datetime.date(1993, 1, 3))
(1695, 76, 'POPLATEK MESICNE', datetime.date(1993, 1, 3))
(1726, 48, 'POPLATEK MESICNE', datetime.date(1993, 1, 3))
(2881, 70, 'POPLATEK MESICNE', datetime.date(1993, 1, 4))
(2357, 19, 'POPLATEK MESICNE', datetime.date(1993, 1, 4))
(2177, 62, 'POPLATEK MESICNE', datetime.date(1993, 1, 4))
(485, 6, 'POPLATEK PO OBRATU', datetime.date(1993, 1, 4))
(652, 21, 'POPLATEK MESICNE', datetime.date(1993, 1, 5))
(9635, 70, 'POPLATEK MESICNE', datetime.date(1993, 1, 5))
(1844, 44, 'POPLA

(6075, 19, 'POPLATEK MESICNE', datetime.date(1996, 9, 15))
(1066, 16, 'POPLATEK TYDNE', datetime.date(1996, 9, 15))
(2275, 1, 'POPLATEK MESICNE', datetime.date(1996, 9, 16))
(5305, 47, 'POPLATEK MESICNE', datetime.date(1996, 9, 16))
(1329, 53, 'POPLATEK MESICNE', datetime.date(1996, 9, 16))
(3098, 1, 'POPLATEK MESICNE', datetime.date(1996, 9, 16))
(4349, 74, 'POPLATEK MESICNE', datetime.date(1996, 9, 16))
(2990, 30, 'POPLATEK MESICNE', datetime.date(1996, 9, 16))
(3800, 30, 'POPLATEK MESICNE', datetime.date(1996, 9, 16))
(3191, 72, 'POPLATEK MESICNE', datetime.date(1996, 9, 17))
(2076, 18, 'POPLATEK MESICNE', datetime.date(1996, 9, 17))
(1002, 50, 'POPLATEK MESICNE', datetime.date(1996, 9, 17))
(8381, 14, 'POPLATEK MESICNE', datetime.date(1996, 9, 17))
(1957, 33, 'POPLATEK MESICNE', datetime.date(1996, 9, 18))
(3569, 1, 'POPLATEK MESICNE', datetime.date(1996, 9, 18))
(10789, 75, 'POPLATEK MESICNE', datetime.date(1996, 9, 18))
(831, 44, 'POPLATEK MESICNE', datetime.date(1996, 9, 18))
(6