## Creating tables using Pandas to_sql. Values are inserting at same time.

In [1]:
import sqlalchemy as db
import pandas as pd
import os

## Connecting to database

In [2]:
engine = db.create_engine('postgresql://localhost:5432/sql_test')
connection = engine.connect()
metadata = db.MetaData()

In [3]:
#to find all the tables for the database
engine.table_names()

  engine.table_names()


[]

# Creating Table

### 2.


# Creating tables and nserting values in tables using csv files

In [4]:
#For table titles
path_titles=os.path.join('.','data', 'titles.csv')

with open(path_titles, 'r') as tit:
    df_titles = pd.read_csv(tit)
df_titles.to_sql('titles', con=engine, index=False, if_exists='replace')


#For table departments
path_dept=os.path.join('.','data', 'departments.csv')

with open(path_dept, 'r') as dept:
    df_dept = pd.read_csv(dept)
df_dept.to_sql('departments', con=engine, index=False, if_exists='replace')

#For table employees
path_emp=os.path.join('.','data', 'employees.csv')

with open(path_emp, 'r') as emp:
    df_emp = pd.read_csv(emp)
df_emp.to_sql('employees', con=engine, index=False, if_exists='replace')


#For table dept_emp
path_dept_emp=os.path.join('.','data', 'dept_emp.csv')

with open(path_dept_emp, 'r') as dept_e:
    df_dept_emp = pd.read_csv(dept_e)
df_dept_emp.to_sql('dept_emp', con=engine, index=False, if_exists='replace')

#For table dept_manager
path_dept_manager=os.path.join('.','data', 'dept_manager.csv')

with open(path_dept_manager, 'r') as dept_man:
    df_dept_manager = pd.read_csv(dept_man)
df_dept_manager.to_sql('dept_manager', con=engine, index=False, if_exists='replace')

#For table salaries
path_salaries=os.path.join('.','data', 'salaries.csv')

with open(path_salaries, 'r') as sal:
    df_salaries = pd.read_csv(sal)
df_salaries.to_sql('salaries', con=engine, index=False, if_exists='replace')

In [41]:
#to find all the tables for the database
engine.table_names()

  engine.table_names()


['titles', 'departments', 'employees', 'dept_emp', 'dept_manager', 'salaries']

In [6]:
#getting the tables from database
titles = db.Table('titles', metadata, autoload=True, autoload_with=engine)
departments = db.Table('departments', metadata, autoload=True, autoload_with=engine)
employees = db.Table('employees', metadata, autoload=True, autoload_with=engine)
dept_emp = db.Table('dept_emp', metadata, autoload=True, autoload_with=engine)
dept_manager = db.Table('dept_manager', metadata, autoload=True, autoload_with=engine)
salaries = db.Table('salaries', metadata, autoload=True, autoload_with=engine)

In [7]:
# Print full table metadata for employees table
print(repr(metadata.tables['employees']))

Table('employees', MetaData(), Column('emp_no', BIGINT(), table=<employees>), Column('emp_title_id', TEXT(), table=<employees>), Column('birth_date', TEXT(), table=<employees>), Column('first_name', TEXT(), table=<employees>), Column('last_name', TEXT(), table=<employees>), Column('sex', TEXT(), table=<employees>), Column('hire_date', TEXT(), table=<employees>), schema=None)


In [8]:
# Print the column names for table employees
employees.columns.keys()

['emp_no',
 'emp_title_id',
 'birth_date',
 'first_name',
 'last_name',
 'sex',
 'hire_date']

In [9]:
#can also use c instead of columns
employees.c.keys()

['emp_no',
 'emp_title_id',
 'birth_date',
 'first_name',
 'last_name',
 'sex',
 'hire_date']

# Querying


### 1.

In [10]:
### sql
sql_1= '''SELECT e.emp_no, e.last_name, e.first_name, e.sex, s.salary 
       FROM employees AS e
       JOIN salaries AS s
       ON e.emp_no=s.emp_no'''

In [11]:
#using text
results_1=connection.execute(db.text(sql_1)).fetchall()
df_1 = pd.DataFrame(results_1, columns=(['Emp_No', 'Last_Name', 
                                         "First_Name",'Sex','Salary']))
df_1


Unnamed: 0,Emp_No,Last_Name,First_Name,Sex,Salary
0,13616,Lorho,Perry,F,40000
1,240129,Karnin,Roddy,M,43548
2,43471,Gihr,Mechthild,F,40023
3,444985,Verspoor,Giap,F,71042
4,492723,Sanella,Piyawadee,M,46740
...,...,...,...,...,...
300019,224534,Porenta,Adib,F,46695
300020,49941,Kuzuoka,Navid,M,41371
300021,433619,Valiente,Youssef,F,40000
300022,222402,Klyachko,Jaana,F,40000


In [12]:
#using sql_alchemy
res_1=connection.execute(db.select([employees.c.emp_no,
                                   employees.c.last_name,
                                   employees.c.first_name,
                                   employees.c.sex,
                                   salaries.c.salary])
                         .join(salaries, employees.c.emp_no==salaries.c.emp_no)
                         ).fetchall()
df_alc1 = pd.DataFrame(res_1, columns=(['Emp_No', 'Last_Name', 
                                         "First_Name",'Sex','Salary']))
df_alc1

Unnamed: 0,Emp_No,Last_Name,First_Name,Sex,Salary
0,13616,Lorho,Perry,F,40000
1,240129,Karnin,Roddy,M,43548
2,43471,Gihr,Mechthild,F,40023
3,444985,Verspoor,Giap,F,71042
4,492723,Sanella,Piyawadee,M,46740
...,...,...,...,...,...
300019,224534,Porenta,Adib,F,46695
300020,49941,Kuzuoka,Navid,M,41371
300021,433619,Valiente,Youssef,F,40000
300022,222402,Klyachko,Jaana,F,40000


### 2.

In [39]:
sql_2='''SELECT first_name, last_name, hire_date
        FROM employees
        WHERE hire_date BETWEEN '1/1/1986' AND '12/31/1986' '''


In [40]:
#using text
results_2=connection.execute(db.text(sql_2)).fetchall()
df_2 = pd.DataFrame(results_2, columns=(['first_name', 'last_name', 'hire_date']))
df_2

Unnamed: 0,first_name,last_name,hire_date
0,Xiong,Verhoeff,11/26/1987
1,Abdelkader,Baumann,1/18/1991
2,Eran,Cusworth,11/14/1986
3,Xudong,Samarati,11/13/1985
4,Lihong,Magliocco,10/23/1993
...,...,...,...
91514,Fay,Perz,12/19/1988
91515,Chenyi,Orlowska,12/25/1986
91516,Jagoda,Molberg,1/2/1987
91517,Constantino,Eastman,10/28/1988


In [38]:
#using sqlalchemy
res_2=connection.execute(db.select([employees.c.first_name,
                                   employees.c.last_name,
                                   employees.c.hire_date])
                        .where(employees.c.hire_date.between('1/1/1986','12/31/1986'))
                        ).fetchall()
df_alc2 = pd.DataFrame(res_2, columns=(['first_name', 'last_name', 'hire_date']))
df_alc2

Unnamed: 0,first_name,last_name,hire_date
0,Xiong,Verhoeff,11/26/1987
1,Abdelkader,Baumann,1/18/1991
2,Eran,Cusworth,11/14/1986
3,Xudong,Samarati,11/13/1985
4,Lihong,Magliocco,10/23/1993
...,...,...,...
91514,Fay,Perz,12/19/1988
91515,Chenyi,Orlowska,12/25/1986
91516,Jagoda,Molberg,1/2/1987
91517,Constantino,Eastman,10/28/1988


### 3.

In [16]:
sql_3='''SELECT  d.dept_no, d.dept_name, e.emp_no, e.last_name, e.first_name 
        FROM departments AS d
        JOIN dept_manager AS dm
        ON d.dept_no=dm.dept_no
        JOIN employees AS e
        ON dm.emp_no=e.emp_no'''

In [17]:
#using text
results_3=connection.execute(db.text(sql_3)).fetchall()
df_3 = pd.DataFrame(results_3, columns=(['dept_no', 'dept_name', 
                                         'emp_no', 'last_name', 'first_name']))
df_3

Unnamed: 0,dept_no,dept_name,emp_no,last_name,first_name
0,d004,Production,110303,Wegerle,Krassimir
1,d006,Quality Management,110800,Quadeer,Sanjoy
2,d006,Quality Management,110854,Pesch,Dung
3,d007,Sales,111035,Kaelbling,Przemyslawa
4,d008,Research,111400,Staelin,Arie
5,d009,Customer Service,111692,Butterworth,Tonny
6,d003,Human Resources,110228,Sigstam,Karsten
7,d006,Quality Management,110725,Onuegbe,Peternela
8,d003,Human Resources,110183,Ossenbruggen,Shirish
9,d004,Production,110344,Cools,Rosine


In [18]:
#using sql_alchemy
res_3=connection.execute(db.select([departments.c.dept_no,
                                    departments.c.dept_name,
                                    employees.c.emp_no,
                                   employees.c.last_name,
                                   employees.c.first_name])
                         .join(dept_manager, departments.c.dept_no==dept_manager.c.dept_no)
                         .join(employees, dept_manager.c.emp_no==employees.c.emp_no)
                         ).fetchall()
df_alc3 = pd.DataFrame(res_3, columns=(['dept_no', 'dept_name', 
                                         'emp_no', 'last_name', 'first_name']))
df_alc3

Unnamed: 0,dept_no,dept_name,emp_no,last_name,first_name
0,d004,Production,110303,Wegerle,Krassimir
1,d006,Quality Management,110800,Quadeer,Sanjoy
2,d006,Quality Management,110854,Pesch,Dung
3,d007,Sales,111035,Kaelbling,Przemyslawa
4,d008,Research,111400,Staelin,Arie
5,d009,Customer Service,111692,Butterworth,Tonny
6,d003,Human Resources,110228,Sigstam,Karsten
7,d006,Quality Management,110725,Onuegbe,Peternela
8,d003,Human Resources,110183,Ossenbruggen,Shirish
9,d004,Production,110344,Cools,Rosine


### 4.

In [19]:
sql_4='''SELECT e.emp_no, e.last_name, e.first_name, d.dept_name
        FROM employees AS e
        JOIN dept_emp AS de
        ON e.emp_no=de.emp_no
        JOIN departments AS d
        on de.dept_no=d.dept_no'''

In [20]:
#using text
results_4=connection.execute(db.text(sql_4)).fetchall()
df_4 = pd.DataFrame(results_4, columns=(['emp_no', 'last_name', 
                                         'first_name', 'dept_name']))
df_4

Unnamed: 0,emp_no,last_name,first_name,dept_name
0,10005,Maliniak,Kyoichi,Human Resources
1,10010,Piveteau,Duangkaew,Production
2,10010,Piveteau,Duangkaew,Quality Management
3,10011,Sluis,Mary,Customer Service
4,10013,Terkki,Eberhardt,Human Resources
...,...,...,...,...
331598,499948,Paludetto,Cordelia,Production
331599,499972,Leuchs,Katsuo,Development
331600,499985,Lukaszewicz,Gila,Research
331601,499987,Dusink,Rimli,Sales


In [21]:
#using sql_alchemy
res_4=connection.execute(db.select([employees.c.emp_no,
                                   employees.c.last_name,
                                   employees.c.first_name,
                                   departments.c.dept_name])
                         .join(dept_emp, employees.c.emp_no==dept_emp.c.emp_no)
                         .join(departments, dept_emp.c.dept_no==departments.c.dept_no)
                         ).fetchall()
df_alc4 = pd.DataFrame(res_4, columns=(['emp_no', 'last_name', 
                                         'first_name', 'dept_name']))
df_alc4

Unnamed: 0,emp_no,last_name,first_name,dept_name
0,10005,Maliniak,Kyoichi,Human Resources
1,10010,Piveteau,Duangkaew,Production
2,10010,Piveteau,Duangkaew,Quality Management
3,10011,Sluis,Mary,Customer Service
4,10013,Terkki,Eberhardt,Human Resources
...,...,...,...,...
331598,499948,Paludetto,Cordelia,Production
331599,499972,Leuchs,Katsuo,Development
331600,499985,Lukaszewicz,Gila,Research
331601,499987,Dusink,Rimli,Sales


### 5.

In [22]:
sql_5='''SELECT first_name, last_name, sex
        FROM employees
        WHERE first_name='Hercules' AND last_name LIKE'B%' '''

In [23]:
#using text
results_5=connection.execute(db.text(sql_5)).fetchall()
df_5 = pd.DataFrame(results_5, columns=(['first_name', 'last_name', 'sex']))
df_5

Unnamed: 0,first_name,last_name,sex
0,Hercules,Baer,M
1,Hercules,Biron,F
2,Hercules,Birge,F
3,Hercules,Bodoff,M
4,Hercules,Berstel,F
5,Hercules,Bernatsky,M
6,Hercules,Bail,F
7,Hercules,Benantar,F
8,Hercules,Bernardinello,F
9,Hercules,Basagni,M


In [24]:
#using sqlalchemy 
res_5=connection.execute(db.select([employees.c.first_name,
                                    employees.c.last_name, employees.c.sex])
                        .where(db.and_(employees.c.first_name=='Hercules', 
                                           employees.c.last_name.like('B%')))
                        ).fetchall()
df_alc5 = pd.DataFrame(res_5, columns=(['first_name', 'last_name', 'sex']))
df_alc5

Unnamed: 0,first_name,last_name,sex
0,Hercules,Baer,M
1,Hercules,Biron,F
2,Hercules,Benantar,F
3,Hercules,Birge,F
4,Hercules,Berstel,F
5,Hercules,Bernatsky,M
6,Hercules,Bail,F
7,Hercules,Bodoff,M
8,Hercules,Basagni,M
9,Hercules,Baranowski,M


### 6.

In [25]:
sql_6='''SELECT e.emp_no, e.last_name, e.first_name, e.sex, d.dept_name
        FROM employees AS e
        JOIN dept_emp AS de
        ON e.emp_no=de.emp_no
        JOIN departments AS d
        ON de.dept_no=d.dept_no
        WHERE d.dept_name='Sales' '''

In [26]:
#using text
results_6=connection.execute(db.text(sql_6)).fetchall()
df_6 = pd.DataFrame(results_6, columns=(['emp_no', 'last_name', 
                                         'first_name','sex','dept_name']))
df_6

Unnamed: 0,emp_no,last_name,first_name,sex,dept_name
0,246449,Bultermann,Subbu,F,Sales
1,205246,Demizu,Nevio,F,Sales
2,476443,Asmuth,Ziya,M,Sales
3,424270,Yoshizawa,Kellyn,F,Sales
4,280408,Perl,Elliott,M,Sales
...,...,...,...,...,...
52240,99439,Manders,Mahmut,M,Sales
52241,464231,Eastman,Constantino,M,Sales
52242,76671,Plessier,Ortrud,M,Sales
52243,264920,Samarati,Percy,F,Sales


In [27]:
#using sql_alchemy
res_6=connection.execute(db.select([employees.c.emp_no,
                                   employees.c.last_name,
                                   employees.c.first_name,
                                   employees.c.sex,
                                   departments.c.dept_name])
                         .join(dept_emp, employees.c.emp_no==dept_emp.c.emp_no)
                         .join(departments, dept_emp.c.dept_no==departments.c.dept_no)
                         .where(departments.c.dept_name=='Sales')
                         ).fetchall()
df_alc6 = pd.DataFrame(res_6, columns=(['emp_no', 'last_name', 
                                         'first_name','sex','dept_name']))
df_alc6

Unnamed: 0,emp_no,last_name,first_name,sex,dept_name
0,414537,Businaro,Chikara,M,Sales
1,246449,Bultermann,Subbu,F,Sales
2,205246,Demizu,Nevio,F,Sales
3,476443,Asmuth,Ziya,M,Sales
4,424270,Yoshizawa,Kellyn,F,Sales
...,...,...,...,...,...
52240,268515,Maksimenko,Yishay,F,Sales
52241,99439,Manders,Mahmut,M,Sales
52242,464231,Eastman,Constantino,M,Sales
52243,76671,Plessier,Ortrud,M,Sales


### 7.

In [28]:
sql_7='''SELECT e.emp_no, e.last_name, e.first_name, e.sex, d.dept_name
        FROM employees AS e
        JOIN dept_emp AS de
        ON e.emp_no=de.emp_no
        JOIN departments AS d
        ON de.dept_no=d.dept_no
        WHERE d.dept_name='Sales' OR d.dept_name='Development' '''

In [29]:
#using text
results_7=connection.execute(db.text(sql_7)).fetchall()
df_7 = pd.DataFrame(results_7, columns=(['emp_no', 'last_name', 
                                         'first_name','sex','dept_name']))
df_7

Unnamed: 0,emp_no,last_name,first_name,sex,dept_name
0,414537,Businaro,Chikara,M,Sales
1,13616,Lorho,Perry,F,Development
2,35916,Talmon,Wonhee,F,Development
3,444985,Verspoor,Giap,F,Sales
4,492723,Sanella,Piyawadee,M,Development
...,...,...,...,...,...
137947,424641,Terlouw,Shridhar,M,Development
137948,439921,Etalle,Adil,M,Sales
137949,266082,Cichocki,Randi,M,Development
137950,216934,Krzyzanowski,Nagui,M,Development


In [30]:
#using sql_alchemy
res_7=connection.execute(db.select([employees.c.emp_no,
                                   employees.c.last_name,
                                   employees.c.first_name,
                                   employees.c.sex,
                                   departments.c.dept_name])
                         .join(dept_emp, employees.c.emp_no==dept_emp.c.emp_no)
                         .join(departments, dept_emp.c.dept_no==departments.c.dept_no)
                         .where(db.or_(departments.c.dept_name=='Sales', 
                                      departments.c.dept_name=='Development'))
                         ).fetchall()
df_alc7 = pd.DataFrame(res_7, columns=(['emp_no', 'last_name', 
                                         'first_name','sex','dept_name']))
df_alc7

Unnamed: 0,emp_no,last_name,first_name,sex,dept_name
0,109600,Chvatal,Xuejun,M,Sales
1,98882,Vanwelkenhuysen,Yishay,F,Development
2,439788,Setia,Irene,F,Development
3,207791,Schmittgen,Jahangir,M,Development
4,205714,Ellozy,Anwar,M,Development
...,...,...,...,...,...
137947,200714,Sudbeck,JoAnne,M,Sales
137948,454597,Wuwongse,Reinhard,M,Sales
137949,234456,Haldar,Xiaoshan,M,Development
137950,425320,Ranai,Premsyl,F,Development


### 8.

In [31]:
sql_8='''SELECT last_name, COUNT(last_name) AS count
        FROM employees
        GROUP BY last_name
        ORDER BY count DESC'''

In [32]:
#using text
results_8=connection.execute(db.text(sql_8)).fetchall()
df_8 = pd.DataFrame(results_8, columns=(['last_name','count']))
df_8

Unnamed: 0,last_name,count
0,Baba,226
1,Gelosh,223
2,Coorg,223
3,Sudbeck,222
4,Farris,222
...,...,...
1633,Zykh,148
1634,Guardalben,148
1635,Merro,147
1636,Sadowsky,145


In [33]:
#using sqlalchemy 
res_8=connection.execute(db.select([employees.c.last_name,
                                    db.func.count(employees.c.last_name)])
                        .group_by(employees.c.last_name)
                        .order_by(db.desc(db.func.count(employees.c.last_name)))
                        ).fetchall()
df_alc8 = pd.DataFrame(res_8, columns=(['last_name','count']))
df_alc8

Unnamed: 0,last_name,count
0,Baba,226
1,Coorg,223
2,Gelosh,223
3,Farris,222
4,Sudbeck,222
...,...,...
1633,Georgatos,148
1634,Guardalben,148
1635,Merro,147
1636,Sadowsky,145


In [34]:
print(repr(metadata.tables['salaries']))

Table('salaries', MetaData(), Column('emp_no', BIGINT(), table=<salaries>), Column('salary', BIGINT(), table=<salaries>), schema=None)
