In [1]:
import sqlite3
import pandas as pd

# Connect to the DB, Read data

In [43]:
connection = sqlite3.connect("HR.db")
cursor = connection.cursor()

In [16]:
# Table names and properties

# Check table names
cursor.execute("SELECT name FROM sqlite_master WHERE type='table';")

tables = cursor.fetchall()

for table in tables:
    print(f"Table Name: {table[0]}")
    
# Check properties of the tables
for table in tables:
    table_name = table[0]
    print(f"\nProperties of table: {table_name}")
    
    cursor.execute(f"PRAGMA table_info({table_name});")
    
    columns = cursor.fetchall()
    for column in columns:
        cid, name, ctype, notnull, dflt_value, pk = column
        print(f"Column Name: {name}, Type: {ctype}, Not Null: {notnull}, Default Value: {dflt_value}, Primary Key: {pk}")

Table Name: EMPLOYEES
Table Name: JOB_HISTORY
Table Name: JOBS
Table Name: DEPARTMENTS

Properties of table: EMPLOYEES
Column Name: EMP_ID, Type: CHAR(9), Not Null: 1, Default Value: None, Primary Key: 1
Column Name: F_NAME, Type: VARCHAR(15), Not Null: 1, Default Value: None, Primary Key: 0
Column Name: L_NAME, Type: VARCHAR(15), Not Null: 1, Default Value: None, Primary Key: 0
Column Name: SSN, Type: CHAR(9), Not Null: 0, Default Value: None, Primary Key: 0
Column Name: B_DATE, Type: DATE, Not Null: 0, Default Value: None, Primary Key: 0
Column Name: SEX, Type: CHAR, Not Null: 0, Default Value: None, Primary Key: 0
Column Name: ADDRESS, Type: VARCHAR(30), Not Null: 0, Default Value: None, Primary Key: 0
Column Name: JOB_ID, Type: CHAR(9), Not Null: 0, Default Value: None, Primary Key: 0
Column Name: SALARY, Type: DECIMAL(10,2), Not Null: 0, Default Value: None, Primary Key: 0
Column Name: MANAGER_ID, Type: CHAR(9), Not Null: 0, Default Value: None, Primary Key: 0
Column Name: DEP_ID,

# Analysis Based on More than One Table

In [8]:
# Information of the employee with JOB_ID = 600
query = '''SELECT * FROM EMPLOYEES WHERE JOB_ID IN (select JOB_IDENT from JOBS where JOB_ID = 600);'''

df = pd.read_sql_query(query, connection)
df

Unnamed: 0,EMP_ID,F_NAME,L_NAME,SSN,B_DATE,SEX,ADDRESS,JOB_ID,SALARY,MANAGER_ID,DEP_ID
0,E1006,Nancy,Allen,123411,1978-06-02,F,"111 Green Pl, Elgin,IL",600,90000,30001,2


In [9]:
# Information of the employees with JOB_TITLE = Sr. Designer
query = '''SELECT * FROM EMPLOYEES WHERE JOB_ID IN (select JOB_IDENT from JOBS where JOB_TITLE = 'Sr. Designer');'''

df = pd.read_sql_query(query, connection)
df

Unnamed: 0,EMP_ID,F_NAME,L_NAME,SSN,B_DATE,SEX,ADDRESS,JOB_ID,SALARY,MANAGER_ID,DEP_ID
0,E1009,Andrea,Jones,123414,1990-09-07,F,"120 Fall Creek, Gary,IL",234,70000,30003,7
1,E1010,Ann,Jacob,123415,1982-03-30,F,"111 Britany Springs,Elgin,IL",220,70000,30002,5


In [13]:
# JOB_TITLE, MIN_SALARY, MAX_SALARY, JOB_IDENT of Employees with Salary > 70000
query = '''SELECT JOB_TITLE, MIN_SALARY, MAX_SALARY, JOB_IDENT FROM JOBS
        WHERE JOB_IDENT IN (select JOB_ID from EMPLOYEES where SALARY > 70000);'''

df = pd.read_sql_query(query, connection)
df

Unnamed: 0,JOB_TITLE,MIN_SALARY,MAX_SALARY,JOB_IDENT
0,Sr. Architect,60000,100000,100
1,Sr. Software Developer,60000,80000,200
2,Lead Architect,70000,100000,600


In [17]:
# Fetch all data of Employees and Jobs, matching by Job ids
query = '''SELECT * FROM EMPLOYEES, JOBS WHERE EMPLOYEES.JOB_ID = JOBS.JOB_IDENT;'''

# Equivalent query: '''SELECT * FROM EMPLOYEES E, JOBS J WHERE E.JOB_ID = J.JOB_IDENT;''' 
df = pd.read_sql_query(query, connection)
df

Unnamed: 0,EMP_ID,F_NAME,L_NAME,SSN,B_DATE,SEX,ADDRESS,JOB_ID,SALARY,MANAGER_ID,DEP_ID,JOB_IDENT,JOB_TITLE,MIN_SALARY,MAX_SALARY
0,E1001,John,Thomas,123456,1976-09-01,M,"5631 Rice, OakPark,IL",100,100000,30001,2,100,Sr. Architect,60000,100000
1,E1002,Alice,James,123457,1972-07-31,F,"980 Berry ln, Elgin,IL",200,80000,30002,5,200,Sr. Software Developer,60000,80000
2,E1003,Steve,Wells,123458,1980-10-08,M,"291 Springs, Gary,IL",300,50000,30002,5,300,Jr.Software Developer,40000,60000
3,E1004,Santosh,Kumar,123459,1985-07-20,M,"511 Aurora Av, Aurora,IL",400,60000,30002,5,400,Jr.Software Developer,40000,60000
4,E1005,Ahmed,Hussain,123410,1981-04-01,M,"216 Oak Tree, Geneva,IL",500,70000,30001,2,500,Jr. Architect,50000,70000
5,E1006,Nancy,Allen,123411,1978-06-02,F,"111 Green Pl, Elgin,IL",600,90000,30001,2,600,Lead Architect,70000,100000
6,E1007,Mary,Thomas,123412,1975-05-05,F,"100 Rose Pl, Gary,IL",650,65000,30003,7,650,Jr. Designer,60000,70000
7,E1008,Bharath,Gupta,123413,1985-06-05,M,"145 Berry Ln, Naperville,IL",660,65000,30003,7,660,Jr. Designer,60000,70000
8,E1009,Andrea,Jones,123414,1990-09-07,F,"120 Fall Creek, Gary,IL",234,70000,30003,7,234,Sr. Designer,70000,90000
9,E1010,Ann,Jacob,123415,1982-03-30,F,"111 Britany Springs,Elgin,IL",220,70000,30002,5,220,Sr. Designer,70000,90000


In [19]:
# Join EMP_ID,F_NAME,L_NAME from Employees table and JOB_TITLE from Jobs table, by matching job_ids
# There is no need to specify the name of the table while selecting but it makes the query easier to read 
query = '''SELECT E.EMP_ID,E.F_NAME, E.L_NAME, J.JOB_TITLE
        FROM EMPLOYEES E, JOBS J
        WHERE E.JOB_ID = J.JOB_IDENT;'''

df = pd.read_sql_query(query, connection)
df

Unnamed: 0,EMP_ID,F_NAME,L_NAME,JOB_TITLE
0,E1001,John,Thomas,Sr. Architect
1,E1002,Alice,James,Sr. Software Developer
2,E1003,Steve,Wells,Jr.Software Developer
3,E1004,Santosh,Kumar,Jr.Software Developer
4,E1005,Ahmed,Hussain,Jr. Architect
5,E1006,Nancy,Allen,Lead Architect
6,E1007,Mary,Thomas,Jr. Designer
7,E1008,Bharath,Gupta,Jr. Designer
8,E1009,Andrea,Jones,Sr. Designer
9,E1010,Ann,Jacob,Sr. Designer


In [26]:
# The list of eployees whose JOB_TITLE is Jr. Designer
query = '''SELECT * FROM EMPLOYEES
        WHERE JOB_ID IN (select JOB_IDENT from JOBS where JOB_TITLE = 'Jr. Designer');'''

# Equivalent query:  '''SELECT * FROM EMPLOYEES E, JOBS J, 
#                    WHERE E.JOB_ID = J.JOB_IDENT  AND J.JOB_TITLE = 'Jr. Designer' '''
df = pd.read_sql_query(query, connection)
df

Unnamed: 0,EMP_ID,F_NAME,L_NAME,SSN,B_DATE,SEX,ADDRESS,JOB_ID,SALARY,MANAGER_ID,DEP_ID
0,E1007,Mary,Thomas,123412,1975-05-05,F,"100 Rose Pl, Gary,IL",650,65000,30003,7
1,E1008,Bharath,Gupta,123413,1985-06-05,M,"145 Berry Ln, Naperville,IL",660,65000,30003,7


In [30]:
# Employees from department with DEP_ID = 7
query = '''SELECT * FROM EMPLOYEES
        WHERE DEP_ID IN (select DEPT_ID_DEP from DEPARTMENTS where DEP_ID = 7);'''

df = pd.read_sql_query(query, connection)
df

Unnamed: 0,EMP_ID,F_NAME,L_NAME,SSN,B_DATE,SEX,ADDRESS,JOB_ID,SALARY,MANAGER_ID,DEP_ID
0,E1007,Mary,Thomas,123412,1975-05-05,F,"100 Rose Pl, Gary,IL",650,65000,30003,7
1,E1008,Bharath,Gupta,123413,1985-06-05,M,"145 Berry Ln, Naperville,IL",660,65000,30003,7
2,E1009,Andrea,Jones,123414,1990-09-07,F,"120 Fall Creek, Gary,IL",234,70000,30003,7


In [50]:
# JOB information and a list of employees whose birth year is after 1976
# Since strftime('%Y', E.B_DATE) returns birth year as string, this value is converted to integer to make comparisons
query = '''SELECT * FROM EMPLOYEES E, JOBS J
        WHERE J.JOB_IDENT = E.JOB_ID AND CAST((strftime('%Y', E.B_DATE)) AS INT) > 1976;'''

df = pd.read_sql_query(query, connection)
df

Unnamed: 0,EMP_ID,F_NAME,L_NAME,SSN,B_DATE,SEX,ADDRESS,JOB_ID,SALARY,MANAGER_ID,DEP_ID,JOB_IDENT,JOB_TITLE,MIN_SALARY,MAX_SALARY
0,E1003,Steve,Wells,123458,1980-10-08,M,"291 Springs, Gary,IL",300,50000,30002,5,300,Jr.Software Developer,40000,60000
1,E1004,Santosh,Kumar,123459,1985-07-20,M,"511 Aurora Av, Aurora,IL",400,60000,30002,5,400,Jr.Software Developer,40000,60000
2,E1005,Ahmed,Hussain,123410,1981-04-01,M,"216 Oak Tree, Geneva,IL",500,70000,30001,2,500,Jr. Architect,50000,70000
3,E1006,Nancy,Allen,123411,1978-06-02,F,"111 Green Pl, Elgin,IL",600,90000,30001,2,600,Lead Architect,70000,100000
4,E1008,Bharath,Gupta,123413,1985-06-05,M,"145 Berry Ln, Naperville,IL",660,65000,30003,7,660,Jr. Designer,60000,70000
5,E1009,Andrea,Jones,123414,1990-09-07,F,"120 Fall Creek, Gary,IL",234,70000,30003,7,234,Sr. Designer,70000,90000
6,E1010,Ann,Jacob,123415,1982-03-30,F,"111 Britany Springs,Elgin,IL",220,70000,30002,5,220,Sr. Designer,70000,90000


In [41]:
# Close connection
cursor.close()
connection.close()