## Importing data into the database.

### Import libraries

In [1]:
import os
import sqlite3
import pandas as pd
from sqlite3 import Error
from IPython.display import display

### Read the processed feather format data.

In [2]:
df = pd.read_feather("NYC-proc.feather")
df.drop("index", axis=1, inplace=True)
df.head(3)

Unnamed: 0,Fiscal Year,Payroll Number,Agency Name,Last Name,First Name,Agency Start Date,Work Location Borough,Title Description,Leave Status as of June 30,Base Salary,...,Regular Gross Paid,OT Hours,Total OT Paid,Total Other Pay,Regular Hours category,Total Paid,OT Hours category,Hourly Pay,Hourly Pay category,Total Pay category
0,2020,17.0,OFFICE OF EMERGENCY MANAGEMENT,BEREZIN,MIKHAIL,08/10/2015,BROOKLYN,EMERGENCY PREPAREDNESS MANAGER,ACTIVE,86005.0,...,84698.21,0.0,0.0,0.0,1500-2000,84698.21,0-500,46.537478,0-100,60000-90000
1,2020,17.0,OFFICE OF EMERGENCY MANAGEMENT,GEAGER,VERONICA,09/12/2016,BROOKLYN,EMERGENCY PREPAREDNESS MANAGER,ACTIVE,86005.0,...,84698.21,0.0,0.0,0.0,1500-2000,84698.21,0-500,46.537478,0-100,60000-90000
2,2020,17.0,OFFICE OF EMERGENCY MANAGEMENT,RAMANI,SHRADDHA,02/22/2016,BROOKLYN,EMERGENCY PREPAREDNESS MANAGER,ACTIVE,86005.0,...,84698.21,0.0,0.0,0.0,1500-2000,84698.21,0-500,46.537478,0-100,60000-90000


### Functions for creating connection, table and executing sql statements.

In [3]:
def create_connection(db_file, delete_db=False):
    conn = None
    try:
        conn = sqlite3.connect(db_file)
        conn.execute("PRAGMA foreign_keys = 1")
    except Error as e:
        print(e)

    return conn

In [4]:
def create_table(conn, create_table_sql, drop_table_name=None):
    
    if drop_table_name:
        try:
            c = conn.cursor()
            c.execute("""DROP TABLE IF EXISTS %s""" % (drop_table_name))
        except Error as e:
            print(e)
    
    try:
        c = conn.cursor()
        c.execute(create_table_sql)
    except Error as e:
        print(e)

In [5]:
def execute_sql_statement(sql_statement, conn):
    cur = conn.cursor()
    cur.execute(sql_statement)

    rows = cur.fetchall()

    return rows

### Create database connection.

In [6]:
PATH = os.getcwd()
conn = create_connection(os.path.join(PATH, 'nyc-payroll.db'))

### Create tables.

##### Create table Fiscal Year.

In [7]:
sql = "CREATE TABLE FiscalYear (FiscalYear INTEGER NOT NULL PRIMARY KEY);"
create_table(conn, sql, drop_table_name='FiscalYear')

##### Create table Agency Name.

In [8]:
sql = "CREATE TABLE AgencyName (AgencyName TEXT NOT NULL PRIMARY KEY);"
create_table(conn, sql, drop_table_name='AgencyName')

##### Create table Pay Basis.

In [9]:
sql = "CREATE TABLE PayBasis (PayBasis TEXT NOT NULL PRIMARY KEY);"
create_table(conn, sql, drop_table_name='PayBasis')

##### Create table Pay Roll.

In [10]:
sql = "CREATE TABLE PayRoll (PayRollNo INTEGER NOT NULL PRIMARY KEY, AgencyName TEXT NOT NULL, FOREIGN KEY (AgencyName) REFERENCES AgencyName(AgencyName));"
create_table(conn, sql, drop_table_name='PayRoll')

##### Create table Employee. 

In [11]:
sql = "CREATE TABLE Employee (EmployeeID INTEGER NOT NULL PRIMARY KEY, FirstName TEXT NOT NULL, LastName TEXT NOT NULL, PayRollNo INTEGER NOT NULL, FiscalYear INTEGER NOT NULL, PayBasis TEXT NOT NULL, RegularGrossPaid FLOAT NOT NULL, FOREIGN KEY (PayBasis) REFERENCES PayBasis(PayBasis), FOREIGN KEY (PayRollNo) REFERENCES PayRoll(PayRollNo), FOREIGN KEY (Fiscalyear) REFERENCES FiscalYear(FiscalYear));"
create_table(conn, sql, drop_table_name='Employee')

##### Create table Designation.

In [12]:
sql = "CREATE TABLE Designation (EmployeeID INTEGER NOT NULL, TitleDescription TEXT NOT NULL, BaseSalary INTEGER NOT NULL, WorkLocation TEXT NOT NULL, FOREIGN KEY (EmployeeID) REFERENCES Employee(EmployeeID));"
create_table(conn, sql, drop_table_name='Designation')

##### Create table Income.

In [13]:
sql = "CREATE TABLE Income (EmployeeID INTEGER NOT NULL, RegularGrossPaid FLOAT NOT NULL, RegularHours FLOAT NOT NULL, OTHours FLOAT NOT NULL, TotalOTPay FLOAT NOT NULL, TotalOtherPay FLOAT NOT NULL, TotalPay FLOAT NOT NULL, HourlyPay FLOAT NOT NULL, RegularHoursCategory VARCHAR(255) NOT NULL, OTHoursCategory VARCHAR(255) NOT NULL, HourlyPayCategory VARCHAR(255) NOT NULL, TotalPayCategory VARCHAR(255) NOT NULL, FOREIGN KEY (EmployeeID) REFERENCES Employee(EmployeeID));"
create_table(conn, sql, drop_table_name='Income')

### Display database schema.

In [25]:
with conn:
    tables = ['FiscalYear','AgencyName','PayBasis','PayRoll','Employee','Designation','Income']
    for t in tables:
        sql = f"select * from {t};"
        df = pd.read_sql_query(sql, conn)
        print(f"{t} :")
        display(df)
        print()

FiscalYear :


Unnamed: 0,FiscalYear



AgencyName :


Unnamed: 0,AgencyName



PayBasis :


Unnamed: 0,PayBasis



PayRoll :


Unnamed: 0,PayRollNo,AgencyName



Employee :


Unnamed: 0,EmployeeID,FirstName,LastName,PayRollNo,FiscalYear,PayBasis,RegularGrossPaid



Designation :


Unnamed: 0,EmployeeID,TitleDescription,BaseSalary,WorkLocation



Income :


Unnamed: 0,EmployeeID,RegularGrossPaid,RegularHours,OTHours,TotalOTPay,TotalOtherPay,TotalPay,HourlyPay,RegularHoursCategory,OTHoursCategory,HourlyPayCategory,TotalPayCategory



