In [1]:
# Python SQL toolkit and Object Relational Mapper
import sqlalchemy
from sqlalchemy import create_engine, MetaData
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy import Column, Integer, String, Numeric, Text, Float
import pandas as pd
import numpy as np
from pandas.api.types import is_string_dtype
from pandas.api.types import is_numeric_dtype
import random
from random import randint
import string

# PostgreSQL info:
import psycopg2
from config import postgres_pswd
PASSWORD = postgres_pswd

In [2]:
df = pd.read_csv('HR_Datasets/human-resources-data-set/HR_helpdesk_info.csv')
df.head(10)

Unnamed: 0,CaseID,RequestorID,RequestorType,CaseOwner,CaseType,CaseStatus,TierLevel,Priority,DateReceived,SatisfactionScore,DateClosed
0,HRHD000001,1009919990,1 - Intern,XLbl6V9K,Recruitment & Selection,Closed,Tier 2,0 - None,2018-01-01 00:00:00.000000000,1 - Unsatisfied,2018-01-01 00:00:00.000000000
1,HRHD000002,1302053044,2 - Regular,Qwsfd5ND,Benefits & Compensation,Closed,Tier 1,1 - Low,2018-01-01 00:15:46.089460894,1 - Unsatisfied,2018-01-02 00:15:46.089460894
2,HRHD000003,1411071481,2 - Regular,m5kYwx5P,Benefits & Compensation,Closed,Tier 2,0 - None,2018-01-01 00:31:32.178921789,0 - Unknown,2018-01-05 00:31:32.178921789
3,HRHD000004,1306059197,4 - Management,XLbl6V9K,Recruitment & Selection,Closed,Tier 2,0 - None,2018-01-01 00:47:18.268382683,0 - Unknown,2018-01-05 00:47:18.268382683
4,HRHD000005,1407068885,1 - Intern,bRWtYc6,Training & Development,Closed,Tier 1,1 - Low,2018-01-01 01:03:04.357843578,1 - Unsatisfied,2018-01-02 01:03:04.357843578
5,HRHD000006,1301052347,4 - Management,m5kYwx5P,Benefits & Compensation,Closed,Tier 2,3 - High,2018-01-01 01:18:50.447304473,0 - Unknown,2018-01-03 01:18:50.447304473
6,HRHD000007,1406067865,3 - Non-staff,tSjw2hYY,Recruitment & Selection,Closed,Tier 1,3 - High,2018-01-01 01:34:36.536765367,0 - Unknown,2018-01-03 01:34:36.536765367
7,HRHD000008,1011022820,4 - Management,XLbl6V9K,Recruitment & Selection,Closed,Tier 2,0 - None,2018-01-01 01:50:22.626226262,0 - Unknown,2018-01-05 01:50:22.626226262
8,HRHD000009,1001109612,2 - Regular,XLbl6V9K,Recruitment & Selection,Closed,Tier 2,2 - Medium,2018-01-01 02:06:08.715687156,1 - Unsatisfied,2018-01-05 02:06:08.715687156
9,HRHD000010,1101023839,3 - Non-staff,Qwsfd5ND,Benefits & Compensation,Closed,Tier 1,1 - Low,2018-01-01 02:21:54.805148051,1 - Unsatisfied,2018-01-03 02:21:54.805148051


In [3]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 100000 entries, 0 to 99999
Data columns (total 11 columns):
CaseID               100000 non-null object
RequestorID          100000 non-null int64
RequestorType        100000 non-null object
CaseOwner            100000 non-null object
CaseType             100000 non-null object
CaseStatus           100000 non-null object
TierLevel            100000 non-null object
Priority             100000 non-null object
DateReceived         100000 non-null object
SatisfactionScore    100000 non-null object
DateClosed           100000 non-null object
dtypes: int64(1), object(10)
memory usage: 8.4+ MB


In [4]:
# Create an engine to the Dental Magic HRv10 database
engine = create_engine('postgresql://postgres:'+PASSWORD+'@localhost:5432/Dental_Magic_HR_v10_db')

In [5]:
# Create a connection to the engine called `conn`
conn = engine.connect()
# Use `declarative_base` from SQLAlchemy to model each table as an ORM class
Base = declarative_base()

In [6]:
# Specify all tables and the data types for each column of each table
class HR_Cases_Data(Base):
    __tablename__ = 'HR_Cases_Data'
    
    ID = Column(Integer, primary_key=True) 
    CaseID = Column(Text)
    RequestorID = Column(Integer)
    RequestorType = Column(Text)
    CaseOwner = Column(Text)
    CaseType = Column(Text)
    CaseStatus = Column(Text)
    TierLevel = Column(Text)
    Priority = Column(Text)
    DateReceived = Column(Text)
    SatisfactionScore = Column(Text, nullable=True)
    DateClosed = Column(Text, nullable=True)

    def __repr__(self):
        return f"id={self.id}, name={self.name}"
 

In [7]:
# Use `create_all` to create the table in the database
Base.metadata.create_all(engine)
# Use MetaData from SQLAlchemy to reflect the tables
metadata = MetaData(bind=engine)
metadata.reflect()

In [8]:
# Use Orient='records' to create a list of data to write for each table
data = df.to_dict(orient='records')
data[0]

{'CaseID': 'HRHD000001',
 'CaseOwner': 'XLbl6V9K',
 'CaseStatus': 'Closed',
 'CaseType': 'Recruitment & Selection',
 'DateClosed': '2018-01-01 00:00:00.000000000',
 'DateReceived': '2018-01-01 00:00:00.000000000',
 'Priority': '0 - None',
 'RequestorID': 1009919990,
 'RequestorType': '1 - Intern',
 'SatisfactionScore': '1 - Unsatisfied',
 'TierLevel': 'Tier 2'}

In [9]:
# Save the references to each table as 'table' variables
table = sqlalchemy.Table('HR_Cases_Data', metadata, autoload=True)

In [10]:
# Use `table.insert()` to insert the data into each table
conn.execute(table.insert(), data)

<sqlalchemy.engine.result.ResultProxy at 0x2dcedb96b70>

In [13]:
# Test that the insert works by fetching the first 5 rows of each table. 
conn.execute('select * from "HR_Cases_Data" limit 5').fetchall()

[(1, 'HRHD000001', 1009919990, '1 - Intern', 'XLbl6V9K', 'Recruitment & Selection', 'Closed', 'Tier 2', '0 - None', '2018-01-01 00:00:00.000000000', '1 - Unsatisfied', '2018-01-01 00:00:00.000000000'),
 (2, 'HRHD000002', 1302053044, '2 - Regular', 'Qwsfd5ND', 'Benefits & Compensation', 'Closed', 'Tier 1', '1 - Low', '2018-01-01 00:15:46.089460894', '1 - Unsatisfied', '2018-01-02 00:15:46.089460894'),
 (3, 'HRHD000003', 1411071481, '2 - Regular', 'm5kYwx5P', 'Benefits & Compensation', 'Closed', 'Tier 2', '0 - None', '2018-01-01 00:31:32.178921789', '0 - Unknown', '2018-01-05 00:31:32.178921789'),
 (4, 'HRHD000004', 1306059197, '4 - Management', 'XLbl6V9K', 'Recruitment & Selection', 'Closed', 'Tier 2', '0 - None', '2018-01-01 00:47:18.268382683', '0 - Unknown', '2018-01-05 00:47:18.268382683'),
 (5, 'HRHD000005', 1407068885, '1 - Intern', 'bRWtYc6', 'Training & Development', 'Closed', 'Tier 1', '1 - Low', '2018-01-01 01:03:04.357843578', '1 - Unsatisfied', '2018-01-02 01:03:04.357843578