<h2><center>Data Engineering</center></h2>

In [1]:
from sqlalchemy.ext.automap import automap_base
from sqlalchemy.orm import Session
from sqlalchemy import create_engine, MetaData, Table, Column, Float, Integer, String, extract, distinct, ForeignKey
from sqlalchemy.engine import reflection
from config import engine_url
import pandas as pd
import datetime as dt
from sqlalchemy.orm import relationship

In [2]:
# Check EmployeeSQL/schema.sql

In [3]:
# Import each CSV file into the corresponding SQL table

In [4]:
engine = create_engine(engine_url)
connection = engine.connect()

In [5]:
metadata = MetaData()
metadata.reflect(engine, only=['employees', 'titles', 'departments', 'salaries'])

Table('dept_emp', metadata,
    Column('emp_no', Integer, primary_key=True),
    Column('dept_no', String, primary_key=True)
)

Table('dept_manager', metadata,
    Column('dept_no', String, primary_key=True),
    Column('emp_no', Integer, primary_key=True)
)

Base = automap_base(metadata=metadata)
Base.prepare()

In [4]:
# Check imported tables
for name in Base.classes.keys():
    print(name)

employees
titles
departments
salaries
dept_emp


In [11]:
Departments = Base.classes.departments
Dept_emp = Base.classes.dept_emp
Dept_manager = Base.classes.dept_manager
Employees = Base.classes.employees
Salaries = Base.classes.salaries
Titles = Base.classes.titles

In [21]:
# Check imported columns for an specific table
table_name = "salaries"
inspector = reflection.Inspector.from_engine(engine)
columns = inspector.get_columns(table_name=table_name)

for c in columns:
    print(c['name'], c["type"])

emp_no INTEGER
salary INTEGER


In [22]:
# Open session
session = Session(engine)

<h3>Read all data from csv files</h3>

In [14]:
# Read all data 
data_departments = pd.read_csv("./data/departments.csv")
data_titles = pd.read_csv("./data/titles.csv")
data_employees = pd.read_csv("./data/employees.csv")
data_dept_emp = pd.read_csv("./data/dept_emp.csv")
data_dept_manager = pd.read_csv("./data/dept_manager.csv")
data_salaries = pd.read_csv("./data/salaries.csv")

In [63]:
# Inspect one of them
data_salaries.head(2)

Unnamed: 0,emp_no,salary
0,10001,60117
1,10002,65828


In [64]:
# Ckeck imported columns
for col in Salaries.__table__.columns.keys():
    print(col)

emp_no
salary


<h3>Import each bunch of data into each database table</h3>

In [29]:
# Import each department to db
for index, row in data_departments.iterrows():
    #print(row["dept_no"], row["dept_name"])
    session.add(Departments(dept_no=row["dept_no"], dept_name=row["dept_name"]))

session.commit()

In [27]:
# Import each title to db
for index, row in data_titles.iterrows():
    session.add(Titles(title_id=row["title_id"], title=row["title"]))

session.commit()

In [39]:
# Import each employee to db
for index, row in data_employees.iterrows():
    #print(row["emp_no"], row["emp_title_id"])
    session.add(Employees(
        emp_no=row["emp_no"], 
        emp_title_id=row["emp_title_id"], 
        birth_date=dt.datetime.strptime(row["birth_date"], '%m/%d/%Y'), 
        first_name=row["first_name"], 
        last_name=row["last_name"], 
        sex=row["sex"], 
        hire_date=dt.datetime.strptime(row["hire_date"], '%m/%d/%Y')
    ))

session.commit()

In [43]:
# Import dept_emp to db
for index, row in data_dept_emp.iterrows():
    session.add(Dept_emp(emp_no=row["emp_no"], dept_no=row["dept_no"]))

session.commit()

In [46]:
# Import dept_manager to db
for index, row in data_dept_manager.iterrows():
    session.add(Dept_manager(dept_no=row["dept_no"], emp_no=row["emp_no"]))

session.commit()

In [57]:
data_salaries.dtypes

emp_no    int64
salary    int64
dtype: object

In [68]:
# Import salaries to db
for index, row in data_salaries.iterrows():
    session.add(Salaries(emp_no=int(row["emp_no"]), salary=int(row["salary"])))

session.commit()

<h3>Now I check all of the inserted data into database</h3>