In [1]:
import sqlite3
import csv
import pandas as pd
import traceback
import sys
from datetime import datetime
import matplotlib.pyplot as plt

# Challenge 1

In [2]:
db = "challenge.db"

## Creacion de Tablas

In [3]:
## CREACION DE TABLAS

con = sqlite3.connect(db)
cur = con.cursor()


try:
    sql_file = open("createTables.sql")
    sql_as_string = sql_file.read()
    cur.executescript(sql_as_string)
except sqlite3.Error as er:
     print('SQLite error: %s' % (' '.join(er.args)))
     print("Exception class is: ", er.__class__)
     print('SQLite traceback: ')
     exc_type, exc_value, exc_tb = sys.exc_info()
     print(traceback.format_exception(exc_type, exc_value, exc_tb))
     con.close()

con.close()

## Migrar datos desde archivos CSV

In [9]:

def uploadData(db: str, file: str, table_name: str, columnas: list, if_exist='append'):
     '''
     Función para incorporar datos de un CSV a la base de datos
     Param db: Nombre de la base de datos
     Param file: archivo csv incluyendo path
     Param table_name: nombre de la tabla destino
     Param columnas: Lista de nombre de columnas de la tabla destino
     
     '''
     
     # Leer los CVS y Generar Dataframes
     df = pd.read_csv(file, header=None)
     df.columns = columnas
     

     # Llenar las tablas a partir de los DataFrames
     con = sqlite3.connect(db)

     try:
        df.to_sql(table_name, con, if_exists=if_exist, index=False)
     except sqlite3.Error as er:
        print('SQLite error: %s' % (' '.join(er.args)))
        print("Exception class is: ", er.__class__)
        print('SQLite traceback: ')
        exc_type, exc_value, exc_tb = sys.exc_info()
        print(traceback.format_exception(exc_type, exc_value, exc_tb))

     return

In [10]:
uploadData(db=db, file='data/jobs.csv', table_name='jobs', columnas=['id', 'job'])
uploadData(db=db, file='data/departments.csv', table_name='departments', columnas=['id', 'department'])
uploadData(db=db, file='data/hired_employees.csv', table_name='hired_employees', columnas= ['id', 'name', 'datetime', 'department_id', 'job_id'])


## Testing cargando DataFrames desde la Base

In [6]:
depts = None
jobs = None
hired_employees = None

con = sqlite3.connect(db)

depts = pd.read_sql('SELECT * FROM DEPARTMENTS', con)
jobs = pd.read_sql('SELECT * FROM JOBS', con)
hired_employees = pd.read_sql('SELECT * FROM HIRED_EMPLOYEES', con)

con.close()

print(depts.head())
print(jobs.head())
print(hired_employees.head())

   id                department
0   1        Product Management
1   2                     Sales
2   3  Research and Development
3   4      Business Development
4   5               Engineering
   id                        job
0   1        Marketing Assistant
1   2                   VP Sales
2   3         Biostatistician IV
3   4  Account Representative II
4   5               VP Marketing
   id            name              datetime  department_id  job_id
0   1     Harold Vogt  2021-11-07T02:48:42Z            2.0    96.0
1   2        Ty Hofer  2021-05-30T05:43:46Z            8.0     NaN
2   3     Lyman Hadye  2021-09-01T23:27:38Z            5.0    52.0
3   4   Lotti Crowthe  2021-10-01T13:04:21Z           12.0    71.0
4   5  Gretna Lording  2021-10-10T22:22:17Z            6.0    80.0


In [49]:
con.close()

## Actualizar datos desde archivos CSV

In [11]:
# Se usa la misma función que para importar todo, pero cambiando el parámetro if_exists a reload
# Si el csv tiene datos nuevos, los incorpora, si tiene datos existentes, los actualiza

uploadData(db=db, file='data/jobs.csv', table_name='jobs', columnas=['id', 'job'], if_exist='replace')
uploadData(db=db, file='data/departments.csv', table_name='departments', columnas=['id', 'department'], if_exist='replace')
uploadData(db=db, file='data/hired_employees.csv', table_name='hired_employees', columnas= ['id', 'name', 'datetime', 'department_id', 'job_id'], if_exist='replace')