### Load the data

In [94]:
import pandas as pd
from datetime import datetime
import pymysql 

In [268]:
department = pd.read_csv('departments.csv', sep=',')
department.columns = ['department_name']
shift = pd.read_csv('shifts.csv', sep=',')
shift.columns = ['from_time', 'length']
employees = pd.read_csv('employees.csv', sep=',')
employees.emptype = employees.emptype.fillna('')
schedule = pd.read_csv('schedule.csv', sep=',')
schedule.columns = ['date', 'empid', 'dept', 'start_time', 'shift_length']

### Manipulate the data

In [316]:
class DataSqlLoader:
    def __init__(self, database):
        # connect to mysql local server
        self.db = pymysql.Connect('localhost',
               'root',
               '',
               database)
        self.c = db.cursor()
    
    # convert the shift and schedule time to `time` format compatible in MySQL
    def convert_time_format(self, time):
        return datetime.strptime('{}'.format(time), '%I%p').strftime('%H:%M:%S')
    
    # convert schedule date to `date` format compatible in MySQL
    def convert_date_format(self, time):
        return datetime.strptime('{}'.format(time), '%m/%d/%Y').strftime('%Y-%m-%d')
    
    def creat_tables(self):
        self.c.execute('''
                create table if not exists department
                (
                  department_id  int auto_increment
                    primary key,
                  department_name varchar(50) not null
                );
                ''')
        self.c.execute('''
            create table if not exists shift
            (
              shift_id int auto_increment
                primary key,
              from_time   time not null,
              length   int  not null
            );
            ''')
        self.c.execute('''
            create table if not exists schedule
            (
              schedule_id  int auto_increment
                primary key,
              date         date        not null,
              empid        varchar(10) not null,
              dept         varchar(50) not null,
              start_time   time        not null,
              shift_length int         not null
            );
            ''')
        self.c.execute('''
            create table if not exists employees
            (
              empid     varchar(10) not null primary key,
              lastname  varchar(20) not null,
              firstname varchar(20) not null,
              emptype   varchar(3)  null,
              cellphone varchar(20) null,
              homephone varchar(20) null,
              ftpt      varchar(2)  not null,
              constraint employee_empid_uindex
              unique (empid)
            );
        ''')
    
    def insert_into_tables(self, table, table_name):
        for i in range(len(table)):
            attributes = '{}'.format(tuple(table.columns.tolist())).replace("'","")
            query = "insert into {} {} values {};".format(
                table_name, attributes, tuple(table.iloc[i,:].values))
            query = query.replace("(none)","")
            query = query.replace(r",)",")")
            # print(query)
            try:
                c.execute(query)
                db.commit()
            except Exception as e:
                print(e)
                
    def close(self):
        self.db.close()

In [317]:
dsl = DataSqlLoader('cs431_project')
dsl.creat_tables()

schedule['start_time'] = list(map(lambda x: dsl.convert_time_format(x), 
                                  schedule['start_time']))
shift['from_time'] = list(map(lambda x: dsl.convert_time_format(x), 
                              shift['from_time']))
schedule['date'] = list(map(lambda x: dsl.convert_date_format(x), 
                            schedule['date']))

dsl.insert_into_tables(department, 'department')
dsl.insert_into_tables(shift, 'shift')
dsl.insert_into_tables(schedule,'schedule')
dsl.insert_into_tables(employees, 'employees')

### SQL Query

In [None]:
select e.lastname as LAST, e.firstname as FIRST, e.cellphone as CELL,
       s.date as DATE, d.department_name as DEPT, date_format(s.start_time, "%I%p") as START, s.shift_length as SHIFT_LENGTH
from department as d, employees as e, schedule as s
where s.dept=d.department_name
and s.empid=e.empid
order by LAST, DATE, START, SHIFT_LENGTH asc, DEPT desc;

#### display results

In [327]:
query = '''
    select e.lastname as LAST, e.firstname as FIRST, e.cellphone as CELL,
       s.date as DATE, d.department_name as DEPT, 
       date_format(s.start_time, "%I%p") as START, s.shift_length as SHIFT_LENGTH
from department as d, employees as e, schedule as s
where s.dept=d.department_name
and s.empid=e.empid
order by LAST, DATE, START, SHIFT_LENGTH asc, DEPT desc limit 50;
'''

df = pd.read_sql(query, dsl.db)
print(df.to_latex())

\begin{tabular}{lllllllr}
\toprule
{} &       LAST &    FIRST &            CELL &        DATE &            DEPT & START &  SHIFT\_LENGTH \\
\midrule
0  &       Ault &  Rosendo &  (571)7252-4785 &  2018-09-24 &      CARDIOLOGY &  03PM &             8 \\
1  &       Ault &  Rosendo &  (571)7252-4785 &  2018-09-25 &      PEDIATRICS &  03PM &             8 \\
2  &       Ault &  Rosendo &  (571)7252-4785 &  2018-09-25 &        ONCOLOGY &  03PM &             8 \\
3  &       Ault &  Rosendo &  (571)7252-4785 &  2018-09-26 &      PEDIATRICS &  11PM &             8 \\
4  &       Ault &  Rosendo &  (571)7252-4785 &  2018-09-28 &        ONCOLOGY &  03PM &             8 \\
5  &       Ault &  Rosendo &  (571)7252-4785 &  2018-09-29 &      CARDIOLOGY &  07AM &            12 \\
6  &       Ault &  Rosendo &  (571)7252-4785 &  2018-10-01 &      CARDIOLOGY &  03PM &             8 \\
7  &       Ault &  Rosendo &  (571)7252-4785 &  2018-10-02 &        ONCOLOGY &  03PM &             8 \\
8  &       Ault &  