### Load the data

In [1]:
import pandas as pd
from datetime import datetime
import pymysql 

In [2]:
department = pd.read_csv('departments.csv', sep=',')
department.columns = ['department_name']
shift = pd.read_csv('shifts.csv', sep=',')
shift.columns = ['from_time', 'length']
employees = pd.read_csv('employees.csv', sep=',')
employees.emptype = employees.emptype.fillna('')
schedule = pd.read_csv('schedule.csv', sep=',')
schedule.columns = ['date', 'empid', 'dept', 'start_time', 'shift_length']

### Manipulate the data

In [5]:
class DataSqlLoader:
    def __init__(self, database):
        # connect to mysql local server
        self.db = pymysql.Connect(
            host='localhost', 
            port=3308, 
            user='root', 
            passwd='', 
            db=database)
        self.c = self.db.cursor()
    
    # convert the shift and schedule time to `time` format compatible in MySQL
    def convert_time_format(self, time):
        return datetime.strptime('{}'.format(time), '%I%p').strftime('%H:%M:%S')
    
    # convert schedule date to `date` format compatible in MySQL
    def convert_date_format(self, time):
        return datetime.strptime('{}'.format(time), '%m/%d/%Y').strftime('%Y-%m-%d')
    
    def creat_tables(self):
        self.c.execute('''
                create table if not exists department
                (
                  department_id  int auto_increment
                    primary key,
                  department_name varchar(50) not null
                );
                ''')
        self.c.execute('''
            create table if not exists shift
            (
              shift_id int auto_increment
                primary key,
              from_time   time not null,
              length   int  not null
            );
            ''')
        self.c.execute('''
            create table if not exists schedule
            (
              schedule_id  int auto_increment
                primary key,
              date         date        not null,
              empid        varchar(10) not null,
              dept         varchar(50) not null,
              start_time   time        not null,
              shift_length int         not null
            );
            ''')
        self.c.execute('''
            create table if not exists employees
            (
              empid     varchar(10) not null primary key,
              lastname  varchar(20) not null,
              firstname varchar(20) not null,
              emptype   varchar(3)  null,
              cellphone varchar(20) null,
              homephone varchar(20) null,
              ftpt      varchar(2)  not null,
              constraint employee_empid_uindex
              unique (empid)
            );
        ''')
        
        self.c.execute('''
            create table if not exists department_managers
            (
                dept_name varchar(20),
                manager varchar(30)
            );
        ''')
    
    def insert_into_tables(self, table, table_name):
        for i in range(len(table)):
            attributes = '{}'.format(tuple(table.columns.tolist())).replace("'","")
            query = "insert into {} {} values {};".format(
                table_name, attributes, tuple(table.iloc[i,:].values))
            query = query.replace("(none)","")
            query = query.replace(r",)",")")
            # print(query)
            try:
                self.c.execute(query)
                self.db.commit()
            except Exception as e:
                print(e)
                
    def close(self):
        self.db.close()

In [11]:
dsl = DataSqlLoader('cs431_project')
dsl.creat_tables()

schedule['start_time'] = list(map(lambda x: dsl.convert_time_format(x), 
                                  schedule['start_time']))
shift['from_time'] = list(map(lambda x: dsl.convert_time_format(x), 
                              shift['from_time']))
schedule['date'] = list(map(lambda x: dsl.convert_date_format(x), 
                            schedule['date']))

dsl.insert_into_tables(department, 'department')
dsl.insert_into_tables(shift, 'shift')
dsl.insert_into_tables(schedule,'schedule')
dsl.insert_into_tables(employees, 'employees')

### SQL Query

#### 1. GENERATE THE FOLLOWING REPORT LISTING ALL EMPLOYEES WHO ARE “OVERBOOKED” BY BEING SCHEDULED MORE THAN ONCE IN A DAY

In [13]:
query = '''
    SELECT a.empid, a.lastname,a.firstname, a.date AS Overbooked_Date,
       date_format(a.start_time, '%I%p') AS sfrom, a.dept_name, a.manager
        FROM 
        (SELECT s.empid, e.lastname, e.firstname,
          s.date, s.start_time, d.dept_name, d.manager
        FROM employees AS e, schedule AS s, department_managers AS d
        WHERE e.empid=s.empid
        and d.dept_name=s.dept) AS a
    INNER JOIN
        ( SELECT empid, date, start_time FROM schedule
        GROUP BY empid, date
          HAVING count(*) >1) AS b
    ON (a.empid, a.date)=(b.empid, b.date)
    ORDER BY a.empid, a.date, a.start_time;
        '''

df = pd.read_sql(query, dsl.db)

#### 2. GENERATE A REPORT OF ALL OF THE FULL TIME PEOPLE WHO ARE WORKING MORE THAN 80 HOURS, AND HOW MUCH OVERTIME THEY ARE SCHEDULED FOR.

In [15]:
query = '''
        SELECT concat(e.firstname, ' ', e.lastname) as fullname, sum(s.shift_length)-80 as scheduled_overtime
        FROM schedule AS s, employees AS e
        WHERE s.empid=e.empid
        AND e.ftpt = 'ft'
        GROUP BY s.empid
        HAVING scheduled_overtime >0
        ORDER BY scheduled_overtime DESC, fullname ASC;
        '''

df = pd.read_sql(query, dsl.db)