In [6]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import re

import matplotlib.font_manager as fm
font_name = fm.FontProperties(fname="C:/Windows/Fonts/malgun.ttf").get_name()
plt.rc("font", family=font_name)
import matplotlib as mpl
mpl.rcParams["axes.unicode_minus"] = False

#SQLite 관련 library import
import sqlite3
from os import mkdir
import csv

## 1. DB 만들기 

In [2]:
def data_read():
    data = pd.read_csv("data/data.csv", encoding='utf-8')
    weather = pd.read_csv("data/weather.csv", encoding='euc-kr')
    return data, weather

In [3]:
def make_database():
    try:
        conn = sqlite3.connect('data/team3.db')
    except sqlite3.OperationalError:
        mkdir('folder_A')
    finally:
        conn = sqlite3.connect('data/team3.db')
        cur = conn.cursor()

    hr = """create table if not exists hr(
        datetime  datetime,
        worker_number   int,
        real_number int,
        biztrip_number  int,
        overtime_number int,
        telecom_number  int    
    )
    """
    cur.execute(hr)
    conn.commit()


    lunch = """create table if not exists lunch(
        datetime  datetime,
        new_lunch   varchar(10),
        lunch_rice varchar(20),
        lunch_soup  varchar(20),
        lunch_main varchar(20),
        lunch_number  int    
    )
    """
    cur.execute(lunch)
    conn.commit()


    dinner = """create table if not exists dinner(
        datetime  datetime,
        new_dinner   varchar(10),
        dinner_rice varchar(20),
        dinner_soup  varchar(20),
        dinner_main varchar(20),
        dinner_number  int    
    )
    """
    cur.execute(dinner)
    conn.commit()


    weather = """create table if not exists weather(
        datetime  datetime,
        temperature   float(32),
        rain float(32),
        wind float(32),
        humidity  float(32),
        discomfort_index    float(32),
        perceived_temperature   float(32)
    )
    """
    cur.execute(weather)
    conn.commit()

    calendar = """create table if not exists calendar(
        datetime  datetime,
        month   int,
        date int,
        weekdays  varchar(16),
        season varchar(16),
        vacation  int    
        )
        """
    cur.execute(calendar)
    conn.commit()


    conn.close()

In [4]:
def transform(data, weather):
    #### 1. 일자를 datetime 형태로 변경
    data['일자'] = pd.to_datetime(data['일자'])
    #### 2. 실질 사내 근무자 수
    data['실질정원수'] = data['본사정원수'] - (data['본사휴가자수'] + data['본사출장자수'] + data['현본사소속재택근무자수'])
    data = data.loc[:, ['일자', '요일', '본사정원수', '실질정원수', '본사휴가자수', '본사출장자수', '본사시간외근무명령서승인건수', '현본사소속재택근무자수', '중식메뉴', '석식메뉴', '중식계', '석식계']]
    #### 3. 월, 일 칼럼과 계절 칼럼, 연휴 칼럼 만들기
    data['Month'] = data['일자'].dt.strftime('%m')
    data['Date'] = data['일자'].dt.strftime('%d')
    season = []

    for index in range(len(data)):
        if data['Month'][index] == '03' or data['Month'][index] == '04' or data['Month'][index] == '05':
            season.append('봄')
        elif data['Month'][index] == '06' or data['Month'][index] == '07' or data['Month'][index] == '08':
            season.append('여름')
        elif data['Month'][index] == '09' or data['Month'][index] == '10' or data['Month'][index] == '11':
            season.append('가을')
        elif data['Month'][index] == '12' or data['Month'][index] == '01' or data['Month'][index] == '02':
            season.append('겨울')
    
    data['Season'] = season



    holiday_gap=[]

    for i in range(len(data)):
        if i == len(data) - 1:
            holiday_gap.append(0)
        elif int((pd.to_datetime(data['일자'][i+1])-pd.to_datetime(data['일자'][i])).days)==1:
            holiday_gap.append(0)
        elif int((pd.to_datetime(data['일자'][i+1])-pd.to_datetime(data['일자'][i])).days)==2:
            holiday_gap.append(1)
        elif int((pd.to_datetime(data['일자'][i+1])-pd.to_datetime(data['일자'][i])).days)==3:
            holiday_gap.append(0)
        else:
            holiday_gap.append(1)
            
    data['연휴'] = holiday_gap



    #### 4. 신메뉴 여부 칼럼 만들기 Y = 신메뉴 / N = 신메뉴 X
    New_lunch = []
    New_dinner = []

    for index in range(len(data)):
        if 'New' in data['중식메뉴'][index]:
            New_lunch.append('Y')
        else:
            New_lunch.append('N')

    for index in range(len(data)):
        if 'New' in data['석식메뉴'][index]:
            New_dinner.append('Y')
        else:
            New_dinner.append('N')
    
    data['New_lunch'] = New_lunch
    data['New_dinner'] = New_dinner

    #### 5. 점심, 저녁에서 밥, 국, 메인반찬 칼럼 만들기
    #점심
    lunch = []
    for index in range(len(data)):
        tmp = data.loc[index,'중식메뉴'].split(' ') # 스페이스로 구분
        tmp = ' '.join(tmp).split()    # 빈칸 제거

        # ()안에 있는 내용 제거
        for menu in tmp:
            if '(' in menu:
                tmp.remove(menu)
        lunch.append(tmp)

    for index in range(len(data)):
        if '쌀밥' in lunch[index][0]:
            lunch[index][0] = '밥'
    
    rice=[]
    soup=[]
    main=[]

    for i in range(len(data)):
        if lunch[i][0]:
            rice.append(lunch[i][0])
        if lunch[i][1]:
            soup.append(lunch[i][1])
        if lunch[i][2]:
            main.append(lunch[i][2])

    data['lunch_rice'] = rice
    data['lunch_soup'] = soup
    data['lunch_main'] = main

    #저녁
    dinner = []

    for index in range(len(data)):
        tmp = data.loc[index,'석식메뉴'].split(' ')
        tmp = ' '.join(tmp).split()

        for menu_d in tmp:
            if '(' in menu_d:
                tmp.remove(menu_d)
        dinner.append(tmp)

    dinner_rice=[]
    dinner_soup=[]
    dinner_main=[]


    for word in dinner:
        
        
        if len(word) == 0:
            dinner_rice.append('None')
            dinner_soup.append('None')
            dinner_main.append('None')
        elif '*' in word:
            dinner_rice.append('None')
            dinner_soup.append('None')
            dinner_main.append('None')
        elif '가정의날' in word:
            dinner_rice.append('None') 
            dinner_soup.append('None')
            dinner_main.append('None')
        elif '가정의달' in word:
            dinner_rice.append('None') 
            dinner_soup.append('None')
            dinner_main.append('None')
        elif '자기계발의날' in word:
            dinner_rice.append('None') 
            dinner_soup.append('None')
            dinner_main.append('None')
        elif '*자기계발의날*' in word:
            dinner_rice.append('None') 
            dinner_soup.append('None')
            dinner_main.append('None')
        elif '자기개발의날' in word:
            dinner_rice.append('None') 
            dinner_soup.append('None')
            dinner_main.append('None')

        else:
            dinner_rice.append(word[0])
            dinner_soup.append(word[1])
            dinner_main.append(word[2])
    
    data['dinner_rice'] = dinner_rice
    data['dinner_soup'] = dinner_soup
    data['dinner_main'] = dinner_main

    for index in range(len(data)):
        if '쌀밥' in data['dinner_rice'][index]:
            data['dinner_rice'][index] = '밥'
        elif '흑미밥' in data['dinner_rice'][index]:
            data['dinner_rice'][index] = '밥'
        
    del data['중식메뉴']
    del data['석식메뉴']



    #### 6. 날짜 데이터 merge
    # merge 전 날짜 데이터 전처리
    weather['일자'] = pd.to_datetime(weather['일시'])
    del weather['일시']
    del weather['지점']
    del weather['지점명']

    #결측치 제거
    weather['일강수량(mm)']=weather['일강수량(mm)'].replace({np.NaN:0})
    weather['평균 상대습도(%)'] = weather['평균 상대습도(%)'].replace({np.NaN:0})

    #칼럼명 변경
    weather.columns = ['기온', '강수량', '풍속', '습도', '일자']

    # 불쾌지수, 체감온도 칼럼 생성
    weather['불쾌지수'] = 9/5 * weather['기온'] - 0.55 * (1-weather['습도']/100) * (9/5 * weather['기온'] - 26) + 32
    weather['체감온도'] = 13.12 + 0.6215 * weather['기온'] - 11.37 * (weather['풍속'] ** 0.16) + 0.3965 * (weather['풍속'] ** 0.16) * weather['기온']    #### 7. 칼럼명 영어로 바꾸기
    
    
    #### 7. 칼럼명 영어로 바꾸기
    df = pd.merge(data, weather, how='inner', on='일자')

    col_eng = ['datetime', 'weekdays', 'worker_number', 'real_number', 'vacation_number', 'biztrip_number', 'overtime_number', 'telecom_number', 'lunch_number',
    'dinner_number', 'month', 'date', 'season', 'vacation', 'new_lunch', 'new_dinner', 'lunch_rice', 'lunch_soup', 'lunch_main', 'dinner_rice', 'dinner_soup', 'dinner_main',
    'temperature', 'rain', 'wind', 'humidity', 'discomfort_index', 'perceived_temperature']

    df.columns=col_eng

    return df

In [5]:
def split(data):
    hr_data = data.loc[:, ["datetime", "worker_number", "real_number", "biztrip_number", "overtime_number", "telecom_number"]]
    lunch_data = data.loc[:, ["datetime", "new_lunch", "lunch_rice", "lunch_soup", "lunch_main", "lunch_number"]]
    dinner_data = data.loc[:, ["datetime", "new_dinner", "dinner_rice", "dinner_soup", "dinner_main", "dinner_number"]]
    weather_data = data.loc[:, ["datetime", "temperature", 'rain', 'wind', 'humidity', 'discomfort_index', 'perceived_temperature']]
    calendar_data = data.loc[:, ['datetime', 'month', 'date', 'weekdays', 'season', 'vacation']]

    return hr_data, lunch_data, dinner_data, weather_data, calendar_data

In [6]:
def save(hr_data, lunch_data, dinner_data, weather_data, calendar_data):
    hr_data.to_csv("data/hr_data.csv", encoding='euc-kr', index=False)
    lunch_data.to_csv("data/lunch_data.csv", encoding='euc-kr', index=False)
    dinner_data.to_csv("data/dinner_data.csv", encoding='euc-kr', index=False)
    weather_data.to_csv("data/weather_data.csv", encoding='euc-kr', index=False)
    calendar_data.to_csv("data/calendar_data.csv", encoding='euc-kr', index=False)

In [7]:
def hr_db():
    conn = sqlite3.connect("data/team3.db")
    cur = conn.cursor()

    file = csv.reader(open("data/hr_data.csv", "r"), delimiter=",")
    next(file)

    for row in file:
        cur.execute("insert into hr values(?, ?, ?, ?, ?, ?)", row)

    conn.commit()
    conn.close()

In [8]:
def lunch_db():
    conn = sqlite3.connect("data/team3.db")
    cur = conn.cursor()

    file = csv.reader(open("data/lunch_data.csv", "r"), delimiter=",")
    next(file)

    for row in file:
        cur.execute("insert into lunch values(?, ?, ?, ?, ?, ?)", row)

    conn.commit()
    conn.close()

In [9]:
def dinner_db():
    conn = sqlite3.connect("data/team3.db")
    cur = conn.cursor()

    file = csv.reader(open("data/dinner_data.csv", "r"), delimiter=",")
    next(file)

    for row in file:
        cur.execute("insert into dinner values(?, ?, ?, ?, ?, ?)", row)

    conn.commit()
    conn.close()

In [10]:
def weather_db():
    conn = sqlite3.connect("data/team3.db")
    cur = conn.cursor()

    file = csv.reader(open("data/weather_data.csv", "r"), delimiter=",")
    next(file)

    for row in file:
        cur.execute("insert into weather values(?, ?, ?, ?, ?, ?, ?)", row)

    conn.commit()
    conn.close()

In [11]:
def calendar_db():
    conn = sqlite3.connect("data/team3.db")
    cur = conn.cursor()

    file = csv.reader(open("data/calendar_data.csv", "r"), delimiter=",")
    next(file)

    for row in file:
        cur.execute("insert into calendar values(?, ?, ?, ?, ?, ?)", row)

    conn.commit()
    conn.close()

In [12]:
data, weather = data_read()
make_database()
df = transform(data, weather)
hr_data, lunch_data, dinner_data, weather_data, calendar_data = split(df)
save(hr_data, lunch_data, dinner_data, weather_data, calendar_data)
hr_db()
lunch_db()
dinner_db()
weather_db()
calendar_db()

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['dinner_rice'][index] = '밥'
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['dinner_rice'][index] = '밥'


## 2. sql 통합 데이터 테이블 생성 

In [82]:
cnt = 0
lndf = []
conn = sqlite3.connect("data/team3.db")
cur = conn.cursor()

cur.execute("""
SELECT * FROM lunch INNER JOIN hr USING (datetime)INNER JOIN weather USING (datetime)INNER JOIN calendar USING (datetime);""")

for row in cur:
    cnt += 1
    lndf.append(row)
    
conn.close()
print(cnt)
lndf = pd.DataFrame(lndf)
lndf

28896


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,12,13,14,15,16,17,18,19,20,21
0,2016-02-01,N,밥,오징어찌개,쇠불고기,1039,2601,2401,150,238,...,0.0,43.9,,,-8.512152,2,1,월,겨울,0
1,2016-02-01,N,밥,오징어찌개,쇠불고기,1039,2601,2401,150,238,...,0.0,43.9,,,-8.512152,2,1,월,겨울,0
2,2016-02-01,N,밥,오징어찌개,쇠불고기,1039,2601,2401,150,238,...,0.0,43.9,,,-8.512152,2,1,월,겨울,0
3,2016-02-01,N,밥,오징어찌개,쇠불고기,1039,2601,2401,150,238,...,0.0,43.9,,,-8.512152,2,1,월,겨울,0
4,2016-02-01,N,밥,오징어찌개,쇠불고기,1039,2601,2401,150,238,...,0.0,43.9,,,-8.512152,2,1,월,겨울,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
28891,2021-01-26,N,밥,들깨미역국,교촌간장치킨,1015,2983,2369,183,551,...,10.3,94.5,,,-3.641776,1,26,화,겨울,0
28892,2021-01-26,N,밥,들깨미역국,교촌간장치킨,1015,2983,2369,183,551,...,10.3,94.5,,,-3.641776,1,26,화,겨울,0
28893,2021-01-26,N,밥,들깨미역국,교촌간장치킨,1015,2983,2369,183,551,...,10.3,94.5,,,-3.641776,1,26,화,겨울,0
28894,2021-01-26,N,밥,들깨미역국,교촌간장치킨,1015,2983,2369,183,551,...,10.3,94.5,,,-3.641776,1,26,화,겨울,0


In [83]:
columns1 = ["datetime", "new_ln", "ln_rice", "ln_soup", "ln_main", "ln_eater",
           "workers", "real-in-worker", "biztrip", "overtime", "remote-work",
          "temperature", "rain", "wind", "humidity", "discomfort_index", "perceived-temperature", 
           "month", "date", "weekdays", "season", "near-holiday"]

lndf.columns = columns1
lndf

lndf.to_csv("data/lunch_training.csv", encoding = "utf-8", index=False)

In [84]:
cnt = 0
dndf = []
conn = sqlite3.connect("data/team3.db")
cur = conn.cursor()

cur.execute("""
SELECT * FROM dinner INNER JOIN hr USING (datetime)INNER JOIN weather USING (datetime)INNER JOIN calendar USING (datetime)""")

for row in cur:
    cnt += 1
    dndf.append(row)
    
conn.close()
print(cnt)
dndf = pd.DataFrame(dndf)
dndf

28896


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,12,13,14,15,16,17,18,19,20,21
0,2016-02-01,N,밥,육개장,자반고등어구이,331,2601,2401,150,238,...,0.0,43.9,,,-8.512152,2,1,월,겨울,0
1,2016-02-01,N,밥,육개장,자반고등어구이,331,2601,2401,150,238,...,0.0,43.9,,,-8.512152,2,1,월,겨울,0
2,2016-02-01,N,밥,육개장,자반고등어구이,331,2601,2401,150,238,...,0.0,43.9,,,-8.512152,2,1,월,겨울,0
3,2016-02-01,N,밥,육개장,자반고등어구이,331,2601,2401,150,238,...,0.0,43.9,,,-8.512152,2,1,월,겨울,0
4,2016-02-01,N,밥,육개장,자반고등어구이,331,2601,2401,150,238,...,0.0,43.9,,,-8.512152,2,1,월,겨울,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
28891,2021-01-26,Y,잔치국수,수제고기육전,쑥갓나물,480,2983,2369,183,551,...,10.3,94.5,,,-3.641776,1,26,화,겨울,0
28892,2021-01-26,Y,잔치국수,수제고기육전,쑥갓나물,480,2983,2369,183,551,...,10.3,94.5,,,-3.641776,1,26,화,겨울,0
28893,2021-01-26,Y,잔치국수,수제고기육전,쑥갓나물,480,2983,2369,183,551,...,10.3,94.5,,,-3.641776,1,26,화,겨울,0
28894,2021-01-26,Y,잔치국수,수제고기육전,쑥갓나물,480,2983,2369,183,551,...,10.3,94.5,,,-3.641776,1,26,화,겨울,0


In [85]:
columns2 = ["datetime", "new_dn", "dn_rice", "dn_soup", "dn_main", "dn_eater",
           "workers", "real-in-worker", "biztrip", "overtime", "remote-work",
          "temperature", "rain", "wind", "humidity", "discomfort_index", "perceived-temperature", 
           "month", "date", "weekdays", "season", "near-holiday"]

dndf.columns = columns2
dndf

dndf.to_csv("data/dinner_training.csv", encoding = "utf-8", index=False)

In [86]:
cnt = 0
lndndf = []
conn = sqlite3.connect("data/team3.db")
cur = conn.cursor()

cur.execute("""
SELECT * FROM lunch INNER JOIN dinner USING (datetime)INNER JOIN hr USING (datetime)
INNER JOIN weather USING (datetime)INNER JOIN calendar USING (datetime)""")

for row in cur:
    cnt += 1
    lndndf.append(row)
    
conn.close()
print(cnt)
lndndf = pd.DataFrame(lndndf)
lndndf

57792


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,17,18,19,20,21,22,23,24,25,26
0,2016-02-01,N,밥,오징어찌개,쇠불고기,1039,N,밥,육개장,자반고등어구이,...,0.0,43.9,,,-8.512152,2,1,월,겨울,0
1,2016-02-01,N,밥,오징어찌개,쇠불고기,1039,N,밥,육개장,자반고등어구이,...,0.0,43.9,,,-8.512152,2,1,월,겨울,0
2,2016-02-01,N,밥,오징어찌개,쇠불고기,1039,N,밥,육개장,자반고등어구이,...,0.0,43.9,,,-8.512152,2,1,월,겨울,0
3,2016-02-01,N,밥,오징어찌개,쇠불고기,1039,N,밥,육개장,자반고등어구이,...,0.0,43.9,,,-8.512152,2,1,월,겨울,0
4,2016-02-01,N,밥,오징어찌개,쇠불고기,1039,N,밥,육개장,자반고등어구이,...,0.0,43.9,,,-8.512152,2,1,월,겨울,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
57787,2021-01-26,N,밥,들깨미역국,교촌간장치킨,1015,Y,잔치국수,수제고기육전,쑥갓나물,...,10.3,94.5,,,-3.641776,1,26,화,겨울,0
57788,2021-01-26,N,밥,들깨미역국,교촌간장치킨,1015,Y,잔치국수,수제고기육전,쑥갓나물,...,10.3,94.5,,,-3.641776,1,26,화,겨울,0
57789,2021-01-26,N,밥,들깨미역국,교촌간장치킨,1015,Y,잔치국수,수제고기육전,쑥갓나물,...,10.3,94.5,,,-3.641776,1,26,화,겨울,0
57790,2021-01-26,N,밥,들깨미역국,교촌간장치킨,1015,Y,잔치국수,수제고기육전,쑥갓나물,...,10.3,94.5,,,-3.641776,1,26,화,겨울,0


In [87]:
columns3 = ["datetime", "new_ln", "ln_rice", "ln_soup", "ln_main", "ln_eater", 
            "new_dn", "dn_rice", "dn_soup", "dn_main", "dn_eater",
           "workers", "real-in-worker", "biztrip", "overtime", "remote-work",
          "temperature", "rain", "wind", "humidity", "discomfort_index", "perceived-temperature", 
           "month", "date", "weekdays", "season", "near-holiday"]

lndndf.columns = columns3
lndndf

lndndf.to_csv("data/lunch_dinner_training.csv", encoding = "utf-8", index=False)

## 3. EDA- 쿼리문 작성 

### (1) 메뉴 관련 EDA

+ 일반 밥이냐 그 외 밥(볶음밥, 비빔밥, 특수메뉴)이냐에 따라 식수인원 차이가 존재할까?
+ 국/찌개/탕 에따라 식수인원 차이가 존재할까?
+ 면이 메뉴에 있을떄 없을떄 식수인원 차이가 날까? 
+ 신메뉴가 있으면 식수인원이 증가할까?

In [2]:
import sqlite3

In [28]:
#일반 '밥'아닌 메뉴 중식계 평균
conn = sqlite3.connect("data/team3.db")
cur= conn.cursor()
cur.execute("SELECT count(lunch_number), avg(lunch_number), max(lunch_number), min(lunch_number) FROM lunch WHERE lunch_rice != '밥'")

for row in cur:
    print(row)

(404, 851.009900990099, 1319, 387)


+ 일반밥 유무

In [29]:
#일반 '밥'인 메뉴 평균 중식계
conn = sqlite3.connect("data/team3.db")
cur= conn.cursor()
cur.execute("SELECT count(lunch_number), avg(lunch_number), max(lunch_number), min(lunch_number) FROM lunch WHERE lunch_rice = '밥'")

for row in cur:
    print(row)

(2004, 898.5249500998004, 1459, 296)


In [30]:
# 중식계의 경우 일반 밥+ 반찬 조합 평균이 높음 
# 일반 밥이 나오는 날이 더 자주 있기도 하지만 이용인원에 대한 평균값과 최대값이 크게 나타난다.(표준편차를 구할 필요도 있음)

In [31]:
#일반 '밥'아닌 메뉴 평균 석식계
conn = sqlite3.connect("data/team3.db")
cur= conn.cursor()
cur.execute("SELECT count(dinner_number), avg(dinner_number), max(dinner_number), min(dinner_number)FROM dinner WHERE dinner_rice != '밥'")

for row in cur:
    print(row)

(898, 462.52561247216033, 776, 0)


In [32]:
#일반 '밥'인 메뉴 평균 중식계
conn = sqlite3.connect("data/team3.db")
cur= conn.cursor()
cur.execute("SELECT count(dinner_number), avg(dinner_number), max(dinner_number), min(dinner_number) FROM dinner WHERE dinner_rice = '밥'")

for row in cur:
    print(row)

(1510, 461.36688741721855, 905, 0)


In [33]:
# 석식계의 경우 일반 '밥'이 아닌 경우와 일반밥의 평균이 거의 차이 없음 

+ 일반밥이 아닌경우 (덮밥, 비빔밥, 볶음밥)

In [51]:
#덮밥, 비빔밥, 볶음밥 중식 식수 인원

conn = sqlite3.connect("data/team3.db")
cur= conn.cursor()
cur.execute("SELECT count(lunch_number), avg(lunch_number), max(lunch_number), min(lunch_number) FROM lunch WHERE lunch_rice like '%덮밥'")

for row in cur:
    print(row)
    
conn = sqlite3.connect("data/team3.db")
cur= conn.cursor()
cur.execute("SELECT count(lunch_number), avg(lunch_number), max(lunch_number), min(lunch_number) FROM lunch WHERE lunch_rice like '%비빔밥'")

for row in cur:
    print(row)

conn = sqlite3.connect("data/team3.db")
cur= conn.cursor()
cur.execute("SELECT count(lunch_number), avg(lunch_number), max(lunch_number), min(lunch_number) FROM lunch WHERE lunch_rice like '%볶음밥'")

for row in cur:
    print(row)

(90, 840.6666666666666, 1260, 450)
(94, 883.5531914893617, 1109, 508)
(14, 687.8571428571429, 986, 387)


In [49]:
# 볶음밥 메뉴의 식수인원이 적은 이유?
# 단순히 메뉴에 대한 불호인지 요일(금요일)에 따른 식수인원이 적은것인지 확인 필요

In [52]:
#덮밥, 비빔밥, 볶음밥 석식 식수인원 

conn = sqlite3.connect("data/team3.db")
cur= conn.cursor()
cur.execute("SELECT count(dinner_number), avg(dinner_number), max(dinner_number), min(dinner_number) FROM dinner WHERE dinner_rice like '%덮밥'")

for row in cur:
    print(row)
    
conn = sqlite3.connect("data/team3.db")
cur= conn.cursor()
cur.execute("SELECT count(dinner_number), avg(dinner_number), max(dinner_number), min(dinner_number) FROM dinner WHERE dinner_rice like '%비빔밥'")

for row in cur:
    print(row)

conn = sqlite3.connect("data/team3.db")
cur= conn.cursor()
cur.execute("SELECT count(dinner_number), avg(dinner_number), max(dinner_number), min(dinner_number) FROM dinner WHERE dinner_rice like '%볶음밥'")

for row in cur:
    print(row)

(84, 532.0714285714286, 668, 364)
(14, 527.8571428571429, 608, 449)
(190, 502.9789473684211, 776, 212)


In [53]:
#석식계는 평균적으로 덮밥, 비빔밥, 볶음밥 간 큰  차이를 보이고 있지 않음 

+ 국 탕 찌개

In [25]:
conn = sqlite3.connect("data/team3.db")
cur = conn.cursor()

cur.execute("SELECT count(lunch_number), avg(lunch_number), max(lunch_number), min(lunch_number) FROM lunch WHERE lunch_soup LIKE '%탕'")

for row in cur:
    print(row)

conn.close()


(230, 864.0434782608696, 1292, 408)


In [26]:
# 탕/찌개/ 국 중 선호하는 것 
conn = sqlite3.connect("data/team3.db")
cur= conn.cursor()
cur.execute("SELECT count(lunch_number), avg(lunch_number), max(lunch_number), min(lunch_number)FROM lunch WHERE lunch_soup LIKE '%찌개%'")

for row in cur:
    print(row)


(446, 888.9820627802691, 1372, 389)


In [27]:
conn = sqlite3.connect("data/team3.db")
cur= conn.cursor()
cur.execute("SELECT count(lunch_number), avg(lunch_number), max(lunch_number), min(lunch_number) FROM lunch WHERE lunch_soup LIKE '%국%'")

for row in cur:
    print(row)

(1636, 897.7946210268949, 1459, 296)


In [22]:
# 중식계의 경우 데이터가 많은 순(국, 찌개, 탕)으로 평균값도 크다, 그렇지만 편차도 큰 것 같다.

In [37]:
# 저녁 국 탕찌개 
conn = sqlite3.connect("data/team3.db")
cur= conn.cursor()
cur.execute("SELECT count(dinner_number), avg(dinner_number), max(dinner_number), min(dinner_number) FROM dinner WHERE dinner_soup LIKE '%찌개%'")

for row in cur:
    print(row)

(436, 472.6605504587156, 905, 0)


In [38]:
conn = sqlite3.connect("data/team3.db")
cur= conn.cursor()
cur.execute("SELECT count(dinner_number), avg(dinner_number), max(dinner_number), min(dinner_number) FROM dinner WHERE dinner_soup LIKE '%국%'")

for row in cur:
    print(row)

(1360, 481.5720588235294, 776, 0)


In [39]:
conn = sqlite3.connect("data/team3.db")
cur= conn.cursor()
cur.execute("SELECT count(dinner_number), avg(dinner_number), max(dinner_number), min(dinner_number) FROM dinner WHERE dinner_soup LIKE '%탕%'")

for row in cur:
    print(row)

(176, 454.27272727272725, 712, 117)


In [25]:
# 석식계도 동일하게 국 찌개 탕 순위임 

+ 면 유무

In [88]:

# 면이 포함된 중식계 
conn = sqlite3.connect("data/team3.db")
cur= conn.cursor()
cur.execute("SELECT count(lunch_number), avg(lunch_number), max(lunch_number), min(lunch_number) FROM lunch WHERE lunch_main LIKE '%면%'")

for row in cur:
    print(row)

(22, 967.7272727272727, 1329, 653)


In [44]:
# 면이 포함되지 않은 중식계

conn = sqlite3.connect("data/team3.db")
cur= conn.cursor()
cur.execute("SELECT count(lunch_number), avg(lunch_number), max(lunch_number), min(lunch_number) FROM lunch WHERE NOT lunch_main LIKE '%면%'")

for row in cur:
    print(row)

(2386, 889.8415758591785, 1459, 296)


In [28]:
#중식계의 경우 면이 있는 경우 평균적으로 더 많이 찾아옴 

In [45]:
# 면 포함된 석식계  확인 
conn = sqlite3.connect("data/team3.db")
cur= conn.cursor()
cur.execute("SELECT count(dinner_number), avg(dinner_number), max(dinner_number), min(dinner_number) FROM dinner WHERE dinner_main LIKE '%면%'")

for row in cur:
    print(row)

(20, 447.1, 590, 197)


In [47]:
#석식계도 살펴보자 
conn = sqlite3.connect("data/team3.db")
cur= conn.cursor()
cur.execute("SELECT count(dinner_number), avg(dinner_number), max(dinner_number), min(dinner_number) FROM dinner WHERE NOT dinner_main LIKE '%면%'")

for row in cur:
    print(row)

(2388, 461.9221105527638, 905, 0)


In [31]:
#석식계의 경우 평균적으로 큰 차이를 보이지 않고 있음  

+ 신메뉴

In [55]:
# 새 메뉴가 나왔을 때 식수인원

conn = sqlite3.connect("data/team3.db")
cur= conn.cursor()
cur.execute("SELECT count(lunch_number), avg(lunch_number), max(lunch_number), min(lunch_number) FROM lunch WHERE new_lunch == 'Y'")

for row in cur:
    print(row)
    
conn = sqlite3.connect("data/team3.db")
cur= conn.cursor()
cur.execute("SELECT count(lunch_number), avg(lunch_number), max(lunch_number), min(lunch_number) FROM lunch WHERE new_lunch == 'N'")

for row in cur:
    print(row)
    
    
conn = sqlite3.connect("data/team3.db")
cur= conn.cursor()
cur.execute("SELECT count(dinner_number), avg(dinner_number), max(dinner_number), min(dinner_number) FROM dinner WHERE new_dinner == 'Y'")

for row in cur:
    print(row)
    
conn = sqlite3.connect("data/team3.db")
cur= conn.cursor()
cur.execute("SELECT count(dinner_number), avg(dinner_number), max(dinner_number), min(dinner_number) FROM dinner WHERE new_dinner == 'N'")

for row in cur:
    print(row)
    

(52, 941.9230769230769, 1329, 570)
(2356, 889.4193548387096, 1459, 296)
(42, 459.4761904761905, 712, 131)
(2366, 461.84023668639054, 905, 0)


In [56]:
# 중식은 신메뉴가 나왔을 경우 식수인원이 증가, 석식은 신메뉴 여부에 따른 인원 평균차이가 적음 

## (2) 날씨 관련 EDA

+ 계절에 따라 식수인원 차이가 날까?  
+ 기온에 따라 식수인원 차이가 날까?  
+ 비가 오면 식수인원이 증가할까? 

+ 계절

In [57]:
# 계절을 join 사용하여 조회하고 불러오기 
# SQLite는 왼쪽 외부 조인을 하는 LEFT OUTER JOIN 절에서만 지원함 

#중식계 4계절 
conn = sqlite3.connect("data/team3.db")
cur= conn.cursor()
cur.execute("SELECT count(lunch_number), avg(lunch_number), max(lunch_number), min(lunch_number) FROM lunch LEFT JOIN calendar ON lunch.rowid=calendar.rowid WHERE season = '봄'")

for row in cur:
    print(row)

conn = sqlite3.connect("data/team3.db")
cur= conn.cursor()
cur.execute("SELECT count(lunch_number), avg(lunch_number), max(lunch_number), min(lunch_number)FROM lunch LEFT JOIN calendar ON lunch.rowid=calendar.rowid WHERE season = '여름'")

for row in cur:
    print(row)
    
conn = sqlite3.connect("data/team3.db")
cur= conn.cursor()
cur.execute("SELECT count(lunch_number), avg(lunch_number), max(lunch_number), min(lunch_number) FROM lunch LEFT JOIN calendar ON lunch.rowid=calendar.rowid WHERE season = '가을'")

for row in cur:
    print(row)

conn = sqlite3.connect("data/team3.db")
cur= conn.cursor()
cur.execute("SELECT count(lunch_number), avg(lunch_number), max(lunch_number), min(lunch_number) FROM lunch LEFT JOIN calendar ON lunch.rowid=calendar.rowid WHERE season = '겨울'")

for row in cur:
    print(row)


(618, 903.4239482200647, 1362, 383)
(614, 855.1172638436482, 1292, 389)
(586, 889.3924914675767, 1363, 358)
(590, 915.1016949152543, 1459, 296)


In [46]:
#중식계의 경우 겨울 평균 식수인원이 915.1016949152543 가장 많고 여름이 855.1172638436482 가장 적음 

In [58]:
#석식계 4계절 
conn = sqlite3.connect("data/team3.db")
cur= conn.cursor()
cur.execute("SELECT count(dinner_number), avg(dinner_number), max(dinner_number), min(dinner_number) FROM dinner LEFT JOIN calendar ON dinner.rowid=calendar.rowid WHERE season = '봄'")

for row in cur:
    print(row)

conn = sqlite3.connect("data/team3.db")
cur= conn.cursor()
cur.execute("SELECT count(dinner_number), avg(dinner_number), max(dinner_number), min(dinner_number) FROM dinner LEFT JOIN calendar ON dinner.rowid=calendar.rowid WHERE season = '여름'")

for row in cur:
    print(row)
    
conn = sqlite3.connect("data/team3.db")
cur= conn.cursor()
cur.execute("SELECT count(dinner_number), avg(dinner_number), max(dinner_number), min(dinner_number) FROM dinner LEFT JOIN calendar ON dinner.rowid=calendar.rowid WHERE season = '가을'")

for row in cur:
    print(row)

    
conn = sqlite3.connect("data/team3.db")
cur= conn.cursor()
cur.execute("SELECT count(dinner_number), avg(dinner_number), max(dinner_number), min(dinner_number) FROM dinner LEFT JOIN calendar ON dinner.rowid=calendar.rowid WHERE season = '겨울'")

for row in cur:
    print(row)


(618, 490.8252427184466, 771, 0)
(614, 451.79153094462544, 729, 0)
(586, 458.5665529010239, 905, 0)
(590, 445.02033898305086, 776, 0)


In [48]:
#석식계의 경우 봄 평균 식수인원이 490.8252427184466 으로 가장 많고 겨울이 445.02033898305086가장 적음 

+ 기온

In [59]:
# 중식계 기온- 1.영하인경우 

conn = sqlite3.connect("data/team3.db")
cur= conn.cursor()
cur.execute("SELECT count(lunch_number), avg(lunch_number), max(lunch_number), min(lunch_number) FROM lunch LEFT JOIN weather ON lunch.rowid=weather.rowid WHERE perceived_temperature LIKE'-%'")

for row in cur:
    print(row)

#기온- 2.0-25 사이인경우 

conn = sqlite3.connect("data/team3.db")
cur= conn.cursor()
cur.execute("SELECT count(lunch_number), avg(lunch_number), max(lunch_number), min(lunch_number)FROM lunch LEFT JOIN weather ON lunch.rowid=weather.rowid WHERE perceived_temperature BETWEEN '0' AND '25'")

for row in cur:
    print(row)

#기온 25도 이상인 경우 

conn = sqlite3.connect("data/team3.db")
cur= conn.cursor()
cur.execute("SELECT count(lunch_number), avg(lunch_number), max(lunch_number), min(lunch_number) FROM lunch LEFT JOIN weather ON lunch.rowid=weather.rowid WHERE perceived_temperature LIKE '25%'")

for row in cur:
    print(row)


(664, 914.9879518072289, 1459, 296)
(1394, 889.3443328550933, 1363, 358)
(48, 878.0416666666666, 1258, 516)


In [60]:
#중식계의 경우 영하일때 평균 식수인원이 914.9879518072289으로 가장 많고 25도 이상이 878.0416666666666로 가장 적음 

In [61]:
# 석식계 기온- 1.영하인경우 

conn = sqlite3.connect("data/team3.db")
cur= conn.cursor()
cur.execute("SELECT count(dinner_number), avg(dinner_number), max(dinner_number), min(dinner_number) FROM dinner LEFT JOIN weather ON dinner.rowid=weather.rowid WHERE temperature LIKE'-%'")

for row in cur:
    print(row)

#기온- 2.0-25 사이인경우 

conn = sqlite3.connect("data/team3.db")
cur= conn.cursor()
cur.execute("SELECT count(dinner_number), avg(dinner_number), max(dinner_number), min(dinner_number)  FROM dinner LEFT JOIN weather ON dinner.rowid=weather.rowid WHERE temperature BETWEEN '0' AND '25'")

for row in cur:
    print(row)

#기온 25도 이상인 경우 

conn = sqlite3.connect("data/team3.db")
cur= conn.cursor()
cur.execute("SELECT count(dinner_number), avg(dinner_number), max(dinner_number), min(dinner_number) FROM dinner LEFT JOIN weather ON dinner.rowid=weather.rowid WHERE temperature LIKE '25%'")

for row in cur:
    print(row)


(176, 424.2386363636364, 689, 0)
(1922, 466.8907388137357, 905, 0)
(42, 436.76190476190476, 673, 0)


In [None]:
#석식계의 경우 기온이 2.0-25 사이일때 평균 식수인원이 466.8907388137357으로 가장 많고 영하가 424.2386363636364 ㄴ으로 가장 적음 

+ 비 여부 

In [62]:
# 비가 오는날 중식 식수 인원 평균

conn = sqlite3.connect("data/team3.db")
cur= conn.cursor()
cur.execute("SELECT count(lunch_number), avg(lunch_number), max(lunch_number), min(lunch_number)FROM lunch, weather WHERE lunch.datetime=weather.datetime AND rain !=0")

for row in cur:
    print(row)
    
# 비가 안오는날 중식 식수 인원 평균

conn = sqlite3.connect("data/team3.db")
cur= conn.cursor()
cur.execute("SELECT count(lunch_number), avg(lunch_number), max(lunch_number), min(lunch_number) FROM lunch, weather WHERE lunch.datetime=weather.datetime AND rain ==0")

for row in cur:
    print(row)

(1236, 924.0291262135922, 1363, 383)
(3580, 878.995530726257, 1459, 296)


In [63]:
#평균적으로 비가 오는날 중식 식수인원 이 많음 

In [64]:
# 비가 오는날 석식계 식수 인원 평균

conn = sqlite3.connect("data/team3.db")
cur= conn.cursor()
cur.execute("SELECT count(dinner_number), avg(dinner_number), max(dinner_number), min(dinner_number)FROM dinner, weather WHERE dinner.datetime=weather.datetime AND rain !=0")

for row in cur:
    print(row)
    
# 비가 안오는날 중식 식수 인원 평균

conn = sqlite3.connect("data/team3.db")
cur= conn.cursor()
cur.execute("SELECT count(dinner_number), avg(dinner_number), max(dinner_number), min(dinner_number)FROM dinner, weather WHERE dinner.datetime=weather.datetime AND rain ==0")

for row in cur:
    print(row)

(1236, 463.1618122977346, 905, 0)
(3580, 461.3284916201117, 772, 0)


In [65]:
#석식계의 경우 비가 오나 오지 않으나 평균적으로 차이가 크지 않음 

## (4) 날짜 관련 EDA

+ 요일마다 식수인원 차이가 있을까? 
+ 연휴 전에는 직원들이 회사 밖에서 식사를해 식수인원이 줄어들까?
+ 월초 월중 월말간 식수인원 차이가 있을까? 
+ 연말에는 식수인원이 줄어들까?

+ 요일

In [67]:
# 요일별 중식 인원(평균, 최대, 최소)

conn = sqlite3.connect("data/team3.db")
cur = conn.cursor()

cur.execute("""SELECT weekdays, round(avg(lunch_number)), max(lunch_number), min(lunch_number) 
FROM calendar LEFT OUTER JOIN lunch ON calendar.datetime = lunch.datetime group by weekdays""")

for row in cur:
    print(row)

conn.close()

('금', 654.0, 1215, 387)
('목', 824.0, 1214, 311)
('수', 905.0, 1336, 571)
('월', 1144.0, 1459, 296)
('화', 926.0, 1324, 349)


In [68]:
# 요일별 석식 인원(평균, 최대, 최소)-단, 석식을 운영하지 않은 날은 고려되지 않도록

conn = sqlite3.connect("data/team3.db")
cur = conn.cursor()

cur.execute("""SELECT weekdays, round(avg(dinner_number)), max(dinner_number), min(dinner_number) 
FROM calendar LEFT OUTER JOIN dinner ON calendar.datetime = dinner.datetime 
where dinner_number != 0 group by weekdays""")

for row in cur:
    print(row)

conn.close()

('금', 408.0, 755, 171)
('목', 480.0, 743, 117)
('수', 437.0, 905, 123)
('월', 539.0, 772, 104)
('화', 522.0, 902, 176)


In [12]:
#석식계의 경우  월요일 평균 식수인원이 538.9336099585062 으로 가장 많고 수요일이 363.6150627615063으로 가장 적음 

+ 휴일
     + vacation 변수는 0이면 연휴 전이 아니란 뜻, 1이면 다음날부터 휴일이란 뜻

In [72]:
# 중식계 휴일 


conn = sqlite3.connect("data/team3.db")
cur = conn.cursor()

cur.execute("""SELECT vacation, round(avg(lunch_number)), max(lunch_number), min(lunch_number)
FROM calendar LEFT OUTER JOIN lunch ON calendar.datetime = lunch.datetime
group by vacation""")

for row in cur:
    print(row)

conn.close()

(0, 899.0, 1459, 311)
(1, 711.0, 1241, 296)


In [None]:
#휴일 전날 중식계 경우 월요일 평균 식수인원이 약 711 으로 평일 899 보다 적은 식수인원인것을 확인할 수 있음 

In [73]:
#석식계 휴일 

conn = sqlite3.connect("data/team3.db")
cur = conn.cursor()

cur.execute("""SELECT vacation, round(avg(dinner_number)), max(dinner_number), min(dinner_number)
FROM calendar LEFT OUTER JOIN dinner ON calendar.datetime = dinner.datetime
WHERE dinner_number != 0 group by vacation""")

for row in cur:
    print(row)

conn.close()

(0, 485.0, 905, 124)
(1, 345.0, 743, 104)


In [74]:
#휴일 전날 석식계 경우  평균 식수인원이 345으로 평일 485보다 적은 식수인원인것을 확인할 수 있음 

+ 월 초 중 말

In [75]:
# 월초 월중 ,월말 중식계


conn = sqlite3.connect("data/team3.db")
cur= conn.cursor()
cur.execute("SELECT count(lunch_number), avg(lunch_number), max(lunch_number), min(lunch_number) From lunch  WHERE strftime('%d',datetime) BETWEEN'1'AND '10'")

for row in cur:
    print(row)
    
conn = sqlite3.connect("data/team3.db")
cur= conn.cursor()
cur.execute("SELECT count(lunch_number), avg(lunch_number), max(lunch_number), min(lunch_number)From lunch  WHERE strftime('%d',datetime) BETWEEN'11'AND '20'")

for row in cur:
    print(row)

conn = sqlite3.connect("data/team3.db")
cur= conn.cursor()
cur.execute("SELECT count(lunch_number), avg(lunch_number), max(lunch_number), min(lunch_number) From lunch WHERE strftime('%d',datetime) BETWEEN'21'AND '31'")

for row in cur:
    print(row)


(82, 902.8536585365854, 1319, 548)
(818, 898.4376528117359, 1459, 405)
(834, 866.0215827338129, 1417, 296)


In [None]:
#월초중말 중식계의 경우 월초 평균 식수 인원이 902.8536585365854으로 가장 많고 월말이 866.0215827338129으로 가장 적은 식수인원인것을 확인할 수 있음 

In [87]:
# 월초 월중 ,월말 석식계
conn = sqlite3.connect("data/team3.db")
cur= conn.cursor()
cur.execute("Select avg(dinner_number) AS avg From dinner WHERE strftime('%d',datetime) BETWEEN'1'AND '10'")

for row in cur:
    print(row)
    
conn = sqlite3.connect("data/team3.db")
cur= conn.cursor()
cur.execute("Select avg(dinner_number) AS avg From dinner WHERE strftime('%d',datetime) BETWEEN'11'AND '20'")

for row in cur:
    print(row)

conn = sqlite3.connect("data/team3.db")
cur= conn.cursor()
cur.execute("Select avg(dinner_number) AS avg From dinner WHERE strftime('%d',datetime) BETWEEN'21'AND '31'")

for row in cur:
    print(row)

(488.219512195122,)
(476.3422982885086,)
(427.6378896882494,)


In [89]:
#월초중말 석식계의 경우 월초 평균 식수 인원이 488.219512195122으로 가장 많고 월말이 427.6378896882494으로 가장 적은 식수인원인것을 확인할 수 있음 

+ 연말

In [None]:
# 연말 중식, 석식 식수인원

# 2016년
SELECT avg(lunch_number) FROM lunch WHERE datetime BETWEEN '2016-01-01' AND '2016-12-14'
# 939
SELECT avg(lunch_number) FROM lunch WHERE datetime BETWEEN '2016-12-15' AND '2016-12-30'
# 809

SELECT avg(dinner_number) FROM dinner WHERE datetime BETWEEN '2016-01-01' AND '2016-12-14'
# 528
SELECT avg(dinner_number) FROM dinner WHERE datetime BETWEEN '2016-12-15' AND '2016-12-30'
# 360

SELECT lunch.datetime, lunch_number
from lunch left outer join calendar
on lunch.datetime=calendar.datetime 
WHERE month = 12 and date between 15 and 31;
# 739

SELECT dinner.datetime, dinner_number
from dinner left outer join calendar
on lunch.datetime=calendar.datetime 
WHERE month = 12 and date between 15 and 31;
# 315

# 연말에는 중식과 석식 식수인원이 감소하는 것을 확인할 수 있다

## (5) 기타 EDA

+  코로나 이전/이후 

+ 재택근무자가 50명 초과가 된 시점을 기준
+ 그 날짜를 기준으로 하여 재택근무자 수의 최대값, 최소값, 평균을 구하자

In [9]:
conn = sqlite3.connect("data/team3.db")
cur = conn.cursor()

cur.execute("select datetime, telecom_number from hr where telecom_number > 50") 
#2020.3.20부터 본격적으로 코로나 영향으로 대규모 재택근무 시작

for row in cur:
    print(row)

conn.close()

('2020-03-20', 135)
('2020-03-23', 285)
('2020-03-24', 303)
('2020-03-25', 303)
('2020-03-26', 288)
('2020-03-27', 260)
('2020-03-30', 488)
('2020-03-31', 533)
('2020-04-01', 420)
('2020-04-02', 395)
('2020-04-03', 340)
('2020-04-06', 477)
('2020-04-07', 490)
('2020-04-08', 479)
('2020-04-09', 488)
('2020-04-10', 417)
('2020-04-13', 447)
('2020-04-14', 485)
('2020-04-16', 447)
('2020-04-17', 389)
('2020-04-20', 471)
('2020-04-21', 450)
('2020-04-22', 443)
('2020-04-23', 437)
('2020-04-24', 387)
('2020-04-27', 418)
('2020-04-28', 410)
('2020-04-29', 396)
('2020-05-04', 189)
('2020-05-06', 291)
('2020-05-07', 285)
('2020-05-08', 246)
('2020-05-11', 245)
('2020-05-12', 250)
('2020-05-13', 236)
('2020-05-14', 212)
('2020-05-15', 201)
('2020-05-18', 246)
('2020-05-19', 237)
('2020-05-20', 258)
('2020-05-21', 218)
('2020-05-22', 206)
('2020-05-25', 229)
('2020-05-26', 200)
('2020-05-27', 235)
('2020-05-28', 212)
('2020-05-29', 202)
('2020-06-01', 215)
('2020-06-02', 213)
('2020-06-03', 191)


In [10]:
conn = sqlite3.connect("data/team3.db")
cur = conn.cursor()

cur.execute("select max(telecom_number), min(telecom_number), avg(telecom_number) from hr where datetime < '2020-03-20'")

for row in cur:
    print(row)

conn.close()

#코로나 이전에는 많아야 25명 정도 재택근무

(25, 0, 0.29038652130822595)


In [11]:
conn = sqlite3.connect("data/team3.db")
cur = conn.cursor()

cur.execute("select max(telecom_number), min(telecom_number), avg(telecom_number) from hr where datetime >= '2020-03-20'")

for row in cur:
    print(row)

conn.close()

#코로나 이후에는 늘 평균 267명 정도의 사원이 재택근무

(533, 95, 267.34358974358975)


+ 코로나 이전이후 식수인원

In [76]:
# 코로나 이전과 이후 중식계 차이 
conn = sqlite3.connect("data/team3.db")
cur = conn.cursor()

cur.execute("select max(lunch_number), min(lunch_number),avg(lunch_number) from lunch where datetime < '2020-03-20'")

for row in cur:
    print(row)

conn.close()

conn = sqlite3.connect("data/team3.db")
cur = conn.cursor()

cur.execute("select max(lunch_number), min(lunch_number),avg(lunch_number) from lunch where datetime >= '2020-03-20'")

for row in cur:
    print(row)

conn.close()

# 평균이 코로나 이전 대비 6명 정도 줄었는데 유의미한 변화인가?

(1459, 296, 891.4648166501487)
(1405, 358, 885.8358974358974)


In [81]:
#코로나 이전 중식계 경우 891.4648166501487으로  코로나 이후 885.8358974358974보다 많은 식수인원인것을 확인할 수 있음 

In [77]:
# 코로나 이전과 이후 석식계 차이 

#코로나 이전 
conn = sqlite3.connect("data/team3.db")
cur = conn.cursor()

cur.execute("select max(dinner_number), min(dinner_number),avg(dinner_number) from dinner where datetime < '2020-03-20' and dinner_number != 0")

for row in cur:
    print(row)

conn.close()

#코로나 이후 
conn = sqlite3.connect("data/team3.db")
cur = conn.cursor()

cur.execute("select max(dinner_number), min(dinner_number),avg(dinner_number) from dinner where datetime >= '2020-03-20' and dinner_number != 0")

for row in cur:
    print(row)

conn.close()


(905, 104, 490.32988624612204)
(754, 123, 421.94329896907215)


In [83]:
#코로나 이전 석식계 경우 490.32988624612204 으로 코로나 이후 421.94329896907215, 보다 많은 식수인원인것을 확인할 수 있음 

In [103]:
#모든 테이블 합쳐서 조회하기 

# #모든 테이블 합친 view 생성 -> 한번만 실행 
# conn = sqlite3.connect("data/team3.db")
# cur= conn.cursor()
# cur.execute("create view all_exencoding as SELECT *FROM lunch INNER JOIN dinner USING (datetime) INNER JOIN hr USING (datetime)INNER JOIN weather USING (datetime)INNER JOIN calendar USING (datetime);")

# for row in cur:
#     print(row)

#모든 테이블 조회 
conn = sqlite3.connect("data/team3.db")
cur= conn.cursor()
cur.execute("select * from all_exencoding;")


# lunch_train_exencoding view 생성 -> 한번만 실행 
# conn = sqlite3.connect("data/team3.db")
# cur= conn.cursor()
# cur.execute("create view lunch_train_exencoding as SELECT *FROM lunch INNER JOIN calendar USING (datetime) INNER JOIN hr USING (datetime)INNER JOIN weather USING (datetime);")

# for row in cur:
#     print(row)

#lunch_train_exencoding 조회  
conn = sqlite3.connect("data/team3.db")
cur= conn.cursor()
cur.execute("select * from lunch_train_exencoding;")


# dinner_train_exencoding view 생성 -> 한번만 실행 
# conn = sqlite3.connect("data/team3.db")
# cur= conn.cursor()
# cur.execute("create view dinner_train_exencoding as SELECT *FROM dinner INNER JOIN calendar USING (datetime) INNER JOIN hr USING (datetime)INNER JOIN weather USING (datetime);")

# for row in cur:
#     print(row)

#dinner_train_exencoding 조회  
conn = sqlite3.connect("data/team3.db")
cur= conn.cursor()
cur.execute("select * from dinner_train_exencoding;")




<sqlite3.Cursor at 0x288c4d38110>