In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import re

import matplotlib.font_manager as fm
font_name = fm.FontProperties(fname="C:/Windows/Fonts/malgun.ttf").get_name()
plt.rc("font", family=font_name)
import matplotlib as mpl
mpl.rcParams["axes.unicode_minus"] = False

#SQLite 관련 library import
import sqlite3
from os import mkdir
import csv

## 1. DB 만들기 

In [2]:
def data_read():
    data = pd.read_csv("data/data.csv", encoding='utf-8')
    weather = pd.read_csv("data/weather.csv", encoding='euc-kr')
    return data, weather

In [3]:
def make_database():
    try:
        conn = sqlite3.connect('data/team3.db')
    except sqlite3.OperationalError:
        mkdir('folder_A')
    finally:
        conn = sqlite3.connect('data/team3.db')
        cur = conn.cursor()

    hr = """create table if not exists hr(
        datetime  datetime,
        worker_number   int,
        real_number int,
        biztrip_number  int,
        overtime_number int,
        telecom_number  int    
    )
    """
    cur.execute(hr)
    conn.commit()


    lunch = """create table if not exists lunch(
        datetime  datetime,
        new_lunch   varchar(10),
        lunch_rice varchar(20),
        lunch_soup  varchar(20),
        lunch_main varchar(20),
        lunch_number  int    
    )
    """
    cur.execute(lunch)
    conn.commit()


    dinner = """create table if not exists dinner(
        datetime  datetime,
        new_dinner   varchar(10),
        dinner_rice varchar(20),
        dinner_soup  varchar(20),
        dinner_main varchar(20),
        dinner_number  int    
    )
    """
    cur.execute(dinner)
    conn.commit()


    weather = """create table if not exists weather(
        datetime  datetime,
        temperature   float(32),
        rain float(32),
        wind float(32),
        humidity  float(32),
        discomfort_index    float(32),
        perceived_temperature   float(32)
    )
    """
    cur.execute(weather)
    conn.commit()

    calendar = """create table if not exists calendar(
        datetime  datetime,
        month   int,
        date int,
        weekdays  varchar(16),
        season varchar(16),
        vacation  int    
        )
        """
    cur.execute(calendar)
    conn.commit()


    conn.close()

In [4]:
def transform(data, weather):
    #### 1. 일자를 datetime 형태로 변경
    data['일자'] = pd.to_datetime(data['일자'])
    #### 2. 실질 사내 근무자 수
    data['실질정원수'] = data['본사정원수'] - (data['본사휴가자수'] + data['본사출장자수'] + data['현본사소속재택근무자수'])
    data = data.loc[:, ['일자', '요일', '본사정원수', '실질정원수', '본사휴가자수', '본사출장자수', '본사시간외근무명령서승인건수', '현본사소속재택근무자수', '중식메뉴', '석식메뉴', '중식계', '석식계']]
    #### 3. 월, 일 칼럼과 계절 칼럼, 연휴 칼럼 만들기
    data['Month'] = data['일자'].dt.strftime('%m')
    data['Date'] = data['일자'].dt.strftime('%d')
    season = []

    for index in range(len(data)):
        if data['Month'][index] == '03' or data['Month'][index] == '04' or data['Month'][index] == '05':
            season.append('봄')
        elif data['Month'][index] == '06' or data['Month'][index] == '07' or data['Month'][index] == '08':
            season.append('여름')
        elif data['Month'][index] == '09' or data['Month'][index] == '10' or data['Month'][index] == '11':
            season.append('가을')
        elif data['Month'][index] == '12' or data['Month'][index] == '01' or data['Month'][index] == '02':
            season.append('겨울')
    
    data['Season'] = season



    holiday_gap=[]

    for i in range(len(data)):
        if i == len(data) - 1:
            holiday_gap.append(0)
        elif int((pd.to_datetime(data['일자'][i+1])-pd.to_datetime(data['일자'][i])).days)==1:
            holiday_gap.append(0)
        elif int((pd.to_datetime(data['일자'][i+1])-pd.to_datetime(data['일자'][i])).days)==2:
            holiday_gap.append(1)
        elif int((pd.to_datetime(data['일자'][i+1])-pd.to_datetime(data['일자'][i])).days)==3:
            holiday_gap.append(0)
        else:
            holiday_gap.append(1)
            
    data['연휴'] = holiday_gap



    #### 4. 신메뉴 여부 칼럼 만들기 Y = 신메뉴 / N = 신메뉴 X
    New_lunch = []
    New_dinner = []

    for index in range(len(data)):
        if 'New' in data['중식메뉴'][index]:
            New_lunch.append('Y')
        else:
            New_lunch.append('N')

    for index in range(len(data)):
        if 'New' in data['석식메뉴'][index]:
            New_dinner.append('Y')
        else:
            New_dinner.append('N')
    
    data['New_lunch'] = New_lunch
    data['New_dinner'] = New_dinner

    #### 5. 점심, 저녁에서 밥, 국, 메인반찬 칼럼 만들기
    #점심
    lunch = []
    for index in range(len(data)):
        tmp = data.loc[index,'중식메뉴'].split(' ') # 스페이스로 구분
        tmp = ' '.join(tmp).split()    # 빈칸 제거

        # ()안에 있는 내용 제거
        for menu in tmp:
            if '(' in menu:
                tmp.remove(menu)
        lunch.append(tmp)

    for index in range(len(data)):
        if '쌀밥' in lunch[index][0]:
            lunch[index][0] = '밥'
    
    rice=[]
    soup=[]
    main=[]

    for i in range(len(data)):
        if lunch[i][0]:
            rice.append(lunch[i][0])
        if lunch[i][1]:
            soup.append(lunch[i][1])
        if lunch[i][2]:
            main.append(lunch[i][2])

    data['lunch_rice'] = rice
    data['lunch_soup'] = soup
    data['lunch_main'] = main

    #저녁
    dinner = []

    for index in range(len(data)):
        tmp = data.loc[index,'석식메뉴'].split(' ')
        tmp = ' '.join(tmp).split()

        for menu_d in tmp:
            if '(' in menu_d:
                tmp.remove(menu_d)
        dinner.append(tmp)

    dinner_rice=[]
    dinner_soup=[]
    dinner_main=[]


    for word in dinner:
        
        
        if len(word) == 0:
            dinner_rice.append('None')
            dinner_soup.append('None')
            dinner_main.append('None')
        elif '*' in word:
            dinner_rice.append('None')
            dinner_soup.append('None')
            dinner_main.append('None')
        elif '가정의날' in word:
            dinner_rice.append('None') 
            dinner_soup.append('None')
            dinner_main.append('None')
        elif '가정의달' in word:
            dinner_rice.append('None') 
            dinner_soup.append('None')
            dinner_main.append('None')
        elif '자기계발의날' in word:
            dinner_rice.append('None') 
            dinner_soup.append('None')
            dinner_main.append('None')
        elif '*자기계발의날*' in word:
            dinner_rice.append('None') 
            dinner_soup.append('None')
            dinner_main.append('None')
        elif '자기개발의날' in word:
            dinner_rice.append('None') 
            dinner_soup.append('None')
            dinner_main.append('None')

        else:
            dinner_rice.append(word[0])
            dinner_soup.append(word[1])
            dinner_main.append(word[2])
    
    data['dinner_rice'] = dinner_rice
    data['dinner_soup'] = dinner_soup
    data['dinner_main'] = dinner_main

    for index in range(len(data)):
        if '쌀밥' in data['dinner_rice'][index]:
            data['dinner_rice'][index] = '밥'
        elif '흑미밥' in data['dinner_rice'][index]:
            data['dinner_rice'][index] = '밥'
        
    del data['중식메뉴']
    del data['석식메뉴']



    #### 6. 날짜 데이터 merge
    # merge 전 날짜 데이터 전처리
    weather['일자'] = pd.to_datetime(weather['일시'])
    del weather['일시']
    del weather['지점']
    del weather['지점명']

    #결측치 제거
    weather['일강수량(mm)']=weather['일강수량(mm)'].replace({np.NaN:0})
    weather['평균 상대습도(%)'] = weather['평균 상대습도(%)'].replace({np.NaN:0})

    #칼럼명 변경
    weather.columns = ['기온', '강수량', '풍속', '습도', '일자']

    # 불쾌지수, 체감온도 칼럼 생성
    weather['불쾌지수'] = 9/5 * weather['기온'] - 0.55 * (1-weather['습도']/100) * (9/5 * weather['기온'] - 26) + 32
    weather['체감온도'] = 13.12 + 0.6215 * weather['기온'] - 11.37 * (weather['풍속'] ** 0.16) + 0.3965 * (weather['풍속'] ** 0.16) * weather['기온']    #### 7. 칼럼명 영어로 바꾸기
    
    
    #### 7. 칼럼명 영어로 바꾸기
    df = pd.merge(data, weather, how='inner', on='일자')

    col_eng = ['datetime', 'weekdays', 'worker_number', 'real_number', 'vacation_number', 'biztrip_number', 'overtime_number', 'telecom_number', 'lunch_number',
    'dinner_number', 'month', 'date', 'season', 'vacation', 'new_lunch', 'new_dinner', 'lunch_rice', 'lunch_soup', 'lunch_main', 'dinner_rice', 'dinner_soup', 'dinner_main',
    'temperature', 'rain', 'wind', 'humidity', 'discomfort_index', 'perceived_temperature']

    df.columns=col_eng

    return df

In [5]:
def split(data):
    hr_data = data.loc[:, ["datetime", "worker_number", "real_number", "biztrip_number", "overtime_number", "telecom_number"]]
    lunch_data = data.loc[:, ["datetime", "new_lunch", "lunch_rice", "lunch_soup", "lunch_main", "lunch_number"]]
    dinner_data = data.loc[:, ["datetime", "new_dinner", "dinner_rice", "dinner_soup", "dinner_main", "dinner_number"]]
    weather_data = data.loc[:, ["datetime", "temperature", 'rain', 'wind', 'humidity', 'discomfort_index', 'perceived_temperature']]
    calendar_data = data.loc[:, ['datetime', 'month', 'date', 'weekdays', 'season', 'vacation']]

    return hr_data, lunch_data, dinner_data, weather_data, calendar_data

In [6]:
def save(hr_data, lunch_data, dinner_data, weather_data, calendar_data):
    hr_data.to_csv("data/hr_data.csv", encoding='euc-kr', index=False)
    lunch_data.to_csv("data/lunch_data.csv", encoding='euc-kr', index=False)
    dinner_data.to_csv("data/dinner_data.csv", encoding='euc-kr', index=False)
    weather_data.to_csv("data/weather_data.csv", encoding='euc-kr', index=False)
    calendar_data.to_csv("data/calendar_data.csv", encoding='euc-kr', index=False)

In [7]:
def hr_db():
    conn = sqlite3.connect("data/team3.db")
    cur = conn.cursor()

    file = csv.reader(open("data/hr_data.csv", "r"), delimiter=",")
    next(file)

    for row in file:
        cur.execute("insert into hr values(?, ?, ?, ?, ?, ?)", row)

    conn.commit()
    conn.close()

In [8]:
def lunch_db():
    conn = sqlite3.connect("data/team3.db")
    cur = conn.cursor()

    file = csv.reader(open("data/lunch_data.csv", "r"), delimiter=",")
    next(file)

    for row in file:
        cur.execute("insert into lunch values(?, ?, ?, ?, ?, ?)", row)

    conn.commit()
    conn.close()

In [9]:
def dinner_db():
    conn = sqlite3.connect("data/team3.db")
    cur = conn.cursor()

    file = csv.reader(open("data/dinner_data.csv", "r"), delimiter=",")
    next(file)

    for row in file:
        cur.execute("insert into dinner values(?, ?, ?, ?, ?, ?)", row)

    conn.commit()
    conn.close()

In [10]:
def weather_db():
    conn = sqlite3.connect("data/team3.db")
    cur = conn.cursor()

    file = csv.reader(open("data/weather_data.csv", "r"), delimiter=",")
    next(file)

    for row in file:
        cur.execute("insert into weather values(?, ?, ?, ?, ?, ?, ?)", row)

    conn.commit()
    conn.close()

In [11]:
def calendar_db():
    conn = sqlite3.connect("data/team3.db")
    cur = conn.cursor()

    file = csv.reader(open("data/calendar_data.csv", "r"), delimiter=",")
    next(file)

    for row in file:
        cur.execute("insert into calendar values(?, ?, ?, ?, ?, ?)", row)

    conn.commit()
    conn.close()

In [12]:
data, weather = data_read()
make_database()
df = transform(data, weather)
hr_data, lunch_data, dinner_data, weather_data, calendar_data = split(df)
save(hr_data, lunch_data, dinner_data, weather_data, calendar_data)
hr_db()
lunch_db()
dinner_db()
weather_db()
calendar_db()

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['dinner_rice'][index] = '밥'
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['dinner_rice'][index] = '밥'


## 2. EDA- 쿼리문 작성 

### (1) 메뉴 관련 EDA

In [2]:
import sqlite3

In [3]:
#일반 '밥'아닌 메뉴 평균 중식계
conn = sqlite3.connect("data/team3.db")
cur= conn.cursor()
cur.execute("SELECT avg(lunch_number) FROM lunch WHERE lunch_rice != '밥'")

for row in cur:
    print(row)

(851.009900990099,)


In [15]:
#일반 '밥'인 메뉴 평균 중식계
conn = sqlite3.connect("data/team3.db")
cur= conn.cursor()
cur.execute("SELECT avg(lunch_number) FROM lunch WHERE lunch_rice = '밥'")

for row in cur:
    print(row)

(898.5249500998004,)


In [16]:
# 중식계의 경우 일반 밥+ 반찬 조합 평균이 높음 

In [17]:
#일반 '밥'아닌 메뉴 평균 석식계
conn = sqlite3.connect("data/team3.db")
cur= conn.cursor()
cur.execute("SELECT avg(dinner_number) FROM dinner WHERE dinner_rice != '밥'")

for row in cur:
    print(row)

(462.52561247216033,)


In [18]:
#일반 '밥'인 메뉴 평균 중식계
conn = sqlite3.connect("data/team3.db")
cur= conn.cursor()
cur.execute("SELECT avg(dinner_number) FROM dinner WHERE dinner_rice = '밥'")

for row in cur:
    print(row)

(461.36688741721855,)


In [19]:
# 석식계의 경우 일반 '밥'이 아닌 경우와 일반밥의 평균이 거의 차이 없음 

In [20]:
# 점심중 찌개/ 국 중 선호하는 것 
conn = sqlite3.connect("data/team3.db")
cur= conn.cursor()
cur.execute("SELECT avg(lunch_number) FROM lunch WHERE lunch_soup LIKE '%찌개%'")

for row in cur:
    print(row)


(888.9820627802691,)


In [21]:
conn = sqlite3.connect("data/team3.db")
cur= conn.cursor()
cur.execute("SELECT avg(lunch_number) FROM lunch WHERE lunch_soup LIKE '%국%'")

for row in cur:
    print(row)

(897.7946210268949,)


In [22]:
# 중식계의 경우 국의 선호도가 더 높음

In [23]:
# 저녁중 찌개/ 국 중 선호하는 것 
conn = sqlite3.connect("data/team3.db")
cur= conn.cursor()
cur.execute("SELECT avg(dinner_number) FROM dinner WHERE dinner_soup LIKE '%찌개%'")

for row in cur:
    print(row)

(472.6605504587156,)


In [24]:
conn = sqlite3.connect("data/team3.db")
cur= conn.cursor()
cur.execute("SELECT avg(dinner_number) FROM dinner WHERE dinner_soup LIKE '%국%'")

for row in cur:
    print(row)

(481.5720588235294,)


In [25]:
# 석식계도  국의 선호도가 더 높음

In [26]:
# 면이 포함된 날과 포함되지 않은날 차이가 존재할까?

# 면이 포함된 중식계 
conn = sqlite3.connect("data/team3.db")
cur= conn.cursor()
cur.execute("SELECT avg(lunch_number) FROM lunch WHERE lunch_main LIKE '%면%'")

for row in cur:
    print(row)

(967.7272727272727,)


In [27]:
# 면이 포함되지 않은 중식계

conn = sqlite3.connect("data/team3.db")
cur= conn.cursor()
cur.execute("SELECT avg(lunch_number) FROM lunch WHERE NOT lunch_main LIKE '%면%'")

for row in cur:
    print(row)

(889.8415758591785,)


In [28]:
#중식계의 경우 면이 있는 경우 평균적으로 더 많이 찾아옴 

In [29]:
# 면 포함된 석식계  확인 
conn = sqlite3.connect("data/team3.db")
cur= conn.cursor()
cur.execute("SELECT avg(dinner_number) FROM dinner WHERE dinner_main LIKE '%면%'")

for row in cur:
    print(row)

(447.1,)


In [30]:
#석식계도 살펴보자 
conn = sqlite3.connect("data/team3.db")
cur= conn.cursor()
cur.execute("SELECT avg(dinner_number) FROM dinner WHERE NOT dinner_main LIKE '%면%'")

for row in cur:
    print(row)

(461.9221105527638,)


In [31]:
#석식계의 경우는 큰 차이를 보이고 있지 않음 

### (2) 중식계 상관분석

## (3) 날씨 관련 EDA

+ 계절
+ 기온 

In [45]:
# 계절을 join 사용하여 조회하고 불러오기 
# SQLite는 왼쪽 외부 조인을 하는 LEFT OUTER JOIN 절에서만 지원함 

#중식계 4계절 
conn = sqlite3.connect("data/team3.db")
cur= conn.cursor()
cur.execute("SELECT avg(lunch_number) FROM lunch LEFT JOIN calendar ON lunch.rowid=calendar.rowid WHERE season = '봄'")

for row in cur:
    print(row)

conn = sqlite3.connect("data/team3.db")
cur= conn.cursor()
cur.execute("SELECT avg(lunch_number) FROM lunch LEFT JOIN calendar ON lunch.rowid=calendar.rowid WHERE season = '여름'")

for row in cur:
    print(row)
    
conn = sqlite3.connect("data/team3.db")
cur= conn.cursor()
cur.execute("SELECT avg(lunch_number) FROM lunch LEFT JOIN calendar ON lunch.rowid=calendar.rowid WHERE season = '가을'")

for row in cur:
    print(row)

conn = sqlite3.connect("data/team3.db")
cur= conn.cursor()
cur.execute("SELECT avg(lunch_number) FROM lunch LEFT JOIN calendar ON lunch.rowid=calendar.rowid WHERE season = '겨울'")

for row in cur:
    print(row)


(903.4239482200647,)
(855.1172638436482,)
(889.3924914675767,)
(915.1016949152543,)


In [46]:
#중식계의 경우 겨울 평균 식수인원이 915.1016949152543 가장 많고 여름이 855.1172638436482 가장 적음 

In [47]:
#석식계 4계절 
conn = sqlite3.connect("data/team3.db")
cur= conn.cursor()
cur.execute("SELECT avg(dinner_number) FROM dinner LEFT JOIN calendar ON dinner.rowid=calendar.rowid WHERE season = '봄'")

for row in cur:
    print(row)

conn = sqlite3.connect("data/team3.db")
cur= conn.cursor()
cur.execute("SELECT avg(dinner_number) FROM dinner LEFT JOIN calendar ON dinner.rowid=calendar.rowid WHERE season = '여름'")

for row in cur:
    print(row)
    
conn = sqlite3.connect("data/team3.db")
cur= conn.cursor()
cur.execute("SELECT avg(dinner_number) FROM dinner LEFT JOIN calendar ON dinner.rowid=calendar.rowid WHERE season = '가을'")

for row in cur:
    print(row)

    
conn = sqlite3.connect("data/team3.db")
cur= conn.cursor()
cur.execute("SELECT avg(dinner_number) FROM dinner LEFT JOIN calendar ON dinner.rowid=calendar.rowid WHERE season = '겨울'")

for row in cur:
    print(row)


(490.8252427184466,)
(451.79153094462544,)
(458.5665529010239,)
(445.02033898305086,)


In [48]:
#석식계의 경우 봄 평균 식수인원이 490.8252427184466 으로 가장 많고 겨울이 445.02033898305086가장 적음 

In [52]:
# 중식계 기온- 1.영하인경우 

conn = sqlite3.connect("data/team3.db")
cur= conn.cursor()
cur.execute("SELECT avg(lunch_number) FROM lunch LEFT JOIN weather ON lunch.rowid=weather.rowid WHERE perceived_temperature LIKE'-%'")

for row in cur:
    print(row)

#기온- 2.0-25 사이인경우 

conn = sqlite3.connect("data/team3.db")
cur= conn.cursor()
cur.execute("SELECT avg(lunch_number) FROM lunch LEFT JOIN weather ON lunch.rowid=weather.rowid WHERE perceived_temperature BETWEEN '0' AND '25'")

for row in cur:
    print(row)

#기온 25도 이상인 경우 

conn = sqlite3.connect("data/team3.db")
cur= conn.cursor()
cur.execute("SELECT avg(lunch_number) FROM lunch LEFT JOIN weather ON lunch.rowid=weather.rowid WHERE perceived_temperature LIKE '25%'")

for row in cur:
    print(row)


(914.9879518072289,)
(889.3443328550933,)
(878.0416666666666,)


In [53]:
#중식계의 경우 영하일때 평균 식수인원이 914.9879518072289으로 가장 많고 25도 이상이 878.0416666666666로 가장 적음 

In [86]:
# 석식계 기온- 1.영하인경우 

conn = sqlite3.connect("data/team3.db")
cur= conn.cursor()
cur.execute("SELECT avg(dinner_number) FROM dinner LEFT JOIN weather ON dinner.rowid=weather.rowid WHERE temperature LIKE'-%'")

for row in cur:
    print(row)

#기온- 2.0-25 사이인경우 

conn = sqlite3.connect("data/team3.db")
cur= conn.cursor()
cur.execute("SELECT avg(dinner_number) FROM dinner LEFT JOIN weather ON dinner.rowid=weather.rowid WHERE temperature BETWEEN '0' AND '25'")

for row in cur:
    print(row)

#기온 25도 이상인 경우 

conn = sqlite3.connect("data/team3.db")
cur= conn.cursor()
cur.execute("SELECT avg(dinner_number) FROM dinner LEFT JOIN weather ON dinner.rowid=weather.rowid WHERE temperature LIKE '25%'")

for row in cur:
    print(row)


(424.2386363636364,)
(466.8907388137357,)
(436.76190476190476,)


In [None]:
#석식계의 경우 기온이 2.0-25 사이일때 평균 식수인원이 466.8907388137357으로 가장 많고 영하가 424.2386363636364 ㄴ으로 가장 적음 

## (4) 날짜 관련 EDA

+ 요일
+ 휴일 
+ 월초 월중 월말 

In [9]:
# 중식계 요일 


conn = sqlite3.connect("data/team3.db")
cur= conn.cursor()
cur.execute("SELECT avg(lunch_number) FROM lunch LEFT JOIN calendar ON lunch.rowid=calendar.rowid WHERE weekdays='월'")

for row in cur:
    print(row)
    
conn = sqlite3.connect("data/team3.db")
cur= conn.cursor()
cur.execute("SELECT avg(lunch_number) FROM lunch LEFT JOIN calendar ON lunch.rowid=calendar.rowid WHERE weekdays='화'")

for row in cur:
    print(row)
    
conn = sqlite3.connect("data/team3.db")
cur= conn.cursor()
cur.execute("SELECT avg(lunch_number) FROM lunch LEFT JOIN calendar ON lunch.rowid=calendar.rowid WHERE weekdays='수'")

for row in cur:
    print(row)
    
conn = sqlite3.connect("data/team3.db")
cur= conn.cursor()
cur.execute("SELECT avg(lunch_number) FROM lunch LEFT JOIN calendar ON lunch.rowid=calendar.rowid WHERE weekdays='목'")

for row in cur:
    print(row)

conn = sqlite3.connect("data/team3.db")
cur= conn.cursor()
cur.execute("SELECT avg(lunch_number) FROM lunch LEFT JOIN calendar ON lunch.rowid=calendar.rowid WHERE weekdays='금'")

for row in cur:
    print(row)

(1144.331950207469,)
(925.6208333333333,)
(905.2133891213389,)
(823.9918032786885,)
(653.7208333333333,)


In [10]:
#중식계의 경우  월요일 평균 식수인원이 1144.331950207469 으로 가장 많고 금요일이 653.7208333333333 으로 가장 적음 

In [11]:
# 석식계 요일 


conn = sqlite3.connect("data/team3.db")
cur= conn.cursor()
cur.execute("SELECT avg(dinner_number) FROM dinner LEFT JOIN calendar ON dinner.rowid=calendar.rowid WHERE weekdays='월'")

for row in cur:
    print(row)
    
conn = sqlite3.connect("data/team3.db")
cur= conn.cursor()
cur.execute("SELECT avg(dinner_number) FROM dinner LEFT JOIN calendar ON dinner.rowid=calendar.rowid WHERE weekdays='화'")

for row in cur:
    print(row)
    
conn = sqlite3.connect("data/team3.db")
cur= conn.cursor()
cur.execute("SELECT avg(dinner_number) FROM dinner LEFT JOIN calendar ON dinner.rowid=calendar.rowid WHERE weekdays='수'")

for row in cur:
    print(row)
    
conn = sqlite3.connect("data/team3.db")
cur= conn.cursor()
cur.execute("SELECT avg(dinner_number) FROM dinner LEFT JOIN calendar ON dinner.rowid=calendar.rowid WHERE weekdays='목'")

for row in cur:
    print(row)

conn = sqlite3.connect("data/team3.db")
cur= conn.cursor()
cur.execute("SELECT avg(dinner_number) FROM dinner LEFT JOIN calendar ON dinner.rowid=calendar.rowid WHERE weekdays='금'")

for row in cur:
    print(row)

(538.9336099585062,)
(520.1291666666667,)
(363.6150627615063,)
(480.4016393442623,)
(404.875,)


In [12]:
#석식계의 경우  월요일 평균 식수인원이 538.9336099585062 으로 가장 많고 수요일이 363.6150627615063으로 가장 적음 

In [13]:
# 휴일 

In [14]:
# 중식계 휴일 


conn = sqlite3.connect("data/team3.db")
cur= conn.cursor()
cur.execute("SELECT avg(lunch_number) FROM lunch LEFT JOIN calendar ON lunch.rowid=calendar.rowid WHERE vacation='1'")

for row in cur:
    print(row)
    
conn = sqlite3.connect("data/team3.db")
cur= conn.cursor()
cur.execute("SELECT avg(lunch_number) FROM lunch LEFT JOIN calendar ON lunch.rowid=calendar.rowid WHERE vacation='0'")

for row in cur:
    print(row)

(711.3269230769231,)
(898.6432291666666,)


In [None]:
#휴일 전날 중식계 경우 월요일 평균 식수인원이 711.3269230769231 으로 평일 898.6432291666666 보다 적은 식수인원인것을 확인할 수 있음 

In [15]:
#석식계 휴일 


conn = sqlite3.connect("data/team3.db")
cur= conn.cursor()
cur.execute("SELECT avg(dinner_number) FROM dinner LEFT JOIN calendar ON dinner.rowid=calendar.rowid WHERE vacation='1'")

for row in cur:
    print(row)
    
conn = sqlite3.connect("data/team3.db")
cur= conn.cursor()
cur.execute("SELECT avg(dinner_number) FROM dinner LEFT JOIN calendar ON dinner.rowid=calendar.rowid WHERE vacation='0'")

for row in cur:
    print(row)

(318.1730769230769,)
(468.28211805555554,)


In [16]:
#휴일 전날 석식계 경우 월요일 평균 식수인원이 318.1730769230769으로 평일 468.28211805555554보다 적은 식수인원인것을 확인할 수 있음 

In [79]:
# 월초 월중 ,월말 중식계


conn = sqlite3.connect("data/team3.db")
cur= conn.cursor()
cur.execute("Select avg(lunch_number) AS avg From lunch WHERE strftime('%d',datetime) BETWEEN'1'AND '10'")

for row in cur:
    print(row)
    
conn = sqlite3.connect("data/team3.db")
cur= conn.cursor()
cur.execute("Select avg(lunch_number) AS avg From lunch WHERE strftime('%d',datetime) BETWEEN'11'AND '20'")

for row in cur:
    print(row)

conn = sqlite3.connect("data/team3.db")
cur= conn.cursor()
cur.execute("Select avg(lunch_number) AS avg From lunch WHERE strftime('%d',datetime) BETWEEN'21'AND '31'")

for row in cur:
    print(row)


(902.8536585365854,)
(898.4376528117359,)
(866.0215827338129,)


In [None]:
#월초중말 중식계의 경우 월초 평균 식수 인원이 902.8536585365854으로 가장 많고 월말이 866.0215827338129으로 가장 적은 식수인원인것을 확인할 수 있음 

In [87]:
# 월초 월중 ,월말 석식계
conn = sqlite3.connect("data/team3.db")
cur= conn.cursor()
cur.execute("Select avg(dinner_number) AS avg From dinner WHERE strftime('%d',datetime) BETWEEN'1'AND '10'")

for row in cur:
    print(row)
    
conn = sqlite3.connect("data/team3.db")
cur= conn.cursor()
cur.execute("Select avg(dinner_number) AS avg From dinner WHERE strftime('%d',datetime) BETWEEN'11'AND '20'")

for row in cur:
    print(row)

conn = sqlite3.connect("data/team3.db")
cur= conn.cursor()
cur.execute("Select avg(dinner_number) AS avg From dinner WHERE strftime('%d',datetime) BETWEEN'21'AND '31'")

for row in cur:
    print(row)

(488.219512195122,)
(476.3422982885086,)
(427.6378896882494,)


In [89]:
#월초중말 석식계의 경우 월초 평균 식수 인원이 488.219512195122으로 가장 많고 월말이 427.6378896882494으로 가장 적은 식수인원인것을 확인할 수 있음 

## (5) 기타 EDA

+  코로나 이전/이후 

In [80]:
# 코로나 이전과 이후 중식계 차이 

#코로나 이전 
conn = sqlite3.connect("data/team3.db")
cur= conn.cursor()
cur.execute("SELECT avg(lunch_number) FROM lunch WHERE datetime<'2020-03-20'")

for row in cur:
    print(row)

#코로나 이후 
conn = sqlite3.connect("data/team3.db")
cur= conn.cursor()
cur.execute("SELECT avg(lunch_number) FROM lunch WHERE datetime>'2020-03-20'")

for row in cur:
    print(row)

(891.4648166501487,)
(887.1701030927835,)


In [81]:
#코로나 이전 중식계 경우 891.4648166501487으로  코로나 이후 887.1701030927835 보다 많은 식수인원인것을 확인할 수 있음 

In [82]:
# 코로나 이전과 이후 석식계 차이 

#코로나 이전 
conn = sqlite3.connect("data/team3.db")
cur= conn.cursor()
cur.execute("SELECT avg(dinner_number) FROM dinner WHERE datetime<'2020-03-20'")

for row in cur:
    print(row)

#코로나 이후 
conn = sqlite3.connect("data/team3.db")
cur= conn.cursor()
cur.execute("SELECT avg(dinner_number) FROM dinner WHERE datetime>'2020-03-20'")

for row in cur:
    print(row)

(469.9197224975223,)
(420.0979381443299,)


In [83]:
#코로나 이전 석식계 경우 469.9197224975223 으로 코로나 이후 420.0979381443299, 보다 많은 식수인원인것을 확인할 수 있음 

In [103]:
#모든 테이블 합쳐서 조회하기 

# #모든 테이블 합친 view 생성 -> 한번만 실행 
# conn = sqlite3.connect("data/team3.db")
# cur= conn.cursor()
# cur.execute("create view all_exencoding as SELECT *FROM lunch INNER JOIN dinner USING (datetime) INNER JOIN hr USING (datetime)INNER JOIN weather USING (datetime)INNER JOIN calendar USING (datetime);")

# for row in cur:
#     print(row)

#모든 테이블 조회 
conn = sqlite3.connect("data/team3.db")
cur= conn.cursor()
cur.execute("select * from all_exencoding;")


# lunch_train_exencoding view 생성 -> 한번만 실행 
# conn = sqlite3.connect("data/team3.db")
# cur= conn.cursor()
# cur.execute("create view lunch_train_exencoding as SELECT *FROM lunch INNER JOIN calendar USING (datetime) INNER JOIN hr USING (datetime)INNER JOIN weather USING (datetime);")

# for row in cur:
#     print(row)

#lunch_train_exencoding 조회  
conn = sqlite3.connect("data/team3.db")
cur= conn.cursor()
cur.execute("select * from lunch_train_exencoding;")


# dinner_train_exencoding view 생성 -> 한번만 실행 
# conn = sqlite3.connect("data/team3.db")
# cur= conn.cursor()
# cur.execute("create view dinner_train_exencoding as SELECT *FROM dinner INNER JOIN calendar USING (datetime) INNER JOIN hr USING (datetime)INNER JOIN weather USING (datetime);")

# for row in cur:
#     print(row)

#dinner_train_exencoding 조회  
conn = sqlite3.connect("data/team3.db")
cur= conn.cursor()
cur.execute("select * from dinner_train_exencoding;")




<sqlite3.Cursor at 0x288c4d38110>