In [1]:
import pandas as pd
from datetime import datetime
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import load_iris
from sklearn import preprocessing
import seaborn as sns
import holidays


In [3]:
new_data = pd.read_csv('passenger_with_weather19.csv')

In [4]:
new_data

Unnamed: 0,년,월,일,항공사,편명,출발지,예상_1,예상_2,현황,예상_승객,운항편수,Count,가시거리,온도,풍속
0,2019,1,1,42,240,9,0,15,1,1128,486,1,10000.0,-6.34,3.0
1,2019,1,1,65,1020,43,0,20,1,1128,486,1,10000.0,-6.34,3.0
2,2019,1,1,67,891,73,0,30,1,1128,486,1,10000.0,-6.34,3.0
3,2019,1,1,62,775,44,0,35,1,1128,486,1,10000.0,-6.34,3.0
4,2019,1,1,41,239,0,0,20,1,1128,486,1,10000.0,-6.34,3.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
181107,2019,12,31,14,987,147,6,45,1,1224,501,1,8000.0,-5.70,20.0
181108,2019,12,31,79,257,62,6,50,1,1224,501,1,8000.0,-5.70,20.0
181109,2019,12,31,70,114,33,6,50,1,1224,501,1,8000.0,-5.70,20.0
181110,2019,12,31,57,537,52,6,50,1,1224,501,1,8000.0,-5.70,20.0


In [5]:
data = new_data

In [7]:
def classify_day_of_week(row):
    date = datetime(int(row['년']), int(row['월']), int(row['일']))
    day_of_week = date.weekday() # Monday: 0, Sunday: 6
    
    return {'평일': int(0 <= day_of_week <= 3),
            '금요일': int(day_of_week == 4),
            '주말': int(5 <= day_of_week <= 6)}

day_of_week_columns = data.apply(classify_day_of_week, axis=1, result_type='expand')
data = pd.concat([data, day_of_week_columns], axis=1)

In [8]:
data

Unnamed: 0,년,월,일,항공사,편명,출발지,예상_1,예상_2,현황,예상_승객,운항편수,Count,가시거리,온도,풍속,평일,금요일,주말
0,2019,1,1,42,240,9,0,15,1,1128,486,1,10000.0,-6.34,3.0,1,0,0
1,2019,1,1,65,1020,43,0,20,1,1128,486,1,10000.0,-6.34,3.0,1,0,0
2,2019,1,1,67,891,73,0,30,1,1128,486,1,10000.0,-6.34,3.0,1,0,0
3,2019,1,1,62,775,44,0,35,1,1128,486,1,10000.0,-6.34,3.0,1,0,0
4,2019,1,1,41,239,0,0,20,1,1128,486,1,10000.0,-6.34,3.0,1,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
181107,2019,12,31,14,987,147,6,45,1,1224,501,1,8000.0,-5.70,20.0,1,0,0
181108,2019,12,31,79,257,62,6,50,1,1224,501,1,8000.0,-5.70,20.0,1,0,0
181109,2019,12,31,70,114,33,6,50,1,1224,501,1,8000.0,-5.70,20.0,1,0,0
181110,2019,12,31,57,537,52,6,50,1,1224,501,1,8000.0,-5.70,20.0,1,0,0


In [10]:
kr_holidays = holidays.SouthKorea()

def is_holiday(row):
    date = datetime(int(row['년']), int(row['월']), int(row['일']))
    return int(date in kr_holidays)

data['공휴일'] = data.apply(is_holiday, axis=1)

In [11]:
data

Unnamed: 0,년,월,일,항공사,편명,출발지,예상_1,예상_2,현황,예상_승객,운항편수,Count,가시거리,온도,풍속,평일,금요일,주말,공휴일
0,2019,1,1,42,240,9,0,15,1,1128,486,1,10000.0,-6.34,3.0,1,0,0,1
1,2019,1,1,65,1020,43,0,20,1,1128,486,1,10000.0,-6.34,3.0,1,0,0,1
2,2019,1,1,67,891,73,0,30,1,1128,486,1,10000.0,-6.34,3.0,1,0,0,1
3,2019,1,1,62,775,44,0,35,1,1128,486,1,10000.0,-6.34,3.0,1,0,0,1
4,2019,1,1,41,239,0,0,20,1,1128,486,1,10000.0,-6.34,3.0,1,0,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
181107,2019,12,31,14,987,147,6,45,1,1224,501,1,8000.0,-5.70,20.0,1,0,0,0
181108,2019,12,31,79,257,62,6,50,1,1224,501,1,8000.0,-5.70,20.0,1,0,0,0
181109,2019,12,31,70,114,33,6,50,1,1224,501,1,8000.0,-5.70,20.0,1,0,0,0
181110,2019,12,31,57,537,52,6,50,1,1224,501,1,8000.0,-5.70,20.0,1,0,0,0


In [12]:
data.to_csv("holidays_with_weather19.csv", index=False)