In [10]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

import matplotlib.font_manager as fm
font_name = fm.FontProperties(fname='/Users/hoon/Library/Fonts/NanumSquareRegular.ttf').get_name()
plt.rc("font", family=font_name)
import matplotlib as mpl
mpl.rcParams["axes.unicode_minus"] = False

import requests as req
import time
import re


from sklearn.model_selection import train_test_split, cross_val_score, cross_validate, GridSearchCV, KFold # 데이터 나누기, 점검 및 훈련

from sklearn.metrics import explained_variance_score, accuracy_score, confusion_matrix, classification_report, roc_curve, precision_score, recall_score # 평가지표
from sklearn.preprocessing import LabelEncoder, OneHotEncoder, MinMaxScaler, StandardScaler # Feature Scaling


from sklearn.neighbors import KNeighborsRegressor

from sklearn.datasets import load_iris, load_boston, load_breast_cancer, make_moons, load_digits, load_diabetes, load_wine, make_blobs # 데이터
import mglearn # 그래프

from sklearn.linear_model import LinearRegression, Lasso, Ridge, ElasticNet
from sklearn.svm import LinearSVC, LinearSVR

# conda install -c conda -py
import xgboost as xgb
from xgboost import XGBRegressor
from xgboost import plot_importance, plot_tree

from sklearn.pipeline import make_pipeline
import multiprocessing
from sklearn.pipeline import Pipeline

from sklearn.naive_bayes import GaussianNB, BernoulliNB, MultinomialNB
from sklearn.linear_model import Perceptron
from sklearn.neural_network import MLPRegressor

# pip install lightgbm
# conda install -c conda-forge lightgbm
from lightgbm import LGBMRegressor
from lightgbm import plot_importance, plot_tree, plot_metric

import graphviz

import statsmodels.api as sm

# SVC는 분류, SVR은 회귀
# 분류알고리즘에선 끝판왕으로 불림
from sklearn.svm import SVR, SVC

In [2]:
lunch = pd.read_csv("data/lunch_df_encoding.csv")
dinner = pd.read_csv("data/dinner_df_encoding.csv")

In [3]:
lunch = lunch.dropna()
dinner = dinner.dropna()

In [4]:
def split_lunch(data):
    data_train = data.drop(["lunch_number", "datetime"], axis = "columns")
    data_target = data.loc[:, 'lunch_number']
    X_train, X_test, y_train, y_test = train_test_split(data_train, data_target)

    return X_train, X_test, y_train, y_test

def split_dinner(data):
    data_train = data.drop(["dinner_number", "datetime"], axis = "columns")
    data_target = data.loc[:, 'dinner_number']
    X_train, X_test, y_train, y_test = train_test_split(data_train, data_target)

    return X_train, X_test, y_train, y_test

In [5]:
lunch_X_train, lunch_X_test, lunch_y_train, lunch_y_test = split_lunch(lunch)
dinner_X_train, dinner_X_test, dinner_y_train, dinner_y_test = split_dinner(dinner)

In [6]:
lunch_X_train = lunch_X_train.sort_index()
lunch_X_test = lunch_X_test.sort_index()
lunch_y_train = lunch_y_train.sort_index()
lunch_y_test = lunch_y_train.sort_index()
dinner_X_train = dinner_X_train.sort_index()
dinner_X_test = dinner_X_test.sort_index()
dinner_y_train = dinner_y_train.sort_index()
dinner_y_test = dinner_y_test.sort_index()

In [7]:
lunch_X_train.head()

Unnamed: 0,year,month,date,worker_number,real_number,vacation_number,biztrip_number,overtime_number,telecom_number,temperature,...,season_winter,weekdays_friday,weekdays_monday,weekdays_thursday,weekdays_tuesday,weekdays_wednesday,vacation_N,vacation_Y,new_lunch_N,new_lunch_Y
0,2016,2,1,2601,2401,50,150,238,0,-0.6,...,1,0,1,0,0,0,1,0,1,0
1,2016,2,2,2601,2378,50,173,319,0,-2.3,...,1,0,0,0,1,0,1,0,1,0
2,2016,2,3,2601,2365,56,180,111,0,-1.7,...,1,0,0,0,0,1,1,0,1,0
4,2016,2,5,2601,2142,278,181,34,0,1.3,...,1,1,0,0,0,0,0,1,1,0
5,2016,2,11,2601,2075,383,143,417,0,6.1,...,1,0,0,1,0,0,1,0,1,0


In [8]:
lunch_y_train.head()

0    1039
1     867
2    1017
4     925
5    1045
Name: lunch_number, dtype: int64

In [9]:
lunch_X_train.to_csv("data/lunch_X_train", encoding='utf-8', index=False)
lunch_X_test.to_csv("data/lunch_X_test", encoding='utf-8', index=False)
lunch_y_train.to_csv("data/lunch_y_train", encoding='utf-8', index=False)
lunch_y_test.to_csv("data/lunch_y_test", encoding='utf-8', index=False)
dinner_X_train.to_csv("data/dinner_X_train", encoding='utf-8', index=False)
dinner_X_test.to_csv("data/dinner_X_test", encoding='utf-8', index=False)
dinner_y_train.to_csv("data/dinner_y_train", encoding='utf-8', index=False)
dinner_y_test.to_csv("data/dinner_y_test", encoding='utf-8', index=False)

## 데이터 정규화

In [11]:
lunch = pd.read_csv("data/lunch_df_encoding.csv")
dinner = pd.read_csv("data/dinner_df_encoding.csv")

In [12]:
lunch = lunch.dropna()
dinner = dinner.dropna()

In [13]:
def train_lunch(data):
    data_train = data.drop(["lunch_number", "datetime"], axis = "columns")
    data_target = data.loc[:, 'lunch_number']

    return data_train, data_target

def train_dinner(data):
    data_train = data.drop(["dinner_number", "datetime"], axis = "columns")
    data_target = data.loc[:, 'dinner_number']
    
    return data_train, data_target

In [14]:
lunch_train, lunch_target = train_lunch(lunch)
dinner_train, dinner_target = train_dinner(dinner)

In [15]:
lunch_train.head()

Unnamed: 0,year,month,date,worker_number,real_number,vacation_number,biztrip_number,overtime_number,telecom_number,temperature,...,season_winter,weekdays_friday,weekdays_monday,weekdays_thursday,weekdays_tuesday,weekdays_wednesday,vacation_N,vacation_Y,new_lunch_N,new_lunch_Y
0,2016,2,1,2601,2401,50,150,238,0,-0.6,...,1,0,1,0,0,0,1,0,1,0
1,2016,2,2,2601,2378,50,173,319,0,-2.3,...,1,0,0,0,1,0,1,0,1,0
2,2016,2,3,2601,2365,56,180,111,0,-1.7,...,1,0,0,0,0,1,1,0,1,0
3,2016,2,4,2601,2277,104,220,355,0,-0.2,...,1,0,0,1,0,0,1,0,1,0
4,2016,2,5,2601,2142,278,181,34,0,1.3,...,1,1,0,0,0,0,0,1,1,0


In [None]:
lunch_y_train.head()

0    1039
1     867
2    1017
4     925
5    1045
Name: lunch_number, dtype: int64

In [None]:
lunch_X_train.to_csv("data/lunch_X_train", encoding='utf-8', index=False)
lunch_X_test.to_csv("data/lunch_X_test", encoding='utf-8', index=False)
lunch_y_train.to_csv("data/lunch_y_train", encoding='utf-8', index=False)
lunch_y_test.to_csv("data/lunch_y_test", encoding='utf-8', index=False)
dinner_X_train.to_csv("data/dinner_X_train", encoding='utf-8', index=False)
dinner_X_test.to_csv("data/dinner_X_test", encoding='utf-8', index=False)
dinner_y_train.to_csv("data/dinner_y_train", encoding='utf-8', index=False)
dinner_y_test.to_csv("data/dinner_y_test", encoding='utf-8', index=False)