<a href="https://colab.research.google.com/github/kong-jh/capstone-design2/blob/main/bikePredict.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [6]:
import pandas as pd
import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import warnings

warnings.filterwarnings('ignore')

In [2]:
import os

base_dir = './drive/MyDrive/predict/data/'

## 1. 서울시 공공자전거 이용정보

In [3]:
bike_df = pd.read_csv(base_dir+'서울특별시 공공자전거 이용정보(시간대별)_21.01.csv', encoding='cp949')
file_nm = '서울특별시 공공자전거 이용정보(시간대별)_21.'
months = ['02', '03', '04', '05', '06', '07', '08', '09', '10', '11', '12']

for month in months:
  df = pd.read_csv(base_dir+file_nm+month+'.csv', encoding='cp949')
  bike_df = pd.concat([bike_df, df])
bike_df['대여소명'] = bike_df['대여소명'].astype('category')

In [4]:
bike_df['대여일자'] = bike_df['대여일자'].astype('datetime64[ns]')
bike_df['월'] = bike_df['대여일자'].dt.month
bike_df['일'] = bike_df['대여일자'].dt.day
bike_df['요일'] = bike_df['대여일자'].dt.dayofweek
bike_df = bike_df[['월', '일', '요일', '대여시간', '대여소번호', '대여소명', '이용건수', '이동거리', '사용시간']]

In [7]:
khu = bike_df[bike_df['대여소번호']==661]
khu = khu.groupby(['월', '일', '대여시간', '요일'])['이용건수'].sum().reset_index()

## 2. 기상정보

In [8]:
weather = pd.read_csv(base_dir+'OBS_AWS_TIM_20221121230810.csv', encoding='cp949')
weather['일시'] = weather['일시'].astype('datetime64[ns]')
weather['월'] = weather['일시'].dt.month
weather['일'] = weather['일시'].dt.day
weather['대여시간'] = weather['일시'].dt.hour
weather = weather[['월', '일', '대여시간', '기온(°C)', '풍향(deg)',	'풍속(m/s)',	'강수량(mm)',	'습도(%)']]
weather.head()

Unnamed: 0,월,일,대여시간,기온(°C),풍향(deg),풍속(m/s),강수량(mm),습도(%)
0,1,1,1,-6.7,109.8,0.9,0.0,51.4
1,1,1,2,-6.9,94.8,0.6,0.0,55.4
2,1,1,3,-7.3,135.6,0.6,0.0,56.0
3,1,1,4,-7.6,72.6,0.6,0.0,59.5
4,1,1,5,-8.0,61.7,0.8,0.0,61.2


In [9]:
weather['강수량(mm)'].fillna(0, inplace=True)
weather = weather.dropna(axis=0)
weather.isnull().sum()

월          0
일          0
대여시간       0
기온(°C)     0
풍향(deg)    0
풍속(m/s)    0
강수량(mm)    0
습도(%)      0
dtype: int64

## 3. 데이터

In [10]:
df = khu.merge(weather, on=['월', '일', '대여시간'])
df['이용건수'] = df['이용건수'].astype('int64')
df = df.join(pd.get_dummies(df['요일'], prefix='요일'))

In [11]:
cols = ['월', '대여시간', '기온(°C)', '풍향(deg)', '풍속(m/s)', '강수량(mm)', '습도(%)', '요일_0', '요일_1', '요일_2', '요일_3', '요일_4', '요일_5', '요일_6']

## 4. 학습

In [12]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(df[cols], df['이용건수'], test_size=0.2, random_state=42)

In [13]:
from sklearn.metrics import mean_absolute_error
from sklearn.model_selection import GridSearchCV
import xgboost as xgb
from scipy.stats import uniform, randint

n = 3

param_grid = {
    "gamma": uniform(0, 0.5).rvs(n),
    "max_depth": range(2, 7),
    "n_estimators": randint(100, 150).rvs(n),
}

grid = GridSearchCV(xgb.XGBRegressor(), cv=5, n_jobs=-1, param_grid=param_grid)
grid = grid.fit(X_train, y_train)

model = grid.best_estimator_
predictions = model.predict(X_test)
mae = mean_absolute_error(y_test, predictions)

print(mae)

1.736959438669611


In [14]:
model.save_model('./drive/MyDrive/predict/661.model')