In [None]:
import pandas as pd
import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import warnings

warnings.filterwarnings('ignore')

In [None]:
import os

base_dir = './drive/MyDrive/predict/data/'

## 1. 공공자전거 이용 정보

In [None]:
bike_df = pd.read_csv(base_dir+'서울특별시 공공자전거 이용정보(시간대별)_21.01.csv', encoding='cp949')
file_nm = '서울특별시 공공자전거 이용정보(시간대별)_21.'
months = ['02', '03', '04', '05', '06', '07', '08', '09', '10', '11', '12']

for month in months:
  df = pd.read_csv(base_dir+file_nm+month+'.csv', encoding='cp949')
  bike_df = pd.concat([bike_df, df])
bike_df['대여소명'] = bike_df['대여소명'].astype('category')

In [None]:
bike_df

Unnamed: 0,대여일자,대여시간,대여소번호,대여소명,대여구분코드,성별,연령대코드,이용건수,운동량,탄소량,이동거리,사용시간
0,2021-01-01,0,1347,1347. 길음래미안아파트817동 상가 앞,정기권,,~10대,2,28.90,0.23,992.78,13.0
1,2021-01-01,0,1349,1349. 월계2교 버스정류장 앞,정기권,,~10대,1,34.38,0.26,1127.50,10.0
2,2021-01-01,0,1168,1168. 마곡엠밸리10단지 앞,정기권,,~10대,1,0.00,0.00,0.00,7.0
3,2021-01-01,0,2619,2619. 석촌고분역 4번출구,정기권,,~10대,1,38.75,0.38,1658.34,14.0
4,2021-01-01,0,536,536. 행당역 2번출구 앞,정기권,,~10대,1,12.96,0.17,743.88,5.0
...,...,...,...,...,...,...,...,...,...,...,...,...
1519846,2021-12-31,23,4561,4561. 양평역 1번출구,정기권,M,50대,1,22.95,0.19,840.00,4.0
1519847,2021-12-31,23,815,815. LIG강촌아파트 103동앞,정기권,M,50대,1,30.20,0.21,907.98,8.0
1519848,2021-12-31,23,2173,2173. 당곡사거리,정기권,M,60대,1,64.55,0.48,2063.26,13.0
1519849,2021-12-31,23,2718,2718.마곡수명산파크1단지,정기권,M,60대,1,27.84,0.22,950.00,8.0


In [None]:
bike_df['대여일자'] = bike_df['대여일자'].astype('datetime64[ns]')
bike_df['월'] = bike_df['대여일자'].dt.month
bike_df['일'] = bike_df['대여일자'].dt.day
bike_df['요일'] = bike_df['대여일자'].dt.dayofweek
bike_df = bike_df[['월', '일', '요일', '대여시간', '대여소번호', '대여소명', '이용건수', '이동거리', '사용시간']]

### 1-(1) : 동대문구 추출


In [None]:
rentals = pd.read_csv(base_dir+'공공자전거 대여소 정보(21.12월 기준).csv', encoding='cp949')
ddm_rentals = rentals[rentals['자치구'] == '동대문구']['대여소번호'].unique()
ddm_rentals

bike_df = bike_df[bike_df['대여소번호'].isin(ddm_rentals)]
bike_df.head()

Unnamed: 0,월,일,요일,대여시간,대여소번호,대여소명,이용건수,이동거리,사용시간
23,1,1,4,0,656,656. 영휘원 교차로,1,2734.11,19.0
36,1,1,4,0,673,673.안암골벽산아파트(후문),1,0.0,10.0
40,1,1,4,0,4103,4103. 휘경sk뷰아파트 앞,1,0.0,15.0
78,1,1,4,0,4117,4117. 휘경119안전센터 앞,1,0.0,52.0
81,1,1,4,0,640,640. KEB하나은행 청량리역지점,1,1256.1,11.0


## 2. 기상정보

In [None]:
weather = pd.read_csv(base_dir+'OBS_AWS_TIM_20221121230810.csv', encoding='cp949')
weather['일시'] = weather['일시'].astype('datetime64[ns]')
weather['월'] = weather['일시'].dt.month
weather['일'] = weather['일시'].dt.day
weather['대여시간'] = weather['일시'].dt.hour
weather = weather[['월', '일', '대여시간', '기온(°C)', '풍향(deg)',	'풍속(m/s)',	'강수량(mm)',	'습도(%)']]
weather.head()

Unnamed: 0,월,일,대여시간,기온(°C),풍향(deg),풍속(m/s),강수량(mm),습도(%)
0,1,1,1,-6.7,109.8,0.9,0.0,51.4
1,1,1,2,-6.9,94.8,0.6,0.0,55.4
2,1,1,3,-7.3,135.6,0.6,0.0,56.0
3,1,1,4,-7.6,72.6,0.6,0.0,59.5
4,1,1,5,-8.0,61.7,0.8,0.0,61.2


In [None]:
weather['강수량(mm)'].fillna(0, inplace=True)
weather = weather.dropna(axis=0)
weather.isnull().sum()

월          0
일          0
대여시간       0
기온(°C)     0
풍향(deg)    0
풍속(m/s)    0
강수량(mm)    0
습도(%)      0
dtype: int64

### 3. 데이터 

In [None]:
df = bike_df.merge(weather, on=['월', '일', '대여시간'])
df

Unnamed: 0,월,일,요일,대여시간,대여소번호,대여소명,이용건수,이동거리,사용시간,기온(°C),풍향(deg),풍속(m/s),강수량(mm),습도(%)
0,1,1,4,1,670,670.삼육서울병원 버스정류장,1,1840.00,17.0,-6.7,109.8,0.9,0.0,51.4
1,1,1,4,1,616,616. 서울시립대 앞,1,0.00,3.0,-6.7,109.8,0.9,0.0,51.4
2,1,1,4,1,602,602. 장안동 사거리,1,0.00,25.0,-6.7,109.8,0.9,0.0,51.4
3,1,1,4,1,609,609. 제기2교,1,2866.89,26.0,-6.7,109.8,0.9,0.0,51.4
4,1,1,4,1,616,616. 서울시립대 앞,1,840.98,5.0,-6.7,109.8,0.9,0.0,51.4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
929008,12,31,4,0,602,602. 장안동 사거리,1,2100.13,21.0,-4.3,348.8,2.8,0.0,31.7
929009,12,31,4,0,673,673.안암골벽산아파트(후문),1,440.00,4.0,-4.3,348.8,2.8,0.0,31.7
929010,12,31,4,0,602,602. 장안동 사거리,1,2030.00,19.0,-4.3,348.8,2.8,0.0,31.7
929011,12,31,4,0,677,677.청량리역 4번 5번출구 사이,1,3670.00,23.0,-4.3,348.8,2.8,0.0,31.7


In [None]:
df['이용건수'] = df['이용건수'].astype('int64')

### 4. 학습

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error
from sklearn.model_selection import GridSearchCV
import xgboost as xgb
from scipy.stats import uniform, randint

n = 3
cols = ['월', '대여시간', '기온(°C)', '풍향(deg)', '풍속(m/s)', '강수량(mm)', '습도(%)', '요일_0', '요일_1', '요일_2', '요일_3', '요일_4', '요일_5', '요일_6']

results = {}

for i in ddm_rentals:
  print(i, '번 대여소')
  
  _df = df[df['대여소번호']==i]
  _df = _df.groupby(['월', '일', '대여시간', '요일', '기온(°C)', '풍향(deg)', '풍속(m/s)', '강수량(mm)', '습도(%)'])['이용건수'].sum().reset_index()
  _df = _df.join(pd.get_dummies(df['요일'], prefix='요일'))

  print(_df.shape)

  X_train, X_test, y_train, y_test = train_test_split(_df[cols], _df['이용건수'], test_size=0.2, random_state=42)

  param_grid = {
      "gamma": uniform(0, 0.5).rvs(n),
      "max_depth": range(2, 7),
      "n_estimators": randint(100, 150).rvs(n),
  }

  grid = GridSearchCV(xgb.XGBRegressor(), cv=5, n_jobs=-1, param_grid=param_grid)
  grid = grid.fit(X_train, y_train)

  model = grid.best_estimator_
  predictions = model.predict(X_test)
  mae = mean_absolute_error(y_test, predictions)

  model.save_model('./drive/MyDrive/predict/model/'+str(i)+'.model')
  print('./drive/MyDrive/predict/model/'+str(i)+'.model')

600 번 대여소
(2085, 17)
./drive/MyDrive/predict/model/600.model
601 번 대여소
(4254, 17)
./drive/MyDrive/predict/model/601.model
602 번 대여소
(6859, 17)
./drive/MyDrive/predict/model/602.model
604 번 대여소
(4358, 17)
./drive/MyDrive/predict/model/604.model
605 번 대여소
(5209, 17)
./drive/MyDrive/predict/model/605.model
606 번 대여소
(2987, 17)
./drive/MyDrive/predict/model/606.model
607 번 대여소
(3754, 17)
./drive/MyDrive/predict/model/607.model
608 번 대여소
(2079, 17)
./drive/MyDrive/predict/model/608.model
609 번 대여소
(5901, 17)
./drive/MyDrive/predict/model/609.model
610 번 대여소
(4388, 17)
./drive/MyDrive/predict/model/610.model
612 번 대여소
(5859, 17)
./drive/MyDrive/predict/model/612.model
613 번 대여소
(5873, 17)
./drive/MyDrive/predict/model/613.model
614 번 대여소
(5817, 17)
./drive/MyDrive/predict/model/614.model
615 번 대여소
(5088, 17)
./drive/MyDrive/predict/model/615.model
616 번 대여소
(1580, 17)
./drive/MyDrive/predict/model/616.model
617 번 대여소
(5041, 17)
./drive/MyDrive/predict/model/617.model
621 번 대여소
(2542, 17)
./d

## test

In [None]:
model.save_model('./drive/MyDrive/predict/661.model')

In [None]:
load_model = xgb.XGBRegressor() # 모델 초기화
load_model.load_model('./drive/MyDrive/predict/661.model')

data = pd.DataFrame(columns=['월', '대여시간', '기온(°C)', '풍향(deg)', '풍속(m/s)', '강수량(mm)', '습도(%)', '요일_0', '요일_1', '요일_2', '요일_3', '요일_4', '요일_5', '요일_6'])
data.loc[0] = [11, 1900, 11.0, 287, 2, 0, 40, 0, 0, 1, 0, 0, 0, 0]

load_model.predict(data)



array([1.9428086], dtype=float32)