# 전기 자동차 충전 데이터를 기반으로 충전기 현황 예측 및 추천 모델 개발

## 기간 : 2020년 8월 - 2020년 11월

## 프로젝트 내용
### 1. 전기 자동차의 충전기 이용 현황을 이용하여 사용자의 이용 패턴을 전처리하여
###  분류 모델로 학습한다.
### 2. 학습된 모델을 통해 사용자가 원하는 시간과 장소에 최적의 충전소를 표시해준다.

In [None]:
import time
import datetime
import numpy as np
import pandas as pd
import warnings
import openpyxl
import tensorflow as tf
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from sklearn.model_selection import train_test_split
import time
import datetime
from datetime import datetime, timedelta
now = datetime.now()

In [None]:
# 충전소
df_station = pd.read_csv('csv\charging_station(201015 ~ 16).csv')
# 사용 중 충전기를 사용시작시간 순으로 정렬
df_station = df_station.sort_values(by=['sid'], ascending=True)

# 충전기
df_charger = pd.read_csv('csv\charger(201015 ~ 16).csv')
# 사용 중 충전기를 사용시작시간 순으로 정렬
df_charger = df_charger.sort_values(by=['cid'], ascending=True)


# 삼천리 에버랜드에 충전양 값이 없어서 임의로 100을 넣음
df_charger['amount'].loc[3595:] = 100

# 분당 충전양
p = (df_charger['amount']/60)
df_charger['amount/m'] = p


# 충전 현황
df1 = pd.read_csv(r'csv\usage_status(201006 ~ 14).csv')
df2 = pd.read_csv(r'csv\usage_status(201015 ~ 16).csv')

df = df1.append(df2)
df = df.sort_values(by=['id'], ascending=True)
df = df.rename({'id':'cid'}, axis='columns')
df

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_with_indexer(indexer, value)


Unnamed: 0,cid,start_date,start_time,end_date,end_time
0,111100030,2020-10-06 Tue,17:44:27,2020-10-06 Tue,18:15:12
1,111100030,2020-10-06 Tue,18:29:43,2020-10-06 Tue,18:37:15
2,111100030,2020-10-06 Tue,19:06:53,2020-10-06 Tue,19:14:15
3,111100030,2020-10-13 Tue,15:25:35,2020-10-13 Tue,15:54:40
2,111100060,2020-10-16 Fri,00:18:41,2020-10-16 Fri,01:48:49
...,...,...,...,...,...
19629,ME19C4052,2020-10-15 Thu,12:06:49,\t,\t
3231,ME19C4052,2020-10-15 Thu,16:19:15,\t,\t
19630,ME19C4053,2020-10-06 Tue,16:13:07,2020-10-08 Thu,14:06:15
19631,ME19C4053,2020-10-15 Thu,12:06:49,\t,\t


In [None]:
# id명을 바꾼다.
df = df.rename({'id':'cid'}, axis='columns')
df = df.sort_values(by=['cid'], ascending=True)


# 지역정보를 가져오기위해 합침
merge_inner = pd.merge(df_charger, df_station, on='sid')

merge_inners = pd.merge(df, merge_inner, on='cid')

df = merge_inners.drop(['sid', 'c_name', 'typ', 'charge', 'c_using', 'amount',
                        'c_create_date', 'amount/m', 'name', 'u_time', 'agency', 'c_add',
                      'phone', 'fee', 'etc', 'create_date'], axis=1)

# 재배열
df = df[['cid', 'do', 'city', 'gu', 'start_date', 'start_time',
         'end_date', 'end_time']]

df = df.sort_values(by=['cid', 'start_date'], ascending=True)

# 새로 인덱스를 부여
df = df.reset_index(drop=False, inplace=False)

# 원래 있던 인덱스가 인덱스 행이 되어서 없앰
df = df.drop(['index'], axis=1)

df

Unnamed: 0,cid,do,city,gu,start_date,start_time,end_date,end_time
0,111100030,서울특별시,서울시,종로구,2020-10-06 Tue,17:44:27,2020-10-06 Tue,18:15:12
1,111100030,서울특별시,서울시,종로구,2020-10-06 Tue,18:29:43,2020-10-06 Tue,18:37:15
2,111100030,서울특별시,서울시,종로구,2020-10-06 Tue,19:06:53,2020-10-06 Tue,19:14:15
3,111100030,서울특별시,서울시,종로구,2020-10-13 Tue,15:25:35,2020-10-13 Tue,15:54:40
4,111100060,서울특별시,서울시,종로구,2020-10-07 Wed,06:30:45,2020-10-07 Wed,06:46:17
...,...,...,...,...,...,...,...,...
22859,ME19C4052,충청남도,보령시,명천동,2020-10-15 Thu,12:06:49,\t,\t
22860,ME19C4052,충청남도,보령시,명천동,2020-10-15 Thu,16:19:15,\t,\t
22861,ME19C4053,충청남도,보령시,명천동,2020-10-06 Tue,16:13:07,2020-10-08 Thu,14:06:15
22862,ME19C4053,충청남도,보령시,명천동,2020-10-15 Thu,12:06:49,\t,\t


# 전처리

In [None]:
df['start_day'] = df['start_date'] + ' ' + df['start_time']
df['start_day']   # 충전 시작한 날을 계산하기 쉽게 합쳐버림

df['end_day'] = df['end_date'] + ' ' + df['end_time']
df['end_day']   # 충전 끝나는 날 시간도 마찬가지

df

Unnamed: 0,cid,do,city,gu,start_date,start_time,end_date,end_time,start_day,end_day
0,111100030,서울특별시,서울시,종로구,2020-10-06 Tue,17:44:27,2020-10-06 Tue,18:15:12,2020-10-06 Tue 17:44:27,2020-10-06 Tue 18:15:12
1,111100030,서울특별시,서울시,종로구,2020-10-06 Tue,18:29:43,2020-10-06 Tue,18:37:15,2020-10-06 Tue 18:29:43,2020-10-06 Tue 18:37:15
2,111100030,서울특별시,서울시,종로구,2020-10-06 Tue,19:06:53,2020-10-06 Tue,19:14:15,2020-10-06 Tue 19:06:53,2020-10-06 Tue 19:14:15
3,111100030,서울특별시,서울시,종로구,2020-10-13 Tue,15:25:35,2020-10-13 Tue,15:54:40,2020-10-13 Tue 15:25:35,2020-10-13 Tue 15:54:40
4,111100060,서울특별시,서울시,종로구,2020-10-07 Wed,06:30:45,2020-10-07 Wed,06:46:17,2020-10-07 Wed 06:30:45,2020-10-07 Wed 06:46:17
...,...,...,...,...,...,...,...,...,...,...
22859,ME19C4052,충청남도,보령시,명천동,2020-10-15 Thu,12:06:49,\t,\t,2020-10-15 Thu 12:06:49,\t \t
22860,ME19C4052,충청남도,보령시,명천동,2020-10-15 Thu,16:19:15,\t,\t,2020-10-15 Thu 16:19:15,\t \t
22861,ME19C4053,충청남도,보령시,명천동,2020-10-06 Tue,16:13:07,2020-10-08 Thu,14:06:15,2020-10-06 Tue 16:13:07,2020-10-08 Thu 14:06:15
22862,ME19C4053,충청남도,보령시,명천동,2020-10-15 Thu,12:06:49,\t,\t,2020-10-15 Thu 12:06:49,\t \t


In [None]:
t = df[(df['end_date'] == '\\t')].index
t

Int64Index([   23,    84,    85,   165,   166,   167,   168,   169,   175,
              176,
            ...
            22798, 22799, 22853, 22854, 22856, 22857, 22859, 22860, 22862,
            22863],
           dtype='int64', length=789)

In [None]:
befor = df.drop(t)

# 새로 인덱스를 부여
befor = befor.reset_index(drop=False, inplace=False)


# 원래 있던 인덱스가 인덱스 행이 되어서 없앰
befor = befor.drop(['index'], axis=1)
befor

Unnamed: 0,cid,do,city,gu,start_date,start_time,end_date,end_time,start_day,end_day
0,111100030,서울특별시,서울시,종로구,2020-10-06 Tue,17:44:27,2020-10-06 Tue,18:15:12,2020-10-06 Tue 17:44:27,2020-10-06 Tue 18:15:12
1,111100030,서울특별시,서울시,종로구,2020-10-06 Tue,18:29:43,2020-10-06 Tue,18:37:15,2020-10-06 Tue 18:29:43,2020-10-06 Tue 18:37:15
2,111100030,서울특별시,서울시,종로구,2020-10-06 Tue,19:06:53,2020-10-06 Tue,19:14:15,2020-10-06 Tue 19:06:53,2020-10-06 Tue 19:14:15
3,111100030,서울특별시,서울시,종로구,2020-10-13 Tue,15:25:35,2020-10-13 Tue,15:54:40,2020-10-13 Tue 15:25:35,2020-10-13 Tue 15:54:40
4,111100060,서울특별시,서울시,종로구,2020-10-07 Wed,06:30:45,2020-10-07 Wed,06:46:17,2020-10-07 Wed 06:30:45,2020-10-07 Wed 06:46:17
...,...,...,...,...,...,...,...,...,...,...
22070,ME19C4043,대구광역시,대구시,수성구,2020-10-16 Fri,13:43:03,2020-10-16 Fri,13:58:44,2020-10-16 Fri 13:43:03,2020-10-16 Fri 13:58:44
22071,ME19C4050,충청남도,보령시,명천동,2020-10-06 Tue,16:13:07,2020-10-08 Thu,14:06:15,2020-10-06 Tue 16:13:07,2020-10-08 Thu 14:06:15
22072,ME19C4051,충청남도,보령시,명천동,2020-10-06 Tue,16:13:07,2020-10-08 Thu,14:06:15,2020-10-06 Tue 16:13:07,2020-10-08 Thu 14:06:15
22073,ME19C4052,충청남도,보령시,명천동,2020-10-06 Tue,16:13:07,2020-10-08 Thu,14:06:15,2020-10-06 Tue 16:13:07,2020-10-08 Thu 14:06:15


In [None]:
# 충전 시작하는 날을 추출
a = []

for i in range(len(befor['start_date'])) :
    test = befor['start_date'][i]
    test = test[5:10]
    test = test.split('-')
    a.append(test[0]+test[1])

    
# 충전 시작하는 시간의 시를 추출
b = []

for i in range(len(befor['start_time'])) :
    text = befor['start_time'][i]
    text = text[0:2]
    b.append(text)

In [None]:
len(a)

22075

In [None]:
# 충전 끝나는 날을 추출
r = []

for i in range(len(befor['end_date'])) :
    test = befor['end_date'][i]
    test = test[5:10]
    test = test.split('-')
    r.append(test[0]+test[1])
    

# 충전 끝나는 날의 시를 추출
bb = []

for i in range(len(befor['end_time'])) :
    text = befor['end_time'][i]
    text = text[0:2]
    bb.append(text)

In [None]:
len(r)

22075

In [None]:
# 요일 변수를 인코딩하기 위해서 시작하는 요일만 추출

qq = pd.to_numeric(befor['start_date'], errors='ignore')

q = []
for i in range(len(qq)) :
    test = qq[i]
    test = test[11:14]
    q.append(test)

In [None]:
# 끝나는 시간의 요일 변수를 인코딩하기 위해서 요일만 추출

befor['end_date'] = pd.to_numeric(befor['end_date'], errors='ignore')

w = []
for i in range(0, len(befor)) :
    test = befor['end_date'][i]
    test = test[11:14]
    w.append(test)

In [None]:
# be는 끝나는 시간을 집어넣어 사용 가능으로 표시할 것이다.

be = befor
be['week'] = w
be['day'] = r
be['time'] = bb
be['use'] = 0      # 0이 사용 가능

be

Unnamed: 0,cid,do,city,gu,start_date,start_time,end_date,end_time,start_day,end_day,week,day,time,use
0,111100030,서울특별시,서울시,종로구,2020-10-06 Tue,17:44:27,2020-10-06 Tue,18:15:12,2020-10-06 Tue 17:44:27,2020-10-06 Tue 18:15:12,Tue,1006,18,0
1,111100030,서울특별시,서울시,종로구,2020-10-06 Tue,18:29:43,2020-10-06 Tue,18:37:15,2020-10-06 Tue 18:29:43,2020-10-06 Tue 18:37:15,Tue,1006,18,0
2,111100030,서울특별시,서울시,종로구,2020-10-06 Tue,19:06:53,2020-10-06 Tue,19:14:15,2020-10-06 Tue 19:06:53,2020-10-06 Tue 19:14:15,Tue,1006,19,0
3,111100030,서울특별시,서울시,종로구,2020-10-13 Tue,15:25:35,2020-10-13 Tue,15:54:40,2020-10-13 Tue 15:25:35,2020-10-13 Tue 15:54:40,Tue,1013,15,0
4,111100060,서울특별시,서울시,종로구,2020-10-07 Wed,06:30:45,2020-10-07 Wed,06:46:17,2020-10-07 Wed 06:30:45,2020-10-07 Wed 06:46:17,Wed,1007,06,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
22070,ME19C4043,대구광역시,대구시,수성구,2020-10-16 Fri,13:43:03,2020-10-16 Fri,13:58:44,2020-10-16 Fri 13:43:03,2020-10-16 Fri 13:58:44,Fri,1016,13,0
22071,ME19C4050,충청남도,보령시,명천동,2020-10-06 Tue,16:13:07,2020-10-08 Thu,14:06:15,2020-10-06 Tue 16:13:07,2020-10-08 Thu 14:06:15,Thu,1008,14,0
22072,ME19C4051,충청남도,보령시,명천동,2020-10-06 Tue,16:13:07,2020-10-08 Thu,14:06:15,2020-10-06 Tue 16:13:07,2020-10-08 Thu 14:06:15,Thu,1008,14,0
22073,ME19C4052,충청남도,보령시,명천동,2020-10-06 Tue,16:13:07,2020-10-08 Thu,14:06:15,2020-10-06 Tue 16:13:07,2020-10-08 Thu 14:06:15,Thu,1008,14,0


In [None]:
# befor은 시작하는 시간을 집어 넣어 사용 중이라는 표시를 넣는다.

befor = df.drop(t)

befor['week'] = q
befor['day'] = a
befor['time'] = b
befor['use'] = 1             # 1은 사용 중이라는 표시


befor # 0이 들어가는 것고 같은 갯수

Unnamed: 0,cid,do,city,gu,start_date,start_time,end_date,end_time,start_day,end_day,week,day,time,use
0,111100030,서울특별시,서울시,종로구,2020-10-06 Tue,17:44:27,2020-10-06 Tue,18:15:12,2020-10-06 Tue 17:44:27,2020-10-06 Tue 18:15:12,Tue,1006,17,1
1,111100030,서울특별시,서울시,종로구,2020-10-06 Tue,18:29:43,2020-10-06 Tue,18:37:15,2020-10-06 Tue 18:29:43,2020-10-06 Tue 18:37:15,Tue,1006,18,1
2,111100030,서울특별시,서울시,종로구,2020-10-06 Tue,19:06:53,2020-10-06 Tue,19:14:15,2020-10-06 Tue 19:06:53,2020-10-06 Tue 19:14:15,Tue,1006,19,1
3,111100030,서울특별시,서울시,종로구,2020-10-13 Tue,15:25:35,2020-10-13 Tue,15:54:40,2020-10-13 Tue 15:25:35,2020-10-13 Tue 15:54:40,Tue,1013,15,1
4,111100060,서울특별시,서울시,종로구,2020-10-07 Wed,06:30:45,2020-10-07 Wed,06:46:17,2020-10-07 Wed 06:30:45,2020-10-07 Wed 06:46:17,Wed,1007,06,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
22851,ME19C4043,대구광역시,대구시,수성구,2020-10-16 Fri,13:43:03,2020-10-16 Fri,13:58:44,2020-10-16 Fri 13:43:03,2020-10-16 Fri 13:58:44,Fri,1016,13,1
22852,ME19C4050,충청남도,보령시,명천동,2020-10-06 Tue,16:13:07,2020-10-08 Thu,14:06:15,2020-10-06 Tue 16:13:07,2020-10-08 Thu 14:06:15,Tue,1006,16,1
22855,ME19C4051,충청남도,보령시,명천동,2020-10-06 Tue,16:13:07,2020-10-08 Thu,14:06:15,2020-10-06 Tue 16:13:07,2020-10-08 Thu 14:06:15,Tue,1006,16,1
22858,ME19C4052,충청남도,보령시,명천동,2020-10-06 Tue,16:13:07,2020-10-08 Thu,14:06:15,2020-10-06 Tue 16:13:07,2020-10-08 Thu 14:06:15,Tue,1006,16,1


In [None]:
# befor에 be를 추가한 것

bef = befor.append(be)

bef.drop(['week', 'day', 'time', 'use'], axis=1)

Unnamed: 0,cid,do,city,gu,start_date,start_time,end_date,end_time,start_day,end_day
0,111100030,서울특별시,서울시,종로구,2020-10-06 Tue,17:44:27,2020-10-06 Tue,18:15:12,2020-10-06 Tue 17:44:27,2020-10-06 Tue 18:15:12
1,111100030,서울특별시,서울시,종로구,2020-10-06 Tue,18:29:43,2020-10-06 Tue,18:37:15,2020-10-06 Tue 18:29:43,2020-10-06 Tue 18:37:15
2,111100030,서울특별시,서울시,종로구,2020-10-06 Tue,19:06:53,2020-10-06 Tue,19:14:15,2020-10-06 Tue 19:06:53,2020-10-06 Tue 19:14:15
3,111100030,서울특별시,서울시,종로구,2020-10-13 Tue,15:25:35,2020-10-13 Tue,15:54:40,2020-10-13 Tue 15:25:35,2020-10-13 Tue 15:54:40
4,111100060,서울특별시,서울시,종로구,2020-10-07 Wed,06:30:45,2020-10-07 Wed,06:46:17,2020-10-07 Wed 06:30:45,2020-10-07 Wed 06:46:17
...,...,...,...,...,...,...,...,...,...,...
22070,ME19C4043,대구광역시,대구시,수성구,2020-10-16 Fri,13:43:03,2020-10-16 Fri,13:58:44,2020-10-16 Fri 13:43:03,2020-10-16 Fri 13:58:44
22071,ME19C4050,충청남도,보령시,명천동,2020-10-06 Tue,16:13:07,2020-10-08 Thu,14:06:15,2020-10-06 Tue 16:13:07,2020-10-08 Thu 14:06:15
22072,ME19C4051,충청남도,보령시,명천동,2020-10-06 Tue,16:13:07,2020-10-08 Thu,14:06:15,2020-10-06 Tue 16:13:07,2020-10-08 Thu 14:06:15
22073,ME19C4052,충청남도,보령시,명천동,2020-10-06 Tue,16:13:07,2020-10-08 Thu,14:06:15,2020-10-06 Tue 16:13:07,2020-10-08 Thu 14:06:15


In [None]:
# 사용 중 충전기를 충전기, 사용시작시간 순으로 정렬
bef = bef.sort_values(by=['cid', 'day', 'time'], ascending=True)


# 새로 인덱스를 부여
befor = bef.reset_index(drop=False, inplace=False)


# 원래 있던 인덱스가 인덱스 행이 되어서 없앰
befor =befor.drop(['index', 'start_date', 'start_time', 'end_date', 'end_time'], axis=1)
# 시간만 남기고 날짜도 따로 집어넣었다.

befor

Unnamed: 0,cid,do,city,gu,start_day,end_day,week,day,time,use
0,111100030,서울특별시,서울시,종로구,2020-10-06 Tue 17:44:27,2020-10-06 Tue 18:15:12,Tue,1006,17,1
1,111100030,서울특별시,서울시,종로구,2020-10-06 Tue 18:29:43,2020-10-06 Tue 18:37:15,Tue,1006,18,1
2,111100030,서울특별시,서울시,종로구,2020-10-06 Tue 17:44:27,2020-10-06 Tue 18:15:12,Tue,1006,18,0
3,111100030,서울특별시,서울시,종로구,2020-10-06 Tue 18:29:43,2020-10-06 Tue 18:37:15,Tue,1006,18,0
4,111100030,서울특별시,서울시,종로구,2020-10-06 Tue 19:06:53,2020-10-06 Tue 19:14:15,Tue,1006,19,1
...,...,...,...,...,...,...,...,...,...,...
44145,ME19C4051,충청남도,보령시,명천동,2020-10-06 Tue 16:13:07,2020-10-08 Thu 14:06:15,Thu,1008,14,0
44146,ME19C4052,충청남도,보령시,명천동,2020-10-06 Tue 16:13:07,2020-10-08 Thu 14:06:15,Tue,1006,16,1
44147,ME19C4052,충청남도,보령시,명천동,2020-10-06 Tue 16:13:07,2020-10-08 Thu 14:06:15,Thu,1008,14,0
44148,ME19C4053,충청남도,보령시,명천동,2020-10-06 Tue 16:13:07,2020-10-08 Thu 14:06:15,Tue,1006,16,1


## 데이터 정밀도 올리기

In [None]:
# 요일을 인코딩하기 위해 임의로 숫자를 넣어준다.

f = pd.DataFrame({'week' : ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun'], 'no' : [0, 1, 2, 3, 4, 5, 6] })
f

Unnamed: 0,week,no
0,Mon,0
1,Tue,1
2,Wed,2
3,Thu,3
4,Fri,4
5,Sat,5
6,Sun,6


In [None]:
# merge_을 하면 순서가 꼬이기에 먼저 새로 인덱스를 부여
merge_ = befor.reset_index(drop=False, inplace=False)

# 인코딩한 요일을 요일을 기준으로 해서 합친다.
merge_inner = pd.merge(merge_, f, on='week')

merge_inner       # 요일에 대응하는 숫자가 들어간다.

Unnamed: 0,index,cid,do,city,gu,start_day,end_day,week,day,time,use,no
0,0,111100030,서울특별시,서울시,종로구,2020-10-06 Tue 17:44:27,2020-10-06 Tue 18:15:12,Tue,1006,17,1,1
1,1,111100030,서울특별시,서울시,종로구,2020-10-06 Tue 18:29:43,2020-10-06 Tue 18:37:15,Tue,1006,18,1,1
2,2,111100030,서울특별시,서울시,종로구,2020-10-06 Tue 17:44:27,2020-10-06 Tue 18:15:12,Tue,1006,18,0,1
3,3,111100030,서울특별시,서울시,종로구,2020-10-06 Tue 18:29:43,2020-10-06 Tue 18:37:15,Tue,1006,18,0,1
4,4,111100030,서울특별시,서울시,종로구,2020-10-06 Tue 19:06:53,2020-10-06 Tue 19:14:15,Tue,1006,19,1,1
...,...,...,...,...,...,...,...,...,...,...,...,...
44145,44105,ME19C4041,대구광역시,대구시,수성구,2020-10-16 Fri 13:43:03,2020-10-16 Fri 13:58:44,Fri,1016,13,0,4
44146,44122,ME19C4042,대구광역시,대구시,수성구,2020-10-16 Fri 13:43:03,2020-10-16 Fri 13:58:44,Fri,1016,13,1,4
44147,44123,ME19C4042,대구광역시,대구시,수성구,2020-10-16 Fri 13:43:03,2020-10-16 Fri 13:58:44,Fri,1016,13,0,4
44148,44140,ME19C4043,대구광역시,대구시,수성구,2020-10-16 Fri 13:43:03,2020-10-16 Fri 13:58:44,Fri,1016,13,1,4


In [None]:
# 충전기를 부여한 인덱스 순으로 정렬, (위에서 merge하면서 꼬여서)
merge_ = merge_inner.sort_values(by=['index'], ascending=True)

merge_ = merge_.reset_index(drop=False, inplace=False)

befor = merge_.drop(['level_0', 'index'], axis=1)

befor

Unnamed: 0,cid,do,city,gu,start_day,end_day,week,day,time,use,no
0,111100030,서울특별시,서울시,종로구,2020-10-06 Tue 17:44:27,2020-10-06 Tue 18:15:12,Tue,1006,17,1,1
1,111100030,서울특별시,서울시,종로구,2020-10-06 Tue 18:29:43,2020-10-06 Tue 18:37:15,Tue,1006,18,1,1
2,111100030,서울특별시,서울시,종로구,2020-10-06 Tue 17:44:27,2020-10-06 Tue 18:15:12,Tue,1006,18,0,1
3,111100030,서울특별시,서울시,종로구,2020-10-06 Tue 18:29:43,2020-10-06 Tue 18:37:15,Tue,1006,18,0,1
4,111100030,서울특별시,서울시,종로구,2020-10-06 Tue 19:06:53,2020-10-06 Tue 19:14:15,Tue,1006,19,1,1
...,...,...,...,...,...,...,...,...,...,...,...
44145,ME19C4051,충청남도,보령시,명천동,2020-10-06 Tue 16:13:07,2020-10-08 Thu 14:06:15,Thu,1008,14,0,3
44146,ME19C4052,충청남도,보령시,명천동,2020-10-06 Tue 16:13:07,2020-10-08 Thu 14:06:15,Tue,1006,16,1,1
44147,ME19C4052,충청남도,보령시,명천동,2020-10-06 Tue 16:13:07,2020-10-08 Thu 14:06:15,Thu,1008,14,0,3
44148,ME19C4053,충청남도,보령시,명천동,2020-10-06 Tue 16:13:07,2020-10-08 Thu 14:06:15,Tue,1006,16,1,1


# befor의 중간중간 비어있는 시간을 채우기

In [None]:
befor[6700:6750]# 중간에 비어있는 시간을 채워 데이터 부풀리기

bf = befor[['cid', 'week', 'day', 'time', 'use', 'gu']]

In [None]:
a = {'cid':[], 'week':[], 'day':[], 'time':[], 'use':[]}
# time = test
# use = text
# day = tept
# week = temt




for i in range(0, len(befor)-1) :
    
    if befor['cid'][i+1] == befor['cid'][i] :
            
        if befor['day'][i+1] == befor['day'][i] :
                
            if int(befor['time'][i+1]) == int(befor['time'][i]) :
                a['cid'].append(befor['cid'][i])
                a['week'].append(befor['no'][i])
                a['day'].append(int(befor['day'][i]))
                a['time'].append(int(befor['time'][i]))
                a['use'].append(1)

            
            elif befor['time'][i+1] != befor['time'][i] :
                    
                for j in range(0, (int(befor['time'][i+1])-int(befor['time'][i]))) :
                    test = int(befor['time'][i]) + j
                    text = int(befor['use'][i])
                    a['cid'].append(befor['cid'][i])
                    a['week'].append(befor['no'][i])
                    a['day'].append(int(befor['day'][i]))
                    a['time'].append(test)
                    a['use'].append(text)
                    #  전까지 넣어줌, [i+1]은 다음번에 들어감

# 여기까지가 cid랑 day가 같은데 time이 같거나 다른 거


            
        if befor['day'][i+1] != befor['day'][i] :            
            
            if befor['use'][i] == 1 :
                # 데이터가 분불명하게 며칠을 충전중이라고 나와있는 곳이 있다.
                # 그런 곳은 한 시간을 충전한다고 가정
                   
                a['cid'].append(befor['cid'][i])
                a['week'].append(befor['no'][i])
                a['day'].append(int(befor['day'][i]))
                a['time'].append(int(befor['time'][i]))
                a['use'].append(befor['use'][i])
                    
                for j in range(1, 24 - int(befor['time'][i])) :
                    # [i]번째는 위에서 넣었으니 +1을 넣어서 뺀다.
                    
                    test = int(befor['time'][i]) + j
                    a['cid'].append(befor['cid'][i])
                    a['week'].append(befor['no'][i])
                    a['day'].append(int(befor['day'][i]))
                    a['time'].append(test)
                    a['use'].append(0)

                for k in range(1, int(befor['day'][i+1])-int(befor['day'][i])) :
                    # 만약 13일 이후 데이터가 바로 16일이면 13일은 위의 for문에서 들어가고
                    # 지금 k는 for문에서 +1, +2으로 출력된다.
                
                    for l in range(0, 24) :
                        # 0시부터 23시까지 집어넣기
                        
                        if befor['no'][i]+k >= 7 :
                            temt = befor['no'][i]+k - 7
                            tept = int(befor['day'][i]) + k
                            test = l
                            a['cid'].append(befor['cid'][i])
                            a['week'].append(temt)
                            a['day'].append(tept)
                            a['time'].append(test)
                            a['use'].append(0)
                            
                        elif befor['no'][i]+k <= 6 :
                            temt = befor['no'][i]+k
                            tept = int(befor['day'][i]) + k
                            test = l
                            a['cid'].append(befor['cid'][i])
                            a['week'].append(temt)
                            a['day'].append(tept)
                            a['time'].append(test)
                            a['use'].append(0)

                for n in range(int(befor['time'][i+1]), 0, -1) :
                # 16일의 5시부터 기록이 시작될 경우 5, 0, -1 범위에서 5-1의 n이 출력 
                
                    test = int(befor['time'][i+1]) - n # 5에 5를 뺀 것부터 4까지 들어감 
                    a['cid'].append(befor['cid'][i])     # cid는 어차피 같음
                    a['week'].append(befor['no'][i+1])
                    a['day'].append(int(befor['day'][i+1]))   # day가 26일이니 i+1
                    a['time'].append(test)
                    a['use'].append(0)
                    
            
            elif befor['use'][i] == 0 :
                
                for j in range(0, 24 - int(befor['time'][i])) :
                # 얘는 0, 24-befor['time'][i] 로 들어가야함
                
                    test = int(befor['time'][i]) + j
                    a['cid'].append(befor['cid'][i])
                    a['week'].append(befor['no'][i])
                    a['day'].append(int(befor['day'][i]))
                    a['time'].append(test)
                    a['use'].append(befor['use'][i])

                for k in range(1, int(befor['day'][i+1])-int(befor['day'][i])) :
                    # 만약 23일 이후 데이터가 바로 26일이면 23일은 위의 for문에서 들어가고
                    # 지금 k의 for문에서 +1, +2가 출력되어서
                
                    for l in range(0, 24) :

                        if befor['no'][i]+k >= 7 :
                            temt = befor['no'][i]+k - 7
                            tept = int(befor['day'][i]) + k
                            test = l
                            a['cid'].append(befor['cid'][i])
                            a['week'].append(temt)
                            a['day'].append(tept)
                            a['time'].append(test)
                            a['use'].append(0)
                            
                        elif befor['no'][i]+k <= 6 :
                            temt = befor['no'][i]+k
                            tept = int(befor['day'][i]) + k
                            test = l
                            a['cid'].append(befor['cid'][i])
                            a['week'].append(temt)
                            a['day'].append(tept)
                            a['time'].append(test)
                            a['use'].append(0)

                for n in range(int(befor['time'][i+1]), 0, -1) :
                    # 26일의 5시부터 기록이 시작될 경우 5, 0, -1 범위에서 1-5까지 출력         
                    test = int(befor['time'][i+1]) - n # 5에 5를 뺀 것부터 4까지 들어감  
                    a['cid'].append(befor['cid'][i])   # cid는 어차피 같음
                    a['week'].append(befor['no'][i+1])
                    a['day'].append(int(befor['day'][i+1])) # day가 16일이니 i+1
                    a['time'].append(test)
                    a['use'].append(0)  
                                           
# 여기까지가 cid가 같은데 day가 다른 거 (time은 같거나 다르든 상관없음)




    if befor['cid'][i+1] != befor['cid'][i] :
        
        #if befor['day'][i] == now.day :
            #tept = int(befor['day'][i])
            #test = int(befor['time'][i])
            #text = int(befor['use'][i])
            #a['cid'].append(befor['cid'][i])
            #a['day'].append(tept)
            #a['time'].append(test)
            #a['use'].append(text)
            
        #elif int(befor['day'][i]) != now.day :
            
            #for k in range(1, now.day-int(befor['day'][i])) :
                # 만약 23일 이후 데이터가 없고 오늘이 26일이면 오차가 for문에서 들어가고
                # for문에서 k가 +1, +2으로 출력되어서
                
                #for l in range(0, 24) :
                    # 밑 식의 k에 출력된 +1, +2가 들어가면서
                    # 0시부터 23시까지의 23일에서 변하지 않은 정보가 쭉 들어간다.
                    #tept = int(befor['day'][i]) + k
                    #test = l
                    #text = int(befor['use'][i])
                    #a['cid'].append(befor['cid'][i])
                    #a['day'].append(tept)
                    #a['time'].append(test)
                    #a['use'].append(text)
                    
        #이건 데이터가 너무 적으면 해야할 일
        #현재를 시점으로 충전 상황이 유지되고 있다 라고 가정한 것이다.
            
        
        test = int(befor['time'][i])
        text = int(befor['use'][i])
        a['cid'].append(befor['cid'][i])
        a['week'].append(befor['no'][i])
        a['day'].append(int(befor['day'][i]))
        a['time'].append(test)
        a['use'].append(text)
        

                
                
print(a)
                



IOPub data rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_data_rate_limit`.

Current values:
NotebookApp.iopub_data_rate_limit=1000000.0 (bytes/sec)
NotebookApp.rate_limit_window=3.0 (secs)



In [None]:
# 데이터 전처리한 것을 데이터 프레임으로 만듬
df = pd.DataFrame(a)
df

Unnamed: 0,cid,week,day,time,use
0,111100030,1,1006,17,1
1,111100030,1,1006,18,1
2,111100030,1,1006,18,1
3,111100030,1,1006,18,0
4,111100030,1,1006,19,1
...,...,...,...,...,...
368005,ME19C4053,3,1008,9,0
368006,ME19C4053,3,1008,10,0
368007,ME19C4053,3,1008,11,0
368008,ME19C4053,3,1008,12,0


## 전국을 기준으로 가동횟수가 높은 것에 대한 구분
### 전국에서 충전이력에서 충전횟수가 높은 곳을 선정한 내용이다.

In [None]:
#dd = merge_inners.drop(['typ', 'charge', 'c_using', 'amount', 'start_date', 'start_time',
#                        'end_date', 'end_time', 'c_create_date', 'amount/m', 'name',
#                        'u_time', 'agency', 'phone', 'fee', 'etc', 'create_date', 'c_name',
#                       'do', 'city', 'gu', 'c_add'], axis=1)

#d = dd.drop_duplicates()


#sam = pd.merge(dd, befor, on='cid')
#sam['count'] = 1
#sam

In [None]:
#group = sam.groupby(['city','gu'])
#f = group.sum()
#f['비율'] = f['use']/f['count']
#f

In [None]:
#ff = f.sort_values(by=['비율', 'count'], ascending=False)
#ff[(ff['count'] >= 1000)][:50]

In [None]:
# 가동 횟수가 1000번 이상인 것들(충전기 수가 많은 곳)중에서
# 충전비율이 높은 5개

#성남시	중원구   191  /  1037  =  0.184185
#서울시	마포구   210  /  1722  =  0.121951
#천안시	서북구   183  /  1557  =  0.117534
#화성시	팔탄면   113  /  1000  =  0.113000
#서울시	관악구   147  /  1304  =  0.112730

In [None]:
#top1 = sam[(sam['city'] == '성남시') & (sam['gu'] == '중원구')]
#top1     # 0.184185

In [None]:
#top2 = sam[(sam['city'] == '서울시') & (sam['gu'] == '마포구')]
#top2     # 0.121951

In [None]:
#top3 = sam[(sam['city'] == '천안시') & (sam['gu'] == '서북구')]
#top3     # 0.117534

In [None]:
#top4 = sam[(sam['city'] == '화성시') & (sam['gu'] == '팔탄면')]
#top4     # 0.113000

In [None]:
#top5 = sam[(sam['city'] == '서울시') & (sam['gu'] == '관악구')]
#top5     # 0.112730

## 특정 장소에 대한 학습을 위한 전처리

In [None]:
jj = pd.read_excel('csv\제주.xlsx')
# 사용 중 충전기를 사용시작시간 순으로 정렬
jj.to_csv('csv\제주.csv')

In [None]:
j = pd.read_csv('csv\제주.csv')
jj = j.drop(['Unnamed: 0', 'name', 'u_time', 'agency', 'do', 'city', 'c_add', 'phone', 'fee', 'etc', 
             'create_date'], axis = 1)
jj

Unnamed: 0,sid,gu
0,50110001,연동
1,50110009,이도2동
2,50110025,연동
3,50110040,애월읍
4,50110044,구좌읍
...,...,...
176,ME19B252,외도1동
177,ME19B253,구좌읍
178,ME19B404,법환동
179,ME19B405,봉개동


In [None]:
sid = pd.merge(jj, df_charger, on='sid', how='left')
sid = sid.sort_values(by=['sid'], ascending=True)

sid = sid.drop(['c_name', 'typ', 'charge', 'amount', 'c_create_date',
                'amount/m'], axis=1)

si = sid[(sid['c_using']=='사용불가')].index
sid

Unnamed: 0,sid,gu,cid,c_using
0,50110001,연동,501100010,사용중
1,50110009,이도2동,501100090,사용가능
2,50110025,연동,501100250,사용가능
3,50110040,애월읍,501100400,사용불가
4,50110044,구좌읍,501100440,사용불가
...,...,...,...,...
241,ME19B405,봉개동,ME19B4053,사용가능
243,ME19B406,영평동,ME19B4061,사용가능
244,ME19B406,영평동,ME19B4062,사용가능
242,ME19B406,영평동,ME19B4060,사용가능


In [None]:
ss = sid.drop(si)
sid = ss.drop('c_using', axis=1)
sid

Unnamed: 0,sid,gu,cid
0,50110001,연동,501100010
1,50110009,이도2동,501100090
2,50110025,연동,501100250
5,50110053,조천읍,501100530
6,50110054,한경면,501100540
...,...,...,...
241,ME19B405,봉개동,ME19B4053
243,ME19B406,영평동,ME19B4061
244,ME19B406,영평동,ME19B4062
242,ME19B406,영평동,ME19B4060


In [None]:
dd = merge_inners.drop(['typ', 'charge', 'c_using', 'amount', 'start_date', 'start_time',
                        'end_date', 'end_time', 'c_create_date', 'amount/m', 'name',
                        'u_time', 'agency', 'phone', 'fee', 'etc', 'create_date', 'c_name',
                       'do', 'city', 'gu', 'c_add'], axis=1)

d = dd.drop_duplicates()
d

Unnamed: 0,cid,sid
0,111100030,11110003
4,111100060,11110006
26,112000060,11200006
49,112300050,11230005
50,112300060,11230006
...,...,...
22843,ME19C4043,ME19C404
22852,ME19C4050,ME19C405
22855,ME19C4051,ME19C405
22858,ME19C4052,ME19C405


# 제주도의 train_set 만들기

In [None]:
dj = pd.merge(befor, sid, on='cid')
dj[(dj['sid']=='50130013')]

Unnamed: 0,cid,do,city,gu_x,start_day,end_day,week,day,time,use,no,sid,gu_y
1448,501300130,제주특별자치도,서귀포시,안덕면,2020-10-06 Tue 16:35:19,2020-10-06 Tue 17:05:30,Tue,1006,16,1,1,50130013,안덕면
1449,501300130,제주특별자치도,서귀포시,안덕면,2020-10-06 Tue 17:28:41,2020-10-06 Tue 17:36:43,Tue,1006,17,1,1,50130013,안덕면
1450,501300130,제주특별자치도,서귀포시,안덕면,2020-10-06 Tue 16:35:19,2020-10-06 Tue 17:05:30,Tue,1006,17,0,1,50130013,안덕면
1451,501300130,제주특별자치도,서귀포시,안덕면,2020-10-06 Tue 17:28:41,2020-10-06 Tue 17:36:43,Tue,1006,17,0,1,50130013,안덕면
1452,501300130,제주특별자치도,서귀포시,안덕면,2020-10-06 Tue 23:30:11,2020-10-07 Wed 00:16:19,Tue,1006,23,1,1,50130013,안덕면
1453,501300130,제주특별자치도,서귀포시,안덕면,2020-10-07 Wed 00:31:29,2020-10-07 Wed 00:47:46,Wed,1007,0,1,2,50130013,안덕면
1454,501300130,제주특별자치도,서귀포시,안덕면,2020-10-06 Tue 23:30:11,2020-10-07 Wed 00:16:19,Wed,1007,0,0,2,50130013,안덕면
1455,501300130,제주특별자치도,서귀포시,안덕면,2020-10-07 Wed 00:31:29,2020-10-07 Wed 00:47:46,Wed,1007,0,0,2,50130013,안덕면
1456,501300130,제주특별자치도,서귀포시,안덕면,2020-10-07 Wed 07:33:31,2020-10-07 Wed 08:05:11,Wed,1007,7,1,2,50130013,안덕면
1457,501300130,제주특별자치도,서귀포시,안덕면,2020-10-07 Wed 07:33:31,2020-10-07 Wed 08:05:11,Wed,1007,8,0,2,50130013,안덕면


In [None]:
df_j = pd.merge(df, sid, on='cid')
df_j   #31465

Unnamed: 0,cid,week,day,time,use,sid,gu
0,501100010,2,1007,12,1,50110001,연동
1,501100010,2,1007,13,0,50110001,연동
2,501100010,2,1007,14,1,50110001,연동
3,501100010,2,1007,14,0,50110001,연동
4,501100010,2,1007,15,0,50110001,연동
...,...,...,...,...,...,...,...
30158,ME19B2531,3,1015,12,1,ME19B253,구좌읍
30159,ME19B2531,3,1015,12,0,ME19B253,구좌읍
30160,ME19B2531,3,1015,13,0,ME19B253,구좌읍
30161,ME19B2531,3,1015,14,1,ME19B253,구좌읍


In [None]:
# 제주도의 충전 현황이 있는 읍, 동, 면
dj = df_j['gu'].drop_duplicates()
dj= dj.sort_values(ascending=True)
dj

8249      강정동
1872      건입동
1663      구좌읍
4105      남원읍
2048      노형동
9308      대정읍
5430      도남동
17483    도두1동
25429    도련2동
11732     동홍동
10216     법환동
12434     보목동
7351      봉개동
28356    삼도2도
2756     삼양2동
9733      상예동
21495     상효동
13471     색달동
12186     서귀동
13975     서호동
8947      서홍동
9010      성산읍
1582     아라1동
7832     아라2동
17010     아라동
8710      안덕면
3003      애월읍
0          연동
2526      영평동
22704     오등동
3668     오라1동
909      오라2동
6614     외도1동
6111      용강동
28074    용담1동
20538    용담2동
17991    이도1동
207      이도2동
5862     일도2동
442       조천읍
9056      중문동
7830      추자면
11958     토평동
8454      표선면
3866      하효동
685       한경면
18726     한림읍
1333      해안동
19293    화북1동
15507     회수동
Name: gu, dtype: object

# 제주도 지역을 나눔

In [None]:
A = df_j[df_j['gu'].isin(['한경면', '한림읍', '애월읍'])]
A[100:150]

Unnamed: 0,cid,week,day,time,use,sid,gu
785,501100540,5,1010,21,0,50110054,한경면
786,501100540,5,1010,22,0,50110054,한경면
787,501100540,5,1010,23,0,50110054,한경면
788,501100540,6,1011,0,0,50110054,한경면
789,501100540,6,1011,1,0,50110054,한경면
790,501100540,6,1011,2,0,50110054,한경면
791,501100540,6,1011,3,0,50110054,한경면
792,501100540,6,1011,4,0,50110054,한경면
793,501100540,6,1011,5,0,50110054,한경면
794,501100540,6,1011,6,0,50110054,한경면


In [None]:
A.to_csv('csv\A.csv')

In [None]:
B = df_j[df_j['gu'].isin(['추자면', '연동', '이도2동', '오라2동', '해안동', '아라1동',
                          '건입동', '노형동', '영평동', '삼양2동', '오라1동', '도남동',
                          '일도2동', '용강동', '외도1동', '봉개동', '아라2동', '아라동',
                          '도두1동', '이도1동', '화북1동', '용담2동', '오등동', '도련2동',
                          '용담1동', '삼도2도'])]
B[90:110]

Unnamed: 0,cid,week,day,time,use,sid,gu
90,501100010,6,1011,4,0,50110001,연동
91,501100010,6,1011,5,0,50110001,연동
92,501100010,6,1011,6,0,50110001,연동
93,501100010,6,1011,7,0,50110001,연동
94,501100010,6,1011,8,0,50110001,연동
95,501100010,6,1011,9,0,50110001,연동
96,501100010,6,1011,10,0,50110001,연동
97,501100010,6,1011,11,0,50110001,연동
98,501100010,6,1011,12,0,50110001,연동
99,501100010,6,1011,13,0,50110001,연동


In [None]:
B.to_csv('csv\B.csv')

In [None]:
C = df_j[df_j['gu'].isin(['조천읍', '구좌읍'])]
C

Unnamed: 0,cid,week,day,time,use,sid,gu
442,501100530,1,1006,17,1,50110053,조천읍
443,501100530,1,1006,17,0,50110053,조천읍
444,501100530,1,1006,18,0,50110053,조천읍
445,501100530,1,1006,19,0,50110053,조천읍
446,501100530,1,1006,20,0,50110053,조천읍
...,...,...,...,...,...,...,...
30158,ME19B2531,3,1015,12,1,ME19B253,구좌읍
30159,ME19B2531,3,1015,12,0,ME19B253,구좌읍
30160,ME19B2531,3,1015,13,0,ME19B253,구좌읍
30161,ME19B2531,3,1015,14,1,ME19B253,구좌읍


In [None]:
C.to_csv('csv\C.csv')

In [None]:
D = df_j[df_j['gu'].isin(['대정읍', '안덕면', '하예동', '중문동', '대천동', '상예동',
                          '색달동'])]
D

Unnamed: 0,cid,week,day,time,use,sid,gu
8710,501300130,1,1006,16,1,50130013,안덕면
8711,501300130,1,1006,17,1,50130013,안덕면
8712,501300130,1,1006,17,1,50130013,안덕면
8713,501300130,1,1006,17,0,50130013,안덕면
8714,501300130,1,1006,18,0,50130013,안덕면
...,...,...,...,...,...,...,...
24942,ME18A2721,2,1007,13,0,ME18A272,안덕면
24943,ME18A2730,0,1012,14,1,ME18A273,대정읍
24944,ME18A2730,0,1012,14,0,ME18A273,대정읍
24945,ME18A2731,0,1012,14,1,ME18A273,대정읍


In [None]:
D.drop_duplicates(['cid', 'week', 'day', 'time', 'sid', 'gu'],
                  keep='first')[162:210]


Unnamed: 0,cid,week,day,time,use,sid,gu
8887,501300130,1,1013,10,1,50130013,안덕면
8888,501300130,1,1013,11,0,50130013,안덕면
8889,501300130,1,1013,12,1,50130013,안덕면
8891,501300130,1,1013,13,0,50130013,안덕면
8892,501300130,1,1013,14,1,50130013,안덕면
8893,501300130,1,1013,15,0,50130013,안덕면
8894,501300130,1,1013,16,1,50130013,안덕면
8895,501300130,1,1013,17,0,50130013,안덕면
8896,501300130,1,1013,18,1,50130013,안덕면
8897,501300130,1,1013,19,0,50130013,안덕면


In [None]:
D.to_csv('csv\D.csv')

In [None]:
E = df_j[df_j['gu'].isin(['하효동', '남원읍', '강정동', '서홍동', '법환동', '동홍동',
                          '토평동', '서귀동', '보목동', '서호동', '회수동', '상효동'])]
E.head(15)

Unnamed: 0,cid,week,day,time,use,sid,gu
3866,501100830,1,1006,16,1,50110083,하효동
3867,501100830,1,1006,16,1,50110083,하효동
3868,501100830,1,1006,16,0,50110083,하효동
3869,501100830,1,1006,17,0,50110083,하효동
3870,501100830,1,1006,18,0,50110083,하효동
3871,501100830,1,1006,19,0,50110083,하효동
3872,501100830,1,1006,20,0,50110083,하효동
3873,501100830,1,1006,21,0,50110083,하효동
3874,501100830,1,1006,22,0,50110083,하효동
3875,501100830,1,1006,23,0,50110083,하효동


In [None]:
E.to_csv('csv\E.csv')

In [None]:
F = df_j[df_j['gu'].isin(['성산읍', '표선면'])]
F

Unnamed: 0,cid,week,day,time,use,sid,gu
8454,501300090,1,1006,17,1,50130009,표선면
8455,501300090,1,1006,18,0,50130009,표선면
8456,501300090,1,1006,19,1,50130009,표선면
8457,501300090,1,1006,19,0,50130009,표선면
8458,501300090,1,1006,20,0,50130009,표선면
...,...,...,...,...,...,...,...
21246,ME1831013,1,1013,10,0,ME183101,성산읍
21247,ME1831013,1,1013,11,0,ME183101,성산읍
21248,ME1831013,1,1013,12,0,ME183101,성산읍
21249,ME1831013,1,1013,13,1,ME183101,성산읍


In [None]:
F.to_csv('csv\F.csv')

## 충전기 인코딩하기

In [None]:
encoder = LabelEncoder() 
encoder.fit(df_j['cid'])
labels = encoder.transform(df_j['cid'])

labels

array([  0,   0,   0, ..., 175, 175, 175])

In [None]:
df_j['cid'] = labels
df_j

Unnamed: 0,cid,week,day,time,use,sid,gu
0,0,2,1007,12,1,50110001,연동
1,0,2,1007,13,0,50110001,연동
2,0,2,1007,14,1,50110001,연동
3,0,2,1007,14,0,50110001,연동
4,0,2,1007,15,0,50110001,연동
...,...,...,...,...,...,...,...
30158,175,3,1015,12,1,ME19B253,구좌읍
30159,175,3,1015,12,0,ME19B253,구좌읍
30160,175,3,1015,13,0,ME19B253,구좌읍
30161,175,3,1015,14,1,ME19B253,구좌읍


# 학습하기

In [None]:
df_j    # 학습 data는 cid, week, day, time 이며 target은 use

Unnamed: 0,cid,week,day,time,use,sid,gu
0,0,2,1007,12,1,50110001,연동
1,0,2,1007,13,0,50110001,연동
2,0,2,1007,14,1,50110001,연동
3,0,2,1007,14,0,50110001,연동
4,0,2,1007,15,0,50110001,연동
...,...,...,...,...,...,...,...
30158,175,3,1015,12,1,ME19B253,구좌읍
30159,175,3,1015,12,0,ME19B253,구좌읍
30160,175,3,1015,13,0,ME19B253,구좌읍
30161,175,3,1015,14,1,ME19B253,구좌읍


In [None]:
from sklearn.model_selection import train_test_split

y_target = df_j['use']
x_data = df_j.drop(['use', 'sid', 'gu'], axis=1, inplace=False)

x_train, x_test, y_train, y_test = \
    train_test_split(x_data, y_target, test_size=0.2, random_state=156)

# 실제 분석

## Logistic Regression

In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import mean_squared_error
from sklearn.metrics import accuracy_score
from sklearn.metrics import r2_score

lr = LogisticRegression()
lr.fit(x_train, y_train)
y_pred_lr = lr.predict(x_test)

accuracy_score(y_test, y_pred_lr)
# 0.9330317110948072

0.9076744571523289

## Support Vector Machine

In [None]:
from sklearn.svm import SVC
svc = SVC()
svc.fit(x_train, y_train)
y_pred_svc = svc.predict(x_test)

accuracy_score(y_test, y_pred_svc)
# 0.9076744571523289

0.9076744571523289

## Decision Tree

In [None]:
from sklearn.tree import DecisionTreeClassifier
dtc = DecisionTreeClassifier()
dtc.fit(x_train, y_train)
y_pred_dtc = dtc.predict(x_test)

accuracy_score(y_test, y_pred_dtc)
# 0.8531410575169899

0.8529753025029008

## Random Forest

In [None]:
from sklearn.ensemble import RandomForestClassifier
rf_clf = RandomForestClassifier(random_state=0)
rf_clf.fit(x_train, y_train)

pred = rf_clf.predict(x_test)
accuracy_score(y_test, pred)

# 0.8615945632355378

0.8615945632355378

## GBM(Gradient Boosting Machine)

In [None]:
from sklearn.ensemble import GradientBoostingClassifier
gb_clf = GradientBoostingClassifier(random_state=0)
# verbose를 입력하면 학습 현황이 뜸
gb_clf.fit(x_train, y_train)

gb_pred = gb_clf.predict(x_test)
accuracy_score(y_test, gb_pred)

# 0.9080059671805072

0.9080059671805072

## test 셋으로 검사

## 정밀도 확인 (가동 횟수를 기준)

In [None]:
ss = pd.DataFrame({'LR': y_pred_lr, 'SVM': y_pred_svc, 'DT': y_pred_dtc, 
                   'RF': pred, 'GBM': gb_pred, '정답': y_test})
ss[100:110]

Unnamed: 0,LR,SVM,DT,RF,GBM,정답
29639,0,0,0,0,0,0
11479,0,0,0,0,0,0
5802,0,0,0,0,0,0
10070,0,0,0,0,0,0
23017,0,0,0,0,0,0
23420,0,0,0,0,0,0
6530,0,0,0,0,0,0
19772,0,0,0,0,0,1
29585,0,0,0,0,0,0
3271,0,0,0,0,0,0


In [None]:
count = 0
for i in range(len(y_pred_lr)) :
    if y_pred_lr[i] == 1 :
        print(y_pred_lr[i])
        count += 1
print(count)            # 0 

0


In [None]:
count = 0
for j in range(len(y_pred_svc)) :
    if y_pred_svc[j] == 1 :
        print(y_pred_svc[j])
        count += 1
print(count)            # 0

0


In [None]:
count = 0
for k in range(len(y_pred_dtc)) :
    if  y_pred_dtc[k] == 1 :
        print( y_pred_dtc[k])
        count += 1
print(count)            # 496

1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
496


In [None]:
count = 0
for k in range(len(pred)) :
    if pred[k] == 1 :
        print(pred[k])
        count += 1
print(count)               # 460

1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
460


In [None]:
count = 0
for k in range(len(gb_pred)) :
    if gb_pred[k] == 1 :
        print(gb_pred[k])
        count += 1
print(count)            # 10

1
1
1
1
1
1
1
1
1
1
10


In [None]:
count = 0
for i in range(len(y_test)) :
    if y_test.iloc[i] == 1 :
        print(y_test.iloc[i])
        count += 1
print(count)              # 557

1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1


## 특정 값으로 검사

In [None]:
test_data = np.array([682, 4, 1016, 6]).reshape(1, 4)
test_data

array([[ 682,    4, 1016,    6]])

In [None]:
index_lr = lr.predict(test_data)[0]
index_svm = svc.predict(test_data)[0]
index_dt = dtc.predict(test_data)[0]
index_rf = rf_clf.predict(test_data)[0]
index_gbm = gb_clf.predict(test_data)[0]

In [None]:
index_lr

0

In [None]:
index_svm

0

In [None]:
index_dt

0

In [None]:
index_rf

0

In [None]:
index_gbm

0