In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import geopandas as gpd
import os
from sklearn.preprocessing import MinMaxScaler

from pyproj import CRS
import pydeck as pdk
import matplotlib.font_manager as fm
from sklearn.preprocessing import LabelEncoder
fontprop = fm.FontProperties(fname='../font/NanumGothic.ttf', size=10)
import random
random.seed(42)

## data_load

In [None]:
%%time
#딕셔너리에 각 파일 순서대로 load
file_list = os.listdir('./data')
number = list(map(lambda x : int(x.split('.')[0]),file_list))
#local에서도 동일한 순서로 읽어오게끔
if 24 not in number:
    number = number +[2,8,23,24,25,26]
    file_list = file_list + ['','','','','','']
file_order = pd.DataFrame({'index':number, 'file_name':file_list})
file_order = file_order.sort_values('index')
file_order = file_order[:-1]
files = {}
for i,file in enumerate(file_order['file_name']):
    if len(file)>0:
        try :
            files[i] = pd.read_csv(f'./data/{file}')
        except : 
            files[i] = gpd.read_file(f'./data/{file}')

In [None]:
mapbox_key = 'pk.eyJ1IjoibGFpcjA4MjYiLCJhIjoiY2tkcGoxcnRzMDZvODJxbXk0MWhlcWN2aSJ9.5-yjt_SUq4w5JII7CvD4cA'
map_center = [127.07377,37.14885] #오산시 중심
view_states = pdk.ViewState(longitude=map_center[0],
                          latitude = map_center[1],
                           zoom = 12)
os.makedirs('./output/',exist_ok = True)

In [None]:
def add_coordinates(data):
    '''geodataframe에서 중점 coordinates로 컬럼 생성
    '''
    from shapely.geometry import Polygon,MultiLineString,LineString,MultiPolygon
    #coord_center
    temp = data.copy()
    coord_center = temp.geometry.apply(lambda geo : [geo.centroid.x, geo.centroid.y])
    temp['coord_center'] = coord_center
    
    #coordinates type별로 조금씩 차이가 있음
    type_mls = type(MultiLineString())
    type_ls = type(LineString())
    type_poly = type(Polygon())
    type_mpoly = type(MultiPolygon())
    def get_coordinates(x):
        if type(x) == type_mls: 
            lon,lat = x[0].xy
        elif type(x) == type_ls:
            lon,lat = x.xy
        elif type(x) == type_poly:
            lon,lat = x.exterior.xy
        else :
            lon, lat = x[0].exterior.xy        
        return [[x, y] for x, y in zip(lon, lat)]
    
    temp['coordinates'] = temp.geometry.apply(lambda geo : get_coordinates(geo))
    
    return temp

def add_minmax(data,col):
    '''minmax scale 추가
    '''
    from sklearn.preprocessing import MinMaxScaler
    scaler = MinMaxScaler()
    temp = data.copy()
    values = scaler.fit_transform(temp[col].values.reshape(-1,1))
    temp['minmax_value'] = values
    
    return temp

def merge_to_grid(value_df,merged_df, col_name,lon = 'lon', lat = 'lat',):
    '''value_df가 merge_df포함되는 갯수를 체크하고 merge진행. 
    GeoDataFrame인 경우 point 생성부분은 pass
    ----
    return 
    value_df가 merge_df가 join된 df
    '''
    type_gpd = type(gpd.GeoDataFrame())
    if 'gid' in value_df.columns:
        value_df = value_df.rename(columns={'gid':'gid1'})
    
    if type(value_df) != type_gpd:
        value_df['geometry'] = value_df.apply(lambda x : make_Point(x[lon],x[lat]),axis= 1)
        value_df = gpd.GeoDataFrame(value_df)
        value_df.crs = CRS('epsg:4326')

    joined = gpd.sjoin(merged_df,value_df, how = 'left')
    if col_name not in joined.columns:
        joined.loc[~(joined['index_right'].isna()),col_name] = 1
    joined[col_name] = joined[col_name].fillna(0)

    joined = joined.groupby('gid')[col_name].sum().reset_index()
    
    return joined

def draw_hist(data,col,title):
    '''data를 넣으면 histogram을 그려주고, 0 or NaN 없는 df 리턴
    
    '''
    temp = data.copy()
    temp = add_coordinates(temp[~((temp[col]==0)|(temp[col].isna()))])
    plt.hist(temp[col])
    plt.title(title,fontproperties = fontprop, fontsize= 15)
    plt.box(False)
    plt.savefig(f'./output/{title}.png',bbox_inches='tight')
    plt.show()
    return temp

def make_Point(x,y):
    from shapely.geometry import Point
    '''x,y좌표값이 있으면, 합쳐서 Point로 만듬
    '''
    return Point(x,y)

def move_df(main_df,merged_df, s_time, l_time, col_name):
    '''
    main_df(df) : 유동인구 df
    merged_df(df) : 합칠 df, ex) 어린이교통사고_df
    s_time(int) : 유동인구 시작시간
    l_time(int) : 유동인구 끝 시간
    col_name(string) : 저장 colmn 이름
    
    -----
    return 
    merged_df에 분할시간대 유동인구의 평균값 컬럼생성
    '''
    temp = main_df
    #stack형태로 변경
    melted = pd.melt(temp, id_vars = ['STD_YM','lon','lat'],var_name = 'time')
    melted.time = melted.time.apply(lambda x : int(x[-2:]))
    
    #필요한 시간대로 분할
    grouped = melted.groupby(['lon','lat','time'])['value'].sum().reset_index()
    grouped_cut = grouped[(grouped.time>=s_time)&(grouped.time<=l_time)]
    
    #point생성

    grouped_cut = grouped_cut.groupby(['lon','lat'])['value'].sum().reset_index()
    grouped_cut['geometry'] = grouped_cut.apply(lambda x : make_Point(x['lon'],x['lat']),axis= 1)
    
    #geo dataframe화 
    geo_df = gpd.GeoDataFrame(grouped_cut)
    geo_df.crs = CRS('epsg:4326')
    
    #merge_df와 결합
    joined = gpd.sjoin(files[1],geo_df, how = 'left', op = 'intersects')
    joined['value'] = joined['value'].fillna(0)
    joined_g = joined.groupby('gid')['value'].mean()
    joined_g.name = col_name
    joined_g = joined_g.reset_index()
    moved = files[1].merge(joined_g,how = 'left')
    
    return moved

## EDA

### 1. 주정차단속

In [None]:
files[0].head(3)

In [None]:
layer = pdk.Layer(type = "ScatterplotLayer",
                  data = files[0],
                  get_position = 'coordinates',
                  get_radius=3,
                  get_fill_color=[255, 140, 0],
                  filled=True,
                  pickable=True,
                  opacity=0.8)

r = pdk.Deck(layers =[layer], 
             initial_view_state = view_states,
             map_style='mapbox://styles/mapbox/outdoors-v11',
             mapbox_key= mapbox_key)
r.to_html("./output/주정차단속_scatter.html")

In [None]:
#특정지역에서 주정차단속이 많음
files[0]['단속위치'].value_counts().head(10)

In [None]:
#상위 5개 시간대별 증감 확인
#단속횟수가 많은 상위 5개 지역은 연간 감소추세이나, 2020-04월 다소 증가 
files[0]['YYYYMM'] = files[0]['단속일자'].apply(lambda x: str(x)[:6])
top5 =files[0]['단속위치'].value_counts().head(5).index.values

top5_df = files[0][files[0]['단속위치'].isin(top5)]
counted = top5_df.groupby(['단속위치','YYYYMM'])['행정구역'].count()
counted.name = '횟수'
counted = counted.reset_index()
counted['YYYYMM'] = pd.to_datetime(counted['YYYYMM']+'01',format = '%Y%m%d')

plt.figure(figsize = (10,5))
sns.lineplot(x = 'YYYYMM',y = '횟수', hue ='단속위치',data = counted)
plt.title('상위5개 지역 월별 단속횟수', fontproperties=fontprop,fontsize= 15)
plt.ylabel('횟수', fontproperties=fontprop, fontsize= 13)
plt.xlabel('연월',fontproperties=fontprop, fontsize= 13)
plt.xticks(rotation = 45)
plt.legend(prop = fontprop, bbox_to_anchor = (1,0.5),loc = 'center left',frameon=False,fontsize= 12)
plt.grid(linestyle  = '--')
plt.box(False)
plt.savefig('./output/상위5개 지역월별 단속횟수.png',bbox_inches='tight')
plt.show()

In [None]:
#2020년기준 상위 5개 단속지역
top5_2020 = files[0].loc[files[0]['YYYYMM'] > '202010','단속위치'].value_counts().head(5).index.values
top5_df = files[0][files[0]['단속위치'].isin(top5_2020)]
counted = top5_df.groupby(['단속위치','YYYYMM'])['행정구역'].count()
counted.name = '횟수'
counted = counted.reset_index()
counted['YYYYMM'] = pd.to_datetime(counted['YYYYMM']+'01',format = '%Y%m%d')

plt.figure(figsize = (7,5))
sns.lineplot(x = 'YYYYMM',y = '횟수', hue ='단속위치',data = counted)
plt.title('2020년 상위5개 지역 월별 단속횟수', fontproperties=fontprop,fontsize= 15)
plt.ylabel('횟수', fontproperties=fontprop, fontsize= 13)
plt.xlabel('연월',fontproperties=fontprop, fontsize= 13)
plt.xticks(rotation = 45)
plt.legend(prop = fontprop, bbox_to_anchor = (1,0.5),loc = 'center left',frameon=False,fontsize= 12)
plt.grid(linestyle  = '--')
plt.box(False)
plt.savefig('./output/2020년 상위5개 지역 월별 단속횟수.png',bbox_inches='tight')
plt.show()

In [None]:
#단속위치별로 갯수 통합
counted = files[0].groupby(['단속위치'])['행정구역'].count()
counted.name = '갯수'

mean_axis = files[0].groupby(['단속위치'])['단속위치_경도','단속위치_위도'].agg('mean')
counted_df = pd.merge(mean_axis,counted,left_index = True, right_index = True).reset_index()

In [None]:
#줌아웃해서 볼때 개괄적으로 보기 좋음 
coordinates = []
for i in range(len(counted_df)):
    coord = [counted_df['단속위치_경도'][i],counted_df['단속위치_위도'][i]]
    coordinates.append(coord)
counted_df['coordinates'] = coordinates
center = counted_df['coordinates'][0]

counted_df = add_minmax(counted_df,'갯수')
counted_df['log_갯수'] = np.log1p(counted_df['갯수']) *10

bust_layer = pdk.Layer(type = 'ScatterplotLayer',
                 data = counted_df,
                 get_position = 'coordinates',
                 get_radius = 'log_갯수',
                 get_fill_color = '[10, 100*minmax_value,200,150]',
                 fill = True,
                 pickable = True, auto_highlight=True)

r = pdk.Deck(layers = [bust_layer],
            initial_view_state = view_states,
             map_style='mapbox://styles/mapbox/outdoors-v11',
            mapbox_key = mapbox_key
            )
r.to_html("./output/주정차단속_scatterplot_group.html")

### 2.어린이교통사고(비공개)

In [None]:
files[1].head(3)

In [None]:
f1_over0 = files[1][files[1].accident_cnt >0]
f1_over0 = add_coordinates(f1_over0)

In [None]:
#최대 사고 횟수는 10
f1_over0.sort_values('accident_cnt',ascending = False).head(5)

In [None]:
#대부분이 1건이며 최대10건
graph = draw_hist(f1_over0,'accident_cnt','사고횟수별분포')

In [None]:
#주정차 단속과 큰 연관성은
f1_over0 = add_minmax(f1_over0, 'accident_cnt')
f1_over0['accident_cnt_10'] = f1_over0['accident_cnt'] *10
layer = pdk.Layer('PolygonLayer', data = f1_over0,
                  pickable= True, auto_highlight=True,
                   get_polygon= 'coordinates',
                 get_fill_color='[1000*minmax_value, 1 ,1,150]')

r = pdk.Deck(layers = [bust_layer,layer],
            initial_view_state = view_states,
             map_style='mapbox://styles/mapbox/outdoors-v11',
            mapbox_key = mapbox_key,
             tooltip={"text": "gid:{gid}\n 어린이교통사고: {accident_cnt}"}
            )
r.to_html("./output/사고지역분포도_grid.html")

In [None]:
f1_over0 = add_minmax(f1_over0, 'accident_cnt')
child_acc_layer = pdk.Layer('ScatterplotLayer', data = f1_over0,
                  pickable= True, extruded = True, auto_highlight=True,
                   get_position= 'coord_center',
                  get_radius = 'accident_cnt_10',
                 get_fill_color='[255, 300-70*minmax_value,0,150]')

r = pdk.Deck(layers = [child_acc_layer],
            initial_view_state = view_states,
             map_style='mapbox://styles/mapbox/outdoors-v11',
            mapbox_key = mapbox_key,
             tooltip={"text": "gid:{gid}\n 사고별횟수: {accident_cnt}"}
            )
r.to_html("./output/사고지역분포도_scatter.html")

### 3.차량등록_격자

In [None]:
files[2].head(3)

In [None]:
f2_over0 = add_coordinates(files[2][files[2]['car_cnt']>0])

In [None]:
#자동차 등록집중도가 최대 3000까지 
ax = plt.boxplot(f2_over0.car_cnt,vert= False)
plt.title('자동차대수 분포_box',fontproperties = fontprop, fontsize= 15)
plt.yticks([])
plt.box(False)
plt.savefig('./output/자동차대수분포_box.png',bbox_inches='tight')
plt.show()

In [None]:
graph = draw_hist(f2_over0,'car_cnt','자동차대수분포')

In [None]:
f2_over0 = add_minmax(f2_over0, 'car_cnt')
car_layer = pdk.Layer('PolygonLayer',data= f2_over0,
                  get_polygon = 'coordinates',
                  get_fill_color = '[1,1000*minmax_value,3000*minmax_value,150]',
                 pickable = True, auto_highlight=True)

r = pdk.Deck(layers = [car_layer,child_acc_layer],
             initial_view_state = view_states,
             map_style='mapbox://styles/mapbox/outdoors-v11',
             mapbox_key = mapbox_key,
             tooltip={"text": "gid:{gid}\n 자동차대수: {car_cnt}"})
r.to_html('./output/자동차등록_grid.html')

### 4.거주격자(총인구)

In [None]:
files[3].head(3)

In [None]:
f3_over0 = add_coordinates(files[3][~((files[3].val==0)|(files[3].val.isna()))])

In [None]:
graph = draw_hist(f3_over0, 'val','총인구 분포')

In [None]:
#총인구랑은 크게 연관성 없는 듯
f3_over0 = add_minmax(f3_over0,'val')
t_people_layer = pdk.Layer('PolygonLayer', data = f3_over0,
                           get_polygon = 'coordinates',
                           get_fill_color = '[1,1000*minmax_value,1,150]',
                           pickable= True, auto_highlight=True)

r = pdk.Deck(layers =[t_people_layer,child_acc_layer],
            initial_view_state = view_states,
             map_style='mapbox://styles/mapbox/outdoors-v11',
             mapbox_key = mapbox_key,
             tooltip={"text": "gid:{gid}\n 총인구: {val}"})
r.to_html('./output/총인구_grid.html')

### 5.유소년인구

In [None]:
files[4].head(3)

In [None]:
f4_over0 = add_coordinates(files[4][~((files[4].val==0)|(files[4].val.isna()))])
graph = draw_hist(f4_over0, 'val','유소년인구 분포')

In [None]:
#애들이 사는곳보다 아닌곳에서 사고가 많이 발생
f4_over0 = add_minmax(f4_over0,'val')
t_people_layer = pdk.Layer('PolygonLayer', data = f4_over0,
                           get_polygon = 'coordinates',
                           get_fill_color = '[1,1000*minmax_value,1,150]',
                           pickable= True, auto_highlight=True)

r = pdk.Deck(layers =[t_people_layer,child_acc_layer],
            initial_view_state = view_states,
             map_style='mapbox://styles/mapbox/outdoors-v11',
             mapbox_key = mapbox_key,
             tooltip={"text": "gid:{gid}\n 유소년인구: {val}"})
r.to_html('./output/유소년인구_grid.html')

### 6.생산가능인구분포

In [None]:
files[5].head(3)

In [None]:
f5_over0 = draw_hist(files[5],'val','생산가능인구 분포')

In [None]:
f5_over0.head(2)

In [None]:
#애들이 사는곳보다 아닌곳에서 사고가 많이 발생
f5_over0 = add_minmax(f5_over0,'val')
t_people_layer = pdk.Layer('PolygonLayer', data = f5_over0,
                           get_polygon = 'coordinates',
                           get_fill_color = '[1,1000*minmax_value,1]',
                           pickable= True, auto_highlight=True)

r = pdk.Deck(layers =[t_people_layer,child_acc_layer],
            initial_view_state = view_states,
             map_style='mapbox://styles/mapbox/outdoors-v11',
             mapbox_key = mapbox_key,
             tooltip={"text": "gid:{gid}\n 생산가능인구: {val}"})
r.to_html('./output/생산가능인구_grid.html')

### 고령

In [None]:
files[6].head(3)

In [None]:
f6_over0 = draw_hist(files[6],'val','고령인구 분포')

In [None]:
f6_over0.head(2)

In [None]:
#애들이 사는곳보다 아닌곳에서 사고가 많이 발생
f6_over0 = add_minmax(f6_over0,'val')
t_people_layer = pdk.Layer('PolygonLayer', data = f6_over0,
                           get_polygon = 'coordinates',
                           get_fill_color = '[1,1000*minmax_value,1,150]',
                           pickable= True, auto_highlight=True)

r = pdk.Deck(layers =[t_people_layer,child_acc_layer],
            initial_view_state = view_states,
             map_style='mapbox://styles/mapbox/outdoors-v11',
             mapbox_key = mapbox_key,
             tooltip={"text": "gid:{gid}\n 고령인구: {val}"})
r.to_html('./output/고령인구_grid.html')

### 8.유동인구(비공개)

#### 월별 합계

In [None]:
files[7].head(3)

In [None]:
f7_melted = pd.melt(files[7], id_vars = ['STD_YM','lon','lat'],var_name = 'time')
f7_melted.time = f7_melted.time.apply(lambda x : int(x[-2:]))

In [None]:
f7_melted

In [None]:
#전체 유동인구가 여름철에는 7월, 겨울철에는 1월에 급감 
f7_g_month = f7_melted.groupby('STD_YM')['value'].sum()
plt.plot(f7_g_month)
plt.xticks(f7_g_month.index, range(1,13))
plt.title('오산시 월별 유동인구수',fontproperties = fontprop, size= 15)
plt.box(False)
plt.grid(linestyle = '-',color= 'lightgray')
plt.savefig('./output/오산시 월별 유동인구수.png',bbox_inches='tight')
plt.show()

In [None]:
#출근시간의 유동인구가 퇴근시간의 유동인구보다 낮음
f7_g_time = f7_melted.groupby('time')['value'].sum()
plt.plot(f7_g_time)
plt.xticks(f7_g_time.index, range(1,25))
plt.title('오산시 시간별 유동인구수',fontproperties = fontprop, size= 15)
plt.box(False)
plt.grid(linestyle = '-',color= 'lightgray')
plt.savefig('./output/오산시 시간별 유동인구수.png',bbox_inches='tight')
plt.show()

In [None]:
#각 월별로 패턴이 달라지진 않음
f7_g_time = f7_melted.groupby(['STD_YM','time'])['value'].sum()
f7_g_time = f7_g_time.reset_index()
sns.lineplot(x = 'time', y= 'value', hue= 'STD_YM', data= f7_g_time,palette='tab20')
plt.xticks(range(1,25), range(1,25))
plt.title('오산시 시간별,각월 유동인구수',fontproperties = fontprop, size= 15)
plt.box(False)
plt.grid(linestyle = '-',color= 'lightgray')
plt.legend(bbox_to_anchor = (1,1))
plt.savefig('./output/오산시 시간별,각월 유동인구수.png',bbox_inches='tight')
plt.show()

#### grid단위로 통합
* Point에 buffer 적용후 grid하는 방법은 아직 잘 모르겠음 (buffer의 단위가 미확실)

#### 7-9시 시각화

In [None]:
moved7_9_df = move_df(files[7],files[1],7,9,'moved7_9')

moved7_9_df = moved7_9_df[moved7_9_df['moved7_9'] != 0]

moved7_9_df = add_coordinates(moved7_9_df)
moved7_9_df = add_minmax(moved7_9_df,'moved7_9')

m7_9_layer = pdk.Layer('PolygonLayer', data = moved7_9_df,
                       get_polygon = 'coordinates',
                       get_fill_color = '[1,1,1000*minmax_value,150]',
                       pickable = True,
                       auto_highlight = True)
r = pdk.Deck(layers= [m7_9_layer,child_acc_layer],
            initial_view_state=view_states,
             map_style='mapbox://styles/mapbox/outdoors-v11',
             mapbox_key = mapbox_key,
             tooltip={"text": "gid:{gid}\n 7_9시유동인구: {moved7_9}"})
r.to_html('./output/7_9(출근시간)유동인구.html')      
                   

In [None]:
moved16_18_df = move_df(files[7],files[1],16,18,'moved16_18')

moved16_18_df = moved16_18_df[moved16_18_df['moved16_18'] != 0]

moved16_18_df = add_coordinates(moved16_18_df)
moved16_18_df = add_minmax(moved16_18_df,'moved16_18')

#### 16-18시 시각화

In [None]:
moved16_18_df = move_df(files[7],files[1],16,18,'moved16_18')

moved16_18_df = moved16_18_df[moved16_18_df['moved16_18'] != 0]

moved16_18_df = add_coordinates(moved16_18_df)
moved16_18_df = add_minmax(moved16_18_df,'moved16_18')

m16_18_layer = pdk.Layer('PolygonLayer', data = moved16_18_df,
                       get_polygon = 'coordinates',
                       get_fill_color = '[1,1,1000*minmax_value,150]',
                       pickable = True,
                       auto_highlight = True)
r = pdk.Deck(layers= [m16_18_layer,child_acc_layer],
            initial_view_state=view_states,
             map_style='mapbox://styles/mapbox/outdoors-v11',
             mapbox_key = mapbox_key,
             tooltip={"text": "gid:{gid}\n 16_18시유동인구: {moved16_18}\n accident_cnt: {accident_cnt}"})
r.to_html('./output/16_18(퇴근시간)유동인구.html')      

### 어린이보호구역
* 정의 : 어린이 보호구역이란 초등학교 및 유치원, 어린이집, 학원 등 만 13세 미만 어린이시설 **주변도로** 중 일정구간을 보호구역으로 지정하여 교통안전시설물 및 도로부속물 설치로 어린이들의 안전한 통학공간을 확보하여 교통사고를 예방하기 위한 제도이며 스쿨존(School Zone)이라고도 한다.[도로교통공단]('https://www.koroad.or.kr/kp_web/trafficWeakPersonSafeZone3.do')
* 단순좌표, grid에 넣고 그 grid가 어린이 보호구역임을 확인하는게 맞을듯 하다
-> 건물용도정보 가져와서 학교 polygon따야할듯

In [None]:
files[8].shape

In [None]:
files[8].head(3)

In [None]:
files[8].시설종류.value_counts()

In [None]:
#데이터오류 해결
files[8].loc[(files[8]['CCTV설치여부']== 'N')&(files[8]['CCTV설치대수']>0),'CCTV설치여부'] = 'Y'

In [None]:
#초등학교에 CCTV가 많은편 
files[8].pivot_table(index= '시설종류',columns= 'CCTV설치여부',values = 'CCTV설치대수',aggfunc= ['count','sum'])

In [None]:
from sklearn.preprocessing import LabelEncoder
encoder = LabelEncoder()
files[8]['시설_label'] = encoder.fit_transform(files[8]['시설종류'])

In [None]:
files[8] = pd.DataFrame(files[8])

In [None]:
safe_zone_layer = pdk.Layer('ScatterplotLayer', data = files[8],
                           get_position = ['보호구역_경도','보호구역_위도'],
                           get_radius = 10,
                            get_fill_color = '[255,50,200*시설_label,100]',
                           pickable = True, auto_highlight = True)
r = pdk.Deck(layers = [safe_zone_layer],
            initial_view_state = view_states,
            map_style = 'mapbox://styles/mapbox/outdoors-v11',
            mapbox_key = mapbox_key,
            tooltip = {'text':'시설종류:{시설종류}\nCCTV설치대수:{CCTV설치대수}'})
r.to_html("./output/안전구역위치_버퍼 300.html")

In [None]:
#https://deck.gl/docs/api-reference/layers/scatterplot-layer
#radius unit meter
safe_zone_layer = pdk.Layer('ScatterplotLayer', data = files[8],
                           get_position = ['보호구역_경도','보호구역_위도'],
                           get_radius = 300,
                            get_fill_color = '[255,50,200*시설_label,50]',
                           pickable = True, auto_highlight = True)
r = pdk.Deck(layers = [safe_zone_layer,child_acc_layer],
            initial_view_state = view_states,
            map_style = 'mapbox://styles/mapbox/outdoors-v11',
            mapbox_key = mapbox_key,
            tooltip = {'text':'시설종류:{시설종류}\nCCTV설치대수:{CCTV설치대수}\n사고횟수:{accident_cnt}'})
r.to_html("./output/안전구역위치_버퍼 300.html")

#### point buffer
* 버퍼에 따른 미터단위로 변환은 가능하나, pydeck에는 겹치므로 그림이 안그려지는 것으로 추정됨 

In [None]:
files[8]['point'] = files[8].apply(lambda x : make_Point(x['보호구역_경도'], x['보호구역_위도']),axis= 1)

In [None]:
#미터참고
#https://gis.stackexchange.com/questions/80881/what-is-unit-of-shapely-length-attribute 
files[8]['geometry'] = files[8]['point'].apply(lambda x : x.buffer(0.00247745741629065))
f8 = gpd.GeoDataFrame(files[8])
f8.crs = CRS('EPSG:4326')

In [None]:
from shapely.geometry import LineString
from shapely.ops import transform
from functools import partial
import pyproj

line1 = LineString([(15.8, 40.0), (16.8,40.0)])
print(str(line1.length) + " degrees")
# 0.0115488362184 degrees

# Geometry transform function based on pyproj.transform
project = partial(
    pyproj.transform,
    pyproj.Proj('EPSG:4326'),
    pyproj.Proj('EPSG:32633'))

line2 = transform(project, line1)
print(str(line2.length) + " meters")
# 1021.77585965 meters

In [None]:
joined = merge_to_grid(f8,files[1],col_name = '어린이안전구역')
merged_df = files[1].merge(joined, how= 'left')

In [None]:
merged_df[merged_df['어린이안전구역']>0]

### 학교위치정보
* 지적도에서 지목 -학교로 확인가능할듯 함. intersect도 지목polygon으로 진행필요

In [None]:
files[9].head(2)

### 초등학교 통학구
* 오산전지역에 해당. 공통이 의미가 있다면 필요할듯 

In [None]:
files[10].head(3)

In [None]:
temp = add_coordinates(files[10])
ele_school = files[9][files[9]['학교명'].apply(lambda x : '초등' in x)]
ele_schoolzone_layer = pdk.Layer('PolygonLayer', data = temp,
                           get_polygon = 'coordinates',
                            get_fill_color = '[10,100,150,150]',
                           pickable = True, auto_highlight = True)
ele_school_layer = pdk.Layer('ScatterplotLayer', data = ele_school,
                            get_position = ['학교위치_경도','학교위치_위도'],
                            get_radius = 30,
                            get_fill_color = [200, 100,100],
                            pickable = True, auto_highlight = True)                      
r = pdk.Deck(layers = [ele_schoolzone_layer,ele_school_layer],
            initial_view_state = view_states,
            map_style = 'mapbox://styles/mapbox/outdoors-v11',
            mapbox_key = mapbox_key)
#             tooltip = {'text':'단속구분:{단속구분}}'})
r.to_html("./output/초등학교통학구.html")

### 중학교 통학구
* 오산밖까지 해당... 어떻게쓰나..?

In [None]:
files[11].head(3)

In [None]:
temp = add_coordinates(files[11])
mid_school = files[9][files[9]['학교명'].apply(lambda x : '중등' in x)]
mid_schoolzone_layer = pdk.Layer('PolygonLayer', data = temp,
                           get_polygon = 'coordinates',
                            get_fill_color = '[10,100,150,150]',
                           pickable = True, auto_highlight = True)
mid_school_layer = pdk.Layer('ScatterplotLayer', data = ele_school,
                            get_position = ['학교위치_경도','학교위치_위도'],
                            get_radius = 30,
                            get_fill_color = [200, 100,100],
                            pickable = True, auto_highlight = True)                      
r = pdk.Deck(layers = [mid_schoolzone_layer,mid_school_layer],
            initial_view_state = view_states,
            map_style = 'mapbox://styles/mapbox/outdoors-v11',
            mapbox_key = mapbox_key)
#             tooltip = {'text':'단속구분:{단속구분}}'})
r.to_html("./output/중학교통학구.html")

### 유치원현황

In [None]:
files[12].head(3)

In [None]:
joined = merge_to_grid(files[12],files[1], '유치원수', lon = '시설위치_경도',lat = '시설위치_위도')
merged_df = merged_df.merge(joined,how = 'left')

In [None]:
#학원밀집지역과 사고지역과 연관성이 있는듯 
temp = merged_df[merged_df['유치원수']>0]
temp = add_coordinates(temp)
temp = add_minmax(temp,'유치원수')
cross_layer = pdk.Layer('PolygonLayer', data = temp,
                           get_polygon = 'coordinates',
                            get_fill_color = '[10,255-200*minmax_value,10,150]',
                           pickable = True, auto_highlight = True)
r = pdk.Deck(layers = [cross_layer,child_acc_layer],
            initial_view_state = view_states,
            map_style = 'mapbox://styles/mapbox/outdoors-v11',
            mapbox_key = mapbox_key)
#             tooltip = {'text':'단속구분:{단속구분}}'})
r.to_html("./output/유치원수.html")

### 기상데이터
* 10~19년... 사용할방법이 딱히 보이지 않음

In [None]:
files[13].head(3)

In [None]:
files[13]['일시'] = pd.to_datetime(files[13]['일시'],format='%Y/%m/%d')

In [None]:
#특이사항 없는듯 
plt.plot(files[13]['일시'],files[13]['평균기온(°C)'])
plt.box(False)
plt.savefig('./output/연도별 기상.png',bbox_inches='tight')
plt.show()

### 무인단속카메라

In [None]:
files[14].head(3)

In [None]:
camera_layer = pdk.Layer('ScatterplotLayer', data = files[14],
                           get_position = ['설치위치_경도','설치위치_위도'],
                           get_radius = 30,
                            get_fill_color = '[250,10,10,150]',
                           pickable = True, auto_highlight = True)
r = pdk.Deck(layers = [camera_layer,child_acc_layer],
            initial_view_state = view_states,
            map_style = 'mapbox://styles/mapbox/outdoors-v11',
            mapbox_key = mapbox_key)
#             tooltip = {'text':'단속구분:{단속구분}}'}
r.to_html("./output/무인단속카메라.html")

### 도로안전표지표준데이터
* 다양한 종류가 있음. 대분류로 나눠야할까? - 규제표지는 없는거같음

In [None]:
files[15].head(3)

In [None]:
files[15]['표지종별'].unique()

In [None]:
joined = merge_to_grid(files[15],files[1], '표지판수')
merged_df = files[1].merge(joined,how = 'left')

In [None]:
f15 = merged_df[merged_df['표지판수']>0]
f15 = add_coordinates(f15)
sign_layer = pdk.Layer('PolygonLayer', data = f15,
                           get_polygon = 'coordinates',
                            get_fill_color = '[10,100*표지판수,200,150]',
                           pickable = True, auto_highlight = True)
r = pdk.Deck(layers = [sign_layer],
            initial_view_state = view_states,
            map_style = 'mapbox://styles/mapbox/outdoors-v11',
            mapbox_key = mapbox_key)
#             tooltip = {'text':'단속구분:{단속구분}}'})
r.to_html("./output/도로안전표지.html")

In [None]:
sign_layer = pdk.Layer('ScatterplotLayer', data = files[15],
                           get_position = ['lon','lat'],
                           get_radius = 10,
                            get_fill_color = '[10,10,200,150]',
                           pickable = True, auto_highlight = True)
r = pdk.Deck(layers = [sign_layer],
            initial_view_state = view_states,
            map_style = 'mapbox://styles/mapbox/outdoors-v11',
            mapbox_key = mapbox_key)
#             tooltip = {'text':'단속구분:{단속구분}}'})
r.to_html("./output/도로안전표지_scatter.html")

### 횡단보도

In [None]:
files[16].head(3)

In [None]:
joined = merge_to_grid(files[16],files[1],'횡단보도수')
merged_df = merged_df.merge(joined,how = 'left')

In [None]:
#횡단보도가 있는 곳에서 사고가 있는편
temp = merged_df[merged_df['횡단보도수']>0]
temp = add_coordinates(temp)
cross_layer = pdk.Layer('PolygonLayer', data = temp,
                           get_polygon = 'coordinates',
                            get_fill_color = '[10,100*횡단보도수,200,150]',
                           pickable = True, auto_highlight = True)
r = pdk.Deck(layers = [cross_layer,child_acc_layer],
            initial_view_state = view_states,
            map_style = 'mapbox://styles/mapbox/outdoors-v11',
            mapbox_key = mapbox_key)
#             tooltip = {'text':'단속구분:{단속구분}}'})
r.to_html("./output/횡단보도.html")

### 과속방지턱
* 보차분리 : 보행자의 안전과 생활 환경의 보호는 물론, 교통 소통을 원활하게 할 목적으로 보행자의 보행 공간과 차량의 주행 공간이 물리적으로 구분된 상태

In [None]:
files[17].head(3)

In [None]:
joined = merge_to_grid(files[17],files[1],'과속방지턱수',lon = '설치위치_경도',lat = '설치위치_위도')
merged_df = merged_df.merge(joined,how = 'left')

In [None]:
#과속방지턱 데이터가 적어서 그런지 연관성이 높아보지이 않음 
temp = merged_df[merged_df['과속방지턱수']>0]
temp = add_coordinates(temp)
cross_layer = pdk.Layer('PolygonLayer', data = temp,
                           get_polygon = 'coordinates',
                            get_fill_color = '[10,100*과속방지턱수,200,150]',
                           pickable = True, auto_highlight = True)
r = pdk.Deck(layers = [cross_layer,child_acc_layer],
            initial_view_state = view_states,
            map_style = 'mapbox://styles/mapbox/outdoors-v11',
            mapbox_key = mapbox_key)
#             tooltip = {'text':'단속구분:{단속구분}}'})
r.to_html("./output/과속방지턱.html")

### 신호등

In [None]:
files[18].head(3)

In [None]:
joined = merge_to_grid(files[18],files[1],'신호등수')
merged_df = merged_df.merge(joined,how = 'left')
#신호등없는데서 사고가 꽤 있는듯
temp = merged_df[merged_df['신호등수']>0]
temp = add_coordinates(temp)
cross_layer = pdk.Layer('PolygonLayer', data = temp,
                           get_polygon = 'coordinates',
                            get_fill_color = '[10,100*신호등수,200,150]',
                           pickable = True, auto_highlight = True)
r = pdk.Deck(layers = [cross_layer,child_acc_layer],
            initial_view_state = view_states,
            map_style = 'mapbox://styles/mapbox/outdoors-v11',
            mapbox_key = mapbox_key)
#             tooltip = {'text':'단속구분:{단속구분}}'})
r.to_html("./output/신호등.html")

### CCTV설치현황
* A - 방범, B - 도시공원, C-어린이보호, D-차량방법, E-과속단속
* 과속단속관련이 꽤 적은편

In [None]:
files[19].head(3)

In [None]:
files[19]['CCTV 유형코드'].value_counts()

In [None]:
joined = merge_to_grid(files[19].iloc[:-1,:],files[1],'CCTV수',lon = '설치위치_경도',lat= '설치위치_위도')
merged_df = merged_df.merge(joined,how = 'left')
#CCTV없는데서 사고가 높다
temp = merged_df[merged_df['CCTV수']>0]
temp = add_coordinates(temp)
cross_layer = pdk.Layer('PolygonLayer', data = temp,
                           get_polygon = 'coordinates',
                            get_fill_color = '[10,100*CCTV수,200,150]',
                           pickable = True, auto_highlight = True)
r = pdk.Deck(layers = [cross_layer,child_acc_layer],
            initial_view_state = view_states,
            map_style = 'mapbox://styles/mapbox/outdoors-v11',
            mapbox_key = mapbox_key)
#             tooltip = {'text':'단속구분:{단속구분}}'})
r.to_html("./output/CCTV현황.html")

### 인도
* 도로.. 다소 없는 곳이 있음(실제로 있는데 없음) 
* QUAL(인도재질) : 0-미분류, 1-아스콘, 2-콘크리트 3-블록, 4-비포장, 5-아스콘/블록, 999-기타
* BYYN(자전거도로) : 0-미분류, 1-유, 2-무
* KIND(인도종류) : 0-미분류, 1-인도, 2-자전거도로

In [None]:
files[20].head(3)

In [None]:
files[20]['UFID'].apply(lambda x : x[3:-3]).value_counts()

In [None]:
files[20].plot()

In [None]:
# 큰의미없을듯
files[20].QUAL.value_counts()

In [None]:
#미분류없이 자전거/인도로만 분류됨
files[20].BYYN.value_counts(dropna=False)

In [None]:
bike_df = files[20][files[20].BYYN == 'BYC001']
walk_df = files[20][files[20].BYYN == 'BYC002']

joined = merge_to_grid(walk_df,files[1],'인도수')
merged_df = merged_df.merge(joined,how = 'left')

joined = merge_to_grid(bike_df,files[1],'자전거도로수')
merged_df = merged_df.merge(joined,how = 'left')

In [None]:
bike_df

In [None]:
#CCTV없는데서 사고가 높다
# temp = merged_df[merged_df['CCTV수']>0]
temp1 = walk_df
temp1 = add_coordinates(temp1)
walk_layer = pdk.Layer('PathLayer', data = temp1,
                        get_path = 'coordinates',
                        get_width = 10,
                        get_color = '[10,100,200,150]',
                        pickable = True, auto_highlight = True)
temp2 = bike_df
temp2 = add_coordinates(temp2)

bike_layer = pdk.Layer('PathLayer', data = temp2,
                        get_path = 'coordinates',
                        get_width = 10,
                        get_color = '[10,200,100,150]',
                        pickable = True, auto_highlight = True)

r = pdk.Deck(layers = [bike_layer,walk_layer,child_acc_layer],
            initial_view_state = view_states,
            map_style = 'mapbox://styles/mapbox/outdoors-v11',
            mapbox_key = mapbox_key)
#             tooltip = {'text':'단속구분:{단속구분}}'})
r.to_html("./output/인도.html")

### 버스정류장

In [None]:
files[21].head(3)

In [None]:
joined = merge_to_grid(files[21],files[1],'버스정류장수',lon = '정류장 위치_경도',lat = '정류장 위치_위도')
merged_df = merged_df.merge(joined,how = 'left')

In [None]:
#버스정류장 연관성없음
temp = merged_df[merged_df['버스정류장수']>0]
temp = add_coordinates(temp)
cross_layer = pdk.Layer('PolygonLayer', data = temp,
                           get_polygon = 'coordinates',
                            get_fill_color = '[10,100*버스정류장수,200,150]',
                           pickable = True, auto_highlight = True)
r = pdk.Deck(layers = [cross_layer,child_acc_layer],
            initial_view_state = view_states,
            map_style = 'mapbox://styles/mapbox/outdoors-v11',
            mapbox_key = mapbox_key)
#             tooltip = {'text':'단속구분:{단속구분}}'})
r.to_html("./output/버스정류장.html")

### 상세도로망(비공개)
* road_no(도로등급) : 101-고속국도 102-도시고속국도 103-일반국도, 104-특별광역시도 105-국가지원지방도 106-지방도 107-시군구 108-고속도로 연결램프
* link_type(링크종별) : 1-본선분리 2-연결로

In [None]:
files[22].head(3)

In [None]:
#600개가 결측...
files[22]['max_speed'].value_counts()

In [None]:
temp = add_coordinates(files[22])
temp['width'] = temp['width'].astype(int)
temp['width_10'] = temp['width']*10
temp = add_minmax(temp, 'max_speed')
cross_layer = pdk.Layer('PathLayer', data = temp,
                        get_path = 'coordinates',
                        get_width = 'width_10',
                        get_color = '[255, 255-200*minmax_value,10,150]',
                        pickable = True, auto_highlight = True)
r = pdk.Deck(layers = [cross_layer],
            initial_view_state = view_states,
            map_style = 'mapbox://styles/mapbox/outdoors-v11',
            mapbox_key = mapbox_key)
#             tooltip = {'text':'단속구분:{단속구분}}'})
r.to_html("./output/도로.html")

In [None]:
temp = add_coordinates(files[22])
temp['width'] = temp['width'].astype(int)
temp['width_10'] = temp['width']*10
temp = add_minmax(temp, 'max_speed')
cross_layer = pdk.Layer('PathLayer', data = temp,
                        get_path = 'coordinates',
                        get_width = 'width_10',
                        get_color = '[10, 255-250*minmax_value,100,150]',
                        pickable = True, auto_highlight = True)
r = pdk.Deck(layers = [safe_zone_layer,child_acc_layer,cross_layer],
            initial_view_state = view_states,
            map_style = 'mapbox://styles/mapbox/outdoors-v11',
            mapbox_key = mapbox_key,
            tooltip = {'text':'최고속도:{max_speed},사고횟수:{accident_cnt}'})
r.to_html("./output/도로+안전구역.html")

### 추정교통량(비공개)

In [None]:
files[23].head(3)

In [None]:
files[23]['link_id'] = files[23]['상세도로망_LinkID'].apply(lambda x : str(x)[:-2])

In [None]:
f23_gt = files[23][files[23]['시간적범위'] != '전일']

f23_gt['시간적범위'] = f23_gt['시간적범위'].astype(int)
f23_gt = f23_gt.groupby(['시간적범위'])['전체 추정교통량', '승용차 추정교통량', '버스 추정교통량', '화물차 추정교통량'].sum().reset_index()
f23_m = pd.melt(f23_gt,id_vars = '시간적범위')
sns.lineplot(x= '시간적범위', y = 'value', hue = 'variable', data = f23_m)
plt.title('전 도로 시간대별 교통량',fontproperties = fontprop, fontsize= 15)
plt.xlabel('시간대',fontproperties=fontprop, fontsize= 13)
plt.legend(prop = fontprop, bbox_to_anchor = (1,0.5),loc = 'center left',frameon=False,fontsize= 12)
plt.grid(linestyle  = '--')
plt.savefig('./output/전 도로 시간대별 교통량.png',bbox_inches='tight')
plt.box(False)
plt.show()

#### merge방법

In [None]:
#stack형태로 변경
melted = pd.melt(files[23], id_vars = ['link_id','시간적범위'],var_name = 'time')
# melted.time = melted.time.apply(lambda x : int(x[-2:]))

In [None]:
s_time = 7
l_time = 9

#시간범위 설정
f23 = files[23][['link_id','시간적범위','전체 추정교통량', '승용차 추정교통량', '버스 추정교통량', '화물차 추정교통량']]
f23 = f23[f23['시간적범위'] != '전일']
f23.시간적범위 = f23.시간적범위.astype(int)
melted = pd.melt(f23, id_vars = ['link_id','시간적범위'])

f23_time_cut = f23[(f23.시간적범위>=s_time)&(f23.시간적범위<=l_time)]
f23_time_cut = f23_time_cut.groupby('link_id')['전체 추정교통량', '승용차 추정교통량', '버스 추정교통량', '화물차 추정교통량'].sum().reset_index()

#컬럼이름 재설정
rename_ls = []
for i, col in enumerate(f23_time_cut.columns):
    if i> 0:
        new_name = col + f'{s_time}_{l_time}'
        rename_ls.append(new_name)
    else :
        new_name = col
        rename_ls.append(new_name)

f23_time_cut.columns = rename_ls

#intersect and merge
road_merged = files[22][['link_id','geometry']].merge(f23_time_cut,how = 'left')
temp = gpd.sjoin(merged_df[['gid','geometry']],road_merged)
temp = temp.groupby('gid')[rename_ls].mean().reset_index()
merged_df = merged_df.merge(temp,how = 'left')
merged_df = merged_df.fillna(0)

In [None]:
#주요도로 위주로 교통량 높음. 고속도로는 제외해야할수도 있음 
temp = merged_df[merged_df['전체 추정교통량7_9']>0]
temp = add_coordinates(temp)
temp = add_minmax(temp,'전체 추정교통량7_9')
cross_layer = pdk.Layer('PolygonLayer', data = temp,
                           get_polygon = 'coordinates',
                            get_fill_color = '[1000*minmax_value,10,10,150]',
                           pickable = True, auto_highlight = True)
r = pdk.Deck(layers = [cross_layer,child_acc_layer],
            initial_view_state = view_states,
            map_style = 'mapbox://styles/mapbox/outdoors-v11',
            mapbox_key = mapbox_key)
#             tooltip = {'text':'단속구분:{단속구분}}'})
r.to_html("./output/전체 추정교통량7_9_grid.html")

In [None]:
files[23]['link_id'] = files[23]['상세도로망_LinkID'].apply(lambda x : str(x)[:-2])
f23_gt = files[23][files[23]['시간적범위'] == '전일']

f23_gt = f23_gt.groupby(['link_id'])['전체 추정교통량', '승용차 추정교통량', '버스 추정교통량', '화물차 추정교통량'].sum().reset_index()
road_merged = files[22].merge(f23_gt,how = 'left')
road_merged = road_merged.fillna(0)

In [None]:
temp = add_coordinates(road_merged)
temp['width'] = temp['width'].astype(int)
temp['width_10'] = temp['width']*10
temp = add_minmax(temp, '전체 추정교통량')
cross_layer = pdk.Layer('PathLayer', data = temp,
                        get_path = 'coordinates',
                        get_width = 'width_10',
                        get_color = '[255, 255-200*minmax_value,,10]',
                        pickable = True, auto_highlight = True)
r = pdk.Deck(layers = [cross_layer,child_acc_layer],
            initial_view_state = view_states,
            map_style = 'mapbox://styles/mapbox/outdoors-v11',
            mapbox_key = mapbox_key,
            tooltip = {'text':'전체 추정교통량:{전체 추정교통량}\n승용차 추정교통량:{승용차 추정교통량}\n버스 추정교통량:{버스 추정교통량}\n화물차 추정교통량:{화물차 추정교통량}\n accident_cnt: {accident_cnt}'})
r.to_html("./output/추정교통량.html")

### 혼잡빈도강도(비공개)

In [None]:
files[24].head(3)

In [None]:
#intersect and merge
files[24]['link_id'] = files[24]['상세도로망_LinkID'].apply(lambda x : str(x)[:-2])
f24_gt = files[24][files[24]['시간적범위'] == '전일']
f24_gt = f24_gt.groupby(['link_id'])['혼잡빈도강도'].sum().reset_index()

road_merged = files[22][['link_id','geometry']].merge(f24_gt,how = 'left')
temp = gpd.sjoin(merged_df[['gid','geometry']],road_merged)
temp = temp.groupby('gid')['혼잡빈도강도'].mean().reset_index()

merged_df = merged_df.merge(temp,how = 'left')
merged_df = merged_df.fillna(0)

In [None]:
#빈도강도랑 다소 연관성?
temp = merged_df[merged_df['혼잡빈도강도']>0]
temp = add_coordinates(temp)
temp = add_minmax(temp,'혼잡빈도강도')
cross_layer = pdk.Layer('PolygonLayer', data = temp,
                           get_polygon = 'coordinates',
                            get_fill_color = '[혼잡빈도강도,10,10,150]',
                           pickable = True, auto_highlight = True)
r = pdk.Deck(layers = [cross_layer,child_acc_layer],
            initial_view_state = view_states,
            map_style = 'mapbox://styles/mapbox/outdoors-v11',
            mapbox_key = mapbox_key)
#             tooltip = {'text':'단속구분:{단속구분}}'})
# r.to_html()
r.to_html("./output/혼잡빈도강도_grid.html")

In [None]:
files[24]['link_id'] = files[24]['상세도로망_LinkID'].apply(lambda x : str(x)[:-2])
f24_gt = files[24][files[24]['시간적범위'] == '전일']

f24_gt = f24_gt.groupby(['link_id'])['혼잡빈도강도'].sum().reset_index()
road_merged = road_merged.merge(f24_gt,how = 'left')
road_merged['혼잡빈도강도'] = road_merged['혼잡빈도강도'].fillna(0)

In [None]:
temp = add_coordinates(road_merged)
temp = add_minmax(temp, '혼잡빈도강도')
cross_layer = pdk.Layer('PathLayer', data = temp,
                        get_path = 'coordinates',
                        get_width = '10',
                        get_color = '[255, 255-200*minmax_value,,10]',
                        pickable = True, highlight = True)
r = pdk.Deck(layers = [cross_layer,child_acc_layer],
            initial_view_state = view_states,
            map_style = 'mapbox://styles/mapbox/outdoors-v11',
            mapbox_key = mapbox_key,
            tooltip = {'text':'전체 추정교통량:{전체 추정교통량}\n승용차 추정교통량:{승용차 추정교통량}\n버스 추정교통량:{버스 추정교통량}\n화물차 추정교통량:{화물차 추정교통량}\n혼잡빈도강도:{혼잡빈도강도}\n accident_cnt: {accident_cnt}'})
r.to_html("./output/혼잡빈도강도.html")

### 혼잡시간강도(비공개)

In [None]:
files[25].head(3)

In [None]:
#intersect and merge
files[25]['link_id'] = files[25]['상세도로망_LinkID'].apply(lambda x : str(x)[:-2])
f25_gt = files[25][files[25]['시간적범위'] == '전일']
f25_gt = f25_gt.groupby(['link_id'])['혼잡시간강도'].sum().reset_index()

road_merged = files[22][['link_id','geometry']].merge(f25_gt,how = 'left')
temp = gpd.sjoin(merged_df[['gid','geometry']],road_merged)
temp = temp.groupby('gid')['혼잡시간강도'].mean().reset_index()

merged_df = merged_df.merge(temp,how = 'left')
merged_df = merged_df.fillna(0)

In [None]:
#시간강도랑 다소 연관성?
temp = merged_df[merged_df['혼잡시간강도']>0]
temp = add_coordinates(temp)
temp = add_minmax(temp,'혼잡시간강도')
cross_layer = pdk.Layer('PolygonLayer', data = temp,
                           get_polygon = 'coordinates',
                            get_fill_color = '[혼잡시간강도,10,10,150]',
                           pickable = True, auto_highlight = True)
r = pdk.Deck(layers = [cross_layer,child_acc_layer],
            initial_view_state = view_states,
            map_style = 'mapbox://styles/mapbox/outdoors-v11',
            mapbox_key = mapbox_key)
#             tooltip = {'text':'단속구분:{단속구분}}'})
# r.to_html()
r.to_html("./output/혼잡시간강도_grid.html")

In [None]:
files[25]['link_id'] = files[25]['상세도로망_LinkID'].apply(lambda x : str(x)[:-2])
f25_gt = files[25][files[25]['시간적범위'] == '전일']

f25_gt = f25_gt.groupby(['link_id'])['혼잡시간강도'].sum().reset_index()
road_merged = road_merged.merge(f25_gt,how = 'left')
road_merged['혼잡시간강도'] = road_merged['혼잡시간강도'].fillna(0)

In [None]:
temp = add_coordinates(road_merged)
temp = add_minmax(temp, '혼잡시간강도')
cross_layer = pdk.Layer('PathLayer', data = temp,
                        get_path = 'coordinates',
                        get_width = '10',
                        get_color = '[255, 255-200*minmax_value,10]',
                        pickable = True, auto_highlight = True)
r = pdk.Deck(layers = [cross_layer,child_acc_layer],
            initial_view_state = view_states,
            map_style = 'mapbox://styles/mapbox/outdoors-v11',
            mapbox_key = mapbox_key,
            tooltip = {'text':'전체 추정교통량:{전체 추정교통량}\n승용차 추정교통량:{승용차 추정교통량}\n버스 추정교통량:{버스 추정교통량}\n화물차 추정교통량:{화물차 추정교통량}\n혼잡빈도강도:{혼잡빈도강도}\n혼잡시간강도:{혼잡시간강도}\n accident_cnt: {accident_cnt}'})
r.to_html("./output/혼잡시간강도.html")

### 27.오산시_도로명주소_건물
* BDTYP_CD(건물용도) : 01000-단독주택, 02000-공동주택, 03000-제1종근린생활시설, 04000-제2종근린생활시설 05000 - 문화및 집회시설 ..29000-장례식장까지
* BULD_NM(건축물대장 건물명) : 60%이상 결측
* BULD_NM_DC(상세건물명) : 88%이상 결측
* BULD_SE_CD(건물구분코드) : 전부 0 
* BUL_MAN_NO(건물일련번호 : 이상없음
* EMD_CD(읍면동코드)
* GRO_FLO_CO(지상층수)
* LNBR_MNNM(지번본번)
* LNBR_SLNO(지번부번)
* UND_FLO_CO(지하층수)

In [None]:
files[26].shape

In [None]:
files[26].head(3)

#### 데이터일반확인

In [None]:
print('BULD_NM 결측비율',files[26].BULD_NM.isna().sum()/files[27].shape[0])
print('BULD_NM_DC 결측비율',files[26].BULD_NM_DC.isna().sum()/files[27].shape[0])

In [None]:
#고유코드번호개수 확인
files[26].BUL_MAN_NO.nunique() == files[26].shape[0]

In [None]:
files[26]['floor'] = files[26]['UND_FLO_CO'] + files[26]['GRO_FLO_CO']

In [None]:
#건물용도 대분류
#grid에 주 건물용도를 넣을 방법은?
bulid_type = pd.read_csv('./참고자료/BDTYP_CD(건물용도).csv',encoding= 'cp949')
files[26]['nomal_code']= files[26]['BDTYP_CD'].apply(lambda x : int(x[:2]+'000'))

bulid_type.columns = ['nomal_code','nomal_type']
files[26]  = files[26].merge(bulid_type,how = 'left',on = 'nomal_code')

In [None]:
#상위 10개
files[26].nomal_type.value_counts().sort_values(ascending = False).head(10)

#### 시각화

In [None]:
files[26]['temp_count'] = 1
f26_pivot = files[26].pivot_table(index='BUL_MAN_NO',columns = 'nomal_type',values= 'temp_count',aggfunc= 'sum')

In [None]:
#건물개수
joined = merge_to_grid(files[26],files[1],'건물개수')
merged_df = merged_df.merge(joined,how = 'left')

In [None]:
#건물문개수 연관성없음
temp = merged_df[merged_df['건물개수']>0]
temp = add_coordinates(temp)
temp = add_minmax(temp,'건물개수')
cross_layer = pdk.Layer('PolygonLayer', data = temp,
                           get_polygon = 'coordinates',
                            get_fill_color = '[10,255-200*minmax_value,10,150]',
                           pickable = True, auto_highlight = True)
r = pdk.Deck(layers = [cross_layer,child_acc_layer],
            initial_view_state = view_states,
            map_style = 'mapbox://styles/mapbox/outdoors-v11',
            mapbox_key = mapbox_key)
#             tooltip = {'text':'단속구분:{단속구분}}'})
r.to_html("./output/건물개수_grid.html")

In [None]:
#층수 (반복실행시 rename으로 인해 중복컬럼생성됨)
joined = merge_to_grid(files[26],files[1],'floor')
merged_df = merged_df.merge(joined,how = 'left')
merged_df = merged_df.rename(columns={'floor':'총층수'})

In [None]:
#총층수 연관성낮음 건물개수 대비 주거지역에서 진한편
temp = merged_df[merged_df['총층수']>0]
temp = add_coordinates(temp)
temp = add_minmax(temp,'총층수')
cross_layer = pdk.Layer('PolygonLayer', data = temp,
                           get_polygon = 'coordinates',
                            get_fill_color = '[10,255-220*minmax_value,10,150]',
                           pickable = True, auto_highlight = True)
r = pdk.Deck(layers = [cross_layer,child_acc_layer],
            initial_view_state = view_states,
            map_style = 'mapbox://styles/mapbox/outdoors-v11',
            mapbox_key = mapbox_key)
#             tooltip = {'text':'단속구분:{단속구분}}'})
r.to_html("./output/floor_grid.html")

### 28.오산시_건물연면적_격자.geojson

In [None]:
files[27].head(3)

In [None]:
files[27].val = files[27].val.fillna(0)

In [None]:
joined = merge_to_grid(files[27],files[1],'val')
merged_df = merged_df.merge(joined,how = 'left')
merged_df = merged_df.rename(columns={'val':'건물연면적'})

In [None]:
#일부(아파트지역)의 건물연면먹이 모든 그래프를 망침. 1500000이상 삭제 후 그래프 생성
plt.boxplot(merged_df['건물연면적'],vert = False)
plt.title('건물연면적 분포_box',fontproperties = fontprop, fontsize= 15)
plt.yticks([])
plt.box(False)
plt.savefig('./output/건물연면적분포_box.png',bbox_inches='tight')
plt.show()

In [None]:
#연면적이 큰지역보다 적은지역에서 사고가 많은듯하다
temp = merged_df[merged_df['건물연면적']>0]
temp = temp[temp['건물연면적']<150000] #아웃라이어제거
temp = add_coordinates(temp)
temp = add_minmax(temp,'건물연면적')
cross_layer = pdk.Layer('PolygonLayer', data = temp,
                           get_polygon = 'coordinates',
                            get_fill_color = '[10,255-200*minmax_value,10,150]',
                           pickable = True, auto_highlight = True)
r = pdk.Deck(layers = [cross_layer,child_acc_layer],
            initial_view_state = view_states,
            map_style = 'mapbox://styles/mapbox/outdoors-v11',
            mapbox_key = mapbox_key)
#             tooltip = {'text':'단속구분:{단속구분}}'})
r.to_html("./output/건물연면적.html")

### 29.오산시_체육시설현황.csv

In [None]:
files[28].head(3)

In [None]:
#체육도장
files[28]['시설구분명'].value_counts()

In [None]:
joined = merge_to_grid(files[28],files[1],'체육시설수', lon = '설치위치_경도',lat = '설치위치_위도')
merged_df = merged_df.merge(joined,how = 'left')

In [None]:
#버스정류장 연관성없음
temp = merged_df[merged_df['체육시설수']>0]
temp = add_coordinates(temp)
cross_layer = pdk.Layer('PolygonLayer', data = temp,
                           get_polygon = 'coordinates',
                            get_fill_color = '[10,100*체육시설수,200,150]',
                           pickable = True, auto_highlight = True)
r = pdk.Deck(layers = [cross_layer,child_acc_layer],
            initial_view_state = view_states,
            map_style = 'mapbox://styles/mapbox/outdoors-v11',
            mapbox_key = mapbox_key)
#             tooltip = {'text':'단속구분:{단속구분}}'})
r.to_html("./output/체육시설수.html")

### 30.오산시_학원_및_교습소_현황.csv
* 교습과정명 종류 1120개-> 필요하다면 대분류필요할듯

In [None]:
files[29].head(3)

In [None]:
files[29]['업종구분명'].value_counts()

In [None]:
files[29]['교습과정명'].value_counts()

In [None]:
joined = merge_to_grid(files[29],files[1],'학원수', lon = '시설위치_경도', lat = '시설위치_위도')
merged_df = merged_df.merge(joined,how = 'left')

In [None]:
#학원밀집지역과 사고지역과 연관성이 있는듯 
temp = merged_df[merged_df['학원수']>0]
temp = add_coordinates(temp)
temp = add_minmax(temp,'학원수')
cross_layer = pdk.Layer('PolygonLayer', data = temp,
                           get_polygon = 'coordinates',
                            get_fill_color = '[10,255-200*minmax_value,10,150]',
                           pickable = True, auto_highlight = True)
r = pdk.Deck(layers = [cross_layer,child_acc_layer],
            initial_view_state = view_states,
            map_style = 'mapbox://styles/mapbox/outdoors-v11',
            mapbox_key = mapbox_key)
#             tooltip = {'text':'단속구분:{단속구분}}'})
r.to_html("./output/학원수.html")

In [None]:
files[30].head(3)

### 34.지적도
* 제외 지목 : ['묘','철','천','제','구','수']
* '가'는 뭐지

In [None]:
files[33].head(3)

In [None]:
files[33]['JIMOK'] = files[33]['JIBUN'].apply(lambda x: x[-1])

In [None]:
out_jimok = ['묘','철','천','제','구','수']
f33 = files[33][~(files[33]['JIMOK'].isin(out_jimok))]

In [None]:
f1_over0 = files[1][files[1]['accident_cnt']>0]
f33_joind = gpd.sjoin(f33,f1_over0,how = 'left')
f33_joind_over0 = f33_joind[~(f33_joind['accident_cnt'].isna())]
f33_joind_over0['JIMOK_label'] = encoder.fit_transform(f33_joind_over0['JIMOK'])

In [None]:
f33_joind_over0['JIMOK'].value_counts()

In [None]:
#랜덤컬러부여
color_lookup = pdk.data_utils.assign_random_colors(f33_joind_over0['JIMOK'])
f33_joind_over0['color'] = f33_joind_over0['JIMOK'].apply(lambda row: color_lookup.get(row))

In [None]:
#학원밀집지역과 사고지역과 연관성이 있는듯 
# temp = merged_df[merged_df['학원수']>0]
temp = add_coordinates(f33_joind_over0)
# temp = add_minmax(temp,'학원수')

#랜덤컬러

color_lookup = pdk.data_utils.assign_random_colors(f33_joind_over0['JIMOK'])
f33_joind_over0['color'] = f33_joind_over0['JIMOK'].apply(lambda row: color_lookup.get(row))

cross_layer = pdk.Layer('PolygonLayer', data = temp,
                           get_polygon = 'coordinates',
                            get_fill_color = 'color',
                           pickable = True, auto_highlight = True)

r = pdk.Deck(layers = [cross_layer,child_acc_layer],
            initial_view_state = view_states,
            map_style = 'mapbox://styles/mapbox/outdoors-v11',
            mapbox_key = mapbox_key)
#             tooltip = {'text':'단속구분:{단속구분}}'})
r.to_html("./output/지적도.html")

In [None]:
#시경계
f30 = add_coordinates(files[30])
osan_line_layer = pdk.Layer('PolygonLayer', data = f30,
                           get_polygon = 'coordinates',
                           get_fill_color = [255,255,255,0],
                           get_line_color = [0,0,0],
                           get_line_width = 10,
                           pickable=True)
r = pdk.Deck(layers = [osan_line_layer],
            initial_view_state = view_states,
            map_style = 'mapbox://styles/mapbox/outdoors-v11',
            mapbox_key = mapbox_key)
r.to_html("시경계.html")