In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import geopandas as gpd
import os
from sklearn.preprocessing import MinMaxScaler

from pyproj import CRS
import pydeck as pdk
import matplotlib.font_manager as fm
from sklearn.preprocessing import LabelEncoder
fontprop = fm.FontProperties(fname='../font/NanumGothic.ttf', size=10)

  import pandas.util.testing as tm


### data load

In [2]:
%%time
#딕셔너리에 각 파일 순서대로 load
file_list = os.listdir('./data')
file_list = file_list[:-1]
files = {}
for i,file in enumerate(file_list):
    try :
        files[i] = pd.read_csv(f'./data/{file}')
    except : 
        files[i] = gpd.read_file(f'./data/{file}')



CPU times: user 12.2 s, sys: 33 ms, total: 12.2 s
Wall time: 12.9 s


In [3]:
mapbox_key = 'pk.eyJ1IjoibGFpcjA4MjYiLCJhIjoiY2tkcGoxcnRzMDZvODJxbXk0MWhlcWN2aSJ9.5-yjt_SUq4w5JII7CvD4cA'
map_center = [127.07377,37.14885] #오산시 중심
view_states = pdk.ViewState(longitude=map_center[0],
                          latitude = map_center[1],
                           zoom = 12)

In [4]:
def add_coordinates(data):
    '''geodataframe에서 중점 coordinates로 컬럼 생성
    '''
    from shapely.geometry import Polygon,MultiLineString,LineString,MultiPolygon
    #coord_center
    temp = data.copy()
    coord_center = temp.geometry.apply(lambda geo : [geo.centroid.x, geo.centroid.y])
    temp['coord_center'] = coord_center
    
    #coordinates type별로 조금씩 차이가 있음
    type_mls = type(MultiLineString())
    type_ls = type(LineString())
    type_poly = type(Polygon())
    type_mpoly = type(MultiPolygon())
    def get_coordinates(x):
        if type(x) == type_mls: 
            lon,lat = x[0].xy
        elif type(x) == type_ls:
            lon,lat = x.xy
        elif type(x) == type_poly:
            lon,lat = x.exterior.xy
        else :
            lon, lat = x[0].exterior.xy        
        return [[x, y] for x, y in zip(lon, lat)]
    
    temp['coordinates'] = temp.geometry.apply(lambda geo : get_coordinates(geo))
    
    return temp

def add_minmax(data,col):
    '''minmax scale 추가
    '''
    from sklearn.preprocessing import MinMaxScaler
    scaler = MinMaxScaler()
    temp = data.copy()
    values = scaler.fit_transform(temp[col].values.reshape(-1,1))
    temp['minmax_value'] = values
    
    return temp

def merge_to_grid(value_df,merged_df, col_name,lon = 'lon', lat = 'lat',rename = ''):
    '''value_df가 merge_df포함되는 갯수를 체크하고 merge진행. 
    GeoDataFrame인 경우 point 생성부분은 pass
    Parameters
    ----------
    value_df(DF, GDF) : 병합할 값의 DF, GDF
    merged_df(GDF) : grid 속성으로 되어 있는 GDF
    col_name(string) : value_df내에서 병합할 컬럼명, 해당컬럼이 없을 경우 새컬럼을 만들고 count
    lon(string) : '경도', 한글 등 다른 명칭으로 되어 있을때 대체
    lat(string) : '위도', 한글 등 다른 명칭으로 되어 있을때 대체
    rename(string) : col_name 변경시 사용
    ----
    return 
    value_df가 merge_df가 join된 df
    '''
    type_gpd = type(gpd.GeoDataFrame())
    if 'gid' in value_df.columns:
        value_df = value_df.rename(columns={'gid':'gid1'})
    
    if type(value_df) != type_gpd:
        value_df['geometry'] = value_df.apply(lambda x : make_Point(x[lon],x[lat]),axis= 1)
        value_df = gpd.GeoDataFrame(value_df)
        value_df.crs = CRS('epsg:4326')

    joined = gpd.sjoin(merged_df,value_df, how = 'left')
    if col_name not in joined.columns:
        joined.loc[~(joined['index_right'].isna()),col_name] = 1
    joined[col_name] = joined[col_name].fillna(0)

    joined = joined.groupby('gid')[col_name].sum().reset_index()
    grid_df = merged_df.merge(joined, how= 'left')
    
    if len(rename)>0:
        grid_df = grid_df.rename(columns = {col_name:rename})
    return grid_df

def draw_hist(data,col,title):
    '''data를 넣으면 histogram을 그려주고, 0 or NaN 없는 df 리턴
    
    '''
    temp = data.copy()
    temp = add_coordinates(temp[~((temp[col]==0)|(temp[col].isna()))])
    plt.hist(temp[col])
    plt.title(title,fontproperties = fontprop, fontsize= 15)
    plt.box(False)
    plt.show()
    return temp

def make_Point(x,y):
    from shapely.geometry import Point
    '''x,y좌표값이 있으면, 합쳐서 Point로 만듬
    '''
    return Point(x,y)

def move_df(main_df,merged_df, s_time, l_time, col_name):
    '''
    main_df(df) : 유동인구 df
    merged_df(df) : 합칠 df, ex) 어린이교통사고_df
    s_time(int) : 유동인구 시작시간
    l_time(int) : 유동인구 끝 시간
    col_name(string) : 저장 colmn 이름
    
    -----
    return 
    merged_df에 분할시간대 유동인구의 평균값 컬럼생성
    '''
    temp = main_df.copy()
    #stack형태로 변경
    melted = pd.melt(temp, id_vars = ['STD_YM','lon','lat'],var_name = 'time')
    melted.time = melted.time.apply(lambda x : int(x[-2:]))
    
    #필요한 시간대로 분할
    grouped = melted.groupby(['lon','lat','time'])['value'].sum().reset_index()
    grouped_cut = grouped[(grouped.time>=s_time)&(grouped.time<=l_time)]
    
    #point생성

    grouped_cut = grouped_cut.groupby(['lon','lat'])['value'].sum().reset_index()
    grouped_cut['geometry'] = grouped_cut.apply(lambda x : make_Point(x['lon'],x['lat']),axis= 1)
    
    #geo dataframe화 
    geo_df = gpd.GeoDataFrame(grouped_cut)
    geo_df.crs = CRS('epsg:4326')
    
    #merge_df와 결합
    joined = gpd.sjoin(merged_df,geo_df, how = 'left', op = 'intersects')
    joined['value'] = joined['value'].fillna(0)
    joined_g = joined.groupby('gid')['value'].mean()
    joined_g.name = col_name
    joined_g = joined_g.reset_index()
    moved = merged_df.merge(joined_g,how = 'left')
    
    return moved

In [5]:
#주정차단속 merge
grid_df = merge_to_grid(files[0],files[1],'주정차단속수',lon = '단속위치_경도',lat = '단속위치_위도')

#자동차대수 merge
grid_df = merge_to_grid(files[2],grid_df,'car_cnt',lon = '단속위치_경도',lat = '단속위치_위도',rename = '자동차대수')

#총인구 merge
grid_df = merge_to_grid(files[3],grid_df,'val',rename = '총인구수')

#유소년인구 merge
grid_df = merge_to_grid(files[4],grid_df,'val',rename = '유소년인구수')

#생산가능인구 merge
grid_df = merge_to_grid(files[5],grid_df,'val',rename = '생산가능인구인구수')

#고령인구 merge
grid_df = merge_to_grid(files[6],grid_df,'val',rename = '고령인구수')

#유동인구 merge
grid_df = move_df(files[7],grid_df,7,9,'유동인구7_9')
grid_df = move_df(files[7],grid_df,16,18,'유동인구16_18')

#어린이보호구역 300m buffer
files[8]['point'] = files[8].apply(lambda x : make_Point(x['보호구역_경도'], x['보호구역_위도']),axis= 1)
files[8]['geometry'] = files[8]['point'].apply(lambda x : x.buffer(0.00247745741629065))
f8 = gpd.GeoDataFrame(files[8])
f8.crs = CRS('EPSG:4326')
grid_df = merge_to_grid(f8,files[1],col_name = '어린이안전구역')

#어린이보호구역 CCTV설치대수
grid_df = merge_to_grid(files[8],grid_df,'CCTV설치대수',lon = '보호구역_경도',lat = '보호구역_위도')

#유치원수
grid_df = merge_to_grid(files[12],grid_df, '유치원수', lon = '시설위치_경도',lat = '시설위치_위도')

#무인카메라수 
grid_df = merge_to_grid(files[14],grid_df, '무인단속카메라수', lon = '설치위치_경도',lat = '설치위치_위도')

#표지판수
grid_df = merge_to_grid(files[15],grid_df, '표지판수')

#횡단보도수
grid_df = merge_to_grid(files[16],grid_df,'횡단보도수')

#과속방지턱
grid_df = merge_to_grid(files[17],grid_df,'과속방지턱수',lon = '설치위치_경도',lat = '설치위치_위도')

#신호등 
grid_df = merge_to_grid(files[18],grid_df,'신호등수')

#CCTV
grid_df = merge_to_grid(files[19].iloc[:-1,:],grid_df,'CCTV수',lon = '설치위치_경도',lat= '설치위치_위도')

#인도/자전거도로 수-> 다소 에러가 있음 
bike_df = files[20][files[20].BYYN == 'BYC001']
walk_df = files[20][files[20].BYYN == 'BYC002']

grid_df = merge_to_grid(walk_df,grid_df,'인도수')
grid_df = merge_to_grid(bike_df,grid_df,'자전거도로수')

#버스정류장
grid_df = merge_to_grid(files[21],grid_df,'버스정류장수',lon = '정류장 위치_경도',lat = '정류장 위치_위도')

#체육시설수
grid_df = merge_to_grid(files[26],grid_df,'체육시설수', lon = '설치위치_경도',lat = '설치위치_위도')

#건물개수
grid_df = merge_to_grid(files[27],grid_df,'건물개수')

#층수 
grid_df = merge_to_grid(files[27],grid_df,'floor',rename = '총층수')

#연면적 
grid_df = merge_to_grid(files[28],grid_df,'val', rename = '건물연면적')

#학원수
grid_df = merge_to_grid(files[29],grid_df,'학원수', lon = '시설위치_경도', lat = '시설위치_위도')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


In [6]:
#추정교통량
s_time = 7
l_time = 9

#시간범위 설정
f23 = files[23].rename(columns = {'상세도로망_LinkID':'link_id'})
f23['link_id'] = f23['link_id'].apply(lambda x : str(x)[:-2])
f23['link_id'] = f23['link_id'].astype(str)
f23 = f23[['link_id','시간적범위','전체 추정교통량', '승용차 추정교통량', '버스 추정교통량', '화물차 추정교통량']]
f23 = f23[f23['시간적범위'] != '전일']
f23.시간적범위 = f23.시간적범위.astype(int)
melted = pd.melt(f23, id_vars = ['link_id','시간적범위'])

f23_time_cut = f23[(f23.시간적범위>=s_time)&(f23.시간적범위<=l_time)]
f23_time_cut = f23_time_cut.groupby('link_id')['전체 추정교통량', '승용차 추정교통량', '버스 추정교통량', '화물차 추정교통량'].sum().reset_index()

#컬럼이름 재설정
rename_ls = []
for i, col in enumerate(f23_time_cut.columns):
    if i> 0:
        new_name = col + f'{s_time}_{l_time}'
        rename_ls.append(new_name)
    else :
        new_name = col
        rename_ls.append(new_name)

f23_time_cut.columns = rename_ls

#intersect and merge
road_merged = files[22][['link_id','geometry']].merge(f23_time_cut,how = 'left')
temp = gpd.sjoin(grid_df[['gid','geometry']],road_merged)
temp = temp.groupby('gid')[rename_ls].mean().reset_index()
grid_df = grid_df.merge(temp,on = 'gid', how = 'left')
grid_df = grid_df.fillna(0)

  from ipykernel import kernelapp as app


In [7]:
#혼잡빈도강도

#intersect and merge
files[24]['link_id'] = files[24]['상세도로망_LinkID'].apply(lambda x : str(x)[:-2])
f24_gt = files[24][files[24]['시간적범위'] == '전일']
f24_gt = f24_gt.groupby(['link_id'])['혼잡빈도강도'].sum().reset_index()

road_merged = files[22][['link_id','geometry']].merge(f24_gt,how = 'left')
temp = gpd.sjoin(grid_df[['gid','geometry']],road_merged)
temp = temp.groupby('gid')['혼잡빈도강도'].mean().reset_index()

grid_df = grid_df.merge(temp,how = 'left')
grid_df = grid_df.fillna(0)

In [8]:
#혼잡시간강도 `
#intersect and merge
files[25]['link_id'] = files[25]['상세도로망_LinkID'].apply(lambda x : str(x)[:-2])
f25_gt = files[25][files[25]['시간적범위'] == '전일']
f25_gt = f25_gt.groupby(['link_id'])['혼잡시간강도'].sum().reset_index()

road_merged = files[22][['link_id','geometry']].merge(f25_gt,how = 'left')
temp = gpd.sjoin(grid_df[['gid','geometry']],road_merged)
temp = temp.groupby('gid')['혼잡시간강도'].mean().reset_index()

grid_df = grid_df.merge(temp,how = 'left')
grid_df = grid_df.fillna(0)

In [9]:
grid_df.columns

Index(['gid', 'accident_cnt', 'geometry', '어린이안전구역', 'CCTV설치대수', '유치원수',
       '무인단속카메라수', '표지판수', '횡단보도수', '과속방지턱수', '신호등수', 'CCTV수', '인도수', '자전거도로수',
       '버스정류장수', '체육시설수', '건물개수', '총층수', '건물연면적', '학원수', '전체 추정교통량7_9',
       '승용차 추정교통량7_9', '버스 추정교통량7_9', '화물차 추정교통량7_9', '혼잡빈도강도', '혼잡시간강도'],
      dtype='object')

In [10]:
pd.options.display.max_columns = 50

In [12]:
#안전구역 중 사고 발생이 있는 구역
grid_df[(grid_df.accident_cnt>0)&(grid_df['어린이안전구역']>0)]

Unnamed: 0,gid,accident_cnt,geometry,어린이안전구역,CCTV설치대수,유치원수,무인단속카메라수,표지판수,횡단보도수,과속방지턱수,신호등수,CCTV수,인도수,자전거도로수,버스정류장수,체육시설수,건물개수,총층수,건물연면적,학원수,전체 추정교통량7_9,승용차 추정교통량7_9,버스 추정교통량7_9,화물차 추정교통량7_9,혼잡빈도강도,혼잡시간강도
1056,다사585098,1,"MULTIPOLYGON (((127.03244 37.18606, 127.03244 ...",1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,26.0,26.0,1280.36,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
1199,다사587098,1,"MULTIPOLYGON (((127.03470 37.18607, 127.03469 ...",1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,21.0,21.0,1249.55,0.0,327.950000,212.400000,0.400000,115.150000,88.100000,94.500000
1202,다사587101,1,"MULTIPOLYGON (((127.03468 37.18877, 127.03467 ...",1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,16.0,16.0,1779.56,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
1321,다사589069,1,"MULTIPOLYGON (((127.03711 37.15994, 127.03710 ...",1.0,0.0,0.0,0.0,2.0,1.0,0.0,1.0,0.0,0.0,6.0,0.0,0.0,2.0,2.0,10566.67,0.0,2604.615000,2243.090000,32.420000,329.100000,103.715000,143.070000
1428,다사590098,2,"MULTIPOLYGON (((127.03808 37.18608, 127.03807 ...",1.0,0.0,0.0,0.0,3.0,3.0,0.0,3.0,0.0,1.0,4.0,0.0,0.0,2.0,2.0,17352.14,0.0,2595.807857,2198.132857,38.793571,358.885000,36.045714,67.565714
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4012,다사629042,1,"MULTIPOLYGON (((127.08229 37.13576, 127.08229 ...",3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5.0,0.0,2.0,4.0,16.0,16.0,20932.99,12.0,992.620000,822.580000,5.250000,164.800000,144.580000,180.060000
4049,다사630040,2,"MULTIPOLYGON (((127.08343 37.13397, 127.08342 ...",4.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,30.0,30.0,5593.17,0.0,5002.290000,4102.950000,86.680000,812.630000,199.150000,199.770000
4086,다사631038,2,"MULTIPOLYGON (((127.08456 37.13217, 127.08456 ...",4.0,0.0,0.0,0.0,1.0,2.0,0.0,6.0,1.0,2.0,2.0,1.0,0.0,5.0,5.0,17056.61,9.0,2855.548000,2350.460000,56.276000,448.802000,169.330000,180.900000
4094,다사631046,1,"MULTIPOLYGON (((127.08452 37.13938, 127.08452 ...",1.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1015.75,0.0,13571.806667,10307.670000,640.936667,2623.203333,46.783333,58.736667
