# 빅프9조

 필수 라이브러리 import

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import folium
from selenium import webdriver as wb
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from selenium.common.exceptions import NoSuchElementException
import time
import math

상수 선언

In [2]:
NS_DIFF_1M = 0.000007
EW_DIFF_1M = 0.000009
PATROL_HR = 900
GRID_DIFF = 90
data_path = "data/"
encoding = "utf-8-sig"
#상계1
police = np.array([37.679699, 127.055066], dtype=np.float64)

원형으로 필터링

In [3]:
def filtering(police, grid):
    ft = (grid - police).copy()
    ft[:,0] /= NS_DIFF_1M
    ft[:,1] /= EW_DIFF_1M
    ft = np.sqrt( np.sum( np.square(ft), axis=1 ))
    grid = grid[ft <= PATROL_HR + 10, :]
    return grid

격자점 반환

In [4]:
def grids(police):
    hcount = PATROL_HR//GRID_DIFF
    ew = np.arange(-hcount, hcount + 1, dtype=np.float64)
    ns = ew.reshape(-1,1).copy()
    ew *= EW_DIFF_1M * GRID_DIFF
    ns *= NS_DIFF_1M * GRID_DIFF
    
    result = np.full((2*hcount+1, 2*hcount+1, 2), police, dtype=np.float64)
    result[:,:,0] += ns
    result[:,:,1] += ew
    
    return filtering(police, result.reshape(-1,2))

In [5]:
def draw_marker(m, grid, color):
    color = "#" + color
    for i in grid:
        folium.CircleMarker(location=i, radius=5, color=color).add_to(m)

In [6]:
m = folium.Map(location = police, zoom_start=60)
folium.Marker(location=police, 
              popup='지구대',
              icon=folium.Icon(color='lightblue', icon='star')
).add_to(m)

<folium.map.Marker at 0x15e4eb91f50>

In [7]:
police_grid = grids(police)
draw_marker(m, police_grid, color='ffffff')

In [8]:
police_grid = pd.DataFrame(police_grid, columns=["위도", "경도"])
police_grid["점수"] = np.nan

In [9]:
m

# 샘플링

In [10]:
SAMPLING = 5

In [11]:
def sampled_grids(grid):
    interval_size = SAMPLING+1
    result = np.zeros((grid.shape[0]*interval_size, 2), dtype=np.float64)
    for i in range(0, result.shape[0], interval_size):
        ran = np.zeros((interval_size, 2), dtype=np.float64)
        ran[1:] = np.random.uniform(-1, 1, (interval_size-1, 2))
        ran[:, 0] *= (GRID_DIFF/2)*NS_DIFF_1M
        ran[:, 1] *= (GRID_DIFF/2)*EW_DIFF_1M
        ran += grid[i//interval_size]
        result[i:i+interval_size] += ran
    return result

In [12]:
'''
police_sampled = sampled_grids(police_grid)
police_sampled

police_sampled = pd.DataFrame(police_sampled, columns = ["위도", "경도"])
police_sampled.to_csv(data_path + "상계1위경도샘플.csv", index=False, encoding=encoding)
'''

'\npolice_sampled = sampled_grids(police_grid)\npolice_sampled\n\npolice_sampled = pd.DataFrame(police_sampled, columns = ["위도", "경도"])\npolice_sampled.to_csv(data_path + "상계1위경도샘플.csv", index=False, encoding=encoding)\n'

In [13]:
police_addr_data=pd.read_csv(data_path + "상계1샘플주소.csv", encoding=encoding)
police_addr_data.index=[i//(SAMPLING+1) for i in range(police_addr_data.shape[0])]
police_addr_data["유형"] = None
police_addr_data["확률"] = None
police_addr_data

Unnamed: 0,위도,경도,지번주소,도로명주소,유형,확률
0,37.673399,127.054256,서울특별시 노원구 상계동 1044,서울특별시 노원구 동일로231길 24,,
0,37.673447,127.054045,서울특별시 노원구 상계동 1044,서울특별시 노원구 동일로231길 24,,
0,37.673578,127.054242,서울특별시 노원구 상계동 1044,서울특별시 노원구 동일로231길 24,,
0,37.673156,127.054325,서울특별시 노원구 상계동 1044,서울특별시 노원구 동일로231길 24,,
0,37.673209,127.054497,서울특별시 노원구 상계동 1044-2,서울특별시 노원구 동일로 1625,,
...,...,...,...,...,...,...
324,37.686257,127.055793,서울특별시 노원구 상계동 산 12-1,서울특별시 노원구 동일로 1772,,
324,37.685699,127.055507,서울특별시 노원구 상계동 1202-1,서울특별시 노원구 누원로 18,,
324,37.686189,127.055881,서울특별시 노원구 상계동 산 12-1,서울특별시 노원구 동일로 1772,,
324,37.685953,127.055901,서울특별시 노원구 상계동 산 12-1,서울특별시 노원구 동일로 1772,,


# 유형 크롤링

In [14]:
DELAY_TIME = 1
map_url = "https://map.kakao.com/"

In [15]:
def openweb(mode="window"):
    if mode == "no":
        options = wb.ChromeOptions()
        options.add_argument('headless')
        driver = wb.Chrome(options=options)
    else:
        driver = wb.Chrome()
    driver.get(map_url)
    time.sleep(DELAY_TIME)
    return driver

In [16]:
def search(browser, address):
    search_box = browser.find_element(By.ID, "search.keyword.query")
    while search_box.get_attribute("value"):
        search_box.send_keys(Keys.CONTROL, 'a')
        search_box.send_keys(Keys.BACKSPACE)
    search_box.send_keys(address)
    search_box.send_keys(Keys.RETURN)
    time.sleep(DELAY_TIME)

In [17]:
def find_postage(browser, num_addr):
    if "산" in num_addr:
        return "산"
    search(browser, num_addr)
    try:
        postage = browser.find_element(By.CLASS_NAME, "zip")
    except NoSuchElementException:
        return "nozip"
    else:
        return postage.text
    
def find_build_name(browser, road_addr, num_addr):
    search(browser, road_addr)
    try:
        buildname = browser.find_element(By.CLASS_NAME, "building")
    except NoSuchElementException:
        return find_postage(browser, num_addr)
    else:
        return buildname.text

In [18]:
'''
browser = openweb("no")
for i in range(len(police_addr_data.index)):
    police_addr_data.iloc[i, 4] = find_build_name(browser, police_addr_data.iloc[i, 3], police_addr_data.iloc[i, 2])
police_addr_data.to_csv(data_path+"유형.csv",encoding=encoding)
'''

'\nbrowser = openweb("no")\nfor i in range(len(police_addr_data.index)):\n    police_addr_data.iloc[i, 4] = find_build_name(browser, police_addr_data.iloc[i, 3], police_addr_data.iloc[i, 2])\npolice_addr_data.to_csv(data_path+"유형.csv",encoding=encoding)\n'

In [19]:
police_addr_data = pd.read_csv(data_path + "유형.csv", encoding=encoding)
police_addr_data["index"]=[i//(SAMPLING+1) for i in range(police_addr_data.shape[0])]
police_addr_data

Unnamed: 0,위도,경도,지번주소,도로명주소,이름,확률,index
0,37.673399,127.054256,서울특별시 노원구 상계동 1044,서울특별시 노원구 동일로231길 24,노일중학교,,0
1,37.673447,127.054045,서울특별시 노원구 상계동 1044,서울특별시 노원구 동일로231길 24,노일중학교,,0
2,37.673578,127.054242,서울특별시 노원구 상계동 1044,서울특별시 노원구 동일로231길 24,노일중학교,,0
3,37.673156,127.054325,서울특별시 노원구 상계동 1044,서울특별시 노원구 동일로231길 24,노일중학교,,0
4,37.673209,127.054497,서울특별시 노원구 상계동 1044-2,서울특별시 노원구 동일로 1625,노일초등학교,,0
...,...,...,...,...,...,...,...
1945,37.686257,127.055793,서울특별시 노원구 상계동 산 12-1,서울특별시 노원구 동일로 1772,산,,324
1946,37.685699,127.055507,서울특별시 노원구 상계동 1202-1,서울특별시 노원구 누원로 18,수락리버시티4단지,,324
1947,37.686189,127.055881,서울특별시 노원구 상계동 산 12-1,서울특별시 노원구 동일로 1772,산,,324
1948,37.685953,127.055901,서울특별시 노원구 상계동 산 12-1,서울특별시 노원구 동일로 1772,산,,324


In [20]:
'''
crime_loc = pd.read_csv(data_path+"범죄장소.csv", encoding='cp949')
crime_loc = crime_loc.groupby("장소")
crime_loc = crime_loc.sum()
crime_loc /= crime_loc.sum()
crime_loc.to_csv(data_path + "범죄장소확률.csv", encoding=encoding)
'''

'\ncrime_loc = pd.read_csv(data_path+"범죄장소.csv", encoding=\'cp949\')\ncrime_loc = crime_loc.groupby("장소")\ncrime_loc = crime_loc.sum()\ncrime_loc /= crime_loc.sum()\ncrime_loc.to_csv(data_path + "범죄장소확률.csv", encoding=encoding)\n'

In [21]:
'''
per = pd.DataFrame()
per["공업"] = crime_per[["공사장광산", "공장", "창고"]].sum(axis=1)
per["교통"] = crime_per[["기타교통수단내", "지하철", "역,대합실"]].sum(axis=1)
per["사무"] = crime_per[["금융기관", "사무실"]].sum(axis=1)
per["숙박"] = crime_per[["숙박업소목욕탕"]].sum(axis=1)
per["상업"] = crime_per[["시장,노점", "상점", "흥행장"]].sum(axis=1)
per["아파트"] = crime_per[["아파트,연립다세대"]].sum(axis=1)
per["주택"] = crime_per[["단독주택"]].sum(axis=1)
per["유흥"] = crime_per[["유흥접객업소"]].sum(axis=1)
per["공원"] = crime_per[["유원지", "공지"]].sum(axis=1)
per["의료"] = crime_per[["의료기관"]].sum(axis=1)
per["종교"] = crime_per[["종교기관"]].sum(axis=1)
per["학교"] = crime_per[["학교"]].sum(axis=1)
per["기타"] = crime_per[["구금장소", "고속도로", "기타", "부대"]].sum(axis=1)
per["야외"] = crime_per[["노상", "산야"]].sum(axis=1)
per
'''

'\nper = pd.DataFrame()\nper["공업"] = crime_per[["공사장광산", "공장", "창고"]].sum(axis=1)\nper["교통"] = crime_per[["기타교통수단내", "지하철", "역,대합실"]].sum(axis=1)\nper["사무"] = crime_per[["금융기관", "사무실"]].sum(axis=1)\nper["숙박"] = crime_per[["숙박업소목욕탕"]].sum(axis=1)\nper["상업"] = crime_per[["시장,노점", "상점", "흥행장"]].sum(axis=1)\nper["아파트"] = crime_per[["아파트,연립다세대"]].sum(axis=1)\nper["주택"] = crime_per[["단독주택"]].sum(axis=1)\nper["유흥"] = crime_per[["유흥접객업소"]].sum(axis=1)\nper["공원"] = crime_per[["유원지", "공지"]].sum(axis=1)\nper["의료"] = crime_per[["의료기관"]].sum(axis=1)\nper["종교"] = crime_per[["종교기관"]].sum(axis=1)\nper["학교"] = crime_per[["학교"]].sum(axis=1)\nper["기타"] = crime_per[["구금장소", "고속도로", "기타", "부대"]].sum(axis=1)\nper["야외"] = crime_per[["노상", "산야"]].sum(axis=1)\nper\n'

In [22]:
#per.to_csv("범죄장소확률.csv", index=False, encoding='cp949')

In [23]:
crime_per = pd.read_csv(data_path+"범죄장소확률.csv", encoding='cp949')
crime_per.loc[0,"교통"]

0.04346287

In [24]:
namespace = dict()
with open(data_path+"이름유형분류.txt", 'r', encoding='UTF8') as file:
    for i in file:
        t=i.split(" - ")
        namespace[t[0]] = t[1].strip()
namespace

{'(우) 01319': '주택',
 '(우) 01320': '주택',
 '(우) 01321': '주택',
 '(우) 01322': '주택',
 '(우) 01323': '주택',
 '(우) 01324': '주택',
 '(우) 01605': '주택',
 '(우) 01606': '주택',
 '(우) 01608': '주택',
 '(우) 01609': '주택',
 '(우) 01610': '주택',
 '(우) 01611': '주택',
 '(우) 01612': '주택',
 '(우) 01613': '주택',
 '(우) 01621': '주택',
 '(우) 01622': '주택',
 '(우) 01623': '주택',
 '(우) 01625': '주택',
 '(우) 01626': '주택',
 '(우) 01627': '주택',
 '(우) 01628': '주택',
 '(우) 01629': '주택',
 '(우) 01630': '주택',
 '(주)씨에스윈': '사무',
 '1': '기타',
 'GS-25': '상업',
 'K마트': '상업',
 'nozip': '야외',
 '가로판매대': '상업',
 '경서 레디빌 A동': '아파트',
 '경서아트빌': '아파트',
 '경일하우스': '주택',
 '계산노인정': '주택',
 '광산산업': '공업',
 '구두수선대': '상업',
 '구립상일경로당': '사무',
 '극동아파트': '아파트',
 '기독교대한성결교회영광교회': '종교',
 '기아자동차서비스도봉사업소': '공업',
 '남광하이빌': '아파트',
 '노원교회': '종교',
 '노원어린이집': '학교',
 '노일중학교': '학교',
 '노일초등학교': '학교',
 '대건주택': '주택',
 '대망드림힐아파트': '아파트',
 '대명하우스': '주택',
 '대원파크빌': '아파트',
 '대은빌딩': '상업',
 '대한빌딩': '상업',
 '도봉2 파출소': '사무',
 '도봉기적의도서관': '학교',
 '도봉동 실내스포츠센터': '공원',
 '도봉파크빌2단지': '아파트',
 '도봉한

In [25]:
for i in namespace:
    police_addr_data.loc[police_addr_data["이름"]==i, "확률"] = crime_per.loc[0, namespace[i]]
    
police_addr_data

Unnamed: 0,위도,경도,지번주소,도로명주소,이름,확률,index
0,37.673399,127.054256,서울특별시 노원구 상계동 1044,서울특별시 노원구 동일로231길 24,노일중학교,0.004735,0
1,37.673447,127.054045,서울특별시 노원구 상계동 1044,서울특별시 노원구 동일로231길 24,노일중학교,0.004735,0
2,37.673578,127.054242,서울특별시 노원구 상계동 1044,서울특별시 노원구 동일로231길 24,노일중학교,0.004735,0
3,37.673156,127.054325,서울특별시 노원구 상계동 1044,서울특별시 노원구 동일로231길 24,노일중학교,0.004735,0
4,37.673209,127.054497,서울특별시 노원구 상계동 1044-2,서울특별시 노원구 동일로 1625,노일초등학교,0.004735,0
...,...,...,...,...,...,...,...
1945,37.686257,127.055793,서울특별시 노원구 상계동 산 12-1,서울특별시 노원구 동일로 1772,산,0.324937,324
1946,37.685699,127.055507,서울특별시 노원구 상계동 1202-1,서울특별시 노원구 누원로 18,수락리버시티4단지,0.108198,324
1947,37.686189,127.055881,서울특별시 노원구 상계동 산 12-1,서울특별시 노원구 동일로 1772,산,0.324937,324
1948,37.685953,127.055901,서울특별시 노원구 상계동 산 12-1,서울특별시 노원구 동일로 1772,산,0.324937,324


In [26]:
score = police_addr_data[["index", "확률"]].groupby("index").sum()
police_grid["점수"] = score
mins = police_grid["점수"].min()
maxs = police_grid["점수"].max()
police_grid["점수"] = (police_grid["점수"] - mins) / (maxs - mins) * 255
police_grid

Unnamed: 0,위도,경도,점수
0,37.673399,127.054256,2.244119
1,37.673399,127.055066,17.807417
2,37.673399,127.055876,112.825908
3,37.674029,127.051826,98.784885
4,37.674029,127.052636,98.784885
...,...,...,...
320,37.685369,127.057496,255.000000
321,37.685369,127.058306,255.000000
322,37.685999,127.054256,83.914324
323,37.685999,127.055066,83.914324


In [27]:
def colorstr(num):
    num=hex(255-int(num))[2:]
    if len(num)==1:
        num='0'+num
    return "#ff"+num*2

In [28]:
for i in police_grid.index:
    folium.CircleMarker(location=police_grid.iloc[i,:2].values,
                        radius=20,
                        color=colorstr(police_grid.iloc[i,2]),
                        fill=True).add_to(m)

m

In [29]:
def location_count(grid, location):
    temp = (grid - location).copy()
    temp.iloc[:,0] /= NS_DIFF_1M
    temp.iloc[:,1] /= EW_DIFF_1M
    temp = np.sqrt( np.sum( np.square(temp), axis=1 ))
    return len(grid.loc[temp <= GRID_DIFF/2, :].index)

def police_count(grid, police):
    temp = (grid - police).copy()
    temp.iloc[:,0] /= NS_DIFF_1M
    temp.iloc[:,1] /= EW_DIFF_1M
    temp = np.sqrt( np.sum( np.square(temp), axis=1 ))
    return len(grid.loc[temp <= PATROL_HR+GRID_DIFF/2, :].index)

In [30]:
'''
data = pd.read_csv("안전시설물.csv", encoding='cp949')
def dong_guard_data(data, dong):
    re=""
    for i in dong:
        re+= i+"|"
    data = data.loc[data["읍면동명"].str.contains(re[:-1]) , ["포인트 wkt", "읍면동명"]]
    def decompose(word):
        temp = word.split("(")
        return temp[-1][:-1]
    def NS(word):
        temp = word.split(" ")
        return temp[1]
    def EW(word):
        temp = word.split(" ")
        return temp[0]
    data["포인트 wkt"] = data["포인트 wkt"].apply(decompose)
    data["위도"] = data["포인트 wkt"].apply(NS)
    data["위도"] = data["위도"].astype(np.float64)
    data["경도"] = data["포인트 wkt"].apply(EW)
    data["경도"] = data["경도"].astype(np.float64)
    return data.drop(columns=["포인트 wkt", "읍면동명"]).reset_index(drop=True)

data = dong_guard_data(data, ["도봉동", "상계동"])
'''

'\ndata = pd.read_csv("안전시설물.csv", encoding=\'cp949\')\ndef dong_guard_data(data, dong):\n    re=""\n    for i in dong:\n        re+= i+"|"\n    data = data.loc[data["읍면동명"].str.contains(re[:-1]) , ["포인트 wkt", "읍면동명"]]\n    def decompose(word):\n        temp = word.split("(")\n        return temp[-1][:-1]\n    def NS(word):\n        temp = word.split(" ")\n        return temp[1]\n    def EW(word):\n        temp = word.split(" ")\n        return temp[0]\n    data["포인트 wkt"] = data["포인트 wkt"].apply(decompose)\n    data["위도"] = data["포인트 wkt"].apply(NS)\n    data["위도"] = data["위도"].astype(np.float64)\n    data["경도"] = data["포인트 wkt"].apply(EW)\n    data["경도"] = data["경도"].astype(np.float64)\n    return data.drop(columns=["포인트 wkt", "읍면동명"]).reset_index(drop=True)\n\ndata = dong_guard_data(data, ["도봉동", "상계동"])\n'

In [31]:
ansim_data = pd.read_csv(data_path+"상계1안심시설물.csv", encoding=encoding)
ansim_data

Unnamed: 0,위도,경도
0,37.677522,127.044616
1,37.677482,127.044476
2,37.677482,127.044476
3,37.677289,127.044185
4,37.677170,127.044018
...,...,...
273,37.675134,127.053706
274,37.675134,127.053706
275,37.675047,127.054265
276,37.675130,127.054613


In [32]:
#draw_marker(m, ansim_data.values, color='0000ff')
m

In [33]:
dobong_cctv=pd.read_csv(data_path+"dobong_cctv.csv", encoding='cp949')
nowon_cctv=pd.read_csv(data_path+"nowon_cctv.csv", encoding='cp949')

In [34]:
cctv = pd.concat([nowon_cctv, dobong_cctv])
cctv

Unnamed: 0,위도,경도
0,37.6578,127.0613
1,37.6575,127.0614
2,37.6591,127.0568
3,37.6590,127.0565
4,37.6683,127.0620
...,...,...
1593,37.6595,127.0450
1594,37.6545,127.0460
1595,37.6555,127.0420
1596,37.6530,127.0431


In [35]:
#draw_marker(m, cctv.values, color='00ffff')
m

In [36]:
cctv.to_csv(data_path+"nowon_dobong_cctv.csv", encoding=encoding)

In [37]:
def light_cut(police, light_data):
    NS_MAX=police[0]+(PATROL_HR+GRID_DIFF/2)*NS_DIFF_1M
    NS_MIN=police[0]-(PATROL_HR+GRID_DIFF/2)*NS_DIFF_1M
    EW_MAX=police[1]+(PATROL_HR+GRID_DIFF/2)*EW_DIFF_1M
    EW_MIN=police[1]-(PATROL_HR+GRID_DIFF/2)*EW_DIFF_1M
    return light_data.loc[(NS_MIN<=light_data["위도"])&(light_data["위도"]<=NS_MAX)&
                           (EW_MIN<=light_data["경도"])&(light_data["경도"]<=EW_MAX),:]

In [38]:
dobong_light = pd.read_csv("dobong_light.csv", encoding='cp949')
nowon_light = pd.read_csv("nowon_light.csv", encoding='cp949')
light_data = pd.concat([dobong_light, nowon_light])
light_data

Unnamed: 0,위도,경도
0,37.659426,127.036036
1,37.660503,127.040867
2,37.658717,127.038386
3,37.653005,127.035297
4,37.656666,127.037624
...,...,...
652486,37.623603,127.063553
652487,37.623819,127.063466
652488,37.623776,127.063446
652489,37.623573,127.063403


In [39]:
light_police = light_cut(police, light_data)

In [40]:
#draw_marker(m, light_police.values, color='ffff00')
m