In [1]:
# 패키지 준비
import numpy as np
import pandas as pd
import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
# matplotlib에서 한글 폰트를 사용하는 설정

%matplotlib inline

import platform

if platform.system() == 'Darwin':
    mpl.rc('font', family='AppleGothic')
elif platform.system() == 'Windows':
    path = "c:/Windows/Fonts/malgun.ttf"
    font_name = mpl.font_manager.FontProperties(fname=path).get_name()
    mpl.rc('font', family=font_name)
else:
    print('Unknown system...') 

In [24]:
# 데이터 준비
crime_in_seoul = pd.read_csv('data-files/crime-in-seoul-include-gu-name.csv', encoding='utf-8')
crime_in_seoul.head()

Unnamed: 0,관서명,살인 발생,살인 검거,강도 발생,강도 검거,강간 발생,강간 검거,절도 발생,절도 검거,폭력 발생,폭력 검거,구별
0,중부서,2,2,3,2,105,65,1395,477,1355,1170,중구
1,종로서,3,3,6,5,115,98,1070,413,1278,1070,종로구
2,남대문서,1,0,6,4,65,46,1153,382,869,794,중구
3,서대문서,2,2,5,4,154,124,1812,738,2056,1711,서대문구
4,혜화서,3,2,5,4,96,63,1114,424,1015,861,종로구


In [25]:
# 구별 컬럼을 인덱스로 해서 DataFrame 재구성
crime_in_seoul_by_gu = pd.pivot_table(crime_in_seoul, index='구별', aggfunc=np.sum)

In [26]:
crime_in_seoul_by_gu.head()

Unnamed: 0_level_0,강간 검거,강간 발생,강도 검거,강도 발생,살인 검거,살인 발생,절도 검거,절도 발생,폭력 검거,폭력 발생
구별,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
강남구,349,449,18,21,10,13,1650,3850,3705,4284
강동구,123,156,8,6,3,4,789,2366,2248,2712
강북구,126,153,13,14,8,7,618,1434,2348,2649
관악구,221,320,14,12,8,9,827,2706,2642,3298
광진구,220,240,26,14,4,4,1277,3026,2180,2625


In [27]:
# 검거 컬럼 제거 -> 검거율 컬럼 생성
crime_in_seoul_by_gu['강간검거율'] = crime_in_seoul_by_gu['강간 검거'] / crime_in_seoul_by_gu['강간 발생'] * 100
crime_in_seoul_by_gu['강도검거율'] = crime_in_seoul_by_gu['강도 검거'] / crime_in_seoul_by_gu['강도 발생'] * 100
crime_in_seoul_by_gu['살인검거율'] = crime_in_seoul_by_gu['살인 검거'] / crime_in_seoul_by_gu['살인 발생'] * 100
crime_in_seoul_by_gu['절도검거율'] = crime_in_seoul_by_gu['절도 검거'] / crime_in_seoul_by_gu['절도 발생'] * 100
crime_in_seoul_by_gu['폭력검거율'] = crime_in_seoul_by_gu['폭력 검거'] / crime_in_seoul_by_gu['폭력 발생'] * 100

crime_in_seoul_by_gu.drop(['강간 검거', '강도 검거', '살인 검거', '절도 검거', '폭력 검거'], axis=1, inplace=True)

crime_in_seoul_by_gu.head()

Unnamed: 0_level_0,강간 발생,강도 발생,살인 발생,절도 발생,폭력 발생,강간검거율,강도검거율,살인검거율,절도검거율,폭력검거율
구별,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
강남구,449,21,13,3850,4284,77.728285,85.714286,76.923077,42.857143,86.484594
강동구,156,6,4,2366,2712,78.846154,133.333333,75.0,33.347422,82.890855
강북구,153,14,7,1434,2649,82.352941,92.857143,114.285714,43.096234,88.637222
관악구,320,12,9,2706,3298,69.0625,116.666667,88.888889,30.561715,80.109157
광진구,240,14,4,3026,2625,91.666667,185.714286,100.0,42.200925,83.047619


In [40]:
con_list = ['강간검거율', '강도검거율', '살인검거율', '절도검거율', '폭력검거율']

for column in con_list:
    crime_in_seoul_by_gu.loc[crime_in_seoul_by_gu[column] > 100, column] = 100
    
crime_in_seoul_by_gu.head()

Unnamed: 0_level_0,강간,강도,살인,절도,폭력,강간검거율,강도검거율,살인검거율,절도검거율,폭력검거율
구별,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
강남구,449,21,13,3850,4284,77.728285,85.714286,76.923077,42.857143,86.484594
강동구,156,6,4,2366,2712,78.846154,100.0,75.0,33.347422,82.890855
강북구,153,14,7,1434,2649,82.352941,92.857143,100.0,43.096234,88.637222
관악구,320,12,9,2706,3298,69.0625,100.0,88.888889,30.561715,80.109157
광진구,240,14,4,3026,2625,91.666667,100.0,100.0,42.200925,83.047619


In [41]:
crime_in_seoul_by_gu.rename(columns = {'강간 발생':'강간', 
                             '강도 발생':'강도', 
                             '살인 발생':'살인', 
                             '절도 발생':'절도', 
                             '폭력 발생':'폭력'}, inplace=True)
crime_in_seoul_by_gu.head()

Unnamed: 0_level_0,강간,강도,살인,절도,폭력,강간검거율,강도검거율,살인검거율,절도검거율,폭력검거율
구별,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
강남구,449,21,13,3850,4284,77.728285,85.714286,76.923077,42.857143,86.484594
강동구,156,6,4,2366,2712,78.846154,100.0,75.0,33.347422,82.890855
강북구,153,14,7,1434,2649,82.352941,92.857143,100.0,43.096234,88.637222
관악구,320,12,9,2706,3298,69.0625,100.0,88.888889,30.561715,80.109157
광진구,240,14,4,3026,2625,91.666667,100.0,100.0,42.200925,83.047619


In [44]:
# !pip install scikit-learn



In [45]:
cols = ['강간', '강도', '살인', '절도', '폭력']

crime_in_seoul_by_gu_norm = crime_in_seoul_by_gu.copy()

# for col in cols:
#     minv = crime_in_seoul_by_gu[col].min() # 최소값 계산
#     maxv = crime_in_seoul_by_gu[col].max() # 최대값 계산
#     crime_in_seoul_by_gu_norm[col] = (crime_in_seoul_by_gu[col] - minv) / (maxv - minv)

from sklearn.preprocessing import MinMaxScaler

x = crime_in_seoul_by_gu[cols].values # dataframe.values : dataframe -> numpy ndarray
min_max_scaler = MinMaxScaler() # 스케일 변환기 만들기 (0 ~ 1)

x_scaled = min_max_scaler.fit_transform(x.astype(float))
crime_in_seoul_by_gu_norm[cols] = x_scaled

In [46]:
crime_in_seoul_by_gu_norm.describe()

Unnamed: 0,강간,강도,살인,절도,폭력,강간검거율,강도검거율,살인검거율,절도검거율,폭력검거율
count,24.0,24.0,24.0,24.0,24.0,24.0,24.0,24.0,24.0,24.0
mean,0.360351,0.382353,0.399306,0.424721,0.290829,77.237094,85.144035,92.072626,39.145055,84.808901
std,0.263464,0.274814,0.297919,0.250679,0.212406,12.285098,13.583076,10.949588,5.759176,2.310261
min,0.0,0.0,0.0,0.0,0.0,48.77193,55.555556,66.666667,30.561715,80.109157
25%,0.154179,0.235294,0.166667,0.253934,0.161681,68.120404,76.025641,85.119048,35.295879,83.083024
50%,0.269452,0.352941,0.291667,0.38505,0.271932,79.831933,84.962406,100.0,38.19849,84.495162
75%,0.533862,0.529412,0.604167,0.543876,0.355167,84.10847,100.0,100.0,41.684685,86.633963
max,1.0,1.0,1.0,1.0,1.0,100.0,100.0,100.0,56.668794,88.637222


In [49]:
# result_CCTV = pd.read_csv('data-files/cctv-result.csv', encoding='UTF-8')
# result_CCTV.set_index(['구별'], inplace=True)
# result_CCTV

result_CCTV = pd.read_csv('data-files/cctv-result.csv', encoding='UTF-8', index_col='구별')
result_CCTV

Unnamed: 0_level_0,소계,최근증가율,인구수,한국인,외국인,고령자,외국인비율,고령자비율,CCTV비율,오차
구별,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
강남구,2780,150.619195,570500.0,565550.0,4950.0,63167.0,0.86766,11.072217,0.487292,1388.055355
강동구,773,166.490765,453233.0,449019.0,4214.0,54622.0,0.929765,12.051638,0.170552,465.422892
강북구,748,125.203252,330192.0,326686.0,3506.0,54813.0,1.061806,16.600342,0.226535,329.342026
강서구,884,134.793814,603772.0,597248.0,6524.0,72548.0,1.08054,12.015794,0.146413,551.503155
관악구,1496,149.29078,525515.0,507203.0,18312.0,68082.0,3.484582,12.955291,0.284673,162.948104
광진구,707,53.228621,372164.0,357211.0,14953.0,42214.0,4.017852,11.342849,0.18997,425.290264
구로구,1561,64.97373,447874.0,416487.0,31387.0,56833.0,7.007998,12.689506,0.348536,329.592918
금천구,1015,100.0,255082.0,236353.0,18729.0,32970.0,7.342345,12.925255,0.397911,35.989293
노원구,1265,188.929889,569384.0,565565.0,3819.0,71941.0,0.670725,12.634883,0.22217,125.483618
도봉구,485,246.638655,348646.0,346629.0,2017.0,51312.0,0.578524,14.717507,0.13911,616.501341


In [51]:
crime_in_seoul_by_gu_norm[['인구수', 'CCTV']] = result_CCTV[['인구수', '소계']]
crime_in_seoul_by_gu_norm.head()

Unnamed: 0_level_0,강간,강도,살인,절도,폭력,강간검거율,강도검거율,살인검거율,절도검거율,폭력검거율,인구수,CCTV
구별,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
강남구,1.0,0.941176,0.916667,0.953472,0.661386,77.728285,85.714286,76.923077,42.857143,86.484594,570500.0,2780
강동구,0.15562,0.058824,0.166667,0.445775,0.289667,78.846154,100.0,75.0,33.347422,82.890855,453233.0,773
강북구,0.146974,0.529412,0.416667,0.126924,0.274769,82.352941,92.857143,100.0,43.096234,88.637222,330192.0,748
관악구,0.628242,0.411765,0.583333,0.562094,0.428234,69.0625,100.0,88.888889,30.561715,80.109157,525515.0,1496
광진구,0.397695,0.529412,0.166667,0.67157,0.269094,91.666667,100.0,100.0,42.200925,83.047619,372164.0,707


In [52]:
cols = ['강간','강도','살인','절도','폭력']
crime_in_seoul_by_gu_norm['범죄'] = np.sum(crime_in_seoul_by_gu_norm[cols], axis=1)
crime_in_seoul_by_gu_norm.head()

Unnamed: 0_level_0,강간,강도,살인,절도,폭력,강간검거율,강도검거율,살인검거율,절도검거율,폭력검거율,인구수,CCTV,범죄
구별,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
강남구,1.0,0.941176,0.916667,0.953472,0.661386,77.728285,85.714286,76.923077,42.857143,86.484594,570500.0,2780,4.472701
강동구,0.15562,0.058824,0.166667,0.445775,0.289667,78.846154,100.0,75.0,33.347422,82.890855,453233.0,773,1.116551
강북구,0.146974,0.529412,0.416667,0.126924,0.274769,82.352941,92.857143,100.0,43.096234,88.637222,330192.0,748,1.494746
관악구,0.628242,0.411765,0.583333,0.562094,0.428234,69.0625,100.0,88.888889,30.561715,80.109157,525515.0,1496,2.613667
광진구,0.397695,0.529412,0.166667,0.67157,0.269094,91.666667,100.0,100.0,42.200925,83.047619,372164.0,707,2.034438


In [53]:
cols = ['강간검거율','강도검거율','살인검거율','절도검거율','폭력검거율']
crime_in_seoul_by_gu_norm['검거'] = np.sum(crime_in_seoul_by_gu_norm[cols], axis=1)
crime_in_seoul_by_gu_norm.head()

Unnamed: 0_level_0,강간,강도,살인,절도,폭력,강간검거율,강도검거율,살인검거율,절도검거율,폭력검거율,인구수,CCTV,범죄,검거
구별,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
강남구,1.0,0.941176,0.916667,0.953472,0.661386,77.728285,85.714286,76.923077,42.857143,86.484594,570500.0,2780,4.472701,369.707384
강동구,0.15562,0.058824,0.166667,0.445775,0.289667,78.846154,100.0,75.0,33.347422,82.890855,453233.0,773,1.116551,370.084431
강북구,0.146974,0.529412,0.416667,0.126924,0.274769,82.352941,92.857143,100.0,43.096234,88.637222,330192.0,748,1.494746,406.94354
관악구,0.628242,0.411765,0.583333,0.562094,0.428234,69.0625,100.0,88.888889,30.561715,80.109157,525515.0,1496,2.613667,368.622261
광진구,0.397695,0.529412,0.166667,0.67157,0.269094,91.666667,100.0,100.0,42.200925,83.047619,372164.0,707,2.034438,416.915211
