In [None]:
pip install geopandas

In [None]:
import json
import pandas as pd
import numpy as np
import requests
from functools import reduce

from sklearn.cluster import KMeans
from sklearn.metrics import silhouette_samples, silhouette_score

import geopandas as gpd
import warnings
warnings.filterwarnings(action='ignore')

# 서울시 행정구역 나누기

In [None]:
korea = gpd.GeoDataFrame.from_file('/content/drive/MyDrive/Colab Notebooks/1차프로젝트/data/korea', encoding='cp949')
seoul = korea.iloc[:25]
seoul.drop(['SIG_CD','SIG_ENG_NM'],axis=1, inplace=True)
seoul= seoul.to_crs(epsg=4326)

# 각 항목 최종 스코어 데이터 만들기 함수

In [None]:
def score_df(df, plot_li, best_cluster, coeff_point):
  df = df.copy()

  # 1. rate값: 자치구 총 x 개수/ 서울시 총 x 개수
  cnt = df.groupby('자치구').agg(count = ('자치구', 'count'))
  cnt.reset_index(inplace=True)
  cnt['rate'] = cnt['count']/cnt['count'].sum()
  cnt.rename(columns={'자치구':'gu'},inplace=True)

  # 2. 클러스터링 
  ## 경도, 위도
  plot = df.iloc[:,plot_li].astype('float')

  ## 클러스터링
  kmeans = KMeans(n_clusters=best_cluster, init ='k-means++', max_iter = 300, random_state=0)
  plot['target'] = kmeans.fit_predict(plot)
  plot['silhouette_coeff'] = silhouette_samples(plot.iloc[:,[0,1]],plot['target'])
  centers = kmeans.cluster_centers_

  ## 클러스터 결과 저장
  cluster = pd.DataFrame(index=range(len(centers)), columns = {'target','address','silhouette_coeff'})
  cluster['target'] = cluster.index
  cluster['silhouette_coeff'] = plot.groupby('target')[['silhouette_coeff']].mean()
  cluster.drop('target',axis=1,inplace=True)

  ### 경도,위도 -> 주소
  for i in range(len(centers)):
      url= "http://api.vworld.kr/req/address?"
      key = '0733E51F-F657-3DA2-B82B-41831D9A8528'
      point1 = str(centers[i][0])
      point2 = str(centers[i][1])
      point = point1+','+point2
      
      params ={'service' : 'address', 
              'request' : 'getAddress','version' : '2.0',
              'refine' : 'true', 'simple' : 'false', 'type' : 'both',
              'crs' : 'epsg:4326', 'format' : 'json',
              'point': point,
              'key' : key}
      res = requests.get(url, params = params)
      json_data = res.json()
      
      cluster['address'][i] = json_data['response']['result'][0]['text']

  ## 자치구만 남기기
  cluster['gu'] = cluster['address'].str.split(' ').str[1]
  cluster.drop('address',axis=1,inplace=True)

  result = pd.merge(cnt, cluster, on='gu')
  ## 최종 점수 계산: 범죄와 유흥주점 간의 상관 계수 X 각 자치구의 실루엣 계수 X (각 자치구의 유흥주점 수 / 서울시 총 유흥주점 수)
  result[f'score'] = coeff_point * result['silhouette_coeff'] * result['rate']

  return result

# 범죄 점수

## CCTV SCORE

범죄와 cctv 변화량 간의 상관 계수(-0.577) X 각 자치구의 실루엣 계수 X (각 자치구의 CCTV 수 / 서울시 총 CCTV 수)
<br>

cctv: 
```
df = cctv
plot_li = [2,1]
best_cluster = 8
coeff_point = -0.577
```

In [None]:
cctv = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/1차프로젝트/data/CCTV_위치.csv')

In [None]:
cctv_score = score_df(cctv, [2,1], 8, -0.577)

In [None]:
cctv_score.to_csv('/content/drive/MyDrive/Colab Notebooks/1차프로젝트/make_df/cctv_score.csv',index=False)

## 유흥주점(bar) SCORE

범죄와 유흥주점 간의 상관 계수(0.683) X 각 자치구의 실루엣 계수 X (각 자치구의 유흥주점 수 / 서울시 총 유흥주점 수)
<br>

bar:
``` 
df = bar_df
plot_li = [1,2]
best_cluster = 8
coeff_point = 0.683
```

In [None]:
bar = pd.read_excel('/content/drive/MyDrive/Colab Notebooks/1차프로젝트/data/유흥주점.xlsx',usecols = [7,15])

In [None]:
def bar_df(bar_df):
  bar_df = bar_df.copy()
  bar = bar_df.query('상세영업상태명 == "영업"')
  bar.dropna(inplace = True)
  bar.reset_index(drop = True, inplace=True)
  ## 유흥주점 주소 가져오기
  bar['Longitude'] = ''
  bar['Latitude'] = ''
  drop_li = []

  ## 주소를 위도 경도로 바꾸기
  for i in range(len(bar)):
      try:
          url= "http://api.vworld.kr/req/address?"
          key = '0733E51F-F657-3DA2-B82B-41831D9A8528'
          address = bar['지번주소'][i]
          params ={'service': "address", 
                  'request': "getcoord", 'version' : '2.0',
                  'type' : 'PARCEL',
                  'crs' : 'epsg:4326', 'format' : 'json',
                  'address': address,
                  'key' : key}
      
          res = requests.get(url, params = params)
          json_data = res.json()

          if json_data['response']['status'] == 'OK':
              bar['Longitude'][i] = json_data['response']['result']['point']['x']
              bar['Latitude'][i] = json_data['response']['result']['point']['y']
              
      except:
          drop_li.append(i)

  bar.drop(bar.query('Longitude=="" or Latitude==""').index, inplace=True)
  bar.reset_index(drop=True, inplace=True)

  ## 자치구만 남기기
  bar['자치구'] = bar['지번주소'].str.split(' ').str[1]
  bar.drop('지번주소',axis=1,inplace=True)

  return bar

In [None]:
bar_df = bar_df(bar)

In [None]:
bar_score = score_df(bar_df, [1,2], 8, 0.683)

In [None]:
bar_score.to_csv('/content/drive/MyDrive/Colab Notebooks/1차프로젝트/make_df/bar_score.csv',index=False)

# 교통점수

## SPEED SCORE

교통사고와 단속카메라 간의 상관 계수(0.398) X 각 자치구의 실루엣 계수 X (각 자치구의 단속카메라 수 / 서울시 총 단속카메라 수)
<br>

speed: 
```
df = speed
plot_li = [3,2]
best_cluster = 7
coeff_point = 0.398
```

In [None]:
speed = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/1차프로젝트/data/단속카메라_위치.csv',  
                    names = ['자치구','Latitude','Longitude'])

In [None]:
speed_score = score_df(speed, [2,1], 7, 0.398)

In [None]:
speed_score.to_csv('/content/drive/MyDrive/Colab Notebooks/1차프로젝트/make_df/speed_score.csv',index=False)

## 어린이 보호구역(protect) SCORE

교통사고와 어린이 보호구역 간의 상관 계수(-0.203) X 각 자치구의 실루엣 계수 X (각 자치구의 어린이 보호구역 수 / 서울시 총 어린이 보호구역 수)
<br>

protect: 
```
df = protect
plot_li = [1,2]
best_cluster = 8
coeff_point = -0.203
```

In [None]:
protect = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/1차프로젝트/data/어린이보호구역_위치.csv')

In [None]:
protect_score = score_df(protect, [1,2], 8, -0.203)

In [None]:
protect_score.to_csv('/content/drive/MyDrive/Colab Notebooks/1차프로젝트/make_df/protect_score.csv',index=False)

# 주변시설 점수

## 병원(h) SCORE

어린이와 병원 간의 상관 계수(0.573) X 각 자치구의 실루엣 계수 X (각 자치구의 병원 수 / 서울시 총 병원 수)
<br>

hospital: 
```
df = hospital
plot_li = [2,3]
best_cluster = 6
coeff_point = 0.573
```

In [None]:
df_H = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/1차프로젝트/data/병의원.csv',
                   usecols = [1,3,31,32], 
                   names = ['주소','병원분류명','Longitude','Latitude'])

# 1. 데이터 정제
hospital = df_H.query("병원분류명 in ['의원','병원','종합병원']")
hospital.reset_index(drop=True, inplace=True)
hospital['자치구'] = hospital['주소'].str.split(' ').str[1]

In [None]:
hospital_score = score_df(hospital, [2,3], 6, 0.573)

In [None]:
hospital_score.to_csv('/content/drive/MyDrive/Colab Notebooks/1차프로젝트/make_df/h_score.csv',index=False)

## 돌봄센터(care) SCORE

어린이와 돌봄센터 간의 상관 계수(0.422) X 각 자치구의 실루엣 계수 X (각 자치구의 돌봄센터 수 / 서울시 총 돌봄센터 수)
<br>

care: 
```
df = care
plot_li = [3,4]
best_cluster = 8
coeff_point = 0.422
```

In [None]:
df_care1 = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/1차프로젝트/data/열린육아방.csv', usecols = [1,5,7,8,9])
df_care2 = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/1차프로젝트/data/우리동네키움센터.csv', usecols = [1,5,7,8,9])
df_care3 = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/1차프로젝트/data/지역아동센터.csv',usecols = [1,5,7,8,9])

care = pd.concat([df_care1,df_care2,df_care3])
care.dropna(inplace = True)
care.reset_index(drop=True, inplace=True)
care.columns = ['시설명','자치구','연령구분','Longitude','Latitude']

# 돌봄센터(합친파일) 저장
care.to_csv('/content/drive/MyDrive/Colab Notebooks/1차프로젝트/data/돌봄센터.csv', index = False)

In [None]:
care_score = score_df(care, [3,4], 8, 0.422)

In [None]:
care_score.to_csv('/content/drive/MyDrive/Colab Notebooks/1차프로젝트/make_df/care_score.csv',index=False)

# 각 분야별 점수 합산

In [None]:
crime = pd.merge(cctv_score, bar_score, how='outer')
crime = crime.groupby('gu').agg(crime_score = ('score', 'sum')).reset_index()

car_accident = pd.merge(speed_score, protect_score, how='outer')
car_accident = car_accident.groupby('gu').agg(acc_score = ('score', 'sum')).reset_index()

surroundings = pd.merge(hospital_score, care_score, how='outer')
surroundings = surroundings.groupby('gu').agg(sur_score = ('score', 'sum')).reset_index()

# 구별 점수 합산

In [None]:
# 1. 서울시 행정구역 불러오기
seoul_gu = seoul.iloc[:,[0]]
seoul_gu.columns = ['gu']

# 2. 각 분야별 점수 목록 합치기
df_list = [seoul_gu, crime, car_accident, surroundings]
final_df = reduce(lambda x, y : pd.merge(x,y,how='outer'), df_list)

# 3. 최종 점수 데이터
final_df = final_df.fillna(0)
final_df['FINAL_SCORE'] = 1 - final_df['crime_score'] - final_df['acc_score'] + final_df['sur_score']
final_df = final_df.sort_values('FINAL_SCORE', ascending=False).reset_index(drop=True)

In [None]:
final_df

Unnamed: 0,gu,crime_score,acc_score,sur_score,FINAL_SCORE
0,강남구,0.007505,-0.005236,0.065002,1.062733
1,중랑구,-0.016144,0.0,0.008853,1.024997
2,관악구,-0.020473,-0.003772,0.0,1.024245
3,양천구,-0.004073,-0.00654,0.009438,1.020051
4,성북구,0.0,0.0,0.017254,1.017254
5,서대문구,0.0,-0.001965,0.009418,1.011383
6,노원구,0.0,0.005628,0.015089,1.009461
7,강동구,0.0,-0.004833,0.0,1.004833
8,동작구,0.009974,0.005623,0.019599,1.004002
9,도봉구,0.001878,-0.004101,0.0,1.002223


In [None]:
final_df.to_csv('/content/drive/MyDrive/Colab Notebooks/1차프로젝트/make_df/final_df.csv',index=False)