In [20]:
import numpy as np
import pandas as pd
import folium
import re

In [25]:
df = pd.read_csv('./data1/서울시 주요 공원현황.csv', encoding='EUC-KR')
df.shape

(132, 20)

# 1.공원 면적 정리

In [26]:
area = df.면적.astype(str).apply(lambda x: x.strip('총 ').split()[0])
area[-5:]

127    109635㎡
128    140022㎡
129    211392㎡
130    157535㎡
131        nan
Name: 면적, dtype: object

In [27]:
area.unique()

array(['2896887㎡', '80683㎡', '9132690m²', '480994㎡', '2284085㎡',
       '8948.1㎡', '휴양', '6456㎥', '80309㎡', '1980.4㎡', '5197.7㎡', '11467㎡',
       '229539㎡', '15179.7', '16734.60㎥', '61544㎡', '424106㎡', '560552㎡',
       '26696.8㎥', '75900㎡', '113021.7m2', '201779㎥', '258991㎡',
       '10420819.08㎡', '171294.1㎥', '15000㎡', '297926㎡', '411972㎡',
       '98470㎡', '2038741.3㎡', '603715.2㎡', '108120㎡', '29619.3㎥',
       '29843.8㎡', '126026㎡', '135002.7㎡', '384026㎡', '180000㎥',
       '416031㎡', '160720㎡', '2277252㎡', '19599.70㎥', '138596㎡',
       '185295㎥', '254648㎡', '29816.5㎡', '762642㎡', '664905㎡', '99599.9㎡',
       '5802㎡', '16500㎥', '585652㎥', '649709.6㎡', '72132㎡', '66027.8㎡',
       '265582㎡', '2088704.9㎡', '112070㎡', '993724㎡', '219167㎡',
       '1349556.4㎡', '1511198㎡', '1097565㎡', '328018㎡', '1447122㎡',
       '207790㎡', '1018689㎡', '954553㎡', '1667848㎡', '13224㎡', '130345㎡',
       '5332422㎡', '5107964㎡', '47832㎡', '213552㎡', '168099.87㎡',
       '359435.2㎡', '36660㎡', '38073

In [28]:
area[(area == 'nan') | (area == '휴양')]

6       휴양
109    nan
119    nan
131    nan
Name: 면적, dtype: object

In [29]:
area[(area == 'nan') | (area == '휴양')] = '0㎡'

In [30]:
area.unique() # 단위표시가 모두 다르기 때문에 정규표현식으로 제거

array(['2896887㎡', '80683㎡', '9132690m²', '480994㎡', '2284085㎡',
       '8948.1㎡', '0㎡', '6456㎥', '80309㎡', '1980.4㎡', '5197.7㎡', '11467㎡',
       '229539㎡', '15179.7', '16734.60㎥', '61544㎡', '424106㎡', '560552㎡',
       '26696.8㎥', '75900㎡', '113021.7m2', '201779㎥', '258991㎡',
       '10420819.08㎡', '171294.1㎥', '15000㎡', '297926㎡', '411972㎡',
       '98470㎡', '2038741.3㎡', '603715.2㎡', '108120㎡', '29619.3㎥',
       '29843.8㎡', '126026㎡', '135002.7㎡', '384026㎡', '180000㎥',
       '416031㎡', '160720㎡', '2277252㎡', '19599.70㎥', '138596㎡',
       '185295㎥', '254648㎡', '29816.5㎡', '762642㎡', '664905㎡', '99599.9㎡',
       '5802㎡', '16500㎥', '585652㎥', '649709.6㎡', '72132㎡', '66027.8㎡',
       '265582㎡', '2088704.9㎡', '112070㎡', '993724㎡', '219167㎡',
       '1349556.4㎡', '1511198㎡', '1097565㎡', '328018㎡', '1447122㎡',
       '207790㎡', '1018689㎡', '954553㎡', '1667848㎡', '13224㎡', '130345㎡',
       '5332422㎡', '5107964㎡', '47832㎡', '213552㎡', '168099.87㎡',
       '359435.2㎡', '36660㎡', '38073

In [31]:
area = area.apply(lambda x: float(re.sub('[㎡m²㎥]','',x)))

In [32]:
area = area.apply(lambda x: int(np.round(x)))

In [33]:
area

0      2896887
1        80683
2      9132690
3       480994
4      2284085
        ...   
127     109635
128     140022
129     211392
130     157535
131          0
Name: 면적, Length: 132, dtype: int64

# 공원의 면적에 따라 분류

In [35]:
area_criteria = [-1, 100000,  1000000,  12000000]
labels = ['소형', '중형', '대형']
size_info = [3, 7, 15]
scale = pd.cut(area, area_criteria, labels=labels)
size = pd.cut(area, area_criteria, labels=size_info)

In [37]:
scale[:5]

0    대형
1    소형
2    대형
3    중형
4    대형
Name: 면적, dtype: category
Categories (3, object): ['소형' < '중형' < '대형']

# 3.새로운 프레임만들기

In [38]:
df = df[['공원명','지역', 'X좌표(WGS84)', 'Y좌표(WGS84)']]
df.columns=['공원명', '지역', '경도', '위도']

In [39]:
df.head(3)

Unnamed: 0,공원명,지역,경도,위도
0,남산도시자연공원,중구,126.990377,37.55014
1,길동생태공원,강동구,127.154779,37.540394
2,서울대공원,과천시,127.019846,37.426449


In [40]:
df['면적'] = area
df['분류'] = scale
df['크기'] = size

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['면적'] = area
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['분류'] = scale
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['크기'] = size


Unnamed: 0,공원명,지역,경도,위도,면적,분류,크기
0,남산도시자연공원,중구,126.990377,37.55014,2896887,대형,15
1,길동생태공원,강동구,127.154779,37.540394,80683,소형,3
2,서울대공원,과천시,127.019846,37.426449,9132690,대형,15


In [41]:
df.head(3)

Unnamed: 0,공원명,지역,경도,위도,면적,분류,크기
0,남산도시자연공원,중구,126.990377,37.55014,2896887,대형,15
1,길동생태공원,강동구,127.154779,37.540394,80683,소형,3
2,서울대공원,과천시,127.019846,37.426449,9132690,대형,15


In [42]:
df.isnull().sum()

공원명    0
지역     1
경도     1
위도     1
면적     0
분류     0
크기     0
dtype: int64

In [43]:
df[df.지역.isna()]

Unnamed: 0,공원명,지역,경도,위도,면적,분류,크기
129,경춘선숲길,,127.076482,37.627077,211392,중형,7


In [44]:
df[df.경도.isna()]

Unnamed: 0,공원명,지역,경도,위도,면적,분류,크기
131,서울로7017,중구,,,0,소형,3


In [45]:
df = df.dropna()

In [46]:
df.to_csv('서울공원요약.csv', index=False)

# 4.공원 시각화

In [47]:
map = folium.Map(location=[37.5502, 126.982], zoom_start=11)
for i in df.index:
    folium.CircleMarker([df.위도[i], df.경도[i]],
                        radius=int(df.크기[i]),
                       tooltip=f"{df.공원명[i]}({df.면적[i]: ,d})㎡",
                        color='crimson', 
                        fill_color='crimson').add_to(map)

In [48]:
map