### 임상도 데이터 전처리

In [2]:
import geopandas as gpd

In [None]:
gdf = gpd.read_file("../data/raw/임상도_서울특별시/11.shp")

In [None]:
if gdf.crs != "epsg:5179":
    gdf = gdf.to_crs(epsg=5179)
    print("Coordinate Reference System Converted")

In [None]:
gdf = gdf.rename(columns={"FRTP_CD": "수종코드"})

In [None]:
frtp_map = {'1': '침엽수', '2': '활엽수', '3': '혼효림'}
gdf["수종"] = gdf["수종코드"].map(frtp_map).fillna("기타")

In [None]:
gdf["면적"] = gdf.geometry.area

In [None]:
drop_cols = [
    'STORUNST', 'FROR_CD', 'KOFTR_GROU', 'KOFTR_NM',
    'HEIGHT', 'HEIGHT_NM',
    'LDMARK_STN', 'MAP_LABEL',
    '갱신년도', 'ETC_PCMTT',
    'FRTP_NM', 'DMCLS_CD', 'AGCLS_CD', 'DNST_CD',
    'DMCLS_NM', 'AGCLS_NM', 'DNST_NM',
    'Shape_Leng', 'Shape_Area'
]

In [None]:
gdf_cleaned = gdf.drop(columns=drop_cols).copy()

print(gdf_cleaned.columns.tolist())

In [None]:
gdf_cleaned.head()

In [None]:
gdf_cleaned.to_file("../data/processed/서울_임상도_전처리.gpkg", driver="GPKG")

### 대한민국 행정구역 데이터 전처리

In [27]:
gdf = gpd.read_file("../data/raw/sig_20230729/sig.shp", encoding="euc-kr")

In [28]:
# 1. 좌표계 설정
gdf.set_crs(epsg=5179, inplace=True)

Unnamed: 0,SIG_CD,SIG_ENG_NM,SIG_KOR_NM,geometry
0,11110,Jongno-gu,종로구,"POLYGON ((956615.453 1953567.199, 956621.579 1..."
1,11140,Jung-gu,중구,"POLYGON ((957890.386 1952616.746, 957909.908 1..."
2,11170,Yongsan-gu,용산구,"POLYGON ((953115.761 1950834.084, 953114.206 1..."
3,11200,Seongdong-gu,성동구,"POLYGON ((959681.109 1952649.605, 959842.412 1..."
4,11215,Gwangjin-gu,광진구,"POLYGON ((964825.058 1952633.25, 964875.565 19..."
...,...,...,...,...
245,51790,Hwacheon-gun,화천군,"POLYGON ((1027779.614 2032000.017, 1027951.841..."
246,51800,Yanggu-gun,양구군,"MULTIPOLYGON (((1046391.747 2032451.649, 10463..."
247,51810,Inje-gun,인제군,"POLYGON ((1041948.853 1995343.843, 1042054.076..."
248,51820,Goseong-gun,고성군,"MULTIPOLYGON (((1091705.056 2034023.203, 10917..."


In [29]:
# 필요한 컬럼만 남기기
gdf = gdf[["SIG_CD", "SIG_KOR_NM", "geometry"]].copy()

# 컬럼명 변경
gdf = gdf.rename(columns={"SIG_CD": "시군구코드", "SIG_KOR_NM": "시군구명"})

# geometry 유효성 검사
gdf = gdf[gdf.is_valid]

# 면적 0 제거 (혹시라도 있을 경우)
gdf = gdf[gdf.geometry.area > 0]

# 면적 컬럼 추가
gdf["면적"] = gdf.geometry.area

# 시도 코드 추출 (선택)
gdf["시도코드"] = gdf["시군구코드"].str[:2]

In [35]:
sido_map = {
    '11': '서울특별시',
    '26': '부산광역시',
    '27': '대구광역시',
    '28': '인천광역시',
    '29': '광주광역시',
    '30': '대전광역시',
    '31': '울산광역시',
    '36': '세종특별자치시',
    '41': '경기도',
    '42': '강원도',
    '43': '충청북도',
    '44': '충청남도',
    '45': '전라북도',
    '46': '전라남도',
    '47': '경상북도',
    '48': '경상남도',
    '49': '제주도',
    '50': '제주특별자치도',
    '51': '기타'
}

gdf["시도명"] = gdf["시도코드"].map(sido_map)

In [36]:
gdf.head()

Unnamed: 0,시군구코드,시군구명,geometry,면적,시도코드,시도명
0,11110,종로구,"POLYGON ((956615.453 1953567.199, 956621.579 1...",23971610.0,11,서울특별시
1,11140,중구,"POLYGON ((957890.386 1952616.746, 957909.908 1...",9962768.0,11,서울특별시
2,11170,용산구,"POLYGON ((953115.761 1950834.084, 953114.206 1...",21897560.0,11,서울특별시
3,11200,성동구,"POLYGON ((959681.109 1952649.605, 959842.412 1...",16800780.0,11,서울특별시
4,11215,광진구,"POLYGON ((964825.058 1952633.25, 964875.565 19...",17028810.0,11,서울특별시


In [37]:
gdf.to_file("../data/processed/대한민국_시군구.gpkg", driver="GPKG")