# 5th_2010

## Raw Data

In [1]:
import pandas as pd

def convert_github_url_to_raw(github_url):
    """
    깃허브 blob URL을 raw URL로 변환

    Parameters:
    -----------
    github_url : str
        깃허브 파일 URL

    Returns:
    --------
    str
        raw URL (pandas가 직접 읽을 수 있는 형태)
    """
    if '/blob/' in github_url:
        return github_url.replace('/blob/', '/raw/')
    return github_url


def process_5th_governor_election(
    file_path_or_url,
    header_rows=(2, 4),  # 엑셀 기준 행 번호 (2행부터 4행까지)
    filter_column=None,
    filter_value=None
):
    """
    선거 데이터를 유연하게 처리하는 함수

    Parameters:
    -----------
    file_path_or_url : str
        로컬 파일 경로 또는 GitHub URL
    header_rows : tuple
        헤더로 사용할 행 범위 (엑셀 기준 행 번호)
        예: (2, 4) = 2행부터 4행까지
    filter_column : str, optional
        필터링할 컬럼명 (예: '읍면동명')
    filter_value : str, optional
        필터링할 값 (예: '합계')

    Returns:
    --------
    pandas.DataFrame
        처리된 데이터프레임

    Example:
    --------
    # 2~4행을 헤더로, '읍면동명'이 '합계'인 행만 추출
    df = process_5th_governor_election(
        'https://github.com/.../강원도지사선거.xlsx',
        header_rows=(2, 4),
        filter_column='읍면동명',
        filter_value='합계'
    )
    """

    # GitHub URL인 경우 raw URL로 변환
    if file_path_or_url.startswith('https://github.com'):
        file_path_or_url = convert_github_url_to_raw(file_path_or_url)

    # 파일을 header 없이 읽기
    df_raw = pd.read_excel(file_path_or_url, header=None)

    # 엑셀 행 번호를 파이썬 인덱스로 변환 (엑셀은 1부터, 파이썬은 0부터)
    start_idx = header_rows[0] - 1
    end_idx = header_rows[1] - 1

    # 지정된 행들을 가져와서 컬럼명 생성
    header_rows_data = []
    for i in range(start_idx, end_idx + 1):
        header_rows_data.append(df_raw.iloc[i].fillna('_').astype(str))

    # 컬럼명 생성
    new_columns = []
    for col_idx in range(len(header_rows_data[0])):
        parts = []

        # 각 행의 값이 '_'가 아닌 경우만 추가
        for row_data in header_rows_data:
            if row_data[col_idx] != '_':
                parts.append(row_data[col_idx])

        # parts가 비어있으면 '_', 아니면 '_'로 연결
        if parts:
            new_columns.append('_'.join(parts))
        else:
            new_columns.append('_')

    # 데이터프레임 재구성 (헤더 다음 행부터 데이터로 사용)
    data_start_idx = end_idx + 1
    df = df_raw.iloc[data_start_idx:].copy()
    df.columns = new_columns
    df = df.reset_index(drop=True)

    print(f"헤더 행: {header_rows[0]}행 ~ {header_rows[1]}행")
    print(f"생성된 컬럼 수: {len(new_columns)}")
    print(f"데이터 행 수: {len(df)}")

    # 컬럼명 샘플 출력
    print("\n생성된 컬럼명 (처음 10개):")
    for i, col in enumerate(new_columns[:10]):
        print(f"{i}: {col}")

    # 필터링 적용
    if filter_column and filter_value:
        # 필터링할 컬럼 찾기
        matching_col = None
        for col in df.columns:
            if filter_column in col:
                matching_col = col
                break

        if matching_col:
            print(f"\n'{filter_column}' 컬럼 발견: {matching_col}")

            # 필터링 적용
            filtered_df = df[df[matching_col] == filter_value].copy()

            print(f"필터링 전: {len(df)}행 → 필터링 후: {len(filtered_df)}행")
            print(f"'{matching_col}' == '{filter_value}'인 행만 추출")

            return filtered_df
        else:
            print(f"\n경고: '{filter_column}'을 포함하는 컬럼을 찾을 수 없습니다.")
            print("필터링 없이 전체 데이터를 반환합니다.")

    return df


# 사용 예시

# 1. 강원도지사선거 - 2~4행을 헤더로, '읍면동명'이 '합 계'인 행만
# url = "https://github.com/.../강원도지사선거.xlsx"
# summary_df = process_5th_governor_election(
#     url,
#     header_rows=(2, 4),
#     filter_column='읍면동명',
#     filter_value='합 계'
# )

# 2. 다른 파일 - 1~3행을 헤더로, 필터링 없이
# df_all = process_5th_governor_election(
#     'other_file.xlsx',
#     header_rows=(1, 3)
# )

# 3. 로컬 파일 - 기본값 사용
# df = process_5th_governor_election('강원도지사선거.xlsx')

## Seoul


In [2]:
# 깃허브 blob URL로 불러오는 경우 (자동으로 raw URL로 변환됨)
blob_url5_seoul = 'https://github.com/sw1kwon/korean-elections/blob/main/original/Local_Elections_Governor/5th_2010/01_%EC%84%9C%EC%9A%B8%ED%8A%B9%EB%B3%84%EC%8B%9C.xls'

seoul_5th = process_5th_governor_election(
    file_path_or_url = blob_url5_seoul,
    header_rows = (4, 6),
    filter_column = '읍면동명',
    filter_value = '합계'
)

헤더 행: 4행 ~ 6행
생성된 컬럼 수: 12
데이터 행 수: 500

생성된 컬럼명 (처음 10개):
0: 구시군명
1: 읍면동명
2: 선거인수
3: 투표수
4: 후보자별 득표수_한나라당
오세훈
5: 민주당
한명숙
6: 자유선진당
지상욱
7: 진보신당
노회찬
8: 미래연합
석종현
9: 계

'읍면동명' 컬럼 발견: 읍면동명
필터링 전: 500행 → 필터링 후: 26행
'읍면동명' == '합계'인 행만 추출


In [3]:
seoul_5th

Unnamed: 0,구시군명,읍면동명,선거인수,투표수,후보자별 득표수_한나라당\n오세훈,민주당\n한명숙,자유선진당\n지상욱,진보신당\n노회찬,미래연합\n석종현,계,무효투표수,기권수
0,서울특별시,합계,8211461,4426182,2086127,2059715,90032,143459,18339,4397672,28510,3785279
1,종로구,합계,138917,77812,35476,36910,1719,2653,288,77046,766,61105
22,중구,합계,109000,60763,28648,28410,1198,1618,363,60237,526,48237
40,용산구,합계,198044,105005,53285,44706,2533,3160,480,104164,841,93039
59,성동구,합계,250316,135277,63448,63966,2534,3891,526,134365,912,115039
79,광진구,합계,301603,158963,72749,76420,3089,4878,707,157843,1120,142640
97,동대문구,합계,297206,160497,74536,76032,3272,4759,738,159337,1160,136709
114,중랑구,합계,343810,172251,80577,81517,3432,4757,670,170953,1298,171559
133,성북구,합계,383508,204667,91907,99316,4093,7236,808,203360,1307,178841
156,강북구,합계,276171,142061,63231,70115,2553,4539,569,141007,1054,134110


In [4]:
seoul_5th.columns.tolist()

['구시군명',
 '읍면동명',
 '선거인수',
 '투표수',
 '후보자별 득표수_한나라당\n오세훈',
 '민주당\n한명숙',
 '자유선진당\n지상욱',
 '진보신당\n노회찬',
 '미래연합\n석종현',
 '계',
 '무효투표수',
 '기권수']

In [5]:
rename_seoul = {
    '구시군명': '구시군',
    '후보자별 득표수_한나라당\n오세훈': '득표수_1_한나라당_오세훈',
    '민주당\n한명숙': '득표수_2_민주당_한명숙',
    '자유선진당\n지상욱': '득표수_3_자유선진당_지상욱',
    '진보신당\n노회찬': '득표수_7_진보신당_노회찬',
    '미래연합\n석종현': '득표수_8_미래연합_석종현',
    '계': '득표수_계'
    }

In [6]:
seoul_5th = seoul_5th.rename(columns=rename_seoul).drop(columns=['읍면동명'])
seoul_5th

Unnamed: 0,구시군,선거인수,투표수,득표수_1_한나라당_오세훈,득표수_2_민주당_한명숙,득표수_3_자유선진당_지상욱,득표수_7_진보신당_노회찬,득표수_8_미래연합_석종현,득표수_계,무효투표수,기권수
0,서울특별시,8211461,4426182,2086127,2059715,90032,143459,18339,4397672,28510,3785279
1,종로구,138917,77812,35476,36910,1719,2653,288,77046,766,61105
22,중구,109000,60763,28648,28410,1198,1618,363,60237,526,48237
40,용산구,198044,105005,53285,44706,2533,3160,480,104164,841,93039
59,성동구,250316,135277,63448,63966,2534,3891,526,134365,912,115039
79,광진구,301603,158963,72749,76420,3089,4878,707,157843,1120,142640
97,동대문구,297206,160497,74536,76032,3272,4759,738,159337,1160,136709
114,중랑구,343810,172251,80577,81517,3432,4757,670,170953,1298,171559
133,성북구,383508,204667,91907,99316,4093,7236,808,203360,1307,178841
156,강북구,276171,142061,63231,70115,2553,4539,569,141007,1054,134110


In [7]:
seoul_5th = seoul_5th.assign(
    시도='서울특별시'
)[['시도'] + seoul_5th.columns.tolist()]

In [8]:
seoul_5th.loc[0, '구시군'] = '합계'

In [9]:
seoul_5th

Unnamed: 0,시도,구시군,선거인수,투표수,득표수_1_한나라당_오세훈,득표수_2_민주당_한명숙,득표수_3_자유선진당_지상욱,득표수_7_진보신당_노회찬,득표수_8_미래연합_석종현,득표수_계,무효투표수,기권수
0,서울특별시,합계,8211461,4426182,2086127,2059715,90032,143459,18339,4397672,28510,3785279
1,서울특별시,종로구,138917,77812,35476,36910,1719,2653,288,77046,766,61105
22,서울특별시,중구,109000,60763,28648,28410,1198,1618,363,60237,526,48237
40,서울특별시,용산구,198044,105005,53285,44706,2533,3160,480,104164,841,93039
59,서울특별시,성동구,250316,135277,63448,63966,2534,3891,526,134365,912,115039
79,서울특별시,광진구,301603,158963,72749,76420,3089,4878,707,157843,1120,142640
97,서울특별시,동대문구,297206,160497,74536,76032,3272,4759,738,159337,1160,136709
114,서울특별시,중랑구,343810,172251,80577,81517,3432,4757,670,170953,1298,171559
133,서울특별시,성북구,383508,204667,91907,99316,4093,7236,808,203360,1307,178841
156,서울특별시,강북구,276171,142061,63231,70115,2553,4539,569,141007,1054,134110


In [10]:
seoul_5th.info()

<class 'pandas.core.frame.DataFrame'>
Index: 26 entries, 0 to 479
Data columns (total 12 columns):
 #   Column           Non-Null Count  Dtype 
---  ------           --------------  ----- 
 0   시도               26 non-null     object
 1   구시군              26 non-null     object
 2   선거인수             26 non-null     object
 3   투표수              26 non-null     object
 4   득표수_1_한나라당_오세훈   26 non-null     object
 5   득표수_2_민주당_한명숙    26 non-null     object
 6   득표수_3_자유선진당_지상욱  26 non-null     object
 7   득표수_7_진보신당_노회찬   26 non-null     object
 8   득표수_8_미래연합_석종현   26 non-null     object
 9   득표수_계            26 non-null     object
 10  무효투표수            26 non-null     object
 11  기권수              26 non-null     object
dtypes: object(12)
memory usage: 3.7+ KB


In [11]:
seoul_5th = seoul_5th.apply(
    lambda col: col.astype(int)
    if col.dtype == 'object' and col.astype(str).str.fullmatch(r'\d+').all()
    else col
)

In [12]:
seoul_5th_with_total = seoul_5th

In [13]:
seoul_5th_with_total

Unnamed: 0,시도,구시군,선거인수,투표수,득표수_1_한나라당_오세훈,득표수_2_민주당_한명숙,득표수_3_자유선진당_지상욱,득표수_7_진보신당_노회찬,득표수_8_미래연합_석종현,득표수_계,무효투표수,기권수
0,서울특별시,합계,8211461,4426182,2086127,2059715,90032,143459,18339,4397672,28510,3785279
1,서울특별시,종로구,138917,77812,35476,36910,1719,2653,288,77046,766,61105
22,서울특별시,중구,109000,60763,28648,28410,1198,1618,363,60237,526,48237
40,서울특별시,용산구,198044,105005,53285,44706,2533,3160,480,104164,841,93039
59,서울특별시,성동구,250316,135277,63448,63966,2534,3891,526,134365,912,115039
79,서울특별시,광진구,301603,158963,72749,76420,3089,4878,707,157843,1120,142640
97,서울특별시,동대문구,297206,160497,74536,76032,3272,4759,738,159337,1160,136709
114,서울특별시,중랑구,343810,172251,80577,81517,3432,4757,670,170953,1298,171559
133,서울특별시,성북구,383508,204667,91907,99316,4093,7236,808,203360,1307,178841
156,서울특별시,강북구,276171,142061,63231,70115,2553,4539,569,141007,1054,134110


In [14]:
seoul_5th_with_total.to_csv("temp1_governor_seoul_5.csv", index=False, encoding="utf-8-sig")


## Busan


In [15]:
# 깃허브 blob URL로 불러오는 경우 (자동으로 raw URL로 변환됨)
blob_url5_busan = 'https://github.com/sw1kwon/korean-elections/blob/main/original/Local_Elections_Governor/5th_2010/02_%EB%B6%80%EC%82%B0%EA%B4%91%EC%97%AD%EC%8B%9C.xls'

busan_5th = process_5th_governor_election(
    file_path_or_url = blob_url5_busan,
    header_rows = (4, 6),
    filter_column = '읍면동명',
    filter_value = '합계'
)

헤더 행: 4행 ~ 6행
생성된 컬럼 수: 9
데이터 행 수: 264

생성된 컬럼명 (처음 10개):
0: 구시군명
1: 읍면동명
2: 선거인수
3: 투표수
4: 후보자별 득표수_한나라당
허남식
5: 민주당
김정길
6: 계
7: 무효투표수
8: 기권수

'읍면동명' 컬럼 발견: 읍면동명
필터링 전: 264행 → 필터링 후: 17행
'읍면동명' == '합계'인 행만 추출


In [16]:
busan_5th

Unnamed: 0,구시군명,읍면동명,선거인수,투표수,후보자별 득표수_한나라당\n허남식,민주당\n김정길,계,무효투표수,기권수
0,부산광역시,합계,2849895,1410126,770507,619565,1390072,20054,1439769
1,중구,합계,41637,21350,12901,7996,20897,453,20287
13,서구,합계,104507,49172,29328,19131,48459,713,55335
29,동구,합계,85200,43996,25784,17380,43164,832,41204
46,영도구,합계,122392,59112,30750,27332,58082,1030,63280
60,부산진구,합계,322385,157895,85247,70355,155602,2293,164490
88,동래구,합계,224693,109659,60901,47442,108343,1316,115034
105,남구,합계,240421,116776,63602,51993,115595,1181,123645
127,북구,합계,243993,121921,62561,57857,120418,1503,122072
143,해운대구,합계,328744,157525,85110,70341,155451,2074,171219


In [17]:
busan_5th.columns.tolist()

['구시군명',
 '읍면동명',
 '선거인수',
 '투표수',
 '후보자별 득표수_한나라당\n허남식',
 '민주당\n김정길',
 '계',
 '무효투표수',
 '기권수']

In [18]:
rename_busan = {
    '구시군명': '구시군',
    '후보자별 득표수_한나라당\n허남식': '득표수_1_한나라당_허남식',
    '민주당\n김정길': '득표수_2_민주당_김정길',
    '계': '득표수_계'
    }

In [19]:
busan_5th = busan_5th.rename(columns=rename_busan).drop(columns=['읍면동명'])
busan_5th

Unnamed: 0,구시군,선거인수,투표수,득표수_1_한나라당_허남식,득표수_2_민주당_김정길,득표수_계,무효투표수,기권수
0,부산광역시,2849895,1410126,770507,619565,1390072,20054,1439769
1,중구,41637,21350,12901,7996,20897,453,20287
13,서구,104507,49172,29328,19131,48459,713,55335
29,동구,85200,43996,25784,17380,43164,832,41204
46,영도구,122392,59112,30750,27332,58082,1030,63280
60,부산진구,322385,157895,85247,70355,155602,2293,164490
88,동래구,224693,109659,60901,47442,108343,1316,115034
105,남구,240421,116776,63602,51993,115595,1181,123645
127,북구,243993,121921,62561,57857,120418,1503,122072
143,해운대구,328744,157525,85110,70341,155451,2074,171219


In [20]:
busan_5th = busan_5th.assign(
    시도='부산광역시'
)[['시도'] + busan_5th.columns.tolist()]

In [21]:
busan_5th.loc[0, '구시군'] = '합계'

In [22]:
busan_5th

Unnamed: 0,시도,구시군,선거인수,투표수,득표수_1_한나라당_허남식,득표수_2_민주당_김정길,득표수_계,무효투표수,기권수
0,부산광역시,합계,2849895,1410126,770507,619565,1390072,20054,1439769
1,부산광역시,중구,41637,21350,12901,7996,20897,453,20287
13,부산광역시,서구,104507,49172,29328,19131,48459,713,55335
29,부산광역시,동구,85200,43996,25784,17380,43164,832,41204
46,부산광역시,영도구,122392,59112,30750,27332,58082,1030,63280
60,부산광역시,부산진구,322385,157895,85247,70355,155602,2293,164490
88,부산광역시,동래구,224693,109659,60901,47442,108343,1316,115034
105,부산광역시,남구,240421,116776,63602,51993,115595,1181,123645
127,부산광역시,북구,243993,121921,62561,57857,120418,1503,122072
143,부산광역시,해운대구,328744,157525,85110,70341,155451,2074,171219


In [23]:
busan_5th.info()

<class 'pandas.core.frame.DataFrame'>
Index: 17 entries, 0 to 249
Data columns (total 9 columns):
 #   Column          Non-Null Count  Dtype 
---  ------          --------------  ----- 
 0   시도              17 non-null     object
 1   구시군             17 non-null     object
 2   선거인수            17 non-null     object
 3   투표수             17 non-null     object
 4   득표수_1_한나라당_허남식  17 non-null     object
 5   득표수_2_민주당_김정길   17 non-null     object
 6   득표수_계           17 non-null     object
 7   무효투표수           17 non-null     object
 8   기권수             17 non-null     object
dtypes: object(9)
memory usage: 1.9+ KB


In [24]:
busan_5th = busan_5th.apply(
    lambda col: col.astype(int)
    if col.dtype == 'object' and col.astype(str).str.fullmatch(r'\d+').all()
    else col
)

In [25]:
busan_5th_with_total = busan_5th

In [26]:
busan_5th_with_total

Unnamed: 0,시도,구시군,선거인수,투표수,득표수_1_한나라당_허남식,득표수_2_민주당_김정길,득표수_계,무효투표수,기권수
0,부산광역시,합계,2849895,1410126,770507,619565,1390072,20054,1439769
1,부산광역시,중구,41637,21350,12901,7996,20897,453,20287
13,부산광역시,서구,104507,49172,29328,19131,48459,713,55335
29,부산광역시,동구,85200,43996,25784,17380,43164,832,41204
46,부산광역시,영도구,122392,59112,30750,27332,58082,1030,63280
60,부산광역시,부산진구,322385,157895,85247,70355,155602,2293,164490
88,부산광역시,동래구,224693,109659,60901,47442,108343,1316,115034
105,부산광역시,남구,240421,116776,63602,51993,115595,1181,123645
127,부산광역시,북구,243993,121921,62561,57857,120418,1503,122072
143,부산광역시,해운대구,328744,157525,85110,70341,155451,2074,171219


In [27]:
busan_5th_with_total.to_csv("temp1_governor_busan_5.csv", index=False, encoding="utf-8-sig")


## Daegu


In [28]:
# 깃허브 blob URL로 불러오는 경우 (자동으로 raw URL로 변환됨)
blob_url5_daegu = 'https://github.com/sw1kwon/korean-elections/blob/main/original/Local_Elections_Governor/5th_2010/03_%EB%8C%80%EA%B5%AC%EA%B4%91%EC%97%AD%EC%8B%9C.xls'

daegu_5th = process_5th_governor_election(
    file_path_or_url = blob_url5_daegu,
    header_rows = (4, 6),
    filter_column = '읍면동명',
    filter_value = '합계'
)

헤더 행: 4행 ~ 6행
생성된 컬럼 수: 10
데이터 행 수: 168

생성된 컬럼명 (처음 10개):
0: 구시군명
1: 읍면동명
2: 선거인수
3: 투표수
4: 후보자별 득표수_한나라당
김범일
5: 민주당
이승천
6: 진보신당
조명래
7: 계
8: 무효투표수
9: 기권수

'읍면동명' 컬럼 발견: 읍면동명
필터링 전: 168행 → 필터링 후: 9행
'읍면동명' == '합계'인 행만 추출


In [29]:
daegu_5th

Unnamed: 0,구시군명,읍면동명,선거인수,투표수,후보자별 득표수_한나라당\n김범일,민주당\n이승천,진보신당\n조명래,계,무효투표수,기권수
0,대구광역시,합계,1928835,886035,633118,146458,88599,868175,17860,1042800
1,중구,합계,65102,31334,22764,4867,3074,30705,629,33768
17,동구,합계,266834,123904,87541,22096,11529,121166,2738,142930
40,서구,합계,183845,83922,61781,12012,7938,81731,2191,99923
60,남구,합계,142035,61391,45380,9059,5874,60313,1078,80644
76,북구,합계,339490,150682,106071,26213,15629,147913,2769,188808
103,수성구,합계,342990,166958,120726,25761,17209,163696,3262,176032
129,달서구,합계,454805,200176,141106,35453,20292,196851,3325,254629
156,달성군,합계,133734,67668,47749,10997,7054,65800,1868,66066


In [30]:
daegu_5th.columns.tolist()

['구시군명',
 '읍면동명',
 '선거인수',
 '투표수',
 '후보자별 득표수_한나라당\n김범일',
 '민주당\n이승천',
 '진보신당\n조명래',
 '계',
 '무효투표수',
 '기권수']

In [31]:
rename_daegu = {
    '구시군명': '구시군',
    '후보자별 득표수_한나라당\n김범일': '득표수_1_한나라당_김범일',
    '민주당\n이승천': '득표수_2_민주당_이승천',
    '진보신당\n조명래': '득표수_7_진보신당_조명래',
    '계': '득표수_계'
    }

In [32]:
daegu_5th = daegu_5th.rename(columns=rename_daegu).drop(columns=['읍면동명'])
daegu_5th

Unnamed: 0,구시군,선거인수,투표수,득표수_1_한나라당_김범일,득표수_2_민주당_이승천,득표수_7_진보신당_조명래,득표수_계,무효투표수,기권수
0,대구광역시,1928835,886035,633118,146458,88599,868175,17860,1042800
1,중구,65102,31334,22764,4867,3074,30705,629,33768
17,동구,266834,123904,87541,22096,11529,121166,2738,142930
40,서구,183845,83922,61781,12012,7938,81731,2191,99923
60,남구,142035,61391,45380,9059,5874,60313,1078,80644
76,북구,339490,150682,106071,26213,15629,147913,2769,188808
103,수성구,342990,166958,120726,25761,17209,163696,3262,176032
129,달서구,454805,200176,141106,35453,20292,196851,3325,254629
156,달성군,133734,67668,47749,10997,7054,65800,1868,66066


In [33]:
daegu_5th = daegu_5th.assign(
    시도='대구광역시'
)[['시도'] + daegu_5th.columns.tolist()]

In [34]:
daegu_5th.loc[0, '구시군'] = '합계'

In [35]:
daegu_5th

Unnamed: 0,시도,구시군,선거인수,투표수,득표수_1_한나라당_김범일,득표수_2_민주당_이승천,득표수_7_진보신당_조명래,득표수_계,무효투표수,기권수
0,대구광역시,합계,1928835,886035,633118,146458,88599,868175,17860,1042800
1,대구광역시,중구,65102,31334,22764,4867,3074,30705,629,33768
17,대구광역시,동구,266834,123904,87541,22096,11529,121166,2738,142930
40,대구광역시,서구,183845,83922,61781,12012,7938,81731,2191,99923
60,대구광역시,남구,142035,61391,45380,9059,5874,60313,1078,80644
76,대구광역시,북구,339490,150682,106071,26213,15629,147913,2769,188808
103,대구광역시,수성구,342990,166958,120726,25761,17209,163696,3262,176032
129,대구광역시,달서구,454805,200176,141106,35453,20292,196851,3325,254629
156,대구광역시,달성군,133734,67668,47749,10997,7054,65800,1868,66066


In [36]:
daegu_5th.info()

<class 'pandas.core.frame.DataFrame'>
Index: 9 entries, 0 to 156
Data columns (total 10 columns):
 #   Column          Non-Null Count  Dtype 
---  ------          --------------  ----- 
 0   시도              9 non-null      object
 1   구시군             9 non-null      object
 2   선거인수            9 non-null      object
 3   투표수             9 non-null      object
 4   득표수_1_한나라당_김범일  9 non-null      object
 5   득표수_2_민주당_이승천   9 non-null      object
 6   득표수_7_진보신당_조명래  9 non-null      object
 7   득표수_계           9 non-null      object
 8   무효투표수           9 non-null      object
 9   기권수             9 non-null      object
dtypes: object(10)
memory usage: 1.1+ KB


In [37]:
daegu_5th = daegu_5th.apply(
    lambda col: col.astype(int)
    if col.dtype == 'object' and col.astype(str).str.fullmatch(r'\d+').all()
    else col
)

In [38]:
daegu_5th_with_total = daegu_5th

In [39]:
daegu_5th_with_total

Unnamed: 0,시도,구시군,선거인수,투표수,득표수_1_한나라당_김범일,득표수_2_민주당_이승천,득표수_7_진보신당_조명래,득표수_계,무효투표수,기권수
0,대구광역시,합계,1928835,886035,633118,146458,88599,868175,17860,1042800
1,대구광역시,중구,65102,31334,22764,4867,3074,30705,629,33768
17,대구광역시,동구,266834,123904,87541,22096,11529,121166,2738,142930
40,대구광역시,서구,183845,83922,61781,12012,7938,81731,2191,99923
60,대구광역시,남구,142035,61391,45380,9059,5874,60313,1078,80644
76,대구광역시,북구,339490,150682,106071,26213,15629,147913,2769,188808
103,대구광역시,수성구,342990,166958,120726,25761,17209,163696,3262,176032
129,대구광역시,달서구,454805,200176,141106,35453,20292,196851,3325,254629
156,대구광역시,달성군,133734,67668,47749,10997,7054,65800,1868,66066


In [40]:
daegu_5th_with_total.to_csv("temp1_governor_daegu_5.csv", index=False, encoding="utf-8-sig")


## Incheon


In [41]:
# 깃허브 blob URL로 불러오는 경우 (자동으로 raw URL로 변환됨)
blob_url5_incheon = 'https://github.com/sw1kwon/korean-elections/blob/main/original/Local_Elections_Governor/5th_2010/04_%EC%9D%B8%EC%B2%9C%EA%B4%91%EC%97%AD%EC%8B%9C.xls'

incheon_5th = process_5th_governor_election(
    file_path_or_url = blob_url5_incheon,
    header_rows = (4, 6),
    filter_column = '읍면동명',
    filter_value = '합계'
)

헤더 행: 4행 ~ 6행
생성된 컬럼 수: 11
데이터 행 수: 170

생성된 컬럼명 (처음 10개):
0: 구시군명
1: 읍면동명
2: 선거인수
3: 투표수
4: 후보자별 득표수_한나라당
안상수
5: 민주당
송영길
6: 진보신당
김상하
7: 평화민주당
백석두
8: 계
9: 무효투표수

'읍면동명' 컬럼 발견: 읍면동명
필터링 전: 170행 → 필터링 후: 11행
'읍면동명' == '합계'인 행만 추출


In [42]:
incheon_5th

Unnamed: 0,구시군명,읍면동명,선거인수,투표수,후보자별 득표수_한나라당\n안상수,민주당\n송영길,진보신당\n김상하,평화민주당\n백석두,계,무효투표수,기권수
0,인천광역시,합계,2096853,1067431,469040,556902,19580,11258,1056780,10651,1029422
1,중구,합계,73354,37876,17846,18216,827,458,37347,529,35478
14,동구,합계,62253,35099,15714,17486,968,421,34589,510,27154
28,남구,합계,337880,165592,75286,84170,2823,1721,164000,1592,172288
52,연수구,합계,206407,110292,53027,53495,1978,974,109474,818,96115
66,남동구,합계,355642,177496,76011,95283,2993,1764,176051,1445,178146
87,부평구,합계,435655,218198,88370,121650,4190,2258,216468,1730,217457
112,계양구,합계,259089,131148,51211,75847,1869,1291,130218,930,127941
126,서구,합계,294322,143331,61389,76308,2586,1783,142066,1265,150991
145,강화군,합계,56586,37059,23160,10946,1103,386,35595,1464,19527


In [43]:
incheon_5th.columns.tolist()

['구시군명',
 '읍면동명',
 '선거인수',
 '투표수',
 '후보자별 득표수_한나라당\n안상수',
 '민주당\n송영길',
 '진보신당\n김상하',
 '평화민주당\n백석두',
 '계',
 '무효투표수',
 '기권수']

In [44]:
rename_incheon = {
    '구시군명': '구시군',
    '후보자별 득표수_한나라당\n안상수': '득표수_1_한나라당_안상수',
    '민주당\n송영길': '득표수_2_민주당_송영길',
    '진보신당\n김상하': '득표수_7_진보신당_김상하',
    '평화민주당\n백석두': '득표수_8_평화민주당_백석두',
    '계': '득표수_계'
    }

In [45]:
incheon_5th = incheon_5th.rename(columns=rename_incheon).drop(columns=['읍면동명'])
incheon_5th

Unnamed: 0,구시군,선거인수,투표수,득표수_1_한나라당_안상수,득표수_2_민주당_송영길,득표수_7_진보신당_김상하,득표수_8_평화민주당_백석두,득표수_계,무효투표수,기권수
0,인천광역시,2096853,1067431,469040,556902,19580,11258,1056780,10651,1029422
1,중구,73354,37876,17846,18216,827,458,37347,529,35478
14,동구,62253,35099,15714,17486,968,421,34589,510,27154
28,남구,337880,165592,75286,84170,2823,1721,164000,1592,172288
52,연수구,206407,110292,53027,53495,1978,974,109474,818,96115
66,남동구,355642,177496,76011,95283,2993,1764,176051,1445,178146
87,부평구,435655,218198,88370,121650,4190,2258,216468,1730,217457
112,계양구,259089,131148,51211,75847,1869,1291,130218,930,127941
126,서구,294322,143331,61389,76308,2586,1783,142066,1265,150991
145,강화군,56586,37059,23160,10946,1103,386,35595,1464,19527


In [46]:
incheon_5th = incheon_5th.assign(
    시도='인천광역시'
)[['시도'] + incheon_5th.columns.tolist()]

In [47]:
incheon_5th.loc[0, '구시군'] = '합계'

In [48]:
incheon_5th

Unnamed: 0,시도,구시군,선거인수,투표수,득표수_1_한나라당_안상수,득표수_2_민주당_송영길,득표수_7_진보신당_김상하,득표수_8_평화민주당_백석두,득표수_계,무효투표수,기권수
0,인천광역시,합계,2096853,1067431,469040,556902,19580,11258,1056780,10651,1029422
1,인천광역시,중구,73354,37876,17846,18216,827,458,37347,529,35478
14,인천광역시,동구,62253,35099,15714,17486,968,421,34589,510,27154
28,인천광역시,남구,337880,165592,75286,84170,2823,1721,164000,1592,172288
52,인천광역시,연수구,206407,110292,53027,53495,1978,974,109474,818,96115
66,인천광역시,남동구,355642,177496,76011,95283,2993,1764,176051,1445,178146
87,인천광역시,부평구,435655,218198,88370,121650,4190,2258,216468,1730,217457
112,인천광역시,계양구,259089,131148,51211,75847,1869,1291,130218,930,127941
126,인천광역시,서구,294322,143331,61389,76308,2586,1783,142066,1265,150991
145,인천광역시,강화군,56586,37059,23160,10946,1103,386,35595,1464,19527


In [49]:
incheon_5th.info()

<class 'pandas.core.frame.DataFrame'>
Index: 11 entries, 0 to 161
Data columns (total 11 columns):
 #   Column           Non-Null Count  Dtype 
---  ------           --------------  ----- 
 0   시도               11 non-null     object
 1   구시군              11 non-null     object
 2   선거인수             11 non-null     object
 3   투표수              11 non-null     object
 4   득표수_1_한나라당_안상수   11 non-null     object
 5   득표수_2_민주당_송영길    11 non-null     object
 6   득표수_7_진보신당_김상하   11 non-null     object
 7   득표수_8_평화민주당_백석두  11 non-null     object
 8   득표수_계            11 non-null     object
 9   무효투표수            11 non-null     object
 10  기권수              11 non-null     object
dtypes: object(11)
memory usage: 1.3+ KB


In [50]:
incheon_5th = incheon_5th.apply(
    lambda col: col.astype(int)
    if col.dtype == 'object' and col.astype(str).str.fullmatch(r'\d+').all()
    else col
)

In [51]:
incheon_5th_with_total = incheon_5th

In [52]:
incheon_5th_with_total

Unnamed: 0,시도,구시군,선거인수,투표수,득표수_1_한나라당_안상수,득표수_2_민주당_송영길,득표수_7_진보신당_김상하,득표수_8_평화민주당_백석두,득표수_계,무효투표수,기권수
0,인천광역시,합계,2096853,1067431,469040,556902,19580,11258,1056780,10651,1029422
1,인천광역시,중구,73354,37876,17846,18216,827,458,37347,529,35478
14,인천광역시,동구,62253,35099,15714,17486,968,421,34589,510,27154
28,인천광역시,남구,337880,165592,75286,84170,2823,1721,164000,1592,172288
52,인천광역시,연수구,206407,110292,53027,53495,1978,974,109474,818,96115
66,인천광역시,남동구,355642,177496,76011,95283,2993,1764,176051,1445,178146
87,인천광역시,부평구,435655,218198,88370,121650,4190,2258,216468,1730,217457
112,인천광역시,계양구,259089,131148,51211,75847,1869,1291,130218,930,127941
126,인천광역시,서구,294322,143331,61389,76308,2586,1783,142066,1265,150991
145,인천광역시,강화군,56586,37059,23160,10946,1103,386,35595,1464,19527


In [53]:
incheon_5th_with_total.to_csv("temp1_governor_incheon_5.csv", index=False, encoding="utf-8-sig")


## Gwangju


In [54]:
# 깃허브 blob URL로 불러오는 경우 (자동으로 raw URL로 변환됨)
blob_url5_gwangju = 'https://github.com/sw1kwon/korean-elections/blob/main/original/Local_Elections_Governor/5th_2010/05_%EA%B4%91%EC%A3%BC%EA%B4%91%EC%97%AD%EC%8B%9C.xls'

gwangju_5th = process_5th_governor_election(
    file_path_or_url = blob_url5_gwangju,
    header_rows = (4, 6),
    filter_column = '읍면동명',
    filter_value = '합계'
)

헤더 행: 4행 ~ 6행
생성된 컬럼 수: 13
데이터 행 수: 108

생성된 컬럼명 (처음 10개):
0: 구시군명
1: 읍면동명
2: 선거인수
3: 투표수
4: 후보자별 득표수_한나라당
정용화
5: 민주당
강운태
6: 민주노동당
장원섭
7: 진보신당
윤난실
8: 국민참여당
정찬용
9: 평화민주당
조홍규

'읍면동명' 컬럼 발견: 읍면동명
필터링 전: 108행 → 필터링 후: 6행
'읍면동명' == '합계'인 행만 추출


In [55]:
gwangju_5th

Unnamed: 0,구시군명,읍면동명,선거인수,투표수,후보자별 득표수_한나라당\n정용화,민주당\n강운태,민주노동당\n장원섭,진보신당\n윤난실,국민참여당\n정찬용,평화민주당\n조홍규,계,무효투표수,기권수
0,광주광역시,합계,1064913,529901,74490,297003,39455,30834,75830,5871,523483,6418,535012
1,동구,합계,84206,44017,8096,23991,2178,2360,6247,520,43392,625,40189
17,서구,합계,222260,113765,17783,61188,8643,6657,16981,1148,112400,1365,108495
37,남구,합계,164726,87054,12510,53512,4336,3929,10883,847,86017,1037,77672
56,북구,합계,351029,168022,22380,93471,13361,10385,25015,1663,166275,1747,183007
85,광산구,합계,242692,117043,13721,64841,10937,7503,16704,1693,115399,1644,125649


In [56]:
gwangju_5th.columns.tolist()

['구시군명',
 '읍면동명',
 '선거인수',
 '투표수',
 '후보자별 득표수_한나라당\n정용화',
 '민주당\n강운태',
 '민주노동당\n장원섭',
 '진보신당\n윤난실',
 '국민참여당\n정찬용',
 '평화민주당\n조홍규',
 '계',
 '무효투표수',
 '기권수']

In [57]:
rename_gwangju = {
    '구시군명': '구시군',
    '후보자별 득표수_한나라당\n정용화': '득표수_1_한나라당_정용화',
    '민주당\n강운태': '득표수_2_민주당_강운태',
    '민주노동당\n장원섭': '득표수_5_민주노동당_장원섭',
    '진보신당\n윤난실': '득표수_7_진보신당_윤난실',
    '국민참여당\n정찬용': '득표수_8_국민참여당_정찬용',
    '평화민주당\n조홍규': '득표수_9_평화민주당_조홍규',
    '계': '득표수_계'
    }

In [58]:
gwangju_5th = gwangju_5th.rename(columns=rename_gwangju).drop(columns=['읍면동명'])
gwangju_5th

Unnamed: 0,구시군,선거인수,투표수,득표수_1_한나라당_정용화,득표수_2_민주당_강운태,득표수_5_민주노동당_장원섭,득표수_7_진보신당_윤난실,득표수_8_국민참여당_정찬용,득표수_9_평화민주당_조홍규,득표수_계,무효투표수,기권수
0,광주광역시,1064913,529901,74490,297003,39455,30834,75830,5871,523483,6418,535012
1,동구,84206,44017,8096,23991,2178,2360,6247,520,43392,625,40189
17,서구,222260,113765,17783,61188,8643,6657,16981,1148,112400,1365,108495
37,남구,164726,87054,12510,53512,4336,3929,10883,847,86017,1037,77672
56,북구,351029,168022,22380,93471,13361,10385,25015,1663,166275,1747,183007
85,광산구,242692,117043,13721,64841,10937,7503,16704,1693,115399,1644,125649


In [59]:
gwangju_5th = gwangju_5th.assign(
    시도='광주광역시'
)[['시도'] + gwangju_5th.columns.tolist()]

In [60]:
gwangju_5th.loc[0, '구시군'] = '합계'

In [61]:
gwangju_5th

Unnamed: 0,시도,구시군,선거인수,투표수,득표수_1_한나라당_정용화,득표수_2_민주당_강운태,득표수_5_민주노동당_장원섭,득표수_7_진보신당_윤난실,득표수_8_국민참여당_정찬용,득표수_9_평화민주당_조홍규,득표수_계,무효투표수,기권수
0,광주광역시,합계,1064913,529901,74490,297003,39455,30834,75830,5871,523483,6418,535012
1,광주광역시,동구,84206,44017,8096,23991,2178,2360,6247,520,43392,625,40189
17,광주광역시,서구,222260,113765,17783,61188,8643,6657,16981,1148,112400,1365,108495
37,광주광역시,남구,164726,87054,12510,53512,4336,3929,10883,847,86017,1037,77672
56,광주광역시,북구,351029,168022,22380,93471,13361,10385,25015,1663,166275,1747,183007
85,광주광역시,광산구,242692,117043,13721,64841,10937,7503,16704,1693,115399,1644,125649


In [62]:
gwangju_5th.info()

<class 'pandas.core.frame.DataFrame'>
Index: 6 entries, 0 to 85
Data columns (total 13 columns):
 #   Column           Non-Null Count  Dtype 
---  ------           --------------  ----- 
 0   시도               6 non-null      object
 1   구시군              6 non-null      object
 2   선거인수             6 non-null      object
 3   투표수              6 non-null      object
 4   득표수_1_한나라당_정용화   6 non-null      object
 5   득표수_2_민주당_강운태    6 non-null      object
 6   득표수_5_민주노동당_장원섭  6 non-null      object
 7   득표수_7_진보신당_윤난실   6 non-null      object
 8   득표수_8_국민참여당_정찬용  6 non-null      object
 9   득표수_9_평화민주당_조홍규  6 non-null      object
 10  득표수_계            6 non-null      object
 11  무효투표수            6 non-null      object
 12  기권수              6 non-null      object
dtypes: object(13)
memory usage: 844.0+ bytes


In [63]:
gwangju_5th = gwangju_5th.apply(
    lambda col: col.astype(int)
    if col.dtype == 'object' and col.astype(str).str.fullmatch(r'\d+').all()
    else col
)

In [64]:
gwangju_5th_with_total = gwangju_5th

In [65]:
gwangju_5th_with_total

Unnamed: 0,시도,구시군,선거인수,투표수,득표수_1_한나라당_정용화,득표수_2_민주당_강운태,득표수_5_민주노동당_장원섭,득표수_7_진보신당_윤난실,득표수_8_국민참여당_정찬용,득표수_9_평화민주당_조홍규,득표수_계,무효투표수,기권수
0,광주광역시,합계,1064913,529901,74490,297003,39455,30834,75830,5871,523483,6418,535012
1,광주광역시,동구,84206,44017,8096,23991,2178,2360,6247,520,43392,625,40189
17,광주광역시,서구,222260,113765,17783,61188,8643,6657,16981,1148,112400,1365,108495
37,광주광역시,남구,164726,87054,12510,53512,4336,3929,10883,847,86017,1037,77672
56,광주광역시,북구,351029,168022,22380,93471,13361,10385,25015,1663,166275,1747,183007
85,광주광역시,광산구,242692,117043,13721,64841,10937,7503,16704,1693,115399,1644,125649


In [66]:
gwangju_5th_with_total.to_csv("temp1_governor_gwangju_5.csv", index=False, encoding="utf-8-sig")


## Daejeon


In [67]:
# 깃허브 blob URL로 불러오는 경우 (자동으로 raw URL로 변환됨)
blob_url5_daejeon = 'https://github.com/sw1kwon/korean-elections/blob/main/original/Local_Elections_Governor/5th_2010/06_%EB%8C%80%EC%A0%84%EA%B4%91%EC%97%AD%EC%8B%9C.xls'

daejeon_5th = process_5th_governor_election(
    file_path_or_url = blob_url5_daejeon,
    header_rows = (4, 6),
    filter_column = '읍면동명',
    filter_value = '합계'
)

헤더 행: 4행 ~ 6행
생성된 컬럼 수: 11
데이터 행 수: 93

생성된 컬럼명 (처음 10개):
0: 구시군명
1: 읍면동명
2: 선거인수
3: 투표수
4: 후보자별 득표수_한나라당
박성효
5: 민주당
김원웅
6: 자유선진당
염홍철
7: 진보신당
김윤기
8: 계
9: 무효투표수

'읍면동명' 컬럼 발견: 읍면동명
필터링 전: 93행 → 필터링 후: 6행
'읍면동명' == '합계'인 행만 추출


In [68]:
daejeon_5th

Unnamed: 0,구시군명,읍면동명,선거인수,투표수,후보자별 득표수_한나라당\n박성효,민주당\n김원웅,자유선진당\n염홍철,진보신당\n김윤기,계,무효투표수,기권수
0,대전광역시,합계,1127547,596683,168616,137751,276122,9074,591563,5120,530864
1,동구,합계,195072,97967,28132,21795,45415,1535,96877,1090,97105
20,중구,합계,206384,109744,34128,21922,51150,1538,108738,1006,96640
40,서구,합계,375073,197443,55962,44371,92799,2927,196059,1384,177630
66,유성구,합계,193868,109646,28689,29079,49288,1831,108887,759,84222
78,대덕구,합계,157150,81883,21705,20584,37470,1243,81002,881,75267


In [69]:
daejeon_5th.columns.tolist()

['구시군명',
 '읍면동명',
 '선거인수',
 '투표수',
 '후보자별 득표수_한나라당\n박성효',
 '민주당\n김원웅',
 '자유선진당\n염홍철',
 '진보신당\n김윤기',
 '계',
 '무효투표수',
 '기권수']

In [70]:
rename_daejeon = {
    '구시군명': '구시군',
    '후보자별 득표수_한나라당\n박성효': '득표수_1_한나라당_박성효',
    '민주당\n김원웅': '득표수_2_민주당_김원웅',
    '자유선진당\n염홍철': '득표수_3_자유선진당_염홍철',
    '진보신당\n김윤기': '득표수_7_진보신당_김윤기',
    '계': '득표수_계'
    }

In [71]:
daejeon_5th = daejeon_5th.rename(columns=rename_daejeon).drop(columns=['읍면동명'])
daejeon_5th

Unnamed: 0,구시군,선거인수,투표수,득표수_1_한나라당_박성효,득표수_2_민주당_김원웅,득표수_3_자유선진당_염홍철,득표수_7_진보신당_김윤기,득표수_계,무효투표수,기권수
0,대전광역시,1127547,596683,168616,137751,276122,9074,591563,5120,530864
1,동구,195072,97967,28132,21795,45415,1535,96877,1090,97105
20,중구,206384,109744,34128,21922,51150,1538,108738,1006,96640
40,서구,375073,197443,55962,44371,92799,2927,196059,1384,177630
66,유성구,193868,109646,28689,29079,49288,1831,108887,759,84222
78,대덕구,157150,81883,21705,20584,37470,1243,81002,881,75267


In [72]:
daejeon_5th = daejeon_5th.assign(
    시도='대전광역시'
)[['시도'] + daejeon_5th.columns.tolist()]

In [73]:
daejeon_5th.loc[0, '구시군'] = '합계'

In [74]:
daejeon_5th

Unnamed: 0,시도,구시군,선거인수,투표수,득표수_1_한나라당_박성효,득표수_2_민주당_김원웅,득표수_3_자유선진당_염홍철,득표수_7_진보신당_김윤기,득표수_계,무효투표수,기권수
0,대전광역시,합계,1127547,596683,168616,137751,276122,9074,591563,5120,530864
1,대전광역시,동구,195072,97967,28132,21795,45415,1535,96877,1090,97105
20,대전광역시,중구,206384,109744,34128,21922,51150,1538,108738,1006,96640
40,대전광역시,서구,375073,197443,55962,44371,92799,2927,196059,1384,177630
66,대전광역시,유성구,193868,109646,28689,29079,49288,1831,108887,759,84222
78,대전광역시,대덕구,157150,81883,21705,20584,37470,1243,81002,881,75267


In [75]:
daejeon_5th.info()

<class 'pandas.core.frame.DataFrame'>
Index: 6 entries, 0 to 78
Data columns (total 11 columns):
 #   Column           Non-Null Count  Dtype 
---  ------           --------------  ----- 
 0   시도               6 non-null      object
 1   구시군              6 non-null      object
 2   선거인수             6 non-null      object
 3   투표수              6 non-null      object
 4   득표수_1_한나라당_박성효   6 non-null      object
 5   득표수_2_민주당_김원웅    6 non-null      object
 6   득표수_3_자유선진당_염홍철  6 non-null      object
 7   득표수_7_진보신당_김윤기   6 non-null      object
 8   득표수_계            6 non-null      object
 9   무효투표수            6 non-null      object
 10  기권수              6 non-null      object
dtypes: object(11)
memory usage: 748.0+ bytes


In [76]:
daejeon_5th = daejeon_5th.apply(
    lambda col: col.astype(int)
    if col.dtype == 'object' and col.astype(str).str.fullmatch(r'\d+').all()
    else col
)

In [77]:
daejeon_5th_with_total = daejeon_5th

In [78]:
daejeon_5th_with_total

Unnamed: 0,시도,구시군,선거인수,투표수,득표수_1_한나라당_박성효,득표수_2_민주당_김원웅,득표수_3_자유선진당_염홍철,득표수_7_진보신당_김윤기,득표수_계,무효투표수,기권수
0,대전광역시,합계,1127547,596683,168616,137751,276122,9074,591563,5120,530864
1,대전광역시,동구,195072,97967,28132,21795,45415,1535,96877,1090,97105
20,대전광역시,중구,206384,109744,34128,21922,51150,1538,108738,1006,96640
40,대전광역시,서구,375073,197443,55962,44371,92799,2927,196059,1384,177630
66,대전광역시,유성구,193868,109646,28689,29079,49288,1831,108887,759,84222
78,대전광역시,대덕구,157150,81883,21705,20584,37470,1243,81002,881,75267


In [79]:
daejeon_5th_with_total.to_csv("temp1_governor_daejeon_5.csv", index=False, encoding="utf-8-sig")


## Ulsan


In [80]:
# 깃허브 blob URL로 불러오는 경우 (자동으로 raw URL로 변환됨)
blob_url5_ulsan = 'https://github.com/sw1kwon/korean-elections/blob/main/original/Local_Elections_Governor/5th_2010/07_%EC%9A%B8%EC%82%B0%EA%B4%91%EC%97%AD%EC%8B%9C.xls'

ulsan_5th = process_5th_governor_election(
    file_path_or_url = blob_url5_ulsan,
    header_rows = (4, 6),
    filter_column = '읍면동명',
    filter_value = '합계'
)

헤더 행: 4행 ~ 6행
생성된 컬럼 수: 10
데이터 행 수: 72

생성된 컬럼명 (처음 10개):
0: 구시군명
1: 읍면동명
2: 선거인수
3: 투표수
4: 후보자별 득표수_한나라당
박맹우
5: 민주노동당
김창현
6: 진보신당
노옥희
7: 계
8: 무효투표수
9: 기권수

'읍면동명' 컬럼 발견: 읍면동명
필터링 전: 72행 → 필터링 후: 6행
'읍면동명' == '합계'인 행만 추출


In [81]:
ulsan_5th

Unnamed: 0,구시군명,읍면동명,선거인수,투표수,후보자별 득표수_한나라당\n박맹우,민주노동당\n김창현,진보신당\n노옥희,계,무효투표수,기권수
0,울산광역시,합계,838805,462103,279421,133437,43256,456114,5989,376702
1,중구,합계,178158,98144,63911,24441,8265,96617,1527,80014
17,남구,합계,258699,137133,86398,38703,10599,135700,1433,121566
34,동구,합계,132325,76495,41337,24839,9490,75666,829,55830
46,북구,합계,122563,67957,35719,24584,6957,67260,697,54606
57,울주군,합계,147060,82374,52056,20870,7945,80871,1503,64686


In [82]:
ulsan_5th.columns.tolist()

['구시군명',
 '읍면동명',
 '선거인수',
 '투표수',
 '후보자별 득표수_한나라당\n박맹우',
 '민주노동당\n김창현',
 '진보신당\n노옥희',
 '계',
 '무효투표수',
 '기권수']

In [83]:
rename_ulsan = {
    '구시군명': '구시군',
    '후보자별 득표수_한나라당\n박맹우': '득표수_1_한나라당_박맹우',
    '민주노동당\n김창현': '득표수_5_민주노동당_김창현',
    '진보신당\n노옥희': '득표수_7_진보신당_노옥희',
    '계': '득표수_계'
    }

In [84]:
ulsan_5th = ulsan_5th.rename(columns=rename_ulsan).drop(columns=['읍면동명'])
ulsan_5th

Unnamed: 0,구시군,선거인수,투표수,득표수_1_한나라당_박맹우,득표수_5_민주노동당_김창현,득표수_7_진보신당_노옥희,득표수_계,무효투표수,기권수
0,울산광역시,838805,462103,279421,133437,43256,456114,5989,376702
1,중구,178158,98144,63911,24441,8265,96617,1527,80014
17,남구,258699,137133,86398,38703,10599,135700,1433,121566
34,동구,132325,76495,41337,24839,9490,75666,829,55830
46,북구,122563,67957,35719,24584,6957,67260,697,54606
57,울주군,147060,82374,52056,20870,7945,80871,1503,64686


In [85]:
ulsan_5th = ulsan_5th.assign(
    시도='울산광역시'
)[['시도'] + ulsan_5th.columns.tolist()]

In [86]:
ulsan_5th.loc[0, '구시군'] = '합계'

In [87]:
ulsan_5th

Unnamed: 0,시도,구시군,선거인수,투표수,득표수_1_한나라당_박맹우,득표수_5_민주노동당_김창현,득표수_7_진보신당_노옥희,득표수_계,무효투표수,기권수
0,울산광역시,합계,838805,462103,279421,133437,43256,456114,5989,376702
1,울산광역시,중구,178158,98144,63911,24441,8265,96617,1527,80014
17,울산광역시,남구,258699,137133,86398,38703,10599,135700,1433,121566
34,울산광역시,동구,132325,76495,41337,24839,9490,75666,829,55830
46,울산광역시,북구,122563,67957,35719,24584,6957,67260,697,54606
57,울산광역시,울주군,147060,82374,52056,20870,7945,80871,1503,64686


In [88]:
ulsan_5th.info()

<class 'pandas.core.frame.DataFrame'>
Index: 6 entries, 0 to 57
Data columns (total 10 columns):
 #   Column           Non-Null Count  Dtype 
---  ------           --------------  ----- 
 0   시도               6 non-null      object
 1   구시군              6 non-null      object
 2   선거인수             6 non-null      object
 3   투표수              6 non-null      object
 4   득표수_1_한나라당_박맹우   6 non-null      object
 5   득표수_5_민주노동당_김창현  6 non-null      object
 6   득표수_7_진보신당_노옥희   6 non-null      object
 7   득표수_계            6 non-null      object
 8   무효투표수            6 non-null      object
 9   기권수              6 non-null      object
dtypes: object(10)
memory usage: 700.0+ bytes


In [89]:
ulsan_5th = ulsan_5th.apply(
    lambda col: col.astype(int)
    if col.dtype == 'object' and col.astype(str).str.fullmatch(r'\d+').all()
    else col
)

In [90]:
ulsan_5th_with_total = ulsan_5th

In [91]:
ulsan_5th_with_total

Unnamed: 0,시도,구시군,선거인수,투표수,득표수_1_한나라당_박맹우,득표수_5_민주노동당_김창현,득표수_7_진보신당_노옥희,득표수_계,무효투표수,기권수
0,울산광역시,합계,838805,462103,279421,133437,43256,456114,5989,376702
1,울산광역시,중구,178158,98144,63911,24441,8265,96617,1527,80014
17,울산광역시,남구,258699,137133,86398,38703,10599,135700,1433,121566
34,울산광역시,동구,132325,76495,41337,24839,9490,75666,829,55830
46,울산광역시,북구,122563,67957,35719,24584,6957,67260,697,54606
57,울산광역시,울주군,147060,82374,52056,20870,7945,80871,1503,64686


In [92]:
ulsan_5th_with_total.to_csv("temp1_governor_ulsan_5.csv", index=False, encoding="utf-8-sig")


## Gyeonggi


In [93]:
# 깃허브 blob URL로 불러오는 경우 (자동으로 raw URL로 변환됨)
blob_url5_gyeonggi = 'https://github.com/sw1kwon/korean-elections/blob/main/original/Local_Elections_Governor/5th_2010/08_%EA%B2%BD%EA%B8%B0%EB%8F%84.xls'

gyeonggi_5th = process_5th_governor_election(
    file_path_or_url = blob_url5_gyeonggi,
    header_rows = (4, 6),
    filter_column = '읍면동명',
    filter_value = '합계'
)

헤더 행: 4행 ~ 6행
생성된 컬럼 수: 9
데이터 행 수: 666

생성된 컬럼명 (처음 10개):
0: 구시군명
1: 읍면동명
2: 선거인수
3: 투표수
4: 후보자별 득표수_한나라당
김문수
5: 국민참여당
유시민
6: 계
7: 무효투표수
8: 기권수

'읍면동명' 컬럼 발견: 읍면동명
필터링 전: 666행 → 필터링 후: 45행
'읍면동명' == '합계'인 행만 추출


In [94]:
gyeonggi_5th

Unnamed: 0,구시군명,읍면동명,선거인수,투표수,후보자별 득표수_한나라당\n김문수,국민참여당\n유시민,계,무효투표수,기권수
0,경기도,합계,8761840,4534771,2271492,2079892,4351384,183387,4227069
1,수원시장안구,합계,218041,118498,56126,58386,114512,3986,99543
14,수원시권선구,합계,229747,113478,53993,55330,109323,4155,116269
28,수원시팔달구,합계,174518,85692,43066,39357,82423,3269,88826
41,수원시영통구,합계,183747,101911,46917,52842,99759,2152,81836
51,성남시수정구,합계,194233,92330,38249,48655,86904,5426,101903
70,성남시중원구,합계,205070,97065,41382,50010,91392,5673,108005
84,성남시분당구,합계,360189,208482,117016,87384,204400,4082,151707
108,의정부시,합계,328299,161722,80574,73877,154451,7271,166577
126,안양시만안구,합계,208176,111311,52386,54447,106833,4478,96865


In [95]:
gyeonggi_5th.columns.tolist()

['구시군명',
 '읍면동명',
 '선거인수',
 '투표수',
 '후보자별 득표수_한나라당\n김문수',
 '국민참여당\n유시민',
 '계',
 '무효투표수',
 '기권수']

In [96]:
rename_gyeonggi = {
    '구시군명': '구시군',
    '후보자별 득표수_한나라당\n김문수': '득표수_1_한나라당_김문수',
    '국민참여당\n유시민': '득표수_8_국민참여당_유시민',
    '계': '득표수_계'
    }

In [97]:
gyeonggi_5th = gyeonggi_5th.rename(columns=rename_gyeonggi).drop(columns=['읍면동명'])
gyeonggi_5th

Unnamed: 0,구시군,선거인수,투표수,득표수_1_한나라당_김문수,득표수_8_국민참여당_유시민,득표수_계,무효투표수,기권수
0,경기도,8761840,4534771,2271492,2079892,4351384,183387,4227069
1,수원시장안구,218041,118498,56126,58386,114512,3986,99543
14,수원시권선구,229747,113478,53993,55330,109323,4155,116269
28,수원시팔달구,174518,85692,43066,39357,82423,3269,88826
41,수원시영통구,183747,101911,46917,52842,99759,2152,81836
51,성남시수정구,194233,92330,38249,48655,86904,5426,101903
70,성남시중원구,205070,97065,41382,50010,91392,5673,108005
84,성남시분당구,360189,208482,117016,87384,204400,4082,151707
108,의정부시,328299,161722,80574,73877,154451,7271,166577
126,안양시만안구,208176,111311,52386,54447,106833,4478,96865


In [98]:
gyeonggi_5th = gyeonggi_5th.assign(
    시도='경기도'
)[['시도'] + gyeonggi_5th.columns.tolist()]

In [99]:
gyeonggi_5th.loc[0, '구시군'] = '합계'

In [100]:
gyeonggi_5th

Unnamed: 0,시도,구시군,선거인수,투표수,득표수_1_한나라당_김문수,득표수_8_국민참여당_유시민,득표수_계,무효투표수,기권수
0,경기도,합계,8761840,4534771,2271492,2079892,4351384,183387,4227069
1,경기도,수원시장안구,218041,118498,56126,58386,114512,3986,99543
14,경기도,수원시권선구,229747,113478,53993,55330,109323,4155,116269
28,경기도,수원시팔달구,174518,85692,43066,39357,82423,3269,88826
41,경기도,수원시영통구,183747,101911,46917,52842,99759,2152,81836
51,경기도,성남시수정구,194233,92330,38249,48655,86904,5426,101903
70,경기도,성남시중원구,205070,97065,41382,50010,91392,5673,108005
84,경기도,성남시분당구,360189,208482,117016,87384,204400,4082,151707
108,경기도,의정부시,328299,161722,80574,73877,154451,7271,166577
126,경기도,안양시만안구,208176,111311,52386,54447,106833,4478,96865


In [101]:
gyeonggi_5th.info()

<class 'pandas.core.frame.DataFrame'>
Index: 45 entries, 0 to 658
Data columns (total 9 columns):
 #   Column           Non-Null Count  Dtype 
---  ------           --------------  ----- 
 0   시도               45 non-null     object
 1   구시군              45 non-null     object
 2   선거인수             45 non-null     object
 3   투표수              45 non-null     object
 4   득표수_1_한나라당_김문수   45 non-null     object
 5   득표수_8_국민참여당_유시민  45 non-null     object
 6   득표수_계            45 non-null     object
 7   무효투표수            45 non-null     object
 8   기권수              45 non-null     object
dtypes: object(9)
memory usage: 4.6+ KB


In [102]:
gyeonggi_5th = gyeonggi_5th.apply(
    lambda col: col.astype(int)
    if col.dtype == 'object' and col.astype(str).str.fullmatch(r'\d+').all()
    else col
)

In [103]:
gyeonggi_5th_with_total = gyeonggi_5th

In [104]:
gyeonggi_5th_with_total

Unnamed: 0,시도,구시군,선거인수,투표수,득표수_1_한나라당_김문수,득표수_8_국민참여당_유시민,득표수_계,무효투표수,기권수
0,경기도,합계,8761840,4534771,2271492,2079892,4351384,183387,4227069
1,경기도,수원시장안구,218041,118498,56126,58386,114512,3986,99543
14,경기도,수원시권선구,229747,113478,53993,55330,109323,4155,116269
28,경기도,수원시팔달구,174518,85692,43066,39357,82423,3269,88826
41,경기도,수원시영통구,183747,101911,46917,52842,99759,2152,81836
51,경기도,성남시수정구,194233,92330,38249,48655,86904,5426,101903
70,경기도,성남시중원구,205070,97065,41382,50010,91392,5673,108005
84,경기도,성남시분당구,360189,208482,117016,87384,204400,4082,151707
108,경기도,의정부시,328299,161722,80574,73877,154451,7271,166577
126,경기도,안양시만안구,208176,111311,52386,54447,106833,4478,96865


In [105]:
gyeonggi_5th_with_total.to_csv("temp1_governor_gyeonggi_5.csv", index=False, encoding="utf-8-sig")


## Gangwon


In [106]:
# 깃허브 blob URL로 불러오는 경우 (자동으로 raw URL로 변환됨)
blob_url5_gangwon = 'https://github.com/sw1kwon/korean-elections/blob/main/original/Local_Elections_Governor/5th_2010/09_%EA%B0%95%EC%9B%90%EB%8F%84.xls'

gangwon_5th = process_5th_governor_election(
    file_path_or_url = blob_url5_gangwon,
    header_rows = (4, 6),
    filter_column = '읍면동명',
    filter_value = '합계'
)

헤더 행: 4행 ~ 6행
생성된 컬럼 수: 9
데이터 행 수: 241

생성된 컬럼명 (처음 10개):
0: 구시군명
1: 읍면동명
2: 선거인수
3: 투표수
4: 후보자별 득표수_한나라당
이계진
5: 민주당
이광재
6: 계
7: 무효투표수
8: 기권수

'읍면동명' 컬럼 발견: 읍면동명
필터링 전: 241행 → 필터링 후: 19행
'읍면동명' == '합계'인 행만 추출


In [107]:
gangwon_5th

Unnamed: 0,구시군명,읍면동명,선거인수,투표수,후보자별 득표수_한나라당\n이계진,민주당\n이광재,계,무효투표수,기권수
0,강원도,합계,1190509,741724,326111,388443,714554,27170,448785
1,춘천시,합계,206172,124839,47183,73858,121041,3798,81333
29,원주시,합계,234095,133502,58889,70966,129855,3647,100593
57,강릉시,합계,170703,99246,45759,50120,95879,3367,71457
81,동해시,합계,73611,44933,21132,22150,43282,1651,28678
94,삼척시,합계,58602,40390,17971,20511,38482,1908,18212
108,태백시,합계,40415,26819,9958,15946,25904,915,13596
119,정선군,합계,33811,24529,8311,15190,23501,1028,9282
131,속초시,합계,64606,37582,19112,17209,36321,1261,27024
142,고성군,합계,25324,18943,10352,7584,17936,1007,6381


In [108]:
gangwon_5th.columns.tolist()

['구시군명',
 '읍면동명',
 '선거인수',
 '투표수',
 '후보자별 득표수_한나라당\n이계진',
 '민주당\n이광재',
 '계',
 '무효투표수',
 '기권수']

In [109]:
rename_gangwon = {
    '구시군명': '구시군',
    '후보자별 득표수_한나라당\n이계진': '득표수_1_한나라당_이계진',
    '민주당\n이광재': '득표수_2_민주당_이광재',
    '계': '득표수_계'
    }

In [110]:
gangwon_5th = gangwon_5th.rename(columns=rename_gangwon).drop(columns=['읍면동명'])
gangwon_5th

Unnamed: 0,구시군,선거인수,투표수,득표수_1_한나라당_이계진,득표수_2_민주당_이광재,득표수_계,무효투표수,기권수
0,강원도,1190509,741724,326111,388443,714554,27170,448785
1,춘천시,206172,124839,47183,73858,121041,3798,81333
29,원주시,234095,133502,58889,70966,129855,3647,100593
57,강릉시,170703,99246,45759,50120,95879,3367,71457
81,동해시,73611,44933,21132,22150,43282,1651,28678
94,삼척시,58602,40390,17971,20511,38482,1908,18212
108,태백시,40415,26819,9958,15946,25904,915,13596
119,정선군,33811,24529,8311,15190,23501,1028,9282
131,속초시,64606,37582,19112,17209,36321,1261,27024
142,고성군,25324,18943,10352,7584,17936,1007,6381


In [111]:
gangwon_5th = gangwon_5th.assign(
    시도='강원도'
)[['시도'] + gangwon_5th.columns.tolist()]

In [112]:
gangwon_5th.loc[0, '구시군'] = '합계'

In [113]:
gangwon_5th

Unnamed: 0,시도,구시군,선거인수,투표수,득표수_1_한나라당_이계진,득표수_2_민주당_이광재,득표수_계,무효투표수,기권수
0,강원도,합계,1190509,741724,326111,388443,714554,27170,448785
1,강원도,춘천시,206172,124839,47183,73858,121041,3798,81333
29,강원도,원주시,234095,133502,58889,70966,129855,3647,100593
57,강원도,강릉시,170703,99246,45759,50120,95879,3367,71457
81,강원도,동해시,73611,44933,21132,22150,43282,1651,28678
94,강원도,삼척시,58602,40390,17971,20511,38482,1908,18212
108,강원도,태백시,40415,26819,9958,15946,25904,915,13596
119,강원도,정선군,33811,24529,8311,15190,23501,1028,9282
131,강원도,속초시,64606,37582,19112,17209,36321,1261,27024
142,강원도,고성군,25324,18943,10352,7584,17936,1007,6381


In [114]:
gangwon_5th.info()

<class 'pandas.core.frame.DataFrame'>
Index: 19 entries, 0 to 231
Data columns (total 9 columns):
 #   Column          Non-Null Count  Dtype 
---  ------          --------------  ----- 
 0   시도              19 non-null     object
 1   구시군             19 non-null     object
 2   선거인수            19 non-null     object
 3   투표수             19 non-null     object
 4   득표수_1_한나라당_이계진  19 non-null     object
 5   득표수_2_민주당_이광재   19 non-null     object
 6   득표수_계           19 non-null     object
 7   무효투표수           19 non-null     object
 8   기권수             19 non-null     object
dtypes: object(9)
memory usage: 2.0+ KB


In [115]:
gangwon_5th = gangwon_5th.apply(
    lambda col: col.astype(int)
    if col.dtype == 'object' and col.astype(str).str.fullmatch(r'\d+').all()
    else col
)

In [116]:
gangwon_5th_with_total = gangwon_5th

In [117]:
gangwon_5th_with_total

Unnamed: 0,시도,구시군,선거인수,투표수,득표수_1_한나라당_이계진,득표수_2_민주당_이광재,득표수_계,무효투표수,기권수
0,강원도,합계,1190509,741724,326111,388443,714554,27170,448785
1,강원도,춘천시,206172,124839,47183,73858,121041,3798,81333
29,강원도,원주시,234095,133502,58889,70966,129855,3647,100593
57,강원도,강릉시,170703,99246,45759,50120,95879,3367,71457
81,강원도,동해시,73611,44933,21132,22150,43282,1651,28678
94,강원도,삼척시,58602,40390,17971,20511,38482,1908,18212
108,강원도,태백시,40415,26819,9958,15946,25904,915,13596
119,강원도,정선군,33811,24529,8311,15190,23501,1028,9282
131,강원도,속초시,64606,37582,19112,17209,36321,1261,27024
142,강원도,고성군,25324,18943,10352,7584,17936,1007,6381


In [118]:
gangwon_5th_with_total.to_csv("temp1_governor_gangwon_5.csv", index=False, encoding="utf-8-sig")


## Chungbuk


In [119]:
# 깃허브 blob URL로 불러오는 경우 (자동으로 raw URL로 변환됨)
blob_url5_chungbuk = 'https://github.com/sw1kwon/korean-elections/blob/main/original/Local_Elections_Governor/5th_2010/10_%EC%B6%A9%EC%B2%AD%EB%B6%81%EB%8F%84.xls'

chungbuk_5th = process_5th_governor_election(
    file_path_or_url = blob_url5_chungbuk,
    header_rows = (4, 6),
    filter_column = '읍면동명',
    filter_value = '합계'
)

헤더 행: 4행 ~ 6행
생성된 컬럼 수: 10
데이터 행 수: 194

생성된 컬럼명 (처음 10개):
0: 구시군명
1: 읍면동명
2: 선거인수
3: 투표수
4: 후보자별 득표수_한나라당
정우택
5: 민주당
이시종
6: 진보신당
김백규
7: 계
8: 무효투표수
9: 기권수

'읍면동명' 컬럼 발견: 읍면동명
필터링 전: 194행 → 필터링 후: 14행
'읍면동명' == '합계'인 행만 추출


In [120]:
chungbuk_5th

Unnamed: 0,구시군명,읍면동명,선거인수,투표수,후보자별 득표수_한나라당\n정우택,민주당\n이시종,진보신당\n김백규,계,무효투표수,기권수
0,충청북도,합계,1183811,696393,313646,349913,19551,683110,13283,487418
1,청주시상당구,합계,182613,100216,46172,50754,2289,99215,1001,82397
17,청주시흥덕구,합계,296977,159875,69434,85301,3916,158651,1224,137102
37,충주시,합계,161522,94688,33714,57677,1753,93144,1544,66834
65,제천시,합계,107034,63341,31942,28593,1661,62196,1145,43693
85,단양군,합계,26512,18918,9662,8005,614,18281,637,7594
96,청원군,합계,117270,69613,28394,37551,2145,68090,1523,47657
113,영동군,합계,41711,28879,14834,11227,1459,27520,1359,12832
127,보은군,합계,29345,21759,10039,9811,1026,20876,883,7586
141,옥천군,합계,44298,30568,14162,13583,1628,29373,1195,13730


In [121]:
chungbuk_5th.columns.tolist()

['구시군명',
 '읍면동명',
 '선거인수',
 '투표수',
 '후보자별 득표수_한나라당\n정우택',
 '민주당\n이시종',
 '진보신당\n김백규',
 '계',
 '무효투표수',
 '기권수']

In [122]:
rename_chungbuk = {
    '구시군명': '구시군',
    '후보자별 득표수_한나라당\n정우택': '득표수_1_한나라당_정우택',
    '민주당\n이시종': '득표수_2_민주당_이시종',
    '진보신당\n김백규': '득표수_7_진보신당_김백규',
    '계': '득표수_계'
    }

In [123]:
chungbuk_5th = chungbuk_5th.rename(columns=rename_chungbuk).drop(columns=['읍면동명'])
chungbuk_5th

Unnamed: 0,구시군,선거인수,투표수,득표수_1_한나라당_정우택,득표수_2_민주당_이시종,득표수_7_진보신당_김백규,득표수_계,무효투표수,기권수
0,충청북도,1183811,696393,313646,349913,19551,683110,13283,487418
1,청주시상당구,182613,100216,46172,50754,2289,99215,1001,82397
17,청주시흥덕구,296977,159875,69434,85301,3916,158651,1224,137102
37,충주시,161522,94688,33714,57677,1753,93144,1544,66834
65,제천시,107034,63341,31942,28593,1661,62196,1145,43693
85,단양군,26512,18918,9662,8005,614,18281,637,7594
96,청원군,117270,69613,28394,37551,2145,68090,1523,47657
113,영동군,41711,28879,14834,11227,1459,27520,1359,12832
127,보은군,29345,21759,10039,9811,1026,20876,883,7586
141,옥천군,44298,30568,14162,13583,1628,29373,1195,13730


In [124]:
chungbuk_5th = chungbuk_5th.assign(
    시도='충청북도'
)[['시도'] + chungbuk_5th.columns.tolist()]

In [125]:
chungbuk_5th.loc[0, '구시군'] = '합계'

In [126]:
chungbuk_5th

Unnamed: 0,시도,구시군,선거인수,투표수,득표수_1_한나라당_정우택,득표수_2_민주당_이시종,득표수_7_진보신당_김백규,득표수_계,무효투표수,기권수
0,충청북도,합계,1183811,696393,313646,349913,19551,683110,13283,487418
1,충청북도,청주시상당구,182613,100216,46172,50754,2289,99215,1001,82397
17,충청북도,청주시흥덕구,296977,159875,69434,85301,3916,158651,1224,137102
37,충청북도,충주시,161522,94688,33714,57677,1753,93144,1544,66834
65,충청북도,제천시,107034,63341,31942,28593,1661,62196,1145,43693
85,충청북도,단양군,26512,18918,9662,8005,614,18281,637,7594
96,충청북도,청원군,117270,69613,28394,37551,2145,68090,1523,47657
113,충청북도,영동군,41711,28879,14834,11227,1459,27520,1359,12832
127,충청북도,보은군,29345,21759,10039,9811,1026,20876,883,7586
141,충청북도,옥천군,44298,30568,14162,13583,1628,29373,1195,13730


In [127]:
chungbuk_5th.info()

<class 'pandas.core.frame.DataFrame'>
Index: 14 entries, 0 to 189
Data columns (total 10 columns):
 #   Column          Non-Null Count  Dtype 
---  ------          --------------  ----- 
 0   시도              14 non-null     object
 1   구시군             14 non-null     object
 2   선거인수            14 non-null     object
 3   투표수             14 non-null     object
 4   득표수_1_한나라당_정우택  14 non-null     object
 5   득표수_2_민주당_이시종   14 non-null     object
 6   득표수_7_진보신당_김백규  14 non-null     object
 7   득표수_계           14 non-null     object
 8   무효투표수           14 non-null     object
 9   기권수             14 non-null     object
dtypes: object(10)
memory usage: 1.7+ KB


In [128]:
chungbuk_5th = chungbuk_5th.apply(
    lambda col: col.astype(int)
    if col.dtype == 'object' and col.astype(str).str.fullmatch(r'\d+').all()
    else col
)

In [129]:
chungbuk_5th_with_total = chungbuk_5th

In [130]:
chungbuk_5th_with_total

Unnamed: 0,시도,구시군,선거인수,투표수,득표수_1_한나라당_정우택,득표수_2_민주당_이시종,득표수_7_진보신당_김백규,득표수_계,무효투표수,기권수
0,충청북도,합계,1183811,696393,313646,349913,19551,683110,13283,487418
1,충청북도,청주시상당구,182613,100216,46172,50754,2289,99215,1001,82397
17,충청북도,청주시흥덕구,296977,159875,69434,85301,3916,158651,1224,137102
37,충청북도,충주시,161522,94688,33714,57677,1753,93144,1544,66834
65,충청북도,제천시,107034,63341,31942,28593,1661,62196,1145,43693
85,충청북도,단양군,26512,18918,9662,8005,614,18281,637,7594
96,충청북도,청원군,117270,69613,28394,37551,2145,68090,1523,47657
113,충청북도,영동군,41711,28879,14834,11227,1459,27520,1359,12832
127,충청북도,보은군,29345,21759,10039,9811,1026,20876,883,7586
141,충청북도,옥천군,44298,30568,14162,13583,1628,29373,1195,13730


In [131]:
chungbuk_5th_with_total.to_csv("temp1_governor_chungbuk_5.csv", index=False, encoding="utf-8-sig")


## Chungnam


In [132]:
# 깃허브 blob URL로 불러오는 경우 (자동으로 raw URL로 변환됨)
blob_url5_chungnam = 'https://github.com/sw1kwon/korean-elections/blob/main/original/Local_Elections_Governor/5th_2010/11_%EC%B6%A9%EC%B2%AD%EB%82%A8%EB%8F%84.xls'

chungnam_5th = process_5th_governor_election(
    file_path_or_url = blob_url5_chungnam,
    header_rows = (4, 6),
    filter_column = '읍면동명',
    filter_value = '합계'
)

헤더 행: 4행 ~ 6행
생성된 컬럼 수: 10
데이터 행 수: 260

생성된 컬럼명 (처음 10개):
0: 구시군명
1: 읍면동명
2: 선거인수
3: 투표수
4: 후보자별 득표수_한나라당
박해춘
5: 민주당
안희정
6: 자유선진당
박상돈
7: 계
8: 무효투표수
9: 기권수

'읍면동명' 컬럼 발견: 읍면동명
필터링 전: 260행 → 필터링 후: 18행
'읍면동명' == '합계'인 행만 추출


In [133]:
chungnam_5th

Unnamed: 0,구시군명,읍면동명,선거인수,투표수,후보자별 득표수_한나라당\n박해춘,민주당\n안희정,자유선진당\n박상돈,계,무효투표수,기권수
0,충청남도,합계,1595587,901863,154723,367288,347265,869271,32587,693724
1,천안시서북구,합계,221580,104685,16178,44605,42683,103466,1219,116895
15,천안시동남구,합계,183142,89013,14851,36201,36603,87655,1358,94129
35,공주시,합계,99709,61209,6888,27856,24127,58871,2338,38500
54,보령시,합계,85687,53693,8829,18112,24168,51109,2584,31994
73,아산시,합계,195443,99872,18577,44329,34579,97485,2387,95571
93,서산시,합계,121199,66626,12333,24599,27427,64359,2267,54573
111,태안군,합계,52517,36024,6315,12754,14216,33285,2739,16493
122,금산군,합계,46562,28884,6328,11927,9118,27373,1511,17678
134,연기군,합계,64534,37808,4265,18037,14244,36546,1262,26726


In [134]:
chungnam_5th.columns.tolist()

['구시군명',
 '읍면동명',
 '선거인수',
 '투표수',
 '후보자별 득표수_한나라당\n박해춘',
 '민주당\n안희정',
 '자유선진당\n박상돈',
 '계',
 '무효투표수',
 '기권수']

In [135]:
rename_chungnam = {
    '구시군명': '구시군',
    '후보자별 득표수_한나라당\n박해춘': '득표수_1_한나라당_박해춘',
    '민주당\n안희정': '득표수_2_민주당_안희정',
    '자유선진당\n박상돈': '득표수_3_자유선진당_박상돈',
    '계': '득표수_계'
    }

In [136]:
chungnam_5th = chungnam_5th.rename(columns=rename_chungnam).drop(columns=['읍면동명'])
chungnam_5th

Unnamed: 0,구시군,선거인수,투표수,득표수_1_한나라당_박해춘,득표수_2_민주당_안희정,득표수_3_자유선진당_박상돈,득표수_계,무효투표수,기권수
0,충청남도,1595587,901863,154723,367288,347265,869271,32587,693724
1,천안시서북구,221580,104685,16178,44605,42683,103466,1219,116895
15,천안시동남구,183142,89013,14851,36201,36603,87655,1358,94129
35,공주시,99709,61209,6888,27856,24127,58871,2338,38500
54,보령시,85687,53693,8829,18112,24168,51109,2584,31994
73,아산시,195443,99872,18577,44329,34579,97485,2387,95571
93,서산시,121199,66626,12333,24599,27427,64359,2267,54573
111,태안군,52517,36024,6315,12754,14216,33285,2739,16493
122,금산군,46562,28884,6328,11927,9118,27373,1511,17678
134,연기군,64534,37808,4265,18037,14244,36546,1262,26726


In [137]:
chungnam_5th = chungnam_5th.assign(
    시도='충청남도'
)[['시도'] + chungnam_5th.columns.tolist()]

In [138]:
chungnam_5th.loc[0, '구시군'] = '합계'

In [139]:
chungnam_5th

Unnamed: 0,시도,구시군,선거인수,투표수,득표수_1_한나라당_박해춘,득표수_2_민주당_안희정,득표수_3_자유선진당_박상돈,득표수_계,무효투표수,기권수
0,충청남도,합계,1595587,901863,154723,367288,347265,869271,32587,693724
1,충청남도,천안시서북구,221580,104685,16178,44605,42683,103466,1219,116895
15,충청남도,천안시동남구,183142,89013,14851,36201,36603,87655,1358,94129
35,충청남도,공주시,99709,61209,6888,27856,24127,58871,2338,38500
54,충청남도,보령시,85687,53693,8829,18112,24168,51109,2584,31994
73,충청남도,아산시,195443,99872,18577,44329,34579,97485,2387,95571
93,충청남도,서산시,121199,66626,12333,24599,27427,64359,2267,54573
111,충청남도,태안군,52517,36024,6315,12754,14216,33285,2739,16493
122,충청남도,금산군,46562,28884,6328,11927,9118,27373,1511,17678
134,충청남도,연기군,64534,37808,4265,18037,14244,36546,1262,26726


In [140]:
chungnam_5th.info()

<class 'pandas.core.frame.DataFrame'>
Index: 18 entries, 0 to 245
Data columns (total 10 columns):
 #   Column           Non-Null Count  Dtype 
---  ------           --------------  ----- 
 0   시도               18 non-null     object
 1   구시군              18 non-null     object
 2   선거인수             18 non-null     object
 3   투표수              18 non-null     object
 4   득표수_1_한나라당_박해춘   18 non-null     object
 5   득표수_2_민주당_안희정    18 non-null     object
 6   득표수_3_자유선진당_박상돈  18 non-null     object
 7   득표수_계            18 non-null     object
 8   무효투표수            18 non-null     object
 9   기권수              18 non-null     object
dtypes: object(10)
memory usage: 2.1+ KB


In [141]:
chungnam_5th = chungnam_5th.apply(
    lambda col: col.astype(int)
    if col.dtype == 'object' and col.astype(str).str.fullmatch(r'\d+').all()
    else col
)

In [142]:
chungnam_5th_with_total = chungnam_5th

In [143]:
chungnam_5th_with_total

Unnamed: 0,시도,구시군,선거인수,투표수,득표수_1_한나라당_박해춘,득표수_2_민주당_안희정,득표수_3_자유선진당_박상돈,득표수_계,무효투표수,기권수
0,충청남도,합계,1595587,901863,154723,367288,347265,869271,32587,693724
1,충청남도,천안시서북구,221580,104685,16178,44605,42683,103466,1219,116895
15,충청남도,천안시동남구,183142,89013,14851,36201,36603,87655,1358,94129
35,충청남도,공주시,99709,61209,6888,27856,24127,58871,2338,38500
54,충청남도,보령시,85687,53693,8829,18112,24168,51109,2584,31994
73,충청남도,아산시,195443,99872,18577,44329,34579,97485,2387,95571
93,충청남도,서산시,121199,66626,12333,24599,27427,64359,2267,54573
111,충청남도,태안군,52517,36024,6315,12754,14216,33285,2739,16493
122,충청남도,금산군,46562,28884,6328,11927,9118,27373,1511,17678
134,충청남도,연기군,64534,37808,4265,18037,14244,36546,1262,26726


In [144]:
chungnam_5th_with_total.to_csv("temp1_governor_chungnam_5.csv", index=False, encoding="utf-8-sig")


## Jeonbuk


In [145]:
# 깃허브 blob URL로 불러오는 경우 (자동으로 raw URL로 변환됨)
blob_url5_jeonbuk = 'https://github.com/sw1kwon/korean-elections/blob/main/original/Local_Elections_Governor/5th_2010/12_%EC%A0%84%EB%9D%BC%EB%B6%81%EB%8F%84.xls'

jeonbuk_5th = process_5th_governor_election(
    file_path_or_url = blob_url5_jeonbuk,
    header_rows = (4, 6),
    filter_column = '읍면동명',
    filter_value = '합계'
)

헤더 행: 4행 ~ 6행
생성된 컬럼 수: 12
데이터 행 수: 284

생성된 컬럼명 (처음 10개):
0: 구시군명
1: 읍면동명
2: 선거인수
3: 투표수
4: 후보자별 득표수_한나라당
정운천
5: 민주당
김완주
6: 민주노동당
하연호
7: 진보신당
염경석
8: 평화민주당
김대식
9: 계

'읍면동명' 컬럼 발견: 읍면동명
필터링 전: 284행 → 필터링 후: 16행
'읍면동명' == '합계'인 행만 추출


In [146]:
jeonbuk_5th

Unnamed: 0,구시군명,읍면동명,선거인수,투표수,후보자별 득표수_한나라당\n정운천,민주당\n김완주,민주노동당\n하연호,진보신당\n염경석,평화민주당\n김대식,계,무효투표수,기권수
0,전라북도,합계,1442805,856111,151064,569980,52331,35565,20990,829930,26181,586694
1,전주시완산구,합계,262926,140319,32656,86435,9616,7025,2685,138417,1902,122607
22,전주시덕진구,합계,210631,107769,22696,66511,7299,8168,1685,106359,1410,102862
40,군산시,합계,207328,113614,17270,81040,6297,4187,1766,110560,3054,93714
70,익산시,합계,234104,126680,23394,84786,7783,3928,3158,123049,3631,107424
102,정읍시,합계,96698,64655,8977,46746,3312,1568,1419,62022,2633,32043
128,남원시,합계,69420,47635,5343,35825,1705,1535,1527,45935,1700,21785
154,김제시,합계,78217,51879,8034,34695,4092,1122,1515,49458,2421,26338
176,완주군,합계,67270,41784,6653,26880,3807,1893,1156,40389,1395,25486
192,진안군,합계,23443,17962,2874,11212,1024,894,1149,17153,809,5481


In [147]:
jeonbuk_5th.columns.tolist()

['구시군명',
 '읍면동명',
 '선거인수',
 '투표수',
 '후보자별 득표수_한나라당\n정운천',
 '민주당\n김완주',
 '민주노동당\n하연호',
 '진보신당\n염경석',
 '평화민주당\n김대식',
 '계',
 '무효투표수',
 '기권수']

In [148]:
rename_jeonbuk = {
    '구시군명': '구시군',
    '후보자별 득표수_한나라당\n정운천': '득표수_1_한나라당_정운천',
    '민주당\n김완주': '득표수_2_민주당_김완주',
    '민주노동당\n하연호': '득표수_3_민주노동당_하연호',
    '진보신당\n염경석': '득표수_7_진보신당_염경석',
    '평화민주당\n김대식': '득표수_8_평화민주당_김대식',
    '계': '득표수_계'
    }

In [149]:
jeonbuk_5th = jeonbuk_5th.rename(columns=rename_jeonbuk).drop(columns=['읍면동명'])
jeonbuk_5th

Unnamed: 0,구시군,선거인수,투표수,득표수_1_한나라당_정운천,득표수_2_민주당_김완주,득표수_3_민주노동당_하연호,득표수_7_진보신당_염경석,득표수_8_평화민주당_김대식,득표수_계,무효투표수,기권수
0,전라북도,1442805,856111,151064,569980,52331,35565,20990,829930,26181,586694
1,전주시완산구,262926,140319,32656,86435,9616,7025,2685,138417,1902,122607
22,전주시덕진구,210631,107769,22696,66511,7299,8168,1685,106359,1410,102862
40,군산시,207328,113614,17270,81040,6297,4187,1766,110560,3054,93714
70,익산시,234104,126680,23394,84786,7783,3928,3158,123049,3631,107424
102,정읍시,96698,64655,8977,46746,3312,1568,1419,62022,2633,32043
128,남원시,69420,47635,5343,35825,1705,1535,1527,45935,1700,21785
154,김제시,78217,51879,8034,34695,4092,1122,1515,49458,2421,26338
176,완주군,67270,41784,6653,26880,3807,1893,1156,40389,1395,25486
192,진안군,23443,17962,2874,11212,1024,894,1149,17153,809,5481


In [150]:
jeonbuk_5th = jeonbuk_5th.assign(
    시도='전라북도'
)[['시도'] + jeonbuk_5th.columns.tolist()]

In [151]:
jeonbuk_5th.loc[0, '구시군'] = '합계'

In [152]:
jeonbuk_5th

Unnamed: 0,시도,구시군,선거인수,투표수,득표수_1_한나라당_정운천,득표수_2_민주당_김완주,득표수_3_민주노동당_하연호,득표수_7_진보신당_염경석,득표수_8_평화민주당_김대식,득표수_계,무효투표수,기권수
0,전라북도,합계,1442805,856111,151064,569980,52331,35565,20990,829930,26181,586694
1,전라북도,전주시완산구,262926,140319,32656,86435,9616,7025,2685,138417,1902,122607
22,전라북도,전주시덕진구,210631,107769,22696,66511,7299,8168,1685,106359,1410,102862
40,전라북도,군산시,207328,113614,17270,81040,6297,4187,1766,110560,3054,93714
70,전라북도,익산시,234104,126680,23394,84786,7783,3928,3158,123049,3631,107424
102,전라북도,정읍시,96698,64655,8977,46746,3312,1568,1419,62022,2633,32043
128,전라북도,남원시,69420,47635,5343,35825,1705,1535,1527,45935,1700,21785
154,전라북도,김제시,78217,51879,8034,34695,4092,1122,1515,49458,2421,26338
176,전라북도,완주군,67270,41784,6653,26880,3807,1893,1156,40389,1395,25486
192,전라북도,진안군,23443,17962,2874,11212,1024,894,1149,17153,809,5481


In [153]:
jeonbuk_5th.info()

<class 'pandas.core.frame.DataFrame'>
Index: 16 entries, 0 to 268
Data columns (total 12 columns):
 #   Column           Non-Null Count  Dtype 
---  ------           --------------  ----- 
 0   시도               16 non-null     object
 1   구시군              16 non-null     object
 2   선거인수             16 non-null     object
 3   투표수              16 non-null     object
 4   득표수_1_한나라당_정운천   16 non-null     object
 5   득표수_2_민주당_김완주    16 non-null     object
 6   득표수_3_민주노동당_하연호  16 non-null     object
 7   득표수_7_진보신당_염경석   16 non-null     object
 8   득표수_8_평화민주당_김대식  16 non-null     object
 9   득표수_계            16 non-null     object
 10  무효투표수            16 non-null     object
 11  기권수              16 non-null     object
dtypes: object(12)
memory usage: 2.2+ KB


In [154]:
jeonbuk_5th = jeonbuk_5th.apply(
    lambda col: col.astype(int)
    if col.dtype == 'object' and col.astype(str).str.fullmatch(r'\d+').all()
    else col
)

In [155]:
jeonbuk_5th_with_total = jeonbuk_5th

In [156]:
jeonbuk_5th_with_total

Unnamed: 0,시도,구시군,선거인수,투표수,득표수_1_한나라당_정운천,득표수_2_민주당_김완주,득표수_3_민주노동당_하연호,득표수_7_진보신당_염경석,득표수_8_평화민주당_김대식,득표수_계,무효투표수,기권수
0,전라북도,합계,1442805,856111,151064,569980,52331,35565,20990,829930,26181,586694
1,전라북도,전주시완산구,262926,140319,32656,86435,9616,7025,2685,138417,1902,122607
22,전라북도,전주시덕진구,210631,107769,22696,66511,7299,8168,1685,106359,1410,102862
40,전라북도,군산시,207328,113614,17270,81040,6297,4187,1766,110560,3054,93714
70,전라북도,익산시,234104,126680,23394,84786,7783,3928,3158,123049,3631,107424
102,전라북도,정읍시,96698,64655,8977,46746,3312,1568,1419,62022,2633,32043
128,전라북도,남원시,69420,47635,5343,35825,1705,1535,1527,45935,1700,21785
154,전라북도,김제시,78217,51879,8034,34695,4092,1122,1515,49458,2421,26338
176,전라북도,완주군,67270,41784,6653,26880,3807,1893,1156,40389,1395,25486
192,전라북도,진안군,23443,17962,2874,11212,1024,894,1149,17153,809,5481


In [157]:
jeonbuk_5th_with_total.to_csv("temp1_governor_jeonbuk_5.csv", index=False, encoding="utf-8-sig")


## Jeonnam


In [158]:
# 깃허브 blob URL로 불러오는 경우 (자동으로 raw URL로 변환됨)
blob_url5_jeonnam = 'https://github.com/sw1kwon/korean-elections/blob/main/original/Local_Elections_Governor/5th_2010/13_%EC%A0%84%EB%9D%BC%EB%82%A8%EB%8F%84.xls'

jeonnam_5th = process_5th_governor_election(
    file_path_or_url = blob_url5_jeonnam,
    header_rows = (4, 6),
    filter_column = '읍면동명',
    filter_value = '합계'
)

헤더 행: 4행 ~ 6행
생성된 컬럼 수: 11
데이터 행 수: 361

생성된 컬럼명 (처음 10개):
0: 구시군명
1: 읍면동명
2: 선거인수
3: 투표수
4: 후보자별 득표수_한나라당
김대식
5: 민주당
박준영
6: 민주노동당
박웅두
7: 평화민주당
김경재
8: 계
9: 무효투표수

'읍면동명' 컬럼 발견: 읍면동명
필터링 전: 361행 → 필터링 후: 23행
'읍면동명' == '합계'인 행만 추출


In [159]:
jeonnam_5th

Unnamed: 0,구시군명,읍면동명,선거인수,투표수,후보자별 득표수_한나라당\n김대식,민주당\n박준영,민주노동당\n박웅두,평화민주당\n김경재,계,무효투표수,기권수
0,전라남도,합계,1504902,967338,123548,629984,100581,68220,922333,45005,537564
1,목포시,합계,181872,91633,11266,60315,11789,5835,89205,2428,90239
26,여수시,합계,225147,130630,14500,89520,12618,9993,126631,3999,94517
56,순천시,합계,200321,122961,15447,79796,12483,10961,118687,4274,77360
83,나주시,합계,75246,52381,5423,34572,6582,3007,49584,2797,22865
105,광양시,합계,104440,65132,9687,40665,8515,4391,63258,1874,39308
120,담양군,합계,39981,26959,3363,17679,2551,1974,25567,1392,13022
135,장성군,합계,38299,27488,3596,18042,2247,2193,26078,1410,10811
149,곡성군,합계,26917,20586,2123,12527,3185,1442,19277,1309,6331
162,구례군,합계,22984,18290,2213,11513,1972,1311,17009,1281,4694


In [160]:
jeonnam_5th.columns.tolist()

['구시군명',
 '읍면동명',
 '선거인수',
 '투표수',
 '후보자별 득표수_한나라당\n김대식',
 '민주당\n박준영',
 '민주노동당\n박웅두',
 '평화민주당\n김경재',
 '계',
 '무효투표수',
 '기권수']

In [161]:
rename_jeonnam = {
    '구시군명': '구시군',
    '후보자별 득표수_한나라당\n김대식': '득표수_1_한나라당_김대식',
    '민주당\n박준영': '득표수_2_민주당_박준영',
    '민주노동당\n박웅두': '득표수_5_민주노동당_박웅두',
    '평화민주당\n김경재': '득표수_7_평화민주당_김경재',
    '계': '득표수_계'
    }

In [162]:
jeonnam_5th = jeonnam_5th.rename(columns=rename_jeonnam).drop(columns=['읍면동명'])
jeonnam_5th

Unnamed: 0,구시군,선거인수,투표수,득표수_1_한나라당_김대식,득표수_2_민주당_박준영,득표수_5_민주노동당_박웅두,득표수_7_평화민주당_김경재,득표수_계,무효투표수,기권수
0,전라남도,1504902,967338,123548,629984,100581,68220,922333,45005,537564
1,목포시,181872,91633,11266,60315,11789,5835,89205,2428,90239
26,여수시,225147,130630,14500,89520,12618,9993,126631,3999,94517
56,순천시,200321,122961,15447,79796,12483,10961,118687,4274,77360
83,나주시,75246,52381,5423,34572,6582,3007,49584,2797,22865
105,광양시,104440,65132,9687,40665,8515,4391,63258,1874,39308
120,담양군,39981,26959,3363,17679,2551,1974,25567,1392,13022
135,장성군,38299,27488,3596,18042,2247,2193,26078,1410,10811
149,곡성군,26917,20586,2123,12527,3185,1442,19277,1309,6331
162,구례군,22984,18290,2213,11513,1972,1311,17009,1281,4694


In [163]:
jeonnam_5th = jeonnam_5th.assign(
    시도='전라남도'
)[['시도'] + jeonnam_5th.columns.tolist()]

In [164]:
jeonnam_5th.loc[0, '구시군'] = '합계'

In [165]:
jeonnam_5th

Unnamed: 0,시도,구시군,선거인수,투표수,득표수_1_한나라당_김대식,득표수_2_민주당_박준영,득표수_5_민주노동당_박웅두,득표수_7_평화민주당_김경재,득표수_계,무효투표수,기권수
0,전라남도,합계,1504902,967338,123548,629984,100581,68220,922333,45005,537564
1,전라남도,목포시,181872,91633,11266,60315,11789,5835,89205,2428,90239
26,전라남도,여수시,225147,130630,14500,89520,12618,9993,126631,3999,94517
56,전라남도,순천시,200321,122961,15447,79796,12483,10961,118687,4274,77360
83,전라남도,나주시,75246,52381,5423,34572,6582,3007,49584,2797,22865
105,전라남도,광양시,104440,65132,9687,40665,8515,4391,63258,1874,39308
120,전라남도,담양군,39981,26959,3363,17679,2551,1974,25567,1392,13022
135,전라남도,장성군,38299,27488,3596,18042,2247,2193,26078,1410,10811
149,전라남도,곡성군,26917,20586,2123,12527,3185,1442,19277,1309,6331
162,전라남도,구례군,22984,18290,2213,11513,1972,1311,17009,1281,4694


In [166]:
jeonnam_5th.info()

<class 'pandas.core.frame.DataFrame'>
Index: 23 entries, 0 to 344
Data columns (total 11 columns):
 #   Column           Non-Null Count  Dtype 
---  ------           --------------  ----- 
 0   시도               23 non-null     object
 1   구시군              23 non-null     object
 2   선거인수             23 non-null     object
 3   투표수              23 non-null     object
 4   득표수_1_한나라당_김대식   23 non-null     object
 5   득표수_2_민주당_박준영    23 non-null     object
 6   득표수_5_민주노동당_박웅두  23 non-null     object
 7   득표수_7_평화민주당_김경재  23 non-null     object
 8   득표수_계            23 non-null     object
 9   무효투표수            23 non-null     object
 10  기권수              23 non-null     object
dtypes: object(11)
memory usage: 2.7+ KB


In [167]:
jeonnam_5th = jeonnam_5th.apply(
    lambda col: col.astype(int)
    if col.dtype == 'object' and col.astype(str).str.fullmatch(r'\d+').all()
    else col
)

In [168]:
jeonnam_5th_with_total = jeonnam_5th

In [169]:
jeonnam_5th_with_total

Unnamed: 0,시도,구시군,선거인수,투표수,득표수_1_한나라당_김대식,득표수_2_민주당_박준영,득표수_5_민주노동당_박웅두,득표수_7_평화민주당_김경재,득표수_계,무효투표수,기권수
0,전라남도,합계,1504902,967338,123548,629984,100581,68220,922333,45005,537564
1,전라남도,목포시,181872,91633,11266,60315,11789,5835,89205,2428,90239
26,전라남도,여수시,225147,130630,14500,89520,12618,9993,126631,3999,94517
56,전라남도,순천시,200321,122961,15447,79796,12483,10961,118687,4274,77360
83,전라남도,나주시,75246,52381,5423,34572,6582,3007,49584,2797,22865
105,전라남도,광양시,104440,65132,9687,40665,8515,4391,63258,1874,39308
120,전라남도,담양군,39981,26959,3363,17679,2551,1974,25567,1392,13022
135,전라남도,장성군,38299,27488,3596,18042,2247,2193,26078,1410,10811
149,전라남도,곡성군,26917,20586,2123,12527,3185,1442,19277,1309,6331
162,전라남도,구례군,22984,18290,2213,11513,1972,1311,17009,1281,4694


In [170]:
jeonnam_5th_with_total.to_csv("temp1_governor_jeonnam_5.csv", index=False, encoding="utf-8-sig")


## Gyeongbuk


In [171]:
# 깃허브 blob URL로 불러오는 경우 (자동으로 raw URL로 변환됨)
blob_url5_gyeongbuk = 'https://github.com/sw1kwon/korean-elections/blob/main/original/Local_Elections_Governor/5th_2010/14_%EA%B2%BD%EC%83%81%EB%B6%81%EB%8F%84.xls'

gyeongbuk_5th = process_5th_governor_election(
    file_path_or_url = blob_url5_gyeongbuk,
    header_rows = (4, 6),
    filter_column = '읍면동명',
    filter_value = '합계'
)

헤더 행: 4행 ~ 6행
생성된 컬럼 수: 11
데이터 행 수: 398

생성된 컬럼명 (처음 10개):
0: 구시군별
1: 읍면동명
2: 선거인수
3: 투표수
4: 후보자별 득표수_한나라당
김관용
5: 민주당
홍의락
6: 민주노동당
윤병태
7: 국민참여당
유성찬
8: 계
9: 무효투표수

'읍면동명' 컬럼 발견: 읍면동명
필터링 전: 398행 → 필터링 후: 25행
'읍면동명' == '합계'인 행만 추출


In [172]:
gyeongbuk_5th

Unnamed: 0,구시군별,읍면동명,선거인수,투표수,후보자별 득표수_한나라당\n김관용,민주당\n홍의락,민주노동당\n윤병태,국민참여당\n유성찬,계,무효투표수,기권수
0,경상북도,합계,2122905,1260591,913812,143347,68015,87346,1212520,48071,862314
1,포항시북구,합계,199281,107009,81746,10702,4425,7961,104834,2175,92272
19,포항시남구,합계,197630,105544,77479,12764,5305,7620,103168,2376,92086
36,울릉군,합계,9072,7404,5825,648,288,326,7087,317,1668
41,경주시,합계,211517,124949,91802,12039,9766,7002,120609,4340,86568
67,김천시,합계,109593,67692,51237,6967,2910,4023,65137,2555,41901
91,안동시,합계,134118,84290,63228,8297,3524,5909,80958,3332,49828
118,구미시,합계,291865,138048,90050,21610,9804,13233,134697,3351,153817
148,영주시,합계,90908,60339,43016,7707,2879,3896,57498,2841,30569
170,영천시,합계,85134,52224,37647,5719,2211,4173,49750,2474,32910


In [173]:
gyeongbuk_5th.columns.tolist()

['구시군별',
 '읍면동명',
 '선거인수',
 '투표수',
 '후보자별 득표수_한나라당\n김관용',
 '민주당\n홍의락',
 '민주노동당\n윤병태',
 '국민참여당\n유성찬',
 '계',
 '무효투표수',
 '기권수']

In [174]:
rename_gyeongbuk = {
    '구시군별': '구시군',
    '후보자별 득표수_한나라당\n김관용': '득표수_1_한나라당_김관용',
    '민주당\n홍의락': '득표수_2_민주당_홍의락',
    '민주노동당\n윤병태': '득표수_5_민주노동당_윤병태',
    '국민참여당\n유성찬': '득표수_7_국민참여당_유성찬',
    '계': '득표수_계'
    }

In [175]:
gyeongbuk_5th = gyeongbuk_5th.rename(columns=rename_gyeongbuk).drop(columns=['읍면동명'])
gyeongbuk_5th

Unnamed: 0,구시군,선거인수,투표수,득표수_1_한나라당_김관용,득표수_2_민주당_홍의락,득표수_5_민주노동당_윤병태,득표수_7_국민참여당_유성찬,득표수_계,무효투표수,기권수
0,경상북도,2122905,1260591,913812,143347,68015,87346,1212520,48071,862314
1,포항시북구,199281,107009,81746,10702,4425,7961,104834,2175,92272
19,포항시남구,197630,105544,77479,12764,5305,7620,103168,2376,92086
36,울릉군,9072,7404,5825,648,288,326,7087,317,1668
41,경주시,211517,124949,91802,12039,9766,7002,120609,4340,86568
67,김천시,109593,67692,51237,6967,2910,4023,65137,2555,41901
91,안동시,134118,84290,63228,8297,3524,5909,80958,3332,49828
118,구미시,291865,138048,90050,21610,9804,13233,134697,3351,153817
148,영주시,90908,60339,43016,7707,2879,3896,57498,2841,30569
170,영천시,85134,52224,37647,5719,2211,4173,49750,2474,32910


In [176]:
gyeongbuk_5th = gyeongbuk_5th.assign(
    시도='경상북도'
)[['시도'] + gyeongbuk_5th.columns.tolist()]

In [177]:
gyeongbuk_5th.loc[0, '구시군'] = '합계'

In [178]:
gyeongbuk_5th

Unnamed: 0,시도,구시군,선거인수,투표수,득표수_1_한나라당_김관용,득표수_2_민주당_홍의락,득표수_5_민주노동당_윤병태,득표수_7_국민참여당_유성찬,득표수_계,무효투표수,기권수
0,경상북도,합계,2122905,1260591,913812,143347,68015,87346,1212520,48071,862314
1,경상북도,포항시북구,199281,107009,81746,10702,4425,7961,104834,2175,92272
19,경상북도,포항시남구,197630,105544,77479,12764,5305,7620,103168,2376,92086
36,경상북도,울릉군,9072,7404,5825,648,288,326,7087,317,1668
41,경상북도,경주시,211517,124949,91802,12039,9766,7002,120609,4340,86568
67,경상북도,김천시,109593,67692,51237,6967,2910,4023,65137,2555,41901
91,경상북도,안동시,134118,84290,63228,8297,3524,5909,80958,3332,49828
118,경상북도,구미시,291865,138048,90050,21610,9804,13233,134697,3351,153817
148,경상북도,영주시,90908,60339,43016,7707,2879,3896,57498,2841,30569
170,경상북도,영천시,85134,52224,37647,5719,2211,4173,49750,2474,32910


In [179]:
gyeongbuk_5th.info()

<class 'pandas.core.frame.DataFrame'>
Index: 25 entries, 0 to 385
Data columns (total 11 columns):
 #   Column           Non-Null Count  Dtype 
---  ------           --------------  ----- 
 0   시도               25 non-null     object
 1   구시군              25 non-null     object
 2   선거인수             25 non-null     object
 3   투표수              25 non-null     object
 4   득표수_1_한나라당_김관용   25 non-null     object
 5   득표수_2_민주당_홍의락    25 non-null     object
 6   득표수_5_민주노동당_윤병태  25 non-null     object
 7   득표수_7_국민참여당_유성찬  25 non-null     object
 8   득표수_계            25 non-null     object
 9   무효투표수            25 non-null     object
 10  기권수              25 non-null     object
dtypes: object(11)
memory usage: 2.9+ KB


In [180]:
gyeongbuk_5th = gyeongbuk_5th.apply(
    lambda col: col.astype(int)
    if col.dtype == 'object' and col.astype(str).str.fullmatch(r'\d+').all()
    else col
)

In [181]:
gyeongbuk_5th_with_total = gyeongbuk_5th

In [182]:
gyeongbuk_5th_with_total

Unnamed: 0,시도,구시군,선거인수,투표수,득표수_1_한나라당_김관용,득표수_2_민주당_홍의락,득표수_5_민주노동당_윤병태,득표수_7_국민참여당_유성찬,득표수_계,무효투표수,기권수
0,경상북도,합계,2122905,1260591,913812,143347,68015,87346,1212520,48071,862314
1,경상북도,포항시북구,199281,107009,81746,10702,4425,7961,104834,2175,92272
19,경상북도,포항시남구,197630,105544,77479,12764,5305,7620,103168,2376,92086
36,경상북도,울릉군,9072,7404,5825,648,288,326,7087,317,1668
41,경상북도,경주시,211517,124949,91802,12039,9766,7002,120609,4340,86568
67,경상북도,김천시,109593,67692,51237,6967,2910,4023,65137,2555,41901
91,경상북도,안동시,134118,84290,63228,8297,3524,5909,80958,3332,49828
118,경상북도,구미시,291865,138048,90050,21610,9804,13233,134697,3351,153817
148,경상북도,영주시,90908,60339,43016,7707,2879,3896,57498,2841,30569
170,경상북도,영천시,85134,52224,37647,5719,2211,4173,49750,2474,32910


In [183]:
gyeongbuk_5th_with_total.to_csv("temp1_governor_gyeongbuk_5.csv", index=False, encoding="utf-8-sig")


## Gyeongnam


In [184]:
# 깃허브 blob URL로 불러오는 경우 (자동으로 raw URL로 변환됨)
blob_url5_gyeongnam = 'https://github.com/sw1kwon/korean-elections/blob/main/original/Local_Elections_Governor/5th_2010/15_%EA%B2%BD%EC%83%81%EB%82%A8%EB%8F%84.xls'

gyeongnam_5th = process_5th_governor_election(
    file_path_or_url = blob_url5_gyeongnam,
    header_rows = (4, 6),
    filter_column = '읍면동명',
    filter_value = '합계'
)

헤더 행: 4행 ~ 6행
생성된 컬럼 수: 9
데이터 행 수: 378

생성된 컬럼명 (처음 10개):
0: 구시군명
1: 읍면동명
2: 선거인수
3: 투표수
4: 후보자별 득표수_한나라당
이달곤
5: 무소속
김두관
6: 계
7: 무효투표수
8: 기권수

'읍면동명' 컬럼 발견: 읍면동명
필터링 전: 378행 → 필터링 후: 21행
'읍면동명' == '합계'인 행만 추출


In [185]:
gyeongnam_5th

Unnamed: 0,구시군명,읍면동명,선거인수,투표수,후보자별 득표수_한나라당\n이달곤,무소속\n김두관,계,무효투표수,기권수
0,경상남도,합계,2506393,1549690,705986,812336,1518322,31368,956703
1,창원시,합계,371540,224868,97364,125343,222707,2161,146672
19,마산시,합계,322188,191127,97429,91410,188839,2288,131061
54,진주시,합계,254687,164337,72465,89377,161842,2495,90350
94,진해시,합계,130168,77361,38717,37384,76101,1260,52807
112,통영시,합계,108127,67402,35601,30323,65924,1478,40725
133,고성군,합계,47716,32473,15614,15784,31398,1075,15243
150,사천시,합계,89018,61019,29234,30079,59313,1706,27999
167,김해시,합계,359474,195079,73316,119063,192379,2700,164395
187,밀양시,합계,90189,58178,28365,28100,56465,1713,32011


In [186]:
gyeongnam_5th.columns.tolist()

['구시군명',
 '읍면동명',
 '선거인수',
 '투표수',
 '후보자별 득표수_한나라당\n이달곤',
 '무소속\n김두관',
 '계',
 '무효투표수',
 '기권수']

In [187]:
rename_gyeongnam = {
    '구시군명': '구시군',
    '후보자별 득표수_한나라당\n이달곤': '득표수_1_한나라당_이달곤',
    '무소속\n김두관': '득표수_7_무소속_김두관',
    '계': '득표수_계'
    }

In [188]:
gyeongnam_5th = gyeongnam_5th.rename(columns=rename_gyeongnam).drop(columns=['읍면동명'])
gyeongnam_5th

Unnamed: 0,구시군,선거인수,투표수,득표수_1_한나라당_이달곤,득표수_7_무소속_김두관,득표수_계,무효투표수,기권수
0,경상남도,2506393,1549690,705986,812336,1518322,31368,956703
1,창원시,371540,224868,97364,125343,222707,2161,146672
19,마산시,322188,191127,97429,91410,188839,2288,131061
54,진주시,254687,164337,72465,89377,161842,2495,90350
94,진해시,130168,77361,38717,37384,76101,1260,52807
112,통영시,108127,67402,35601,30323,65924,1478,40725
133,고성군,47716,32473,15614,15784,31398,1075,15243
150,사천시,89018,61019,29234,30079,59313,1706,27999
167,김해시,359474,195079,73316,119063,192379,2700,164395
187,밀양시,90189,58178,28365,28100,56465,1713,32011


In [189]:
gyeongnam_5th = gyeongnam_5th.assign(
    시도='경상남도'
)[['시도'] + gyeongnam_5th.columns.tolist()]

In [190]:
gyeongnam_5th.loc[0, '구시군'] = '합계'

In [191]:
gyeongnam_5th

Unnamed: 0,시도,구시군,선거인수,투표수,득표수_1_한나라당_이달곤,득표수_7_무소속_김두관,득표수_계,무효투표수,기권수
0,경상남도,합계,2506393,1549690,705986,812336,1518322,31368,956703
1,경상남도,창원시,371540,224868,97364,125343,222707,2161,146672
19,경상남도,마산시,322188,191127,97429,91410,188839,2288,131061
54,경상남도,진주시,254687,164337,72465,89377,161842,2495,90350
94,경상남도,진해시,130168,77361,38717,37384,76101,1260,52807
112,경상남도,통영시,108127,67402,35601,30323,65924,1478,40725
133,경상남도,고성군,47716,32473,15614,15784,31398,1075,15243
150,경상남도,사천시,89018,61019,29234,30079,59313,1706,27999
167,경상남도,김해시,359474,195079,73316,119063,192379,2700,164395
187,경상남도,밀양시,90189,58178,28365,28100,56465,1713,32011


In [192]:
gyeongnam_5th.info()

<class 'pandas.core.frame.DataFrame'>
Index: 21 entries, 0 to 358
Data columns (total 9 columns):
 #   Column          Non-Null Count  Dtype 
---  ------          --------------  ----- 
 0   시도              21 non-null     object
 1   구시군             21 non-null     object
 2   선거인수            21 non-null     object
 3   투표수             21 non-null     object
 4   득표수_1_한나라당_이달곤  21 non-null     object
 5   득표수_7_무소속_김두관   21 non-null     object
 6   득표수_계           21 non-null     object
 7   무효투표수           21 non-null     object
 8   기권수             21 non-null     object
dtypes: object(9)
memory usage: 2.2+ KB


In [193]:
gyeongnam_5th = gyeongnam_5th.apply(
    lambda col: col.astype(int)
    if col.dtype == 'object' and col.astype(str).str.fullmatch(r'\d+').all()
    else col
)

In [194]:
gyeongnam_5th_with_total = gyeongnam_5th

In [195]:
gyeongnam_5th_with_total

Unnamed: 0,시도,구시군,선거인수,투표수,득표수_1_한나라당_이달곤,득표수_7_무소속_김두관,득표수_계,무효투표수,기권수
0,경상남도,합계,2506393,1549690,705986,812336,1518322,31368,956703
1,경상남도,창원시,371540,224868,97364,125343,222707,2161,146672
19,경상남도,마산시,322188,191127,97429,91410,188839,2288,131061
54,경상남도,진주시,254687,164337,72465,89377,161842,2495,90350
94,경상남도,진해시,130168,77361,38717,37384,76101,1260,52807
112,경상남도,통영시,108127,67402,35601,30323,65924,1478,40725
133,경상남도,고성군,47716,32473,15614,15784,31398,1075,15243
150,경상남도,사천시,89018,61019,29234,30079,59313,1706,27999
167,경상남도,김해시,359474,195079,73316,119063,192379,2700,164395
187,경상남도,밀양시,90189,58178,28365,28100,56465,1713,32011


In [196]:
gyeongnam_5th_with_total.to_csv("temp1_governor_gyeongnam_5.csv", index=False, encoding="utf-8-sig")


## Jeju

In [197]:
# 깃허브 blob URL로 불러오는 경우 (자동으로 raw URL로 변환됨)
blob_url5_jeju = 'https://github.com/sw1kwon/korean-elections/blob/main/original/Local_Elections_Governor/5th_2010/16_%EC%A0%9C%EC%A3%BC%ED%8A%B9%EB%B3%84%EC%9E%90%EC%B9%98%EB%8F%84.xls'

jeju_5th = process_5th_governor_election(
    file_path_or_url = blob_url5_jeju,
    header_rows = (4, 6),
    filter_column = '읍면동명',
    filter_value = '합계'
)

헤더 행: 4행 ~ 6행
생성된 컬럼 수: 10
데이터 행 수: 50

생성된 컬럼명 (처음 10개):
0: 구시군명
1: 읍면동명
2: 선거인수
3: 투표수
4: 후보자별 득표수_민주당
고희범
5: 무소속
현명관
6: 무소속
우근민
7: 계
8: 무효투표수
9: 기권수

'읍면동명' 컬럼 발견: 읍면동명
필터링 전: 50행 → 필터링 후: 3행
'읍면동명' == '합계'인 행만 추출


In [198]:
jeju_5th

Unnamed: 0,구시군명,읍면동명,선거인수,투표수,후보자별 득표수_민주당\n고희범,무소속\n현명관,무소속\n우근민,계,무효투표수,기권수
0,제주특별자치도,합계,424098,276056,48186,108344,110603,267133,8923,148042
1,제주시,합계,305765,195450,36447,74905,78514,189866,5584,110315
30,서귀포시,합계,118333,80606,11739,33439,32089,77267,3339,37727


In [199]:
jeju_5th.columns.tolist()

['구시군명',
 '읍면동명',
 '선거인수',
 '투표수',
 '후보자별 득표수_민주당\n고희범',
 '무소속\n현명관',
 '무소속\n우근민',
 '계',
 '무효투표수',
 '기권수']

In [200]:
rename_jeju = {
    '구시군명': '구시군',
    '후보자별 득표수_민주당\n고희범': '득표수_1_민주당_고희범',
    '무소속\n현명관': '득표수_8_무소속_현명관',
    '무소속\n우근민': '득표수_9_무소속_우근민',
    '계': '득표수_계'
    }

In [201]:
jeju_5th = jeju_5th.rename(columns=rename_jeju).drop(columns=['읍면동명'])
jeju_5th

Unnamed: 0,구시군,선거인수,투표수,득표수_1_민주당_고희범,득표수_8_무소속_현명관,득표수_9_무소속_우근민,득표수_계,무효투표수,기권수
0,제주특별자치도,424098,276056,48186,108344,110603,267133,8923,148042
1,제주시,305765,195450,36447,74905,78514,189866,5584,110315
30,서귀포시,118333,80606,11739,33439,32089,77267,3339,37727


In [202]:
jeju_5th = jeju_5th.assign(
    시도='제주특별자치도'
)[['시도'] + jeju_5th.columns.tolist()]

In [203]:
jeju_5th.loc[0, '구시군'] = '합계'

In [204]:
jeju_5th

Unnamed: 0,시도,구시군,선거인수,투표수,득표수_1_민주당_고희범,득표수_8_무소속_현명관,득표수_9_무소속_우근민,득표수_계,무효투표수,기권수
0,제주특별자치도,합계,424098,276056,48186,108344,110603,267133,8923,148042
1,제주특별자치도,제주시,305765,195450,36447,74905,78514,189866,5584,110315
30,제주특별자치도,서귀포시,118333,80606,11739,33439,32089,77267,3339,37727


In [205]:
jeju_5th.info()

<class 'pandas.core.frame.DataFrame'>
Index: 3 entries, 0 to 30
Data columns (total 10 columns):
 #   Column         Non-Null Count  Dtype 
---  ------         --------------  ----- 
 0   시도             3 non-null      object
 1   구시군            3 non-null      object
 2   선거인수           3 non-null      object
 3   투표수            3 non-null      object
 4   득표수_1_민주당_고희범  3 non-null      object
 5   득표수_8_무소속_현명관  3 non-null      object
 6   득표수_9_무소속_우근민  3 non-null      object
 7   득표수_계          3 non-null      object
 8   무효투표수          3 non-null      object
 9   기권수            3 non-null      object
dtypes: object(10)
memory usage: 372.0+ bytes


In [206]:
jeju_5th = jeju_5th.apply(
    lambda col: col.astype(int)
    if col.dtype == 'object' and col.astype(str).str.fullmatch(r'\d+').all()
    else col
)

In [207]:
jeju_5th_with_total = jeju_5th

In [208]:
jeju_5th_with_total

Unnamed: 0,시도,구시군,선거인수,투표수,득표수_1_민주당_고희범,득표수_8_무소속_현명관,득표수_9_무소속_우근민,득표수_계,무효투표수,기권수
0,제주특별자치도,합계,424098,276056,48186,108344,110603,267133,8923,148042
1,제주특별자치도,제주시,305765,195450,36447,74905,78514,189866,5584,110315
30,제주특별자치도,서귀포시,118333,80606,11739,33439,32089,77267,3339,37727


In [209]:
jeju_5th_with_total.to_csv("temp1_governor_jeju_5.csv", index=False, encoding="utf-8-sig")

# Batch CSV Files to ZIP

In [210]:
import zipfile
import glob

# Find all CSV files in current directory
csv_files = glob.glob('*.csv')

# Create ZIP file
with zipfile.ZipFile('all_csv_files.zip', 'w') as zipf:
   for file in csv_files:
       zipf.write(file)
       print(f"Added: {file}")  # Show progress

print(f"Total {len(csv_files)} files compressed.")

Added: temp1_governor_gyeonggi_5.csv
Added: temp1_governor_ulsan_5.csv
Added: temp1_governor_incheon_5.csv
Added: temp1_governor_gwangju_5.csv
Added: temp1_governor_jeonnam_5.csv
Added: temp1_governor_jeju_5.csv
Added: temp1_governor_gyeongbuk_5.csv
Added: temp1_governor_gyeongnam_5.csv
Added: temp1_governor_chungnam_5.csv
Added: temp1_governor_gangwon_5.csv
Added: temp1_governor_daegu_5.csv
Added: temp1_governor_seoul_5.csv
Added: temp1_governor_chungbuk_5.csv
Added: temp1_governor_busan_5.csv
Added: temp1_governor_daejeon_5.csv
Added: temp1_governor_jeonbuk_5.csv
Total 16 files compressed.
