# Functions

In [1]:
import pandas as pd
import numpy as np
import re
from typing import Dict, Tuple

def process_governor_election_data(region_name: str) -> pd.DataFrame:
    """
    특정 지역의 지사 선거 데이터를 처리하여 병합된 결과를 반환

    Args:
        region_name (str): 지역명 (예: 'busan', 'seoul', 'gyeonggi' 등)

    Returns:
        pd.DataFrame: 병합된 지사 선거 데이터
    """

    # URL 생성
    df1_url = f"https://raw.githubusercontent.com/sw1kwon/korean-elections/refs/heads/main/temp/v1_g/5th_2010/temp1_governor_{region_name}_5.csv"
    df2_url = f"https://raw.githubusercontent.com/sw1kwon/korean-elections/refs/heads/main/temp/v2_2_g/5th_2010/temp2_2_governor_{region_name}_5.csv"

    print(f"=== {region_name} 지사 선거 데이터 처리 시작 ===")
    print(f"상세 데이터 URL: {df1_url}")
    print(f"요약 데이터 URL: {df2_url}")

    try:
        # 첫 번째 CSV 파일 처리 (상세 득표 데이터)
        df1 = pd.read_csv(df1_url)
        print(f"상세 데이터 로드 완료: {df1.shape}")

        # 득표수_{숫자}_정당_이름 패턴의 컬럼들 찾기
        vote_columns = [col for col in df1.columns if col.startswith('득표수_') and col != '득표수_계']
        print(f"득표수 관련 컬럼 수: {len(vote_columns)}")

        # 각 행별로 1위와 2위 후보의 번호 찾기
        def find_top_two_candidates(row):
            vote_data = []

            # 모든 후보의 득표수와 번호를 수집
            for col in vote_columns:
                votes = row[col] if pd.notna(row[col]) else 0
                # 컬럼명에서 숫자 추출
                match = re.search(r'득표수_(\d+)_', col)
                if match:
                    candidate_number = int(match.group(1))
                    vote_data.append((votes, candidate_number, col))

            # 득표수 기준으로 내림차순 정렬
            vote_data.sort(key=lambda x: x[0], reverse=True)

            # 1위와 2위 후보 반환
            first_place = vote_data[0] if len(vote_data) > 0 else (0, None, None)
            second_place = vote_data[1] if len(vote_data) > 1 else (0, None, None)

            return first_place, second_place

        # 1위 후보 번호 찾기
        def find_winner_number(row):
            first_place, _ = find_top_two_candidates(row)
            return first_place[1]

        # 2위 후보 번호 찾기
        def find_second_number(row):
            _, second_place = find_top_two_candidates(row)
            return second_place[1]

        # 1위 득표율 계산
        def calculate_vote_rate_1st(row):
            first_place, _ = find_top_two_candidates(row)
            max_votes = first_place[0]
            total_votes = row['득표수_계'] if pd.notna(row['득표수_계']) else 0

            if total_votes > 0:
                return round(max_votes / total_votes, 4)
            else:
                return 0

        # 2위 득표율 계산
        def calculate_vote_rate_2nd(row):
            _, second_place = find_top_two_candidates(row)
            second_votes = second_place[0]
            total_votes = row['득표수_계'] if pd.notna(row['득표수_계']) else 0

            if total_votes > 0:
                return round(second_votes / total_votes, 4)
            else:
                return 0

        # 새로운 컬럼 추가 (1위, 2위)
        df1['득표_1위_후보번호'] = df1.apply(find_winner_number, axis=1)
        df1['득표_1위_득표율'] = df1.apply(calculate_vote_rate_1st, axis=1)
        df1['득표_2위_후보번호'] = df1.apply(find_second_number, axis=1)
        df1['득표_2위_득표율'] = df1.apply(calculate_vote_rate_2nd, axis=1)

        # 정당 카테고리 매핑 딕셔너리 (지역별로 다를 수 있음)
        category_mapping = get_governor_category_mapping(region_name, vote_columns)
        print(f"생성된 카테고리 매핑: {category_mapping}")

        # 1위, 2위 후보번호를 카테고리로 매핑
        # fillna()로 매핑되지 않은 후보는 '기타'로 처리
        df1['득표_1위_정당'] = df1['득표_1위_후보번호'].map(category_mapping).fillna('기타')
        df1['득표_2위_정당'] = df1['득표_2위_후보번호'].map(category_mapping).fillna('기타')

        # 매핑 결과 확인
        print("매핑 후 1위 정당 분포 (처리 중):")
        print(df1['득표_1위_정당'].value_counts())

        # 매핑되지 않은 후보 확인
        unmapped_1st = df1[df1['득표_1위_정당'] == '기타']['득표_1위_후보번호'].unique()
        unmapped_2nd = df1[df1['득표_2위_정당'] == '기타']['득표_2위_후보번호'].unique()
        if len(unmapped_1st) > 0:
            print(f"경고: 1위에서 매핑되지 않은 후보번호: {unmapped_1st}")
        if len(unmapped_2nd) > 0:
            print(f"경고: 2위에서 매핑되지 않은 후보번호: {unmapped_2nd}")

        # 카테고리별 후보자 수 계산 (모든 카테고리 포함)
        candidate_counts = {}
        all_categories = ['보수정당', '진보정당', '그외정당', '무소속']

        # 먼저 모든 카테고리를 0으로 초기화
        for category in all_categories:
            candidate_counts[category] = 0

        # 실제 매핑에서 카운트
        for candidate_num, category in category_mapping.items():
            if category in candidate_counts:
                candidate_counts[category] += 1
            else:
                candidate_counts[category] = 1

        print(f"카테고리별 후보자 수: {candidate_counts}")

        # 각 카테고리별로 개별 컬럼 생성 (없는 카테고리도 0으로 포함)
        for category in all_categories:
            candidate_count = candidate_counts.get(category, 0)  # 없으면 0
            df1[f'{category}_후보자수'] = candidate_count
            print(f"  {category}_후보자수: {candidate_count}")

        # 필요한 컬럼들만 선택 (병합용)
        merge_columns = ['시도', '구시군', '득표_1위_후보번호', '득표_1위_득표율', '득표_2위_후보번호', '득표_2위_득표율',
                         '득표_1위_정당', '득표_2위_정당'] + [f'{cat}_후보자수' for cat in all_categories]

        df1_for_merge = df1[merge_columns].copy()

        # 두 번째 CSV 파일 읽기 (요약 데이터)
        df2 = pd.read_csv(df2_url)
        print(f"요약 데이터 로드 완료: {df2.shape}")

        # 시도, 구시군을 키로 하여 병합
        merged_df = pd.merge(df2, df1_for_merge, on=['시도', '구시군'], how='left')

        # 병합 결과 검증
        missing_data = merged_df[merged_df['득표_1위_후보번호'].isna()]
        if len(missing_data) > 0:
            print(f"경고: 병합되지 않은 데이터가 {len(missing_data)}개 있습니다")
        else:
            print("모든 데이터가 성공적으로 병합되었습니다!")

        print(f"최종 데이터 형태: {merged_df.shape}")
        print(f"1위 정당 분포:")
        print(merged_df['득표_1위_정당'].value_counts())
        print(f"=== {region_name} 지사 선거 데이터 처리 완료 ===\n")

        return merged_df

    except Exception as e:
        print(f"오류 발생: {e}")
        return None

def get_governor_category_mapping(region_name: str, vote_columns: list) -> Dict[int, str]:
    """
    지역별 지사 선거 정당 카테고리 매핑을 반환
    각 지역마다 후보자와 정당이 다르므로 수동으로 설정 필요

    Args:
        region_name: 지역명
        vote_columns: 득표수 컬럼 리스트 (후보 확인용)

    Returns:
        해당 지역의 후보번호별 카테고리 매핑
    """

    print(f"\n=== {region_name} 지사 선거 후보 정보 ===")
    print("실제 후보 컬럼들:")
    for col in vote_columns:
        print(f"  {col}")

    # 지역별 매핑 설정 (2010년 제5회 지방선거 기준)
    if region_name == 'seoul':  # 서울
        mapping = {
            1: '보수정당',
            2: '진보정당',
            3: '그외정당',
            7: '그외정당',
            8: '그외정당'
        }

    elif region_name == 'busan':  # 부산
        mapping = {
            1: '보수정당',
            2: '진보정당'
        }

    elif region_name == 'daegu':  # 대구
        mapping = {
            1: '보수정당',
            2: '진보정당',
            7: '그외정당'
        }

    elif region_name == 'incheon':  # 인천
        mapping = {
            1: '보수정당',
            2: '진보정당',
            7: '그외정당',
            8: '그외정당'
        }

    elif region_name == 'gwangju':  # 광주
        mapping = {
            1: '보수정당',
            2: '진보정당',
            5: '그외정당',
            7: '그외정당',
            8: '그외정당',
            9: '그외정당'
        }

    elif region_name == 'daejeon':  # 대전
        mapping = {
            1: '보수정당',
            2: '진보정당',
            3: '그외정당',
            7: '그외정당'
        }

    elif region_name == 'ulsan':  # 울산
        mapping = {
            1: '보수정당',
            5: '그외정당',
            7: '그외정당'
        }

    elif region_name == 'gyeonggi':  # 경기
        mapping = {
            1: '보수정당',
            8: '그외정당'
        }

    elif region_name == 'gangwon':  # 강원
        mapping = {
            1: '보수정당',
            2: '진보정당'
        }

    elif region_name == 'chungbuk':  # 충북
        mapping = {
            1: '보수정당',
            2: '진보정당',
            7: '그외정당'
        }

    elif region_name == 'chungnam':  # 충남
        mapping = {
            1: '보수정당',
            2: '진보정당',
            3: '그외정당'
        }

    elif region_name == 'jeonbuk':  # 전북
        mapping = {
            1: '보수정당',
            2: '진보정당',
            3: '그외정당',
            7: '그외정당',
            8: '그외정당'
        }

    elif region_name == 'jeonnam':  # 전남
        mapping = {
            1: '보수정당',
            2: '진보정당',
            5: '그외정당',
            7: '그외정당'
        }

    elif region_name == 'gyeongbuk':  # 경북
        mapping = {
            1: '보수정당',
            2: '진보정당',
            5: '그외정당',
            7: '그외정당'
        }

    elif region_name == 'gyeongnam':  # 경남
        mapping = {
            1: '보수정당',
            7: '무소속'
        }

    elif region_name == 'jeju':  # 제주
        mapping = {
            2: '진보정당',
            8: '무소속',
            9: '무소속'
        }

    else:
        print(f"경고: {region_name} 지역에 대한 매핑이 정의되지 않았습니다.")
        print("기본 매핑을 사용합니다. 수동으로 매핑을 추가해주세요.")
        # 기본 매핑 (수정 필요)
        mapping = {
            1: '보수정당',
            2: '진보정당',
            3: '그외정당',
            4: '그외정당',
            5: '무소속'
        }

    # 실제 존재하는 후보 번호만 추출
    existing_candidates = set()
    for col in vote_columns:
        match = re.search(r'득표수_(\d+)_', col)
        if match:
            existing_candidates.add(int(match.group(1)))

    print(f"실제 존재하는 후보번호: {sorted(existing_candidates)}")

    # 존재하는 후보에 대해서만 매핑 적용
    filtered_mapping = {k: v for k, v in mapping.items() if k in existing_candidates}

    # 매핑되지 않은 후보 번호 확인
    unmapped_candidates = existing_candidates - set(mapping.keys())
    if unmapped_candidates:
        print(f"경고: 매핑되지 않은 후보번호들: {sorted(unmapped_candidates)}")
        print("이 후보들은 '기타' 카테고리로 분류됩니다.")

    print(f"적용된 매핑: {filtered_mapping}")
    print("=" * 50)

    return filtered_mapping

def process_multiple_governor_elections(region_names: list) -> Dict[str, pd.DataFrame]:
    """
    여러 지역의 지사 선거를 일괄 처리하여 df_지역명 형태로 변수 저장

    Args:
        region_names (list): 처리할 지역명 리스트

    Returns:
        Dict[str, pd.DataFrame]: 지역별 처리된 데이터프레임 딕셔너리
    """
    results = {}

    for region_name in region_names:
        print(f"\n{'='*50}")
        result_df = process_governor_election_data(region_name)

        if result_df is not None:
            # df_busan, df_seoul 형태로 변수명 지정
            var_name = f'df_{region_name}'
            results[var_name] = result_df

            print(f"데이터프레임 저장: {var_name} (shape: {result_df.shape})")
        else:
            print(f"{region_name} 지사 선거 데이터 처리 실패")

    return results

# 지역명들
AVAILABLE_REGIONS = [
    'seoul', 'busan', 'daegu', 'incheon', 'gwangju', 'daejeon',
    'ulsan', 'gyeonggi', 'gangwon', 'chungbuk', 'chungnam',
    'jeonbuk', 'jeonnam', 'gyeongbuk', 'gyeongnam', 'jeju'
]

# # 사용 예시
# if __name__ == "__main__":
#     # 방법 1: 특정 지역들만 처리
#     selected_regions = ['busan', 'seoul', 'gyeonggi', 'incheon']
#     governor_results = process_multiple_governor_elections(selected_regions)

#     # 개별 접근 예시:
#     # df_busan = governor_results['df_busan']
#     # df_seoul = governor_results['df_seoul']

#     # 방법 2: 모든 지역 일괄 처리
#     # all_governor_results = process_multiple_governor_elections(AVAILABLE_REGIONS)

#     print(f"\n사용 가능한 지역들: {AVAILABLE_REGIONS}")

# Preprocessing & Merge

In [2]:
AVAILABLE_REGIONS = [
    'seoul', 'busan', 'daegu', 'incheon', 'gwangju', 'daejeon',
    'ulsan', 'gyeonggi', 'gangwon', 'chungbuk', 'chungnam',
    'jeonbuk', 'jeonnam', 'gyeongbuk', 'gyeongnam', 'jeju'
]

election_results = process_multiple_governor_elections(AVAILABLE_REGIONS)


=== seoul 지사 선거 데이터 처리 시작 ===
상세 데이터 URL: https://raw.githubusercontent.com/sw1kwon/korean-elections/refs/heads/main/temp/v1_g/5th_2010/temp1_governor_seoul_5.csv
요약 데이터 URL: https://raw.githubusercontent.com/sw1kwon/korean-elections/refs/heads/main/temp/v2_2_g/5th_2010/temp2_2_governor_seoul_5.csv
상세 데이터 로드 완료: (26, 12)
득표수 관련 컬럼 수: 5

=== seoul 지사 선거 후보 정보 ===
실제 후보 컬럼들:
  득표수_1_한나라당_오세훈
  득표수_2_민주당_한명숙
  득표수_3_자유선진당_지상욱
  득표수_7_진보신당_노회찬
  득표수_8_미래연합_석종현
실제 존재하는 후보번호: [1, 2, 3, 7, 8]
적용된 매핑: {1: '보수정당', 2: '진보정당', 3: '그외정당', 7: '그외정당', 8: '그외정당'}
생성된 카테고리 매핑: {1: '보수정당', 2: '진보정당', 3: '그외정당', 7: '그외정당', 8: '그외정당'}
매핑 후 1위 정당 분포 (처리 중):
득표_1위_정당
진보정당    17
보수정당     9
Name: count, dtype: int64
카테고리별 후보자 수: {'보수정당': 1, '진보정당': 1, '그외정당': 3, '무소속': 0}
  보수정당_후보자수: 1
  진보정당_후보자수: 1
  그외정당_후보자수: 3
  무소속_후보자수: 0
요약 데이터 로드 완료: (26, 11)
모든 데이터가 성공적으로 병합되었습니다!
최종 데이터 형태: (26, 21)
1위 정당 분포:
득표_1위_정당
진보정당    17
보수정당     9
Name: count, dtype: int64
=== seoul 지사 선거 데이터 처리 완료 ===

데이터프레임 저장: df_se

# Governor Election 5th

## Seoul

In [3]:
df_seoul = election_results['df_seoul']

In [4]:
df_seoul

Unnamed: 0,시도,구시군,선거인수,투표수,보수정당,진보정당,그외정당,무소속,득표수_계,무효투표수,...,득표_1위_후보번호,득표_1위_득표율,득표_2위_후보번호,득표_2위_득표율,득표_1위_정당,득표_2위_정당,보수정당_후보자수,진보정당_후보자수,그외정당_후보자수,무소속_후보자수
0,서울특별시,합계,8211461,4426182,2086127,2059715,251830,0,4397672,28510,...,1,0.4744,2,0.4684,보수정당,진보정당,1,1,3,0
1,서울특별시,종로구,138917,77812,35476,36910,4660,0,77046,766,...,2,0.4791,1,0.4605,진보정당,보수정당,1,1,3,0
2,서울특별시,중구,109000,60763,28648,28410,3179,0,60237,526,...,1,0.4756,2,0.4716,보수정당,진보정당,1,1,3,0
3,서울특별시,용산구,198044,105005,53285,44706,6173,0,104164,841,...,1,0.5115,2,0.4292,보수정당,진보정당,1,1,3,0
4,서울특별시,성동구,250316,135277,63448,63966,6951,0,134365,912,...,2,0.4761,1,0.4722,진보정당,보수정당,1,1,3,0
5,서울특별시,광진구,301603,158963,72749,76420,8674,0,157843,1120,...,2,0.4842,1,0.4609,진보정당,보수정당,1,1,3,0
6,서울특별시,동대문구,297206,160497,74536,76032,8769,0,159337,1160,...,2,0.4772,1,0.4678,진보정당,보수정당,1,1,3,0
7,서울특별시,중랑구,343810,172251,80577,81517,8859,0,170953,1298,...,2,0.4768,1,0.4713,진보정당,보수정당,1,1,3,0
8,서울특별시,성북구,383508,204667,91907,99316,12137,0,203360,1307,...,2,0.4884,1,0.4519,진보정당,보수정당,1,1,3,0
9,서울특별시,강북구,276171,142061,63231,70115,7661,0,141007,1054,...,2,0.4972,1,0.4484,진보정당,보수정당,1,1,3,0


### preprocessing

In [5]:
df_seoul = (
    df_seoul
    .assign(시도=lambda df: df['시도'].replace('서울특별시', '서울'))
)

In [6]:
df_seoul.columns

Index(['시도', '구시군', '선거인수', '투표수', '보수정당', '진보정당', '그외정당', '무소속', '득표수_계',
       '무효투표수', '기권수', '득표_1위_후보번호', '득표_1위_득표율', '득표_2위_후보번호', '득표_2위_득표율',
       '득표_1위_정당', '득표_2위_정당', '보수정당_후보자수', '진보정당_후보자수', '그외정당_후보자수',
       '무소속_후보자수'],
      dtype='object')

In [7]:
# 고정 열 순서 정의
fixed_cols = [
    '시도', '구시군', '선거년도', '선거종류',
    '득표_1위_정당', '득표_2위_정당',
    '득표_1위_후보번호', '득표_1위_득표율',
    '득표_2위_후보번호', '득표_2위_득표율',
    '보수정당_후보자수', '진보정당_후보자수',
    '그외정당_후보자수', '무소속_후보자수'
]

# 나머지 열: fixed_cols에 포함되지 않은 열
other_cols = [col for col in df_seoul.columns if col not in fixed_cols]

# 메소드 체이닝
df_seoul = (
    df_seoul
    .assign(선거종류='광역단체장', 선거년도='2010')
    .loc[:, fixed_cols + other_cols]
    .rename(columns={'시도': '지역'})
)

In [8]:
df_seoul

Unnamed: 0,지역,구시군,선거년도,선거종류,득표_1위_정당,득표_2위_정당,득표_1위_후보번호,득표_1위_득표율,득표_2위_후보번호,득표_2위_득표율,...,무소속_후보자수,선거인수,투표수,보수정당,진보정당,그외정당,무소속,득표수_계,무효투표수,기권수
0,서울,합계,2010,광역단체장,보수정당,진보정당,1,0.4744,2,0.4684,...,0,8211461,4426182,2086127,2059715,251830,0,4397672,28510,3785279
1,서울,종로구,2010,광역단체장,진보정당,보수정당,2,0.4791,1,0.4605,...,0,138917,77812,35476,36910,4660,0,77046,766,61105
2,서울,중구,2010,광역단체장,보수정당,진보정당,1,0.4756,2,0.4716,...,0,109000,60763,28648,28410,3179,0,60237,526,48237
3,서울,용산구,2010,광역단체장,보수정당,진보정당,1,0.5115,2,0.4292,...,0,198044,105005,53285,44706,6173,0,104164,841,93039
4,서울,성동구,2010,광역단체장,진보정당,보수정당,2,0.4761,1,0.4722,...,0,250316,135277,63448,63966,6951,0,134365,912,115039
5,서울,광진구,2010,광역단체장,진보정당,보수정당,2,0.4842,1,0.4609,...,0,301603,158963,72749,76420,8674,0,157843,1120,142640
6,서울,동대문구,2010,광역단체장,진보정당,보수정당,2,0.4772,1,0.4678,...,0,297206,160497,74536,76032,8769,0,159337,1160,136709
7,서울,중랑구,2010,광역단체장,진보정당,보수정당,2,0.4768,1,0.4713,...,0,343810,172251,80577,81517,8859,0,170953,1298,171559
8,서울,성북구,2010,광역단체장,진보정당,보수정당,2,0.4884,1,0.4519,...,0,383508,204667,91907,99316,12137,0,203360,1307,178841
9,서울,강북구,2010,광역단체장,진보정당,보수정당,2,0.4972,1,0.4484,...,0,276171,142061,63231,70115,7661,0,141007,1054,134110


### v4.1 ~ v4.3

In [9]:
# 1. 전체 데이터 저장
df_seoul.to_csv("temp4_1_governor_seoul_5.csv", index=False, encoding="utf-8-sig")

# 2. '구시군' != '합계' 행만 추출하여 저장
df_seoul2 = df_seoul.query("구시군 != '합계'")
df_seoul2.to_csv("temp4_2_governor_seoul_5.csv", index=False, encoding="utf-8-sig")

# 3. '구시군' == '합계' 행만 추출 후 '구시군' 열 제거하여 저장
df_seoul3 = df_seoul.query("구시군 == '합계'").drop(columns="구시군")
df_seoul3.to_csv("temp4_3_governor_seoul_5.csv", index=False, encoding="utf-8-sig")

## Busan

In [10]:
df_busan = election_results['df_busan']

In [11]:
df_busan

Unnamed: 0,시도,구시군,선거인수,투표수,보수정당,진보정당,그외정당,무소속,득표수_계,무효투표수,...,득표_1위_후보번호,득표_1위_득표율,득표_2위_후보번호,득표_2위_득표율,득표_1위_정당,득표_2위_정당,보수정당_후보자수,진보정당_후보자수,그외정당_후보자수,무소속_후보자수
0,부산광역시,합계,2849895,1410126,770507,619565,0,0,1390072,20054,...,1,0.5543,2,0.4457,보수정당,진보정당,1,1,0,0
1,부산광역시,중구,41637,21350,12901,7996,0,0,20897,453,...,1,0.6174,2,0.3826,보수정당,진보정당,1,1,0,0
2,부산광역시,서구,104507,49172,29328,19131,0,0,48459,713,...,1,0.6052,2,0.3948,보수정당,진보정당,1,1,0,0
3,부산광역시,동구,85200,43996,25784,17380,0,0,43164,832,...,1,0.5973,2,0.4027,보수정당,진보정당,1,1,0,0
4,부산광역시,영도구,122392,59112,30750,27332,0,0,58082,1030,...,1,0.5294,2,0.4706,보수정당,진보정당,1,1,0,0
5,부산광역시,부산진구,322385,157895,85247,70355,0,0,155602,2293,...,1,0.5479,2,0.4521,보수정당,진보정당,1,1,0,0
6,부산광역시,동래구,224693,109659,60901,47442,0,0,108343,1316,...,1,0.5621,2,0.4379,보수정당,진보정당,1,1,0,0
7,부산광역시,남구,240421,116776,63602,51993,0,0,115595,1181,...,1,0.5502,2,0.4498,보수정당,진보정당,1,1,0,0
8,부산광역시,북구,243993,121921,62561,57857,0,0,120418,1503,...,1,0.5195,2,0.4805,보수정당,진보정당,1,1,0,0
9,부산광역시,해운대구,328744,157525,85110,70341,0,0,155451,2074,...,1,0.5475,2,0.4525,보수정당,진보정당,1,1,0,0


### preprocessing

In [12]:
df_busan = (
    df_busan
    .assign(시도=lambda df: df['시도'].replace('부산광역시', '부산'))
)

In [13]:
df_busan.columns

Index(['시도', '구시군', '선거인수', '투표수', '보수정당', '진보정당', '그외정당', '무소속', '득표수_계',
       '무효투표수', '기권수', '득표_1위_후보번호', '득표_1위_득표율', '득표_2위_후보번호', '득표_2위_득표율',
       '득표_1위_정당', '득표_2위_정당', '보수정당_후보자수', '진보정당_후보자수', '그외정당_후보자수',
       '무소속_후보자수'],
      dtype='object')

In [14]:
# 고정 열 순서 정의
fixed_cols = [
    '시도', '구시군', '선거년도', '선거종류',
    '득표_1위_정당', '득표_2위_정당',
    '득표_1위_후보번호', '득표_1위_득표율',
    '득표_2위_후보번호', '득표_2위_득표율',
    '보수정당_후보자수', '진보정당_후보자수',
    '그외정당_후보자수', '무소속_후보자수'
]

# 나머지 열: fixed_cols에 포함되지 않은 열
other_cols = [col for col in df_busan.columns if col not in fixed_cols]

# 메소드 체이닝
df_busan = (
    df_busan
    .assign(선거종류='광역단체장', 선거년도='2010')
    .loc[:, fixed_cols + other_cols]
    .rename(columns={'시도': '지역'})
)

In [15]:
df_busan

Unnamed: 0,지역,구시군,선거년도,선거종류,득표_1위_정당,득표_2위_정당,득표_1위_후보번호,득표_1위_득표율,득표_2위_후보번호,득표_2위_득표율,...,무소속_후보자수,선거인수,투표수,보수정당,진보정당,그외정당,무소속,득표수_계,무효투표수,기권수
0,부산,합계,2010,광역단체장,보수정당,진보정당,1,0.5543,2,0.4457,...,0,2849895,1410126,770507,619565,0,0,1390072,20054,1439769
1,부산,중구,2010,광역단체장,보수정당,진보정당,1,0.6174,2,0.3826,...,0,41637,21350,12901,7996,0,0,20897,453,20287
2,부산,서구,2010,광역단체장,보수정당,진보정당,1,0.6052,2,0.3948,...,0,104507,49172,29328,19131,0,0,48459,713,55335
3,부산,동구,2010,광역단체장,보수정당,진보정당,1,0.5973,2,0.4027,...,0,85200,43996,25784,17380,0,0,43164,832,41204
4,부산,영도구,2010,광역단체장,보수정당,진보정당,1,0.5294,2,0.4706,...,0,122392,59112,30750,27332,0,0,58082,1030,63280
5,부산,부산진구,2010,광역단체장,보수정당,진보정당,1,0.5479,2,0.4521,...,0,322385,157895,85247,70355,0,0,155602,2293,164490
6,부산,동래구,2010,광역단체장,보수정당,진보정당,1,0.5621,2,0.4379,...,0,224693,109659,60901,47442,0,0,108343,1316,115034
7,부산,남구,2010,광역단체장,보수정당,진보정당,1,0.5502,2,0.4498,...,0,240421,116776,63602,51993,0,0,115595,1181,123645
8,부산,북구,2010,광역단체장,보수정당,진보정당,1,0.5195,2,0.4805,...,0,243993,121921,62561,57857,0,0,120418,1503,122072
9,부산,해운대구,2010,광역단체장,보수정당,진보정당,1,0.5475,2,0.4525,...,0,328744,157525,85110,70341,0,0,155451,2074,171219


### v4.1 ~ v4.3

In [16]:
# 1. 전체 데이터 저장
df_busan.to_csv("temp4_1_governor_busan_5.csv", index=False, encoding="utf-8-sig")

# 2. '구시군' != '합계' 행만 추출하여 저장
df_busan2 = df_busan.query("구시군 != '합계'")
df_busan2.to_csv("temp4_2_governor_busan_5.csv", index=False, encoding="utf-8-sig")

# 3. '구시군' == '합계' 행만 추출 후 '구시군' 열 제거하여 저장
df_busan3 = df_busan.query("구시군 == '합계'").drop(columns="구시군")
df_busan3.to_csv("temp4_3_governor_busan_5.csv", index=False, encoding="utf-8-sig")

## Daegu

In [17]:
df_daegu = election_results['df_daegu']

In [18]:
df_daegu

Unnamed: 0,시도,구시군,선거인수,투표수,보수정당,진보정당,그외정당,무소속,득표수_계,무효투표수,...,득표_1위_후보번호,득표_1위_득표율,득표_2위_후보번호,득표_2위_득표율,득표_1위_정당,득표_2위_정당,보수정당_후보자수,진보정당_후보자수,그외정당_후보자수,무소속_후보자수
0,대구광역시,합계,1928835,886035,633118,146458,88599,0,868175,17860,...,1,0.7293,2,0.1687,보수정당,진보정당,1,1,1,0
1,대구광역시,중구,65102,31334,22764,4867,3074,0,30705,629,...,1,0.7414,2,0.1585,보수정당,진보정당,1,1,1,0
2,대구광역시,동구,266834,123904,87541,22096,11529,0,121166,2738,...,1,0.7225,2,0.1824,보수정당,진보정당,1,1,1,0
3,대구광역시,서구,183845,83922,61781,12012,7938,0,81731,2191,...,1,0.7559,2,0.147,보수정당,진보정당,1,1,1,0
4,대구광역시,남구,142035,61391,45380,9059,5874,0,60313,1078,...,1,0.7524,2,0.1502,보수정당,진보정당,1,1,1,0
5,대구광역시,북구,339490,150682,106071,26213,15629,0,147913,2769,...,1,0.7171,2,0.1772,보수정당,진보정당,1,1,1,0
6,대구광역시,수성구,342990,166958,120726,25761,17209,0,163696,3262,...,1,0.7375,2,0.1574,보수정당,진보정당,1,1,1,0
7,대구광역시,달서구,454805,200176,141106,35453,20292,0,196851,3325,...,1,0.7168,2,0.1801,보수정당,진보정당,1,1,1,0
8,대구광역시,달성군,133734,67668,47749,10997,7054,0,65800,1868,...,1,0.7257,2,0.1671,보수정당,진보정당,1,1,1,0


### preprocessing

In [19]:
df_daegu = (
    df_daegu
    .assign(시도=lambda df: df['시도'].replace('대구광역시', '대구'))
)

In [20]:
df_daegu.columns

Index(['시도', '구시군', '선거인수', '투표수', '보수정당', '진보정당', '그외정당', '무소속', '득표수_계',
       '무효투표수', '기권수', '득표_1위_후보번호', '득표_1위_득표율', '득표_2위_후보번호', '득표_2위_득표율',
       '득표_1위_정당', '득표_2위_정당', '보수정당_후보자수', '진보정당_후보자수', '그외정당_후보자수',
       '무소속_후보자수'],
      dtype='object')

In [21]:
# 고정 열 순서 정의
fixed_cols = [
    '시도', '구시군', '선거년도', '선거종류',
    '득표_1위_정당', '득표_2위_정당',
    '득표_1위_후보번호', '득표_1위_득표율',
    '득표_2위_후보번호', '득표_2위_득표율',
    '보수정당_후보자수', '진보정당_후보자수',
    '그외정당_후보자수', '무소속_후보자수'
]

# 나머지 열: fixed_cols에 포함되지 않은 열
other_cols = [col for col in df_daegu.columns if col not in fixed_cols]

# 메소드 체이닝
df_daegu = (
    df_daegu
    .assign(선거종류='광역단체장', 선거년도='2010')
    .loc[:, fixed_cols + other_cols]
    .rename(columns={'시도': '지역'})
)

In [22]:
df_daegu

Unnamed: 0,지역,구시군,선거년도,선거종류,득표_1위_정당,득표_2위_정당,득표_1위_후보번호,득표_1위_득표율,득표_2위_후보번호,득표_2위_득표율,...,무소속_후보자수,선거인수,투표수,보수정당,진보정당,그외정당,무소속,득표수_계,무효투표수,기권수
0,대구,합계,2010,광역단체장,보수정당,진보정당,1,0.7293,2,0.1687,...,0,1928835,886035,633118,146458,88599,0,868175,17860,1042800
1,대구,중구,2010,광역단체장,보수정당,진보정당,1,0.7414,2,0.1585,...,0,65102,31334,22764,4867,3074,0,30705,629,33768
2,대구,동구,2010,광역단체장,보수정당,진보정당,1,0.7225,2,0.1824,...,0,266834,123904,87541,22096,11529,0,121166,2738,142930
3,대구,서구,2010,광역단체장,보수정당,진보정당,1,0.7559,2,0.147,...,0,183845,83922,61781,12012,7938,0,81731,2191,99923
4,대구,남구,2010,광역단체장,보수정당,진보정당,1,0.7524,2,0.1502,...,0,142035,61391,45380,9059,5874,0,60313,1078,80644
5,대구,북구,2010,광역단체장,보수정당,진보정당,1,0.7171,2,0.1772,...,0,339490,150682,106071,26213,15629,0,147913,2769,188808
6,대구,수성구,2010,광역단체장,보수정당,진보정당,1,0.7375,2,0.1574,...,0,342990,166958,120726,25761,17209,0,163696,3262,176032
7,대구,달서구,2010,광역단체장,보수정당,진보정당,1,0.7168,2,0.1801,...,0,454805,200176,141106,35453,20292,0,196851,3325,254629
8,대구,달성군,2010,광역단체장,보수정당,진보정당,1,0.7257,2,0.1671,...,0,133734,67668,47749,10997,7054,0,65800,1868,66066


### v4.1 ~ v4.3

In [23]:
# 1. 전체 데이터 저장
df_daegu.to_csv("temp4_1_governor_daegu_5.csv", index=False, encoding="utf-8-sig")

# 2. '구시군' != '합계' 행만 추출하여 저장
df_daegu2 = df_daegu.query("구시군 != '합계'")
df_daegu2.to_csv("temp4_2_governor_daegu_5.csv", index=False, encoding="utf-8-sig")

# 3. '구시군' == '합계' 행만 추출 후 '구시군' 열 제거하여 저장
df_daegu3 = df_daegu.query("구시군 == '합계'").drop(columns="구시군")
df_daegu3.to_csv("temp4_3_governor_daegu_5.csv", index=False, encoding="utf-8-sig")

## Incheon

In [24]:
df_incheon = election_results['df_incheon']

In [25]:
df_incheon

Unnamed: 0,시도,구시군,선거인수,투표수,보수정당,진보정당,그외정당,무소속,득표수_계,무효투표수,...,득표_1위_후보번호,득표_1위_득표율,득표_2위_후보번호,득표_2위_득표율,득표_1위_정당,득표_2위_정당,보수정당_후보자수,진보정당_후보자수,그외정당_후보자수,무소속_후보자수
0,인천광역시,합계,2096853,1067431,469040,556902,30838,0,1056780,10651,...,2,0.527,1,0.4438,진보정당,보수정당,1,1,2,0
1,인천광역시,중구,73354,37876,17846,18216,1285,0,37347,529,...,2,0.4878,1,0.4778,진보정당,보수정당,1,1,2,0
2,인천광역시,동구,62253,35099,15714,17486,1389,0,34589,510,...,2,0.5055,1,0.4543,진보정당,보수정당,1,1,2,0
3,인천광역시,남구,337880,165592,75286,84170,4544,0,164000,1592,...,2,0.5132,1,0.4591,진보정당,보수정당,1,1,2,0
4,인천광역시,연수구,206407,110292,53027,53495,2952,0,109474,818,...,2,0.4887,1,0.4844,진보정당,보수정당,1,1,2,0
5,인천광역시,남동구,355642,177496,76011,95283,4757,0,176051,1445,...,2,0.5412,1,0.4318,진보정당,보수정당,1,1,2,0
6,인천광역시,부평구,435655,218198,88370,121650,6448,0,216468,1730,...,2,0.562,1,0.4082,진보정당,보수정당,1,1,2,0
7,인천광역시,계양구,259089,131148,51211,75847,3160,0,130218,930,...,2,0.5825,1,0.3933,진보정당,보수정당,1,1,2,0
8,인천광역시,서구,294322,143331,61389,76308,4369,0,142066,1265,...,2,0.5371,1,0.4321,진보정당,보수정당,1,1,2,0
9,인천광역시,강화군,56586,37059,23160,10946,1489,0,35595,1464,...,1,0.6507,2,0.3075,보수정당,진보정당,1,1,2,0


### preprocessing

In [26]:
df_incheon = (
    df_incheon
    .assign(시도=lambda df: df['시도'].replace('인천광역시', '인천'))
)

In [27]:
df_incheon.columns

Index(['시도', '구시군', '선거인수', '투표수', '보수정당', '진보정당', '그외정당', '무소속', '득표수_계',
       '무효투표수', '기권수', '득표_1위_후보번호', '득표_1위_득표율', '득표_2위_후보번호', '득표_2위_득표율',
       '득표_1위_정당', '득표_2위_정당', '보수정당_후보자수', '진보정당_후보자수', '그외정당_후보자수',
       '무소속_후보자수'],
      dtype='object')

In [28]:
# 고정 열 순서 정의
fixed_cols = [
    '시도', '구시군', '선거년도', '선거종류',
    '득표_1위_정당', '득표_2위_정당',
    '득표_1위_후보번호', '득표_1위_득표율',
    '득표_2위_후보번호', '득표_2위_득표율',
    '보수정당_후보자수', '진보정당_후보자수',
    '그외정당_후보자수', '무소속_후보자수'
]

# 나머지 열: fixed_cols에 포함되지 않은 열
other_cols = [col for col in df_incheon.columns if col not in fixed_cols]

# 메소드 체이닝
df_incheon = (
    df_incheon
    .assign(선거종류='광역단체장', 선거년도='2010')
    .loc[:, fixed_cols + other_cols]
    .rename(columns={'시도': '지역'})
)

In [29]:
df_incheon

Unnamed: 0,지역,구시군,선거년도,선거종류,득표_1위_정당,득표_2위_정당,득표_1위_후보번호,득표_1위_득표율,득표_2위_후보번호,득표_2위_득표율,...,무소속_후보자수,선거인수,투표수,보수정당,진보정당,그외정당,무소속,득표수_계,무효투표수,기권수
0,인천,합계,2010,광역단체장,진보정당,보수정당,2,0.527,1,0.4438,...,0,2096853,1067431,469040,556902,30838,0,1056780,10651,1029422
1,인천,중구,2010,광역단체장,진보정당,보수정당,2,0.4878,1,0.4778,...,0,73354,37876,17846,18216,1285,0,37347,529,35478
2,인천,동구,2010,광역단체장,진보정당,보수정당,2,0.5055,1,0.4543,...,0,62253,35099,15714,17486,1389,0,34589,510,27154
3,인천,남구,2010,광역단체장,진보정당,보수정당,2,0.5132,1,0.4591,...,0,337880,165592,75286,84170,4544,0,164000,1592,172288
4,인천,연수구,2010,광역단체장,진보정당,보수정당,2,0.4887,1,0.4844,...,0,206407,110292,53027,53495,2952,0,109474,818,96115
5,인천,남동구,2010,광역단체장,진보정당,보수정당,2,0.5412,1,0.4318,...,0,355642,177496,76011,95283,4757,0,176051,1445,178146
6,인천,부평구,2010,광역단체장,진보정당,보수정당,2,0.562,1,0.4082,...,0,435655,218198,88370,121650,6448,0,216468,1730,217457
7,인천,계양구,2010,광역단체장,진보정당,보수정당,2,0.5825,1,0.3933,...,0,259089,131148,51211,75847,3160,0,130218,930,127941
8,인천,서구,2010,광역단체장,진보정당,보수정당,2,0.5371,1,0.4321,...,0,294322,143331,61389,76308,4369,0,142066,1265,150991
9,인천,강화군,2010,광역단체장,보수정당,진보정당,1,0.6507,2,0.3075,...,0,56586,37059,23160,10946,1489,0,35595,1464,19527


### v4.1 ~ v4.3

In [30]:
# 1. 전체 데이터 저장
df_incheon.to_csv("temp4_1_governor_incheon_5.csv", index=False, encoding="utf-8-sig")

# 2. '구시군' != '합계' 행만 추출하여 저장
df_incheon2 = df_incheon.query("구시군 != '합계'")
df_incheon2.to_csv("temp4_2_governor_incheon_5.csv", index=False, encoding="utf-8-sig")

# 3. '구시군' == '합계' 행만 추출 후 '구시군' 열 제거하여 저장
df_incheon3 = df_incheon.query("구시군 == '합계'").drop(columns="구시군")
df_incheon3.to_csv("temp4_3_governor_incheon_5.csv", index=False, encoding="utf-8-sig")

## Gwangju

In [31]:
df_gwangju = election_results['df_gwangju']

In [32]:
df_gwangju

Unnamed: 0,시도,구시군,선거인수,투표수,보수정당,진보정당,그외정당,무소속,득표수_계,무효투표수,...,득표_1위_후보번호,득표_1위_득표율,득표_2위_후보번호,득표_2위_득표율,득표_1위_정당,득표_2위_정당,보수정당_후보자수,진보정당_후보자수,그외정당_후보자수,무소속_후보자수
0,광주광역시,합계,1064913,529901,74490,297003,151990,0,523483,6418,...,2,0.5674,8,0.1449,진보정당,그외정당,1,1,4,0
1,광주광역시,동구,84206,44017,8096,23991,11305,0,43392,625,...,2,0.5529,1,0.1866,진보정당,보수정당,1,1,4,0
2,광주광역시,서구,222260,113765,17783,61188,33429,0,112400,1365,...,2,0.5444,1,0.1582,진보정당,보수정당,1,1,4,0
3,광주광역시,남구,164726,87054,12510,53512,19995,0,86017,1037,...,2,0.6221,1,0.1454,진보정당,보수정당,1,1,4,0
4,광주광역시,북구,351029,168022,22380,93471,50424,0,166275,1747,...,2,0.5621,8,0.1504,진보정당,그외정당,1,1,4,0
5,광주광역시,광산구,242692,117043,13721,64841,36837,0,115399,1644,...,2,0.5619,8,0.1447,진보정당,그외정당,1,1,4,0


### preprocessing

In [33]:
df_gwangju = (
    df_gwangju
    .assign(시도=lambda df: df['시도'].replace('광주광역시', '광주'))
)

In [34]:
df_gwangju.columns

Index(['시도', '구시군', '선거인수', '투표수', '보수정당', '진보정당', '그외정당', '무소속', '득표수_계',
       '무효투표수', '기권수', '득표_1위_후보번호', '득표_1위_득표율', '득표_2위_후보번호', '득표_2위_득표율',
       '득표_1위_정당', '득표_2위_정당', '보수정당_후보자수', '진보정당_후보자수', '그외정당_후보자수',
       '무소속_후보자수'],
      dtype='object')

In [35]:
# 고정 열 순서 정의
fixed_cols = [
    '시도', '구시군', '선거년도', '선거종류',
    '득표_1위_정당', '득표_2위_정당',
    '득표_1위_후보번호', '득표_1위_득표율',
    '득표_2위_후보번호', '득표_2위_득표율',
    '보수정당_후보자수', '진보정당_후보자수',
    '그외정당_후보자수', '무소속_후보자수'
]

# 나머지 열: fixed_cols에 포함되지 않은 열
other_cols = [col for col in df_gwangju.columns if col not in fixed_cols]

# 메소드 체이닝
df_gwangju = (
    df_gwangju
    .assign(선거종류='광역단체장', 선거년도='2010')
    .loc[:, fixed_cols + other_cols]
    .rename(columns={'시도': '지역'})
)

In [36]:
df_gwangju

Unnamed: 0,지역,구시군,선거년도,선거종류,득표_1위_정당,득표_2위_정당,득표_1위_후보번호,득표_1위_득표율,득표_2위_후보번호,득표_2위_득표율,...,무소속_후보자수,선거인수,투표수,보수정당,진보정당,그외정당,무소속,득표수_계,무효투표수,기권수
0,광주,합계,2010,광역단체장,진보정당,그외정당,2,0.5674,8,0.1449,...,0,1064913,529901,74490,297003,151990,0,523483,6418,535012
1,광주,동구,2010,광역단체장,진보정당,보수정당,2,0.5529,1,0.1866,...,0,84206,44017,8096,23991,11305,0,43392,625,40189
2,광주,서구,2010,광역단체장,진보정당,보수정당,2,0.5444,1,0.1582,...,0,222260,113765,17783,61188,33429,0,112400,1365,108495
3,광주,남구,2010,광역단체장,진보정당,보수정당,2,0.6221,1,0.1454,...,0,164726,87054,12510,53512,19995,0,86017,1037,77672
4,광주,북구,2010,광역단체장,진보정당,그외정당,2,0.5621,8,0.1504,...,0,351029,168022,22380,93471,50424,0,166275,1747,183007
5,광주,광산구,2010,광역단체장,진보정당,그외정당,2,0.5619,8,0.1447,...,0,242692,117043,13721,64841,36837,0,115399,1644,125649


### v4.1 ~ v4.3

In [37]:
# 1. 전체 데이터 저장
df_gwangju.to_csv("temp4_1_governor_gwangju_5.csv", index=False, encoding="utf-8-sig")

# 2. '구시군' != '합계' 행만 추출하여 저장
df_gwangju2 = df_gwangju.query("구시군 != '합계'")
df_gwangju2.to_csv("temp4_2_governor_gwangju_5.csv", index=False, encoding="utf-8-sig")

# 3. '구시군' == '합계' 행만 추출 후 '구시군' 열 제거하여 저장
df_gwangju3 = df_gwangju.query("구시군 == '합계'").drop(columns="구시군")
df_gwangju3.to_csv("temp4_3_governor_gwangju_5.csv", index=False, encoding="utf-8-sig")

## Daejeon

In [38]:
df_daejeon = election_results['df_daejeon']

In [39]:
df_daejeon

Unnamed: 0,시도,구시군,선거인수,투표수,보수정당,진보정당,그외정당,무소속,득표수_계,무효투표수,...,득표_1위_후보번호,득표_1위_득표율,득표_2위_후보번호,득표_2위_득표율,득표_1위_정당,득표_2위_정당,보수정당_후보자수,진보정당_후보자수,그외정당_후보자수,무소속_후보자수
0,대전광역시,합계,1127547,596683,168616,137751,285196,0,591563,5120,...,3,0.4668,1,0.285,그외정당,보수정당,1,1,2,0
1,대전광역시,동구,195072,97967,28132,21795,46950,0,96877,1090,...,3,0.4688,1,0.2904,그외정당,보수정당,1,1,2,0
2,대전광역시,중구,206384,109744,34128,21922,52688,0,108738,1006,...,3,0.4704,1,0.3139,그외정당,보수정당,1,1,2,0
3,대전광역시,서구,375073,197443,55962,44371,95726,0,196059,1384,...,3,0.4733,1,0.2854,그외정당,보수정당,1,1,2,0
4,대전광역시,유성구,193868,109646,28689,29079,51119,0,108887,759,...,3,0.4527,2,0.2671,그외정당,진보정당,1,1,2,0
5,대전광역시,대덕구,157150,81883,21705,20584,38713,0,81002,881,...,3,0.4626,1,0.268,그외정당,보수정당,1,1,2,0


### preprocessing

In [40]:
df_daejeon = (
    df_daejeon
    .assign(시도=lambda df: df['시도'].replace('대전광역시', '대전'))
)

In [41]:
df_daejeon.columns

Index(['시도', '구시군', '선거인수', '투표수', '보수정당', '진보정당', '그외정당', '무소속', '득표수_계',
       '무효투표수', '기권수', '득표_1위_후보번호', '득표_1위_득표율', '득표_2위_후보번호', '득표_2위_득표율',
       '득표_1위_정당', '득표_2위_정당', '보수정당_후보자수', '진보정당_후보자수', '그외정당_후보자수',
       '무소속_후보자수'],
      dtype='object')

In [42]:
# 고정 열 순서 정의
fixed_cols = [
    '시도', '구시군', '선거년도', '선거종류',
    '득표_1위_정당', '득표_2위_정당',
    '득표_1위_후보번호', '득표_1위_득표율',
    '득표_2위_후보번호', '득표_2위_득표율',
    '보수정당_후보자수', '진보정당_후보자수',
    '그외정당_후보자수', '무소속_후보자수'
]

# 나머지 열: fixed_cols에 포함되지 않은 열
other_cols = [col for col in df_daejeon.columns if col not in fixed_cols]

# 메소드 체이닝
df_daejeon = (
    df_daejeon
    .assign(선거종류='광역단체장', 선거년도='2010')
    .loc[:, fixed_cols + other_cols]
    .rename(columns={'시도': '지역'})
)

In [43]:
df_daejeon

Unnamed: 0,지역,구시군,선거년도,선거종류,득표_1위_정당,득표_2위_정당,득표_1위_후보번호,득표_1위_득표율,득표_2위_후보번호,득표_2위_득표율,...,무소속_후보자수,선거인수,투표수,보수정당,진보정당,그외정당,무소속,득표수_계,무효투표수,기권수
0,대전,합계,2010,광역단체장,그외정당,보수정당,3,0.4668,1,0.285,...,0,1127547,596683,168616,137751,285196,0,591563,5120,530864
1,대전,동구,2010,광역단체장,그외정당,보수정당,3,0.4688,1,0.2904,...,0,195072,97967,28132,21795,46950,0,96877,1090,97105
2,대전,중구,2010,광역단체장,그외정당,보수정당,3,0.4704,1,0.3139,...,0,206384,109744,34128,21922,52688,0,108738,1006,96640
3,대전,서구,2010,광역단체장,그외정당,보수정당,3,0.4733,1,0.2854,...,0,375073,197443,55962,44371,95726,0,196059,1384,177630
4,대전,유성구,2010,광역단체장,그외정당,진보정당,3,0.4527,2,0.2671,...,0,193868,109646,28689,29079,51119,0,108887,759,84222
5,대전,대덕구,2010,광역단체장,그외정당,보수정당,3,0.4626,1,0.268,...,0,157150,81883,21705,20584,38713,0,81002,881,75267


### v4.1 ~ v4.3

In [44]:
# 1. 전체 데이터 저장
df_daejeon.to_csv("temp4_1_governor_daejeon_5.csv", index=False, encoding="utf-8-sig")

# 2. '구시군' != '합계' 행만 추출하여 저장
df_daejeon2 = df_daejeon.query("구시군 != '합계'")
df_daejeon2.to_csv("temp4_2_governor_daejeon_5.csv", index=False, encoding="utf-8-sig")

# 3. '구시군' == '합계' 행만 추출 후 '구시군' 열 제거하여 저장
df_daejeon3 = df_daejeon.query("구시군 == '합계'").drop(columns="구시군")
df_daejeon3.to_csv("temp4_3_governor_daejeon_5.csv", index=False, encoding="utf-8-sig")

## Ulsan

In [45]:
df_ulsan = election_results['df_ulsan']

In [46]:
df_ulsan

Unnamed: 0,시도,구시군,선거인수,투표수,보수정당,진보정당,그외정당,무소속,득표수_계,무효투표수,...,득표_1위_후보번호,득표_1위_득표율,득표_2위_후보번호,득표_2위_득표율,득표_1위_정당,득표_2위_정당,보수정당_후보자수,진보정당_후보자수,그외정당_후보자수,무소속_후보자수
0,울산광역시,합계,838805,462103,279421,0,176693,0,456114,5989,...,1,0.6126,5,0.2926,보수정당,그외정당,1,0,2,0
1,울산광역시,중구,178158,98144,63911,0,32706,0,96617,1527,...,1,0.6615,5,0.253,보수정당,그외정당,1,0,2,0
2,울산광역시,남구,258699,137133,86398,0,49302,0,135700,1433,...,1,0.6367,5,0.2852,보수정당,그외정당,1,0,2,0
3,울산광역시,동구,132325,76495,41337,0,34329,0,75666,829,...,1,0.5463,5,0.3283,보수정당,그외정당,1,0,2,0
4,울산광역시,북구,122563,67957,35719,0,31541,0,67260,697,...,1,0.5311,5,0.3655,보수정당,그외정당,1,0,2,0
5,울산광역시,울주군,147060,82374,52056,0,28815,0,80871,1503,...,1,0.6437,5,0.2581,보수정당,그외정당,1,0,2,0


### preprocessing

In [47]:
df_ulsan = (
    df_ulsan
    .assign(시도=lambda df: df['시도'].replace('울산광역시', '울산'))
)

In [48]:
df_ulsan.columns

Index(['시도', '구시군', '선거인수', '투표수', '보수정당', '진보정당', '그외정당', '무소속', '득표수_계',
       '무효투표수', '기권수', '득표_1위_후보번호', '득표_1위_득표율', '득표_2위_후보번호', '득표_2위_득표율',
       '득표_1위_정당', '득표_2위_정당', '보수정당_후보자수', '진보정당_후보자수', '그외정당_후보자수',
       '무소속_후보자수'],
      dtype='object')

In [49]:
# 고정 열 순서 정의
fixed_cols = [
    '시도', '구시군', '선거년도', '선거종류',
    '득표_1위_정당', '득표_2위_정당',
    '득표_1위_후보번호', '득표_1위_득표율',
    '득표_2위_후보번호', '득표_2위_득표율',
    '보수정당_후보자수', '진보정당_후보자수',
    '그외정당_후보자수', '무소속_후보자수'
]

# 나머지 열: fixed_cols에 포함되지 않은 열
other_cols = [col for col in df_ulsan.columns if col not in fixed_cols]

# 메소드 체이닝
df_ulsan = (
    df_ulsan
    .assign(선거종류='광역단체장', 선거년도='2010')
    .loc[:, fixed_cols + other_cols]
    .rename(columns={'시도': '지역'})
)

In [50]:
df_ulsan

Unnamed: 0,지역,구시군,선거년도,선거종류,득표_1위_정당,득표_2위_정당,득표_1위_후보번호,득표_1위_득표율,득표_2위_후보번호,득표_2위_득표율,...,무소속_후보자수,선거인수,투표수,보수정당,진보정당,그외정당,무소속,득표수_계,무효투표수,기권수
0,울산,합계,2010,광역단체장,보수정당,그외정당,1,0.6126,5,0.2926,...,0,838805,462103,279421,0,176693,0,456114,5989,376702
1,울산,중구,2010,광역단체장,보수정당,그외정당,1,0.6615,5,0.253,...,0,178158,98144,63911,0,32706,0,96617,1527,80014
2,울산,남구,2010,광역단체장,보수정당,그외정당,1,0.6367,5,0.2852,...,0,258699,137133,86398,0,49302,0,135700,1433,121566
3,울산,동구,2010,광역단체장,보수정당,그외정당,1,0.5463,5,0.3283,...,0,132325,76495,41337,0,34329,0,75666,829,55830
4,울산,북구,2010,광역단체장,보수정당,그외정당,1,0.5311,5,0.3655,...,0,122563,67957,35719,0,31541,0,67260,697,54606
5,울산,울주군,2010,광역단체장,보수정당,그외정당,1,0.6437,5,0.2581,...,0,147060,82374,52056,0,28815,0,80871,1503,64686


### v4.1 ~ v4.3

In [51]:
# 1. 전체 데이터 저장
df_ulsan.to_csv("temp4_1_governor_ulsan_5.csv", index=False, encoding="utf-8-sig")

# 2. '구시군' != '합계' 행만 추출하여 저장
df_ulsan2 = df_ulsan.query("구시군 != '합계'")
df_ulsan2.to_csv("temp4_2_governor_ulsan_5.csv", index=False, encoding="utf-8-sig")

# 3. '구시군' == '합계' 행만 추출 후 '구시군' 열 제거하여 저장
df_ulsan3 = df_ulsan.query("구시군 == '합계'").drop(columns="구시군")
df_ulsan3.to_csv("temp4_3_governor_ulsan_5.csv", index=False, encoding="utf-8-sig")

## Gyeonggi

In [52]:
df_gyeonggi = election_results['df_gyeonggi']

In [53]:
df_gyeonggi

Unnamed: 0,시도,구시군,선거인수,투표수,보수정당,진보정당,그외정당,무소속,득표수_계,무효투표수,...,득표_1위_후보번호,득표_1위_득표율,득표_2위_후보번호,득표_2위_득표율,득표_1위_정당,득표_2위_정당,보수정당_후보자수,진보정당_후보자수,그외정당_후보자수,무소속_후보자수
0,경기도,합계,8761840,4534771,2271492,0,2079892,0,4351384,183387,...,1,0.522,8,0.478,보수정당,그외정당,1,0,1,0
1,경기도,수원시장안구,218041,118498,56126,0,58386,0,114512,3986,...,8,0.5099,1,0.4901,그외정당,보수정당,1,0,1,0
2,경기도,수원시권선구,229747,113478,53993,0,55330,0,109323,4155,...,8,0.5061,1,0.4939,그외정당,보수정당,1,0,1,0
3,경기도,수원시팔달구,174518,85692,43066,0,39357,0,82423,3269,...,1,0.5225,8,0.4775,보수정당,그외정당,1,0,1,0
4,경기도,수원시영통구,183747,101911,46917,0,52842,0,99759,2152,...,8,0.5297,1,0.4703,그외정당,보수정당,1,0,1,0
5,경기도,성남시수정구,194233,92330,38249,0,48655,0,86904,5426,...,8,0.5599,1,0.4401,그외정당,보수정당,1,0,1,0
6,경기도,성남시중원구,205070,97065,41382,0,50010,0,91392,5673,...,8,0.5472,1,0.4528,그외정당,보수정당,1,0,1,0
7,경기도,성남시분당구,360189,208482,117016,0,87384,0,204400,4082,...,1,0.5725,8,0.4275,보수정당,그외정당,1,0,1,0
8,경기도,의정부시,328299,161722,80574,0,73877,0,154451,7271,...,1,0.5217,8,0.4783,보수정당,그외정당,1,0,1,0
9,경기도,안양시만안구,208176,111311,52386,0,54447,0,106833,4478,...,8,0.5096,1,0.4904,그외정당,보수정당,1,0,1,0


### preprocessing

In [54]:
df_gyeonggi = (
    df_gyeonggi
    .assign(시도=lambda df: df['시도'].replace('경기도', '경기'))
)

In [55]:
df_gyeonggi.columns

Index(['시도', '구시군', '선거인수', '투표수', '보수정당', '진보정당', '그외정당', '무소속', '득표수_계',
       '무효투표수', '기권수', '득표_1위_후보번호', '득표_1위_득표율', '득표_2위_후보번호', '득표_2위_득표율',
       '득표_1위_정당', '득표_2위_정당', '보수정당_후보자수', '진보정당_후보자수', '그외정당_후보자수',
       '무소속_후보자수'],
      dtype='object')

In [56]:
# 고정 열 순서 정의
fixed_cols = [
    '시도', '구시군', '선거년도', '선거종류',
    '득표_1위_정당', '득표_2위_정당',
    '득표_1위_후보번호', '득표_1위_득표율',
    '득표_2위_후보번호', '득표_2위_득표율',
    '보수정당_후보자수', '진보정당_후보자수',
    '그외정당_후보자수', '무소속_후보자수'
]

# 나머지 열: fixed_cols에 포함되지 않은 열
other_cols = [col for col in df_gyeonggi.columns if col not in fixed_cols]

# 메소드 체이닝
df_gyeonggi = (
    df_gyeonggi
    .assign(선거종류='광역단체장', 선거년도='2010')
    .loc[:, fixed_cols + other_cols]
    .rename(columns={'시도': '지역'})
)

In [57]:
df_gyeonggi

Unnamed: 0,지역,구시군,선거년도,선거종류,득표_1위_정당,득표_2위_정당,득표_1위_후보번호,득표_1위_득표율,득표_2위_후보번호,득표_2위_득표율,...,무소속_후보자수,선거인수,투표수,보수정당,진보정당,그외정당,무소속,득표수_계,무효투표수,기권수
0,경기,합계,2010,광역단체장,보수정당,그외정당,1,0.522,8,0.478,...,0,8761840,4534771,2271492,0,2079892,0,4351384,183387,4227069
1,경기,수원시장안구,2010,광역단체장,그외정당,보수정당,8,0.5099,1,0.4901,...,0,218041,118498,56126,0,58386,0,114512,3986,99543
2,경기,수원시권선구,2010,광역단체장,그외정당,보수정당,8,0.5061,1,0.4939,...,0,229747,113478,53993,0,55330,0,109323,4155,116269
3,경기,수원시팔달구,2010,광역단체장,보수정당,그외정당,1,0.5225,8,0.4775,...,0,174518,85692,43066,0,39357,0,82423,3269,88826
4,경기,수원시영통구,2010,광역단체장,그외정당,보수정당,8,0.5297,1,0.4703,...,0,183747,101911,46917,0,52842,0,99759,2152,81836
5,경기,성남시수정구,2010,광역단체장,그외정당,보수정당,8,0.5599,1,0.4401,...,0,194233,92330,38249,0,48655,0,86904,5426,101903
6,경기,성남시중원구,2010,광역단체장,그외정당,보수정당,8,0.5472,1,0.4528,...,0,205070,97065,41382,0,50010,0,91392,5673,108005
7,경기,성남시분당구,2010,광역단체장,보수정당,그외정당,1,0.5725,8,0.4275,...,0,360189,208482,117016,0,87384,0,204400,4082,151707
8,경기,의정부시,2010,광역단체장,보수정당,그외정당,1,0.5217,8,0.4783,...,0,328299,161722,80574,0,73877,0,154451,7271,166577
9,경기,안양시만안구,2010,광역단체장,그외정당,보수정당,8,0.5096,1,0.4904,...,0,208176,111311,52386,0,54447,0,106833,4478,96865


### v4.1 ~ v4.3

In [58]:
# 1. 전체 데이터 저장
df_gyeonggi.to_csv("temp4_1_governor_gyeonggi_5.csv", index=False, encoding="utf-8-sig")

# 2. '구시군' != '합계' 행만 추출하여 저장
df_gyeonggi2 = df_gyeonggi.query("구시군 != '합계'")
df_gyeonggi2.to_csv("temp4_2_governor_gyeonggi_5.csv", index=False, encoding="utf-8-sig")

# 3. '구시군' == '합계' 행만 추출 후 '구시군' 열 제거하여 저장
df_gyeonggi3 = df_gyeonggi.query("구시군 == '합계'").drop(columns="구시군")
df_gyeonggi3.to_csv("temp4_3_governor_gyeonggi_5.csv", index=False, encoding="utf-8-sig")

## Gangwon

In [59]:
df_gangwon = election_results['df_gangwon']

In [60]:
df_gangwon

Unnamed: 0,시도,구시군,선거인수,투표수,보수정당,진보정당,그외정당,무소속,득표수_계,무효투표수,...,득표_1위_후보번호,득표_1위_득표율,득표_2위_후보번호,득표_2위_득표율,득표_1위_정당,득표_2위_정당,보수정당_후보자수,진보정당_후보자수,그외정당_후보자수,무소속_후보자수
0,강원도,합계,1190509,741724,326111,388443,0,0,714554,27170,...,2,0.5436,1,0.4564,진보정당,보수정당,1,1,0,0
1,강원도,춘천시,206172,124839,47183,73858,0,0,121041,3798,...,2,0.6102,1,0.3898,진보정당,보수정당,1,1,0,0
2,강원도,원주시,234095,133502,58889,70966,0,0,129855,3647,...,2,0.5465,1,0.4535,진보정당,보수정당,1,1,0,0
3,강원도,강릉시,170703,99246,45759,50120,0,0,95879,3367,...,2,0.5227,1,0.4773,진보정당,보수정당,1,1,0,0
4,강원도,동해시,73611,44933,21132,22150,0,0,43282,1651,...,2,0.5118,1,0.4882,진보정당,보수정당,1,1,0,0
5,강원도,삼척시,58602,40390,17971,20511,0,0,38482,1908,...,2,0.533,1,0.467,진보정당,보수정당,1,1,0,0
6,강원도,태백시,40415,26819,9958,15946,0,0,25904,915,...,2,0.6156,1,0.3844,진보정당,보수정당,1,1,0,0
7,강원도,정선군,33811,24529,8311,15190,0,0,23501,1028,...,2,0.6464,1,0.3536,진보정당,보수정당,1,1,0,0
8,강원도,속초시,64606,37582,19112,17209,0,0,36321,1261,...,1,0.5262,2,0.4738,보수정당,진보정당,1,1,0,0
9,강원도,고성군,25324,18943,10352,7584,0,0,17936,1007,...,1,0.5772,2,0.4228,보수정당,진보정당,1,1,0,0


### preprocessing

In [61]:
df_gangwon = (
    df_gangwon
    .assign(시도=lambda df: df['시도'].replace('강원도', '강원'))
)

In [62]:
df_gangwon.columns

Index(['시도', '구시군', '선거인수', '투표수', '보수정당', '진보정당', '그외정당', '무소속', '득표수_계',
       '무효투표수', '기권수', '득표_1위_후보번호', '득표_1위_득표율', '득표_2위_후보번호', '득표_2위_득표율',
       '득표_1위_정당', '득표_2위_정당', '보수정당_후보자수', '진보정당_후보자수', '그외정당_후보자수',
       '무소속_후보자수'],
      dtype='object')

In [63]:
# 고정 열 순서 정의
fixed_cols = [
    '시도', '구시군', '선거년도', '선거종류',
    '득표_1위_정당', '득표_2위_정당',
    '득표_1위_후보번호', '득표_1위_득표율',
    '득표_2위_후보번호', '득표_2위_득표율',
    '보수정당_후보자수', '진보정당_후보자수',
    '그외정당_후보자수', '무소속_후보자수'
]

# 나머지 열: fixed_cols에 포함되지 않은 열
other_cols = [col for col in df_gangwon.columns if col not in fixed_cols]

# 메소드 체이닝
df_gangwon = (
    df_gangwon
    .assign(선거종류='광역단체장', 선거년도='2010')
    .loc[:, fixed_cols + other_cols]
    .rename(columns={'시도': '지역'})
)

In [64]:
df_gangwon

Unnamed: 0,지역,구시군,선거년도,선거종류,득표_1위_정당,득표_2위_정당,득표_1위_후보번호,득표_1위_득표율,득표_2위_후보번호,득표_2위_득표율,...,무소속_후보자수,선거인수,투표수,보수정당,진보정당,그외정당,무소속,득표수_계,무효투표수,기권수
0,강원,합계,2010,광역단체장,진보정당,보수정당,2,0.5436,1,0.4564,...,0,1190509,741724,326111,388443,0,0,714554,27170,448785
1,강원,춘천시,2010,광역단체장,진보정당,보수정당,2,0.6102,1,0.3898,...,0,206172,124839,47183,73858,0,0,121041,3798,81333
2,강원,원주시,2010,광역단체장,진보정당,보수정당,2,0.5465,1,0.4535,...,0,234095,133502,58889,70966,0,0,129855,3647,100593
3,강원,강릉시,2010,광역단체장,진보정당,보수정당,2,0.5227,1,0.4773,...,0,170703,99246,45759,50120,0,0,95879,3367,71457
4,강원,동해시,2010,광역단체장,진보정당,보수정당,2,0.5118,1,0.4882,...,0,73611,44933,21132,22150,0,0,43282,1651,28678
5,강원,삼척시,2010,광역단체장,진보정당,보수정당,2,0.533,1,0.467,...,0,58602,40390,17971,20511,0,0,38482,1908,18212
6,강원,태백시,2010,광역단체장,진보정당,보수정당,2,0.6156,1,0.3844,...,0,40415,26819,9958,15946,0,0,25904,915,13596
7,강원,정선군,2010,광역단체장,진보정당,보수정당,2,0.6464,1,0.3536,...,0,33811,24529,8311,15190,0,0,23501,1028,9282
8,강원,속초시,2010,광역단체장,보수정당,진보정당,1,0.5262,2,0.4738,...,0,64606,37582,19112,17209,0,0,36321,1261,27024
9,강원,고성군,2010,광역단체장,보수정당,진보정당,1,0.5772,2,0.4228,...,0,25324,18943,10352,7584,0,0,17936,1007,6381


### v4.1 ~ v4.3

In [65]:
# 1. 전체 데이터 저장
df_gangwon.to_csv("temp4_1_governor_gangwon_5.csv", index=False, encoding="utf-8-sig")

# 2. '구시군' != '합계' 행만 추출하여 저장
df_gangwon2 = df_gangwon.query("구시군 != '합계'")
df_gangwon2.to_csv("temp4_2_governor_gangwon_5.csv", index=False, encoding="utf-8-sig")

# 3. '구시군' == '합계' 행만 추출 후 '구시군' 열 제거하여 저장
df_gangwon3 = df_gangwon.query("구시군 == '합계'").drop(columns="구시군")
df_gangwon3.to_csv("temp4_3_governor_gangwon_5.csv", index=False, encoding="utf-8-sig")

## Chungbuk

In [66]:
df_chungbuk = election_results['df_chungbuk']

In [67]:
df_chungbuk

Unnamed: 0,시도,구시군,선거인수,투표수,보수정당,진보정당,그외정당,무소속,득표수_계,무효투표수,...,득표_1위_후보번호,득표_1위_득표율,득표_2위_후보번호,득표_2위_득표율,득표_1위_정당,득표_2위_정당,보수정당_후보자수,진보정당_후보자수,그외정당_후보자수,무소속_후보자수
0,충청북도,합계,1183811,696393,313646,349913,19551,0,683110,13283,...,2,0.5122,1,0.4591,진보정당,보수정당,1,1,1,0
1,충청북도,청주시상당구,182613,100216,46172,50754,2289,0,99215,1001,...,2,0.5116,1,0.4654,진보정당,보수정당,1,1,1,0
2,충청북도,청주시흥덕구,296977,159875,69434,85301,3916,0,158651,1224,...,2,0.5377,1,0.4377,진보정당,보수정당,1,1,1,0
3,충청북도,충주시,161522,94688,33714,57677,1753,0,93144,1544,...,2,0.6192,1,0.362,진보정당,보수정당,1,1,1,0
4,충청북도,제천시,107034,63341,31942,28593,1661,0,62196,1145,...,1,0.5136,2,0.4597,보수정당,진보정당,1,1,1,0
5,충청북도,단양군,26512,18918,9662,8005,614,0,18281,637,...,1,0.5285,2,0.4379,보수정당,진보정당,1,1,1,0
6,충청북도,청원군,117270,69613,28394,37551,2145,0,68090,1523,...,2,0.5515,1,0.417,진보정당,보수정당,1,1,1,0
7,충청북도,영동군,41711,28879,14834,11227,1459,0,27520,1359,...,1,0.539,2,0.408,보수정당,진보정당,1,1,1,0
8,충청북도,보은군,29345,21759,10039,9811,1026,0,20876,883,...,1,0.4809,2,0.47,보수정당,진보정당,1,1,1,0
9,충청북도,옥천군,44298,30568,14162,13583,1628,0,29373,1195,...,1,0.4821,2,0.4624,보수정당,진보정당,1,1,1,0


### preprocessing

In [68]:
df_chungbuk = (
    df_chungbuk
    .assign(시도=lambda df: df['시도'].replace('충청북도', '충북'))
)

In [69]:
df_chungbuk.columns

Index(['시도', '구시군', '선거인수', '투표수', '보수정당', '진보정당', '그외정당', '무소속', '득표수_계',
       '무효투표수', '기권수', '득표_1위_후보번호', '득표_1위_득표율', '득표_2위_후보번호', '득표_2위_득표율',
       '득표_1위_정당', '득표_2위_정당', '보수정당_후보자수', '진보정당_후보자수', '그외정당_후보자수',
       '무소속_후보자수'],
      dtype='object')

In [70]:
# 고정 열 순서 정의
fixed_cols = [
    '시도', '구시군', '선거년도', '선거종류',
    '득표_1위_정당', '득표_2위_정당',
    '득표_1위_후보번호', '득표_1위_득표율',
    '득표_2위_후보번호', '득표_2위_득표율',
    '보수정당_후보자수', '진보정당_후보자수',
    '그외정당_후보자수', '무소속_후보자수'
]

# 나머지 열: fixed_cols에 포함되지 않은 열
other_cols = [col for col in df_chungbuk.columns if col not in fixed_cols]

# 메소드 체이닝
df_chungbuk = (
    df_chungbuk
    .assign(선거종류='광역단체장', 선거년도='2010')
    .loc[:, fixed_cols + other_cols]
    .rename(columns={'시도': '지역'})
)

In [71]:
df_chungbuk

Unnamed: 0,지역,구시군,선거년도,선거종류,득표_1위_정당,득표_2위_정당,득표_1위_후보번호,득표_1위_득표율,득표_2위_후보번호,득표_2위_득표율,...,무소속_후보자수,선거인수,투표수,보수정당,진보정당,그외정당,무소속,득표수_계,무효투표수,기권수
0,충북,합계,2010,광역단체장,진보정당,보수정당,2,0.5122,1,0.4591,...,0,1183811,696393,313646,349913,19551,0,683110,13283,487418
1,충북,청주시상당구,2010,광역단체장,진보정당,보수정당,2,0.5116,1,0.4654,...,0,182613,100216,46172,50754,2289,0,99215,1001,82397
2,충북,청주시흥덕구,2010,광역단체장,진보정당,보수정당,2,0.5377,1,0.4377,...,0,296977,159875,69434,85301,3916,0,158651,1224,137102
3,충북,충주시,2010,광역단체장,진보정당,보수정당,2,0.6192,1,0.362,...,0,161522,94688,33714,57677,1753,0,93144,1544,66834
4,충북,제천시,2010,광역단체장,보수정당,진보정당,1,0.5136,2,0.4597,...,0,107034,63341,31942,28593,1661,0,62196,1145,43693
5,충북,단양군,2010,광역단체장,보수정당,진보정당,1,0.5285,2,0.4379,...,0,26512,18918,9662,8005,614,0,18281,637,7594
6,충북,청원군,2010,광역단체장,진보정당,보수정당,2,0.5515,1,0.417,...,0,117270,69613,28394,37551,2145,0,68090,1523,47657
7,충북,영동군,2010,광역단체장,보수정당,진보정당,1,0.539,2,0.408,...,0,41711,28879,14834,11227,1459,0,27520,1359,12832
8,충북,보은군,2010,광역단체장,보수정당,진보정당,1,0.4809,2,0.47,...,0,29345,21759,10039,9811,1026,0,20876,883,7586
9,충북,옥천군,2010,광역단체장,보수정당,진보정당,1,0.4821,2,0.4624,...,0,44298,30568,14162,13583,1628,0,29373,1195,13730


### v4.1 ~ v4.3

In [72]:
# 1. 전체 데이터 저장
df_chungbuk.to_csv("temp4_1_governor_chungbuk_5.csv", index=False, encoding="utf-8-sig")

# 2. '구시군' != '합계' 행만 추출하여 저장
df_chungbuk2 = df_chungbuk.query("구시군 != '합계'")
df_chungbuk2.to_csv("temp4_2_governor_chungbuk_5.csv", index=False, encoding="utf-8-sig")

# 3. '구시군' == '합계' 행만 추출 후 '구시군' 열 제거하여 저장
df_chungbuk3 = df_chungbuk.query("구시군 == '합계'").drop(columns="구시군")
df_chungbuk3.to_csv("temp4_3_governor_chungbuk_5.csv", index=False, encoding="utf-8-sig")

## Chungnam

In [73]:
df_chungnam = election_results['df_chungnam']

In [74]:
df_chungnam

Unnamed: 0,시도,구시군,선거인수,투표수,보수정당,진보정당,그외정당,무소속,득표수_계,무효투표수,...,득표_1위_후보번호,득표_1위_득표율,득표_2위_후보번호,득표_2위_득표율,득표_1위_정당,득표_2위_정당,보수정당_후보자수,진보정당_후보자수,그외정당_후보자수,무소속_후보자수
0,충청남도,합계,1595587,901863,154723,367288,347265,0,869271,32587,...,2,0.4225,3,0.3995,진보정당,그외정당,1,1,1,0
1,충청남도,천안시서북구,221580,104685,16178,44605,42683,0,103466,1219,...,2,0.4311,3,0.4125,진보정당,그외정당,1,1,1,0
2,충청남도,천안시동남구,183142,89013,14851,36201,36603,0,87655,1358,...,3,0.4176,2,0.413,그외정당,진보정당,1,1,1,0
3,충청남도,공주시,99709,61209,6888,27856,24127,0,58871,2338,...,2,0.4732,3,0.4098,진보정당,그외정당,1,1,1,0
4,충청남도,보령시,85687,53693,8829,18112,24168,0,51109,2584,...,3,0.4729,2,0.3544,그외정당,진보정당,1,1,1,0
5,충청남도,아산시,195443,99872,18577,44329,34579,0,97485,2387,...,2,0.4547,3,0.3547,진보정당,그외정당,1,1,1,0
6,충청남도,서산시,121199,66626,12333,24599,27427,0,64359,2267,...,3,0.4262,2,0.3822,그외정당,진보정당,1,1,1,0
7,충청남도,태안군,52517,36024,6315,12754,14216,0,33285,2739,...,3,0.4271,2,0.3832,그외정당,진보정당,1,1,1,0
8,충청남도,금산군,46562,28884,6328,11927,9118,0,27373,1511,...,2,0.4357,3,0.3331,진보정당,그외정당,1,1,1,0
9,충청남도,연기군,64534,37808,4265,18037,14244,0,36546,1262,...,2,0.4935,3,0.3898,진보정당,그외정당,1,1,1,0


### preprocessing

In [75]:
df_chungnam = (
    df_chungnam
    .assign(시도=lambda df: df['시도'].replace('충청남도', '충남'))
)

In [76]:
df_chungnam.columns

Index(['시도', '구시군', '선거인수', '투표수', '보수정당', '진보정당', '그외정당', '무소속', '득표수_계',
       '무효투표수', '기권수', '득표_1위_후보번호', '득표_1위_득표율', '득표_2위_후보번호', '득표_2위_득표율',
       '득표_1위_정당', '득표_2위_정당', '보수정당_후보자수', '진보정당_후보자수', '그외정당_후보자수',
       '무소속_후보자수'],
      dtype='object')

In [77]:
# 고정 열 순서 정의
fixed_cols = [
    '시도', '구시군', '선거년도', '선거종류',
    '득표_1위_정당', '득표_2위_정당',
    '득표_1위_후보번호', '득표_1위_득표율',
    '득표_2위_후보번호', '득표_2위_득표율',
    '보수정당_후보자수', '진보정당_후보자수',
    '그외정당_후보자수', '무소속_후보자수'
]

# 나머지 열: fixed_cols에 포함되지 않은 열
other_cols = [col for col in df_chungnam.columns if col not in fixed_cols]

# 메소드 체이닝
df_chungnam = (
    df_chungnam
    .assign(선거종류='광역단체장', 선거년도='2010')
    .loc[:, fixed_cols + other_cols]
    .rename(columns={'시도': '지역'})
)

In [78]:
df_chungnam

Unnamed: 0,지역,구시군,선거년도,선거종류,득표_1위_정당,득표_2위_정당,득표_1위_후보번호,득표_1위_득표율,득표_2위_후보번호,득표_2위_득표율,...,무소속_후보자수,선거인수,투표수,보수정당,진보정당,그외정당,무소속,득표수_계,무효투표수,기권수
0,충남,합계,2010,광역단체장,진보정당,그외정당,2,0.4225,3,0.3995,...,0,1595587,901863,154723,367288,347265,0,869271,32587,693724
1,충남,천안시서북구,2010,광역단체장,진보정당,그외정당,2,0.4311,3,0.4125,...,0,221580,104685,16178,44605,42683,0,103466,1219,116895
2,충남,천안시동남구,2010,광역단체장,그외정당,진보정당,3,0.4176,2,0.413,...,0,183142,89013,14851,36201,36603,0,87655,1358,94129
3,충남,공주시,2010,광역단체장,진보정당,그외정당,2,0.4732,3,0.4098,...,0,99709,61209,6888,27856,24127,0,58871,2338,38500
4,충남,보령시,2010,광역단체장,그외정당,진보정당,3,0.4729,2,0.3544,...,0,85687,53693,8829,18112,24168,0,51109,2584,31994
5,충남,아산시,2010,광역단체장,진보정당,그외정당,2,0.4547,3,0.3547,...,0,195443,99872,18577,44329,34579,0,97485,2387,95571
6,충남,서산시,2010,광역단체장,그외정당,진보정당,3,0.4262,2,0.3822,...,0,121199,66626,12333,24599,27427,0,64359,2267,54573
7,충남,태안군,2010,광역단체장,그외정당,진보정당,3,0.4271,2,0.3832,...,0,52517,36024,6315,12754,14216,0,33285,2739,16493
8,충남,금산군,2010,광역단체장,진보정당,그외정당,2,0.4357,3,0.3331,...,0,46562,28884,6328,11927,9118,0,27373,1511,17678
9,충남,연기군,2010,광역단체장,진보정당,그외정당,2,0.4935,3,0.3898,...,0,64534,37808,4265,18037,14244,0,36546,1262,26726


### v4.1 ~ v4.3

In [79]:
# 1. 전체 데이터 저장
df_chungnam.to_csv("temp4_1_governor_chungnam_5.csv", index=False, encoding="utf-8-sig")

# 2. '구시군' != '합계' 행만 추출하여 저장
df_chungnam2 = df_chungnam.query("구시군 != '합계'")
df_chungnam2.to_csv("temp4_2_governor_chungnam_5.csv", index=False, encoding="utf-8-sig")

# 3. '구시군' == '합계' 행만 추출 후 '구시군' 열 제거하여 저장
df_chungnam3 = df_chungnam.query("구시군 == '합계'").drop(columns="구시군")
df_chungnam3.to_csv("temp4_3_governor_chungnam_5.csv", index=False, encoding="utf-8-sig")

## Jeonbuk

In [80]:
df_jeonbuk = election_results['df_jeonbuk']

In [81]:
df_jeonbuk

Unnamed: 0,시도,구시군,선거인수,투표수,보수정당,진보정당,그외정당,무소속,득표수_계,무효투표수,...,득표_1위_후보번호,득표_1위_득표율,득표_2위_후보번호,득표_2위_득표율,득표_1위_정당,득표_2위_정당,보수정당_후보자수,진보정당_후보자수,그외정당_후보자수,무소속_후보자수
0,전라북도,합계,1442805,856111,151064,569980,108886,0,829930,26181,...,2,0.6868,1,0.182,진보정당,보수정당,1,1,3,0
1,전라북도,전주시완산구,262926,140319,32656,86435,19326,0,138417,1902,...,2,0.6245,1,0.2359,진보정당,보수정당,1,1,3,0
2,전라북도,전주시덕진구,210631,107769,22696,66511,17152,0,106359,1410,...,2,0.6253,1,0.2134,진보정당,보수정당,1,1,3,0
3,전라북도,군산시,207328,113614,17270,81040,12250,0,110560,3054,...,2,0.733,1,0.1562,진보정당,보수정당,1,1,3,0
4,전라북도,익산시,234104,126680,23394,84786,14869,0,123049,3631,...,2,0.689,1,0.1901,진보정당,보수정당,1,1,3,0
5,전라북도,정읍시,96698,64655,8977,46746,6299,0,62022,2633,...,2,0.7537,1,0.1447,진보정당,보수정당,1,1,3,0
6,전라북도,남원시,69420,47635,5343,35825,4767,0,45935,1700,...,2,0.7799,1,0.1163,진보정당,보수정당,1,1,3,0
7,전라북도,김제시,78217,51879,8034,34695,6729,0,49458,2421,...,2,0.7015,1,0.1624,진보정당,보수정당,1,1,3,0
8,전라북도,완주군,67270,41784,6653,26880,6856,0,40389,1395,...,2,0.6655,1,0.1647,진보정당,보수정당,1,1,3,0
9,전라북도,진안군,23443,17962,2874,11212,3067,0,17153,809,...,2,0.6536,1,0.1676,진보정당,보수정당,1,1,3,0


### preprocessing

In [82]:
df_jeonbuk = (
    df_jeonbuk
    .assign(시도=lambda df: df['시도'].replace('전라북도', '전북'))
)

In [83]:
df_jeonbuk.columns

Index(['시도', '구시군', '선거인수', '투표수', '보수정당', '진보정당', '그외정당', '무소속', '득표수_계',
       '무효투표수', '기권수', '득표_1위_후보번호', '득표_1위_득표율', '득표_2위_후보번호', '득표_2위_득표율',
       '득표_1위_정당', '득표_2위_정당', '보수정당_후보자수', '진보정당_후보자수', '그외정당_후보자수',
       '무소속_후보자수'],
      dtype='object')

In [84]:
# 고정 열 순서 정의
fixed_cols = [
    '시도', '구시군', '선거년도', '선거종류',
    '득표_1위_정당', '득표_2위_정당',
    '득표_1위_후보번호', '득표_1위_득표율',
    '득표_2위_후보번호', '득표_2위_득표율',
    '보수정당_후보자수', '진보정당_후보자수',
    '그외정당_후보자수', '무소속_후보자수'
]

# 나머지 열: fixed_cols에 포함되지 않은 열
other_cols = [col for col in df_jeonbuk.columns if col not in fixed_cols]

# 메소드 체이닝
df_jeonbuk = (
    df_jeonbuk
    .assign(선거종류='광역단체장', 선거년도='2010')
    .loc[:, fixed_cols + other_cols]
    .rename(columns={'시도': '지역'})
)

In [85]:
df_jeonbuk

Unnamed: 0,지역,구시군,선거년도,선거종류,득표_1위_정당,득표_2위_정당,득표_1위_후보번호,득표_1위_득표율,득표_2위_후보번호,득표_2위_득표율,...,무소속_후보자수,선거인수,투표수,보수정당,진보정당,그외정당,무소속,득표수_계,무효투표수,기권수
0,전북,합계,2010,광역단체장,진보정당,보수정당,2,0.6868,1,0.182,...,0,1442805,856111,151064,569980,108886,0,829930,26181,586694
1,전북,전주시완산구,2010,광역단체장,진보정당,보수정당,2,0.6245,1,0.2359,...,0,262926,140319,32656,86435,19326,0,138417,1902,122607
2,전북,전주시덕진구,2010,광역단체장,진보정당,보수정당,2,0.6253,1,0.2134,...,0,210631,107769,22696,66511,17152,0,106359,1410,102862
3,전북,군산시,2010,광역단체장,진보정당,보수정당,2,0.733,1,0.1562,...,0,207328,113614,17270,81040,12250,0,110560,3054,93714
4,전북,익산시,2010,광역단체장,진보정당,보수정당,2,0.689,1,0.1901,...,0,234104,126680,23394,84786,14869,0,123049,3631,107424
5,전북,정읍시,2010,광역단체장,진보정당,보수정당,2,0.7537,1,0.1447,...,0,96698,64655,8977,46746,6299,0,62022,2633,32043
6,전북,남원시,2010,광역단체장,진보정당,보수정당,2,0.7799,1,0.1163,...,0,69420,47635,5343,35825,4767,0,45935,1700,21785
7,전북,김제시,2010,광역단체장,진보정당,보수정당,2,0.7015,1,0.1624,...,0,78217,51879,8034,34695,6729,0,49458,2421,26338
8,전북,완주군,2010,광역단체장,진보정당,보수정당,2,0.6655,1,0.1647,...,0,67270,41784,6653,26880,6856,0,40389,1395,25486
9,전북,진안군,2010,광역단체장,진보정당,보수정당,2,0.6536,1,0.1676,...,0,23443,17962,2874,11212,3067,0,17153,809,5481


### v4.1 ~ v4.3

In [86]:
# 1. 전체 데이터 저장
df_jeonbuk.to_csv("temp4_1_governor_jeonbuk_5.csv", index=False, encoding="utf-8-sig")

# 2. '구시군' != '합계' 행만 추출하여 저장
df_jeonbuk2 = df_jeonbuk.query("구시군 != '합계'")
df_jeonbuk2.to_csv("temp4_2_governor_jeonbuk_5.csv", index=False, encoding="utf-8-sig")

# 3. '구시군' == '합계' 행만 추출 후 '구시군' 열 제거하여 저장
df_jeonbuk3 = df_jeonbuk.query("구시군 == '합계'").drop(columns="구시군")
df_jeonbuk3.to_csv("temp4_3_governor_jeonbuk_5.csv", index=False, encoding="utf-8-sig")

## Jeonnam

In [87]:
df_jeonnam = election_results['df_jeonnam']

In [88]:
df_jeonnam

Unnamed: 0,시도,구시군,선거인수,투표수,보수정당,진보정당,그외정당,무소속,득표수_계,무효투표수,...,득표_1위_후보번호,득표_1위_득표율,득표_2위_후보번호,득표_2위_득표율,득표_1위_정당,득표_2위_정당,보수정당_후보자수,진보정당_후보자수,그외정당_후보자수,무소속_후보자수
0,전라남도,합계,1504902,967338,123548,629984,168801,0,922333,45005,...,2,0.683,1,0.134,진보정당,보수정당,1,1,2,0
1,전라남도,목포시,181872,91633,11266,60315,17624,0,89205,2428,...,2,0.6761,5,0.1322,진보정당,그외정당,1,1,2,0
2,전라남도,여수시,225147,130630,14500,89520,22611,0,126631,3999,...,2,0.7069,1,0.1145,진보정당,보수정당,1,1,2,0
3,전라남도,순천시,200321,122961,15447,79796,23444,0,118687,4274,...,2,0.6723,1,0.1301,진보정당,보수정당,1,1,2,0
4,전라남도,나주시,75246,52381,5423,34572,9589,0,49584,2797,...,2,0.6972,5,0.1327,진보정당,그외정당,1,1,2,0
5,전라남도,광양시,104440,65132,9687,40665,12906,0,63258,1874,...,2,0.6428,1,0.1531,진보정당,보수정당,1,1,2,0
6,전라남도,담양군,39981,26959,3363,17679,4525,0,25567,1392,...,2,0.6915,1,0.1315,진보정당,보수정당,1,1,2,0
7,전라남도,장성군,38299,27488,3596,18042,4440,0,26078,1410,...,2,0.6918,1,0.1379,진보정당,보수정당,1,1,2,0
8,전라남도,곡성군,26917,20586,2123,12527,4627,0,19277,1309,...,2,0.6498,5,0.1652,진보정당,그외정당,1,1,2,0
9,전라남도,구례군,22984,18290,2213,11513,3283,0,17009,1281,...,2,0.6769,1,0.1301,진보정당,보수정당,1,1,2,0


### preprocessing

In [89]:
df_jeonnam = (
    df_jeonnam
    .assign(시도=lambda df: df['시도'].replace('전라남도', '전남'))
)

In [90]:
df_jeonnam.columns

Index(['시도', '구시군', '선거인수', '투표수', '보수정당', '진보정당', '그외정당', '무소속', '득표수_계',
       '무효투표수', '기권수', '득표_1위_후보번호', '득표_1위_득표율', '득표_2위_후보번호', '득표_2위_득표율',
       '득표_1위_정당', '득표_2위_정당', '보수정당_후보자수', '진보정당_후보자수', '그외정당_후보자수',
       '무소속_후보자수'],
      dtype='object')

In [91]:
# 고정 열 순서 정의
fixed_cols = [
    '시도', '구시군', '선거년도', '선거종류',
    '득표_1위_정당', '득표_2위_정당',
    '득표_1위_후보번호', '득표_1위_득표율',
    '득표_2위_후보번호', '득표_2위_득표율',
    '보수정당_후보자수', '진보정당_후보자수',
    '그외정당_후보자수', '무소속_후보자수'
]

# 나머지 열: fixed_cols에 포함되지 않은 열
other_cols = [col for col in df_jeonnam.columns if col not in fixed_cols]

# 메소드 체이닝
df_jeonnam = (
    df_jeonnam
    .assign(선거종류='광역단체장', 선거년도='2010')
    .loc[:, fixed_cols + other_cols]
    .rename(columns={'시도': '지역'})
)

In [92]:
df_jeonnam

Unnamed: 0,지역,구시군,선거년도,선거종류,득표_1위_정당,득표_2위_정당,득표_1위_후보번호,득표_1위_득표율,득표_2위_후보번호,득표_2위_득표율,...,무소속_후보자수,선거인수,투표수,보수정당,진보정당,그외정당,무소속,득표수_계,무효투표수,기권수
0,전남,합계,2010,광역단체장,진보정당,보수정당,2,0.683,1,0.134,...,0,1504902,967338,123548,629984,168801,0,922333,45005,537564
1,전남,목포시,2010,광역단체장,진보정당,그외정당,2,0.6761,5,0.1322,...,0,181872,91633,11266,60315,17624,0,89205,2428,90239
2,전남,여수시,2010,광역단체장,진보정당,보수정당,2,0.7069,1,0.1145,...,0,225147,130630,14500,89520,22611,0,126631,3999,94517
3,전남,순천시,2010,광역단체장,진보정당,보수정당,2,0.6723,1,0.1301,...,0,200321,122961,15447,79796,23444,0,118687,4274,77360
4,전남,나주시,2010,광역단체장,진보정당,그외정당,2,0.6972,5,0.1327,...,0,75246,52381,5423,34572,9589,0,49584,2797,22865
5,전남,광양시,2010,광역단체장,진보정당,보수정당,2,0.6428,1,0.1531,...,0,104440,65132,9687,40665,12906,0,63258,1874,39308
6,전남,담양군,2010,광역단체장,진보정당,보수정당,2,0.6915,1,0.1315,...,0,39981,26959,3363,17679,4525,0,25567,1392,13022
7,전남,장성군,2010,광역단체장,진보정당,보수정당,2,0.6918,1,0.1379,...,0,38299,27488,3596,18042,4440,0,26078,1410,10811
8,전남,곡성군,2010,광역단체장,진보정당,그외정당,2,0.6498,5,0.1652,...,0,26917,20586,2123,12527,4627,0,19277,1309,6331
9,전남,구례군,2010,광역단체장,진보정당,보수정당,2,0.6769,1,0.1301,...,0,22984,18290,2213,11513,3283,0,17009,1281,4694


### v4.1 ~ v4.3

In [93]:
# 1. 전체 데이터 저장
df_jeonnam.to_csv("temp4_1_governor_jeonnam_5.csv", index=False, encoding="utf-8-sig")

# 2. '구시군' != '합계' 행만 추출하여 저장
df_jeonnam2 = df_jeonnam.query("구시군 != '합계'")
df_jeonnam2.to_csv("temp4_2_governor_jeonnam_5.csv", index=False, encoding="utf-8-sig")

# 3. '구시군' == '합계' 행만 추출 후 '구시군' 열 제거하여 저장
df_jeonnam3 = df_jeonnam.query("구시군 == '합계'").drop(columns="구시군")
df_jeonnam3.to_csv("temp4_3_governor_jeonnam_5.csv", index=False, encoding="utf-8-sig")

## Gyeongbuk

In [94]:
df_gyeongbuk = election_results['df_gyeongbuk']

In [95]:
df_gyeongbuk

Unnamed: 0,시도,구시군,선거인수,투표수,보수정당,진보정당,그외정당,무소속,득표수_계,무효투표수,...,득표_1위_후보번호,득표_1위_득표율,득표_2위_후보번호,득표_2위_득표율,득표_1위_정당,득표_2위_정당,보수정당_후보자수,진보정당_후보자수,그외정당_후보자수,무소속_후보자수
0,경상북도,합계,2122905,1260591,913812,143347,155361,0,1212520,48071,...,1,0.7536,2,0.1182,보수정당,진보정당,1,1,2,0
1,경상북도,포항시북구,199281,107009,81746,10702,12386,0,104834,2175,...,1,0.7798,2,0.1021,보수정당,진보정당,1,1,2,0
2,경상북도,포항시남구,197630,105544,77479,12764,12925,0,103168,2376,...,1,0.751,2,0.1237,보수정당,진보정당,1,1,2,0
3,경상북도,울릉군,9072,7404,5825,648,614,0,7087,317,...,1,0.8219,2,0.0914,보수정당,진보정당,1,1,2,0
4,경상북도,경주시,211517,124949,91802,12039,16768,0,120609,4340,...,1,0.7612,2,0.0998,보수정당,진보정당,1,1,2,0
5,경상북도,김천시,109593,67692,51237,6967,6933,0,65137,2555,...,1,0.7866,2,0.107,보수정당,진보정당,1,1,2,0
6,경상북도,안동시,134118,84290,63228,8297,9433,0,80958,3332,...,1,0.781,2,0.1025,보수정당,진보정당,1,1,2,0
7,경상북도,구미시,291865,138048,90050,21610,23037,0,134697,3351,...,1,0.6685,2,0.1604,보수정당,진보정당,1,1,2,0
8,경상북도,영주시,90908,60339,43016,7707,6775,0,57498,2841,...,1,0.7481,2,0.134,보수정당,진보정당,1,1,2,0
9,경상북도,영천시,85134,52224,37647,5719,6384,0,49750,2474,...,1,0.7567,2,0.115,보수정당,진보정당,1,1,2,0


### preprocessing

In [96]:
df_gyeongbuk = (
    df_gyeongbuk
    .assign(시도=lambda df: df['시도'].replace('경상북도', '경북'))
)

In [97]:
df_gyeongbuk.columns

Index(['시도', '구시군', '선거인수', '투표수', '보수정당', '진보정당', '그외정당', '무소속', '득표수_계',
       '무효투표수', '기권수', '득표_1위_후보번호', '득표_1위_득표율', '득표_2위_후보번호', '득표_2위_득표율',
       '득표_1위_정당', '득표_2위_정당', '보수정당_후보자수', '진보정당_후보자수', '그외정당_후보자수',
       '무소속_후보자수'],
      dtype='object')

In [98]:
# 고정 열 순서 정의
fixed_cols = [
    '시도', '구시군', '선거년도', '선거종류',
    '득표_1위_정당', '득표_2위_정당',
    '득표_1위_후보번호', '득표_1위_득표율',
    '득표_2위_후보번호', '득표_2위_득표율',
    '보수정당_후보자수', '진보정당_후보자수',
    '그외정당_후보자수', '무소속_후보자수'
]

# 나머지 열: fixed_cols에 포함되지 않은 열
other_cols = [col for col in df_gyeongbuk.columns if col not in fixed_cols]

# 메소드 체이닝
df_gyeongbuk = (
    df_gyeongbuk
    .assign(선거종류='광역단체장', 선거년도='2010')
    .loc[:, fixed_cols + other_cols]
    .rename(columns={'시도': '지역'})
)

In [99]:
df_gyeongbuk

Unnamed: 0,지역,구시군,선거년도,선거종류,득표_1위_정당,득표_2위_정당,득표_1위_후보번호,득표_1위_득표율,득표_2위_후보번호,득표_2위_득표율,...,무소속_후보자수,선거인수,투표수,보수정당,진보정당,그외정당,무소속,득표수_계,무효투표수,기권수
0,경북,합계,2010,광역단체장,보수정당,진보정당,1,0.7536,2,0.1182,...,0,2122905,1260591,913812,143347,155361,0,1212520,48071,862314
1,경북,포항시북구,2010,광역단체장,보수정당,진보정당,1,0.7798,2,0.1021,...,0,199281,107009,81746,10702,12386,0,104834,2175,92272
2,경북,포항시남구,2010,광역단체장,보수정당,진보정당,1,0.751,2,0.1237,...,0,197630,105544,77479,12764,12925,0,103168,2376,92086
3,경북,울릉군,2010,광역단체장,보수정당,진보정당,1,0.8219,2,0.0914,...,0,9072,7404,5825,648,614,0,7087,317,1668
4,경북,경주시,2010,광역단체장,보수정당,진보정당,1,0.7612,2,0.0998,...,0,211517,124949,91802,12039,16768,0,120609,4340,86568
5,경북,김천시,2010,광역단체장,보수정당,진보정당,1,0.7866,2,0.107,...,0,109593,67692,51237,6967,6933,0,65137,2555,41901
6,경북,안동시,2010,광역단체장,보수정당,진보정당,1,0.781,2,0.1025,...,0,134118,84290,63228,8297,9433,0,80958,3332,49828
7,경북,구미시,2010,광역단체장,보수정당,진보정당,1,0.6685,2,0.1604,...,0,291865,138048,90050,21610,23037,0,134697,3351,153817
8,경북,영주시,2010,광역단체장,보수정당,진보정당,1,0.7481,2,0.134,...,0,90908,60339,43016,7707,6775,0,57498,2841,30569
9,경북,영천시,2010,광역단체장,보수정당,진보정당,1,0.7567,2,0.115,...,0,85134,52224,37647,5719,6384,0,49750,2474,32910


### v4.1 ~ v4.3

In [100]:
# 1. 전체 데이터 저장
df_gyeongbuk.to_csv("temp4_1_governor_gyeongbuk_5.csv", index=False, encoding="utf-8-sig")

# 2. '구시군' != '합계' 행만 추출하여 저장
df_gyeongbuk2 = df_gyeongbuk.query("구시군 != '합계'")
df_gyeongbuk2.to_csv("temp4_2_governor_gyeongbuk_5.csv", index=False, encoding="utf-8-sig")

# 3. '구시군' == '합계' 행만 추출 후 '구시군' 열 제거하여 저장
df_gyeongbuk3 = df_gyeongbuk.query("구시군 == '합계'").drop(columns="구시군")
df_gyeongbuk3.to_csv("temp4_3_governor_gyeongbuk_5.csv", index=False, encoding="utf-8-sig")

## Gyeongnam

In [101]:
df_gyeongnam = election_results['df_gyeongnam']

In [102]:
df_gyeongnam

Unnamed: 0,시도,구시군,선거인수,투표수,보수정당,진보정당,그외정당,무소속,득표수_계,무효투표수,...,득표_1위_후보번호,득표_1위_득표율,득표_2위_후보번호,득표_2위_득표율,득표_1위_정당,득표_2위_정당,보수정당_후보자수,진보정당_후보자수,그외정당_후보자수,무소속_후보자수
0,경상남도,합계,2506393,1549690,705986,0,0,812336,1518322,31368,...,7,0.535,1,0.465,무소속,보수정당,1,0,0,1
1,경상남도,창원시,371540,224868,97364,0,0,125343,222707,2161,...,7,0.5628,1,0.4372,무소속,보수정당,1,0,0,1
2,경상남도,마산시,322188,191127,97429,0,0,91410,188839,2288,...,1,0.5159,7,0.4841,보수정당,무소속,1,0,0,1
3,경상남도,진주시,254687,164337,72465,0,0,89377,161842,2495,...,7,0.5522,1,0.4478,무소속,보수정당,1,0,0,1
4,경상남도,진해시,130168,77361,38717,0,0,37384,76101,1260,...,1,0.5088,7,0.4912,보수정당,무소속,1,0,0,1
5,경상남도,통영시,108127,67402,35601,0,0,30323,65924,1478,...,1,0.54,7,0.46,보수정당,무소속,1,0,0,1
6,경상남도,고성군,47716,32473,15614,0,0,15784,31398,1075,...,7,0.5027,1,0.4973,무소속,보수정당,1,0,0,1
7,경상남도,사천시,89018,61019,29234,0,0,30079,59313,1706,...,7,0.5071,1,0.4929,무소속,보수정당,1,0,0,1
8,경상남도,김해시,359474,195079,73316,0,0,119063,192379,2700,...,7,0.6189,1,0.3811,무소속,보수정당,1,0,0,1
9,경상남도,밀양시,90189,58178,28365,0,0,28100,56465,1713,...,1,0.5023,7,0.4977,보수정당,무소속,1,0,0,1


### preprocessing

In [103]:
df_gyeongnam = (
    df_gyeongnam
    .assign(시도=lambda df: df['시도'].replace('경상남도', '경남'))
)

In [104]:
df_gyeongnam.columns

Index(['시도', '구시군', '선거인수', '투표수', '보수정당', '진보정당', '그외정당', '무소속', '득표수_계',
       '무효투표수', '기권수', '득표_1위_후보번호', '득표_1위_득표율', '득표_2위_후보번호', '득표_2위_득표율',
       '득표_1위_정당', '득표_2위_정당', '보수정당_후보자수', '진보정당_후보자수', '그외정당_후보자수',
       '무소속_후보자수'],
      dtype='object')

In [105]:
# 고정 열 순서 정의
fixed_cols = [
    '시도', '구시군', '선거년도', '선거종류',
    '득표_1위_정당', '득표_2위_정당',
    '득표_1위_후보번호', '득표_1위_득표율',
    '득표_2위_후보번호', '득표_2위_득표율',
    '보수정당_후보자수', '진보정당_후보자수',
    '그외정당_후보자수', '무소속_후보자수'
]

# 나머지 열: fixed_cols에 포함되지 않은 열
other_cols = [col for col in df_gyeongnam.columns if col not in fixed_cols]

# 메소드 체이닝
df_gyeongnam = (
    df_gyeongnam
    .assign(선거종류='광역단체장', 선거년도='2010')
    .loc[:, fixed_cols + other_cols]
    .rename(columns={'시도': '지역'})
)

In [106]:
df_gyeongnam

Unnamed: 0,지역,구시군,선거년도,선거종류,득표_1위_정당,득표_2위_정당,득표_1위_후보번호,득표_1위_득표율,득표_2위_후보번호,득표_2위_득표율,...,무소속_후보자수,선거인수,투표수,보수정당,진보정당,그외정당,무소속,득표수_계,무효투표수,기권수
0,경남,합계,2010,광역단체장,무소속,보수정당,7,0.535,1,0.465,...,1,2506393,1549690,705986,0,0,812336,1518322,31368,956703
1,경남,창원시,2010,광역단체장,무소속,보수정당,7,0.5628,1,0.4372,...,1,371540,224868,97364,0,0,125343,222707,2161,146672
2,경남,마산시,2010,광역단체장,보수정당,무소속,1,0.5159,7,0.4841,...,1,322188,191127,97429,0,0,91410,188839,2288,131061
3,경남,진주시,2010,광역단체장,무소속,보수정당,7,0.5522,1,0.4478,...,1,254687,164337,72465,0,0,89377,161842,2495,90350
4,경남,진해시,2010,광역단체장,보수정당,무소속,1,0.5088,7,0.4912,...,1,130168,77361,38717,0,0,37384,76101,1260,52807
5,경남,통영시,2010,광역단체장,보수정당,무소속,1,0.54,7,0.46,...,1,108127,67402,35601,0,0,30323,65924,1478,40725
6,경남,고성군,2010,광역단체장,무소속,보수정당,7,0.5027,1,0.4973,...,1,47716,32473,15614,0,0,15784,31398,1075,15243
7,경남,사천시,2010,광역단체장,무소속,보수정당,7,0.5071,1,0.4929,...,1,89018,61019,29234,0,0,30079,59313,1706,27999
8,경남,김해시,2010,광역단체장,무소속,보수정당,7,0.6189,1,0.3811,...,1,359474,195079,73316,0,0,119063,192379,2700,164395
9,경남,밀양시,2010,광역단체장,보수정당,무소속,1,0.5023,7,0.4977,...,1,90189,58178,28365,0,0,28100,56465,1713,32011


### v4.1 ~ v4.3

In [107]:
# 1. 전체 데이터 저장
df_gyeongnam.to_csv("temp4_1_governor_gyeongnam_5.csv", index=False, encoding="utf-8-sig")

# 2. '구시군' != '합계' 행만 추출하여 저장
df_gyeongnam2 = df_gyeongnam.query("구시군 != '합계'")
df_gyeongnam2.to_csv("temp4_2_governor_gyeongnam_5.csv", index=False, encoding="utf-8-sig")

# 3. '구시군' == '합계' 행만 추출 후 '구시군' 열 제거하여 저장
df_gyeongnam3 = df_gyeongnam.query("구시군 == '합계'").drop(columns="구시군")
df_gyeongnam3.to_csv("temp4_3_governor_gyeongnam_5.csv", index=False, encoding="utf-8-sig")

## Jeju

In [108]:
df_jeju = election_results['df_jeju']

In [109]:
df_jeju

Unnamed: 0,시도,구시군,선거인수,투표수,보수정당,진보정당,그외정당,무소속,득표수_계,무효투표수,...,득표_1위_후보번호,득표_1위_득표율,득표_2위_후보번호,득표_2위_득표율,득표_1위_정당,득표_2위_정당,보수정당_후보자수,진보정당_후보자수,그외정당_후보자수,무소속_후보자수
0,제주특별자치도,합계,424098,276056,0,48186,0,218947,267133,8923,...,9,0.414,8,0.4056,무소속,무소속,0,1,0,2
1,제주특별자치도,제주시,305765,195450,0,36447,0,153419,189866,5584,...,9,0.4135,8,0.3945,무소속,무소속,0,1,0,2
2,제주특별자치도,서귀포시,118333,80606,0,11739,0,65528,77267,3339,...,8,0.4328,9,0.4153,무소속,무소속,0,1,0,2


### preprocessing

In [110]:
df_jeju = (
    df_jeju
    .assign(시도=lambda df: df['시도'].replace('제주도', '제주'))
)

In [111]:
df_jeju.columns

Index(['시도', '구시군', '선거인수', '투표수', '보수정당', '진보정당', '그외정당', '무소속', '득표수_계',
       '무효투표수', '기권수', '득표_1위_후보번호', '득표_1위_득표율', '득표_2위_후보번호', '득표_2위_득표율',
       '득표_1위_정당', '득표_2위_정당', '보수정당_후보자수', '진보정당_후보자수', '그외정당_후보자수',
       '무소속_후보자수'],
      dtype='object')

In [112]:
# 고정 열 순서 정의
fixed_cols = [
    '시도', '구시군', '선거년도', '선거종류',
    '득표_1위_정당', '득표_2위_정당',
    '득표_1위_후보번호', '득표_1위_득표율',
    '득표_2위_후보번호', '득표_2위_득표율',
    '보수정당_후보자수', '진보정당_후보자수',
    '그외정당_후보자수', '무소속_후보자수'
]

# 나머지 열: fixed_cols에 포함되지 않은 열
other_cols = [col for col in df_jeju.columns if col not in fixed_cols]

# 메소드 체이닝
df_jeju = (
    df_jeju
    .assign(선거종류='광역단체장', 선거년도='2010')
    .loc[:, fixed_cols + other_cols]
    .rename(columns={'시도': '지역'})
)

In [113]:
df_jeju

Unnamed: 0,지역,구시군,선거년도,선거종류,득표_1위_정당,득표_2위_정당,득표_1위_후보번호,득표_1위_득표율,득표_2위_후보번호,득표_2위_득표율,...,무소속_후보자수,선거인수,투표수,보수정당,진보정당,그외정당,무소속,득표수_계,무효투표수,기권수
0,제주특별자치도,합계,2010,광역단체장,무소속,무소속,9,0.414,8,0.4056,...,2,424098,276056,0,48186,0,218947,267133,8923,148042
1,제주특별자치도,제주시,2010,광역단체장,무소속,무소속,9,0.4135,8,0.3945,...,2,305765,195450,0,36447,0,153419,189866,5584,110315
2,제주특별자치도,서귀포시,2010,광역단체장,무소속,무소속,8,0.4328,9,0.4153,...,2,118333,80606,0,11739,0,65528,77267,3339,37727


### v4.1 ~ v4.3

In [114]:
# 1. 전체 데이터 저장
df_jeju.to_csv("temp4_1_governor_jeju_5.csv", index=False, encoding="utf-8-sig")

# 2. '구시군' != '합계' 행만 추출하여 저장
df_jeju2 = df_jeju.query("구시군 != '합계'")
df_jeju2.to_csv("temp4_2_governor_jeju_5.csv", index=False, encoding="utf-8-sig")

# 3. '구시군' == '합계' 행만 추출 후 '구시군' 열 제거하여 저장
df_jeju3 = df_jeju.query("구시군 == '합계'").drop(columns="구시군")
df_jeju3.to_csv("temp4_3_governor_jeju_5.csv", index=False, encoding="utf-8-sig")

## Merge

### v4.1

In [115]:
AVAILABLE_REGIONS = [
    'seoul', 'busan', 'daegu', 'incheon', 'gwangju', 'daejeon',
    'ulsan', 'gyeonggi', 'gangwon', 'chungbuk', 'chungnam',
    'jeonbuk', 'jeonnam', 'gyeongbuk', 'gyeongnam', 'jeju'
]

df_combined = (
    pd.concat([globals()[f'df_{region}'] for region in AVAILABLE_REGIONS], ignore_index=True)
)

In [116]:
df_combined

Unnamed: 0,지역,구시군,선거년도,선거종류,득표_1위_정당,득표_2위_정당,득표_1위_후보번호,득표_1위_득표율,득표_2위_후보번호,득표_2위_득표율,...,무소속_후보자수,선거인수,투표수,보수정당,진보정당,그외정당,무소속,득표수_계,무효투표수,기권수
0,서울,합계,2010,광역단체장,보수정당,진보정당,1,0.4744,2,0.4684,...,0,8211461,4426182,2086127,2059715,251830,0,4397672,28510,3785279
1,서울,종로구,2010,광역단체장,진보정당,보수정당,2,0.4791,1,0.4605,...,0,138917,77812,35476,36910,4660,0,77046,766,61105
2,서울,중구,2010,광역단체장,보수정당,진보정당,1,0.4756,2,0.4716,...,0,109000,60763,28648,28410,3179,0,60237,526,48237
3,서울,용산구,2010,광역단체장,보수정당,진보정당,1,0.5115,2,0.4292,...,0,198044,105005,53285,44706,6173,0,104164,841,93039
4,서울,성동구,2010,광역단체장,진보정당,보수정당,2,0.4761,1,0.4722,...,0,250316,135277,63448,63966,6951,0,134365,912,115039
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
260,경남,거창군,2010,광역단체장,보수정당,무소속,1,0.5147,7,0.4853,...,1,50832,37039,18256,0,0,17216,35472,1567,13793
261,경남,합천군,2010,광역단체장,보수정당,무소속,1,0.5272,7,0.4728,...,1,44353,34161,16976,0,0,15222,32198,1963,10192
262,제주특별자치도,합계,2010,광역단체장,무소속,무소속,9,0.4140,8,0.4056,...,2,424098,276056,0,48186,0,218947,267133,8923,148042
263,제주특별자치도,제주시,2010,광역단체장,무소속,무소속,9,0.4135,8,0.3945,...,2,305765,195450,0,36447,0,153419,189866,5584,110315


In [117]:
df_combined.to_csv("temp4_1_governor_5.csv", index=False, encoding="utf-8-sig")

### v4.2

In [118]:
AVAILABLE_REGIONS = [
    'seoul', 'busan', 'daegu', 'incheon', 'gwangju', 'daejeon',
    'ulsan', 'gyeonggi', 'gangwon', 'chungbuk', 'chungnam',
    'jeonbuk', 'jeonnam', 'gyeongbuk', 'gyeongnam', 'jeju'
]

df_combined2 = (
    pd.concat([globals()[f'df_{region}2'] for region in AVAILABLE_REGIONS], ignore_index=True)
)

In [119]:
df_combined2

Unnamed: 0,지역,구시군,선거년도,선거종류,득표_1위_정당,득표_2위_정당,득표_1위_후보번호,득표_1위_득표율,득표_2위_후보번호,득표_2위_득표율,...,무소속_후보자수,선거인수,투표수,보수정당,진보정당,그외정당,무소속,득표수_계,무효투표수,기권수
0,서울,종로구,2010,광역단체장,진보정당,보수정당,2,0.4791,1,0.4605,...,0,138917,77812,35476,36910,4660,0,77046,766,61105
1,서울,중구,2010,광역단체장,보수정당,진보정당,1,0.4756,2,0.4716,...,0,109000,60763,28648,28410,3179,0,60237,526,48237
2,서울,용산구,2010,광역단체장,보수정당,진보정당,1,0.5115,2,0.4292,...,0,198044,105005,53285,44706,6173,0,104164,841,93039
3,서울,성동구,2010,광역단체장,진보정당,보수정당,2,0.4761,1,0.4722,...,0,250316,135277,63448,63966,6951,0,134365,912,115039
4,서울,광진구,2010,광역단체장,진보정당,보수정당,2,0.4842,1,0.4609,...,0,301603,158963,72749,76420,8674,0,157843,1120,142640
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
244,경남,산청군,2010,광역단체장,보수정당,무소속,1,0.5129,7,0.4871,...,1,30305,22967,11403,0,0,10830,22233,734,7338
245,경남,거창군,2010,광역단체장,보수정당,무소속,1,0.5147,7,0.4853,...,1,50832,37039,18256,0,0,17216,35472,1567,13793
246,경남,합천군,2010,광역단체장,보수정당,무소속,1,0.5272,7,0.4728,...,1,44353,34161,16976,0,0,15222,32198,1963,10192
247,제주특별자치도,제주시,2010,광역단체장,무소속,무소속,9,0.4135,8,0.3945,...,2,305765,195450,0,36447,0,153419,189866,5584,110315


In [120]:
df_combined2.to_csv("temp4_2_governor_5.csv", index=False, encoding="utf-8-sig")

### v4.3

In [121]:
AVAILABLE_REGIONS = [
    'seoul', 'busan', 'daegu', 'incheon', 'gwangju', 'daejeon',
    'ulsan', 'gyeonggi', 'gangwon', 'chungbuk', 'chungnam',
    'jeonbuk', 'jeonnam', 'gyeongbuk', 'gyeongnam', 'jeju'
]

df_combined3 = (
    pd.concat([globals()[f'df_{region}3'] for region in AVAILABLE_REGIONS], ignore_index=True)
)

In [122]:
df_combined3

Unnamed: 0,지역,선거년도,선거종류,득표_1위_정당,득표_2위_정당,득표_1위_후보번호,득표_1위_득표율,득표_2위_후보번호,득표_2위_득표율,보수정당_후보자수,...,무소속_후보자수,선거인수,투표수,보수정당,진보정당,그외정당,무소속,득표수_계,무효투표수,기권수
0,서울,2010,광역단체장,보수정당,진보정당,1,0.4744,2,0.4684,1,...,0,8211461,4426182,2086127,2059715,251830,0,4397672,28510,3785279
1,부산,2010,광역단체장,보수정당,진보정당,1,0.5543,2,0.4457,1,...,0,2849895,1410126,770507,619565,0,0,1390072,20054,1439769
2,대구,2010,광역단체장,보수정당,진보정당,1,0.7293,2,0.1687,1,...,0,1928835,886035,633118,146458,88599,0,868175,17860,1042800
3,인천,2010,광역단체장,진보정당,보수정당,2,0.527,1,0.4438,1,...,0,2096853,1067431,469040,556902,30838,0,1056780,10651,1029422
4,광주,2010,광역단체장,진보정당,그외정당,2,0.5674,8,0.1449,1,...,0,1064913,529901,74490,297003,151990,0,523483,6418,535012
5,대전,2010,광역단체장,그외정당,보수정당,3,0.4668,1,0.285,1,...,0,1127547,596683,168616,137751,285196,0,591563,5120,530864
6,울산,2010,광역단체장,보수정당,그외정당,1,0.6126,5,0.2926,1,...,0,838805,462103,279421,0,176693,0,456114,5989,376702
7,경기,2010,광역단체장,보수정당,그외정당,1,0.522,8,0.478,1,...,0,8761840,4534771,2271492,0,2079892,0,4351384,183387,4227069
8,강원,2010,광역단체장,진보정당,보수정당,2,0.5436,1,0.4564,1,...,0,1190509,741724,326111,388443,0,0,714554,27170,448785
9,충북,2010,광역단체장,진보정당,보수정당,2,0.5122,1,0.4591,1,...,0,1183811,696393,313646,349913,19551,0,683110,13283,487418


In [123]:
df_combined3.to_csv("temp4_3_governor_5.csv", index=False, encoding="utf-8-sig")

# Batch CSV Files to ZIP

In [124]:
import zipfile
import glob

# Find all CSV files in current directory
csv_files = glob.glob('*.csv')

# Create ZIP file
with zipfile.ZipFile('all_csv_files.zip', 'w') as zipf:
   for file in csv_files:
       zipf.write(file)
       print(f"Added: {file}")  # Show progress

print(f"Total {len(csv_files)} files compressed.")

Added: temp4_2_governor_chungbuk_5.csv
Added: temp4_2_governor_5.csv
Added: temp4_1_governor_jeonnam_5.csv
Added: temp4_3_governor_chungnam_5.csv
Added: temp4_2_governor_gwangju_5.csv
Added: temp4_2_governor_jeju_5.csv
Added: temp4_1_governor_gyeongnam_5.csv
Added: temp4_3_governor_gyeongnam_5.csv
Added: temp4_3_governor_5.csv
Added: temp4_3_governor_jeonbuk_5.csv
Added: temp4_2_governor_incheon_5.csv
Added: temp4_2_governor_jeonnam_5.csv
Added: temp4_2_governor_gangwon_5.csv
Added: temp4_1_governor_daegu_5.csv
Added: temp4_3_governor_incheon_5.csv
Added: temp4_1_governor_seoul_5.csv
Added: temp4_2_governor_jeonbuk_5.csv
Added: temp4_1_governor_incheon_5.csv
Added: temp4_2_governor_chungnam_5.csv
Added: temp4_3_governor_chungbuk_5.csv
Added: temp4_3_governor_ulsan_5.csv
Added: temp4_3_governor_gwangju_5.csv
Added: temp4_1_governor_gangwon_5.csv
Added: temp4_1_governor_jeonbuk_5.csv
Added: temp4_1_governor_chungnam_5.csv
Added: temp4_3_governor_gyeonggi_5.csv
Added: temp4_2_governor_dae