# Functions

In [1]:
import pandas as pd
import numpy as np
import re
from typing import Dict, Tuple

def process_governor_election_data(region_name: str) -> pd.DataFrame:
    """
    특정 지역의 지사 선거 데이터를 처리하여 병합된 결과를 반환

    Args:
        region_name (str): 지역명 (예: 'busan', 'seoul', 'gyeonggi' 등)

    Returns:
        pd.DataFrame: 병합된 지사 선거 데이터
    """

    # URL 생성
    df1_url = f"https://raw.githubusercontent.com/sw1kwon/korean-elections/refs/heads/main/temp/v1_g/4th_2006/temp1_governor_{region_name}_4.csv"
    df2_url = f"https://raw.githubusercontent.com/sw1kwon/korean-elections/refs/heads/main/temp/v2_2_g/4th_2006/temp2_2_governor_{region_name}_4.csv"

    print(f"=== {region_name} 지사 선거 데이터 처리 시작 ===")
    print(f"상세 데이터 URL: {df1_url}")
    print(f"요약 데이터 URL: {df2_url}")

    try:
        # 첫 번째 CSV 파일 처리 (상세 득표 데이터)
        df1 = pd.read_csv(df1_url)
        print(f"상세 데이터 로드 완료: {df1.shape}")

        # 득표수_{숫자}_정당_이름 패턴의 컬럼들 찾기
        vote_columns = [col for col in df1.columns if col.startswith('득표수_') and col != '득표수_계']
        print(f"득표수 관련 컬럼 수: {len(vote_columns)}")

        # 각 행별로 1위와 2위 후보의 번호 찾기
        def find_top_two_candidates(row):
            vote_data = []

            # 모든 후보의 득표수와 번호를 수집
            for col in vote_columns:
                votes = row[col] if pd.notna(row[col]) else 0
                # 컬럼명에서 숫자 추출
                match = re.search(r'득표수_(\d+)_', col)
                if match:
                    candidate_number = int(match.group(1))
                    vote_data.append((votes, candidate_number, col))

            # 득표수 기준으로 내림차순 정렬
            vote_data.sort(key=lambda x: x[0], reverse=True)

            # 1위와 2위 후보 반환
            first_place = vote_data[0] if len(vote_data) > 0 else (0, None, None)
            second_place = vote_data[1] if len(vote_data) > 1 else (0, None, None)

            return first_place, second_place

        # 1위 후보 번호 찾기
        def find_winner_number(row):
            first_place, _ = find_top_two_candidates(row)
            return first_place[1]

        # 2위 후보 번호 찾기
        def find_second_number(row):
            _, second_place = find_top_two_candidates(row)
            return second_place[1]

        # 1위 득표율 계산
        def calculate_vote_rate_1st(row):
            first_place, _ = find_top_two_candidates(row)
            max_votes = first_place[0]
            total_votes = row['득표수_계'] if pd.notna(row['득표수_계']) else 0

            if total_votes > 0:
                return round(max_votes / total_votes, 4)
            else:
                return 0

        # 2위 득표율 계산
        def calculate_vote_rate_2nd(row):
            _, second_place = find_top_two_candidates(row)
            second_votes = second_place[0]
            total_votes = row['득표수_계'] if pd.notna(row['득표수_계']) else 0

            if total_votes > 0:
                return round(second_votes / total_votes, 4)
            else:
                return 0

        # 새로운 컬럼 추가 (1위, 2위)
        df1['득표_1위_후보번호'] = df1.apply(find_winner_number, axis=1)
        df1['득표_1위_득표율'] = df1.apply(calculate_vote_rate_1st, axis=1)
        df1['득표_2위_후보번호'] = df1.apply(find_second_number, axis=1)
        df1['득표_2위_득표율'] = df1.apply(calculate_vote_rate_2nd, axis=1)

        # 정당 카테고리 매핑 딕셔너리 (지역별로 다를 수 있음)
        category_mapping = get_governor_category_mapping(region_name, vote_columns)
        print(f"생성된 카테고리 매핑: {category_mapping}")

        # 1위, 2위 후보번호를 카테고리로 매핑
        # fillna()로 매핑되지 않은 후보는 '기타'로 처리
        df1['득표_1위_정당'] = df1['득표_1위_후보번호'].map(category_mapping).fillna('기타')
        df1['득표_2위_정당'] = df1['득표_2위_후보번호'].map(category_mapping).fillna('기타')

        # 매핑 결과 확인
        print("매핑 후 1위 정당 분포 (처리 중):")
        print(df1['득표_1위_정당'].value_counts())

        # 매핑되지 않은 후보 확인
        unmapped_1st = df1[df1['득표_1위_정당'] == '기타']['득표_1위_후보번호'].unique()
        unmapped_2nd = df1[df1['득표_2위_정당'] == '기타']['득표_2위_후보번호'].unique()
        if len(unmapped_1st) > 0:
            print(f"경고: 1위에서 매핑되지 않은 후보번호: {unmapped_1st}")
        if len(unmapped_2nd) > 0:
            print(f"경고: 2위에서 매핑되지 않은 후보번호: {unmapped_2nd}")

        # 카테고리별 후보자 수 계산 (모든 카테고리 포함)
        candidate_counts = {}
        all_categories = ['보수정당', '진보정당', '그외정당', '무소속']

        # 먼저 모든 카테고리를 0으로 초기화
        for category in all_categories:
            candidate_counts[category] = 0

        # 실제 매핑에서 카운트
        for candidate_num, category in category_mapping.items():
            if category in candidate_counts:
                candidate_counts[category] += 1
            else:
                candidate_counts[category] = 1

        print(f"카테고리별 후보자 수: {candidate_counts}")

        # 각 카테고리별로 개별 컬럼 생성 (없는 카테고리도 0으로 포함)
        for category in all_categories:
            candidate_count = candidate_counts.get(category, 0)  # 없으면 0
            df1[f'{category}_후보자수'] = candidate_count
            print(f"  {category}_후보자수: {candidate_count}")

        # 필요한 컬럼들만 선택 (병합용)
        merge_columns = ['시도', '구시군', '득표_1위_후보번호', '득표_1위_득표율', '득표_2위_후보번호', '득표_2위_득표율',
                         '득표_1위_정당', '득표_2위_정당'] + [f'{cat}_후보자수' for cat in all_categories]

        df1_for_merge = df1[merge_columns].copy()

        # 두 번째 CSV 파일 읽기 (요약 데이터)
        df2 = pd.read_csv(df2_url)
        print(f"요약 데이터 로드 완료: {df2.shape}")

        # 시도, 구시군을 키로 하여 병합
        merged_df = pd.merge(df2, df1_for_merge, on=['시도', '구시군'], how='left')

        # 병합 결과 검증
        missing_data = merged_df[merged_df['득표_1위_후보번호'].isna()]
        if len(missing_data) > 0:
            print(f"경고: 병합되지 않은 데이터가 {len(missing_data)}개 있습니다")
        else:
            print("모든 데이터가 성공적으로 병합되었습니다!")

        print(f"최종 데이터 형태: {merged_df.shape}")
        print(f"1위 정당 분포:")
        print(merged_df['득표_1위_정당'].value_counts())
        print(f"=== {region_name} 지사 선거 데이터 처리 완료 ===\n")

        return merged_df

    except Exception as e:
        print(f"오류 발생: {e}")
        return None

def get_governor_category_mapping(region_name: str, vote_columns: list) -> Dict[int, str]:
    """
    지역별 지사 선거 정당 카테고리 매핑을 반환
    각 지역마다 후보자와 정당이 다르므로 수동으로 설정 필요

    Args:
        region_name: 지역명
        vote_columns: 득표수 컬럼 리스트 (후보 확인용)

    Returns:
        해당 지역의 후보번호별 카테고리 매핑
    """

    print(f"\n=== {region_name} 지사 선거 후보 정보 ===")
    print("실제 후보 컬럼들:")
    for col in vote_columns:
        print(f"  {col}")

    # 지역별 매핑 설정 (2006년 제4회 지방선거 기준)
    if region_name == 'seoul':  # 서울
        mapping = {
            1: '진보정당',
            2: '보수정당',
            3: '그외정당',
            4: '그외정당',
            5: '그외정당',
            6: '그외정당',
            7: '그외정당',
            8: '무소속'
        }

    elif region_name == 'busan':  # 부산
        mapping = {
            1: '진보정당',
            2: '보수정당',
            4: '그외정당'
        }

    elif region_name == 'daegu':  # 대구
        mapping = {
            1: '진보정당',
            2: '보수정당',
            4: '그외정당',
            5: '그외정당',
            6: '무소속'
        }

    elif region_name == 'incheon':  # 인천
        mapping = {
            1: '진보정당',
            2: '보수정당',
            3: '그외정당',
            4: '그외정당'
        }

    elif region_name == 'gwangju':  # 광주
        mapping = {
            1: '진보정당',
            2: '보수정당',
            3: '그외정당',
            4: '그외정당'
        }

    elif region_name == 'daejeon':  # 대전
        mapping = {
            1: '진보정당',
            2: '보수정당',
            3: '그외정당',
            4: '그외정당',
            5: '그외정당',
            7: '그외정당'
        }

    elif region_name == 'ulsan':  # 울산
        mapping = {
            1: '진보정당',
            2: '보수정당',
            4: '그외정당'
        }

    elif region_name == 'gyeonggi':  # 경기
        mapping = {
            1: '진보정당',
            2: '보수정당',
            3: '그외정당',
            4: '그외정당'
        }

    elif region_name == 'gangwon':  # 강원
        mapping = {
            1: '진보정당',
            2: '보수정당',
            3: '그외정당',
            5: '그외정당'
        }

    elif region_name == 'chungbuk':  # 충북
        mapping = {
            1: '진보정당',
            2: '보수정당',
            4: '그외정당',
            5: '그외정당'
        }

    elif region_name == 'chungnam':  # 충남
        mapping = {
            1: '진보정당',
            2: '보수정당',
            4: '그외정당',
            5: '그외정당'
        }

    elif region_name == 'jeonbuk':  # 전북
        mapping = {
            1: '진보정당',
            2: '보수정당',
            3: '그외정당',
            4: '그외정당'
        }

    elif region_name == 'jeonnam':  # 전남
        mapping = {
            1: '진보정당',
            2: '보수정당',
            3: '그외정당',
            4: '그외정당'
        }

    elif region_name == 'gyeongbuk':  # 경북
        mapping = {
            1: '진보정당',
            2: '보수정당'
        }

    elif region_name == 'gyeongnam':  # 경남
        mapping = {
            1: '진보정당',
            2: '보수정당',
            4: '그외정당',
            5: '그외정당'
        }

    elif region_name == 'jeju':  # 제주
        mapping = {
            1: '진보정당',
            2: '보수정당',
            6: '무소속'
        }

    else:
        print(f"경고: {region_name} 지역에 대한 매핑이 정의되지 않았습니다.")
        print("기본 매핑을 사용합니다. 수동으로 매핑을 추가해주세요.")
        # 기본 매핑 (수정 필요)
        mapping = {
            1: '진보정당',
            2: '보수정당',
            3: '그외정당',
            4: '그외정당',
            5: '무소속'
        }

    # 실제 존재하는 후보 번호만 추출
    existing_candidates = set()
    for col in vote_columns:
        match = re.search(r'득표수_(\d+)_', col)
        if match:
            existing_candidates.add(int(match.group(1)))

    print(f"실제 존재하는 후보번호: {sorted(existing_candidates)}")

    # 존재하는 후보에 대해서만 매핑 적용
    filtered_mapping = {k: v for k, v in mapping.items() if k in existing_candidates}

    # 매핑되지 않은 후보 번호 확인
    unmapped_candidates = existing_candidates - set(mapping.keys())
    if unmapped_candidates:
        print(f"경고: 매핑되지 않은 후보번호들: {sorted(unmapped_candidates)}")
        print("이 후보들은 '기타' 카테고리로 분류됩니다.")

    print(f"적용된 매핑: {filtered_mapping}")
    print("=" * 50)

    return filtered_mapping

def process_multiple_governor_elections(region_names: list) -> Dict[str, pd.DataFrame]:
    """
    여러 지역의 지사 선거를 일괄 처리하여 df_지역명 형태로 변수 저장

    Args:
        region_names (list): 처리할 지역명 리스트

    Returns:
        Dict[str, pd.DataFrame]: 지역별 처리된 데이터프레임 딕셔너리
    """
    results = {}

    for region_name in region_names:
        print(f"\n{'='*50}")
        result_df = process_governor_election_data(region_name)

        if result_df is not None:
            # df_busan, df_seoul 형태로 변수명 지정
            var_name = f'df_{region_name}'
            results[var_name] = result_df

            print(f"데이터프레임 저장: {var_name} (shape: {result_df.shape})")
        else:
            print(f"{region_name} 지사 선거 데이터 처리 실패")

    return results

# 지역명들
AVAILABLE_REGIONS = [
    'seoul', 'busan', 'daegu', 'incheon', 'gwangju', 'daejeon',
    'ulsan', 'gyeonggi', 'gangwon', 'chungbuk', 'chungnam',
    'jeonbuk', 'jeonnam', 'gyeongbuk', 'gyeongnam', 'jeju'
]

# # 사용 예시
# if __name__ == "__main__":
#     # 방법 1: 특정 지역들만 처리
#     selected_regions = ['busan', 'seoul', 'gyeonggi', 'incheon']
#     governor_results = process_multiple_governor_elections(selected_regions)

#     # 개별 접근 예시:
#     # df_busan = governor_results['df_busan']
#     # df_seoul = governor_results['df_seoul']

#     # 방법 2: 모든 지역 일괄 처리
#     # all_governor_results = process_multiple_governor_elections(AVAILABLE_REGIONS)

#     print(f"\n사용 가능한 지역들: {AVAILABLE_REGIONS}")

# Preprocessing & Merge

In [2]:
AVAILABLE_REGIONS = [
    'seoul', 'busan', 'daegu', 'incheon', 'gwangju', 'daejeon',
    'ulsan', 'gyeonggi', 'gangwon', 'chungbuk', 'chungnam',
    'jeonbuk', 'jeonnam', 'gyeongbuk', 'gyeongnam', 'jeju'
]

election_results = process_multiple_governor_elections(AVAILABLE_REGIONS)


=== seoul 지사 선거 데이터 처리 시작 ===
상세 데이터 URL: https://raw.githubusercontent.com/sw1kwon/korean-elections/refs/heads/main/temp/v1_g/4th_2006/temp1_governor_seoul_4.csv
요약 데이터 URL: https://raw.githubusercontent.com/sw1kwon/korean-elections/refs/heads/main/temp/v2_2_g/4th_2006/temp2_2_governor_seoul_4.csv
상세 데이터 로드 완료: (26, 15)
득표수 관련 컬럼 수: 8

=== seoul 지사 선거 후보 정보 ===
실제 후보 컬럼들:
  득표수_1_열린우리당_강금실
  득표수_2_한나라당_오세훈
  득표수_3_민주당_박주선
  득표수_4_민주노동당_김종철
  득표수_5_국민중심당_임웅균
  득표수_6_시민당_이귀선
  득표수_7_한국의미래를준비하는당_이태희
  득표수_8_무소속_백승원
실제 존재하는 후보번호: [1, 2, 3, 4, 5, 6, 7, 8]
적용된 매핑: {1: '진보정당', 2: '보수정당', 3: '그외정당', 4: '그외정당', 5: '그외정당', 6: '그외정당', 7: '그외정당', 8: '무소속'}
생성된 카테고리 매핑: {1: '진보정당', 2: '보수정당', 3: '그외정당', 4: '그외정당', 5: '그외정당', 6: '그외정당', 7: '그외정당', 8: '무소속'}
매핑 후 1위 정당 분포 (처리 중):
득표_1위_정당
보수정당    26
Name: count, dtype: int64
카테고리별 후보자 수: {'보수정당': 1, '진보정당': 1, '그외정당': 5, '무소속': 1}
  보수정당_후보자수: 1
  진보정당_후보자수: 1
  그외정당_후보자수: 5
  무소속_후보자수: 1
요약 데이터 로드 완료: (26, 11)
모든 데이터가 성공적으로 병합되었습니다!
최종 데이터 형태: (26

# Governor Election 4th

## Seoul

In [3]:
df_seoul = election_results['df_seoul']

In [4]:
df_seoul

Unnamed: 0,시도,구시군,선거인수,투표수,보수정당,진보정당,그외정당,무소속,득표수_계,무효투표수,...,득표_1위_후보번호,득표_1위_득표율,득표_2위_후보번호,득표_2위_득표율,득표_1위_정당,득표_2위_정당,보수정당_후보자수,진보정당_후보자수,그외정당_후보자수,무소속_후보자수
0,서울특별시,합계,7983648,3977842,2409760,1077890,445368,13808,3946826,31016,...,2,0.6106,1,0.2731,보수정당,진보정당,1,1,5,1
1,서울특별시,종로구,134603,71240,41992,19369,8840,265,70466,774,...,2,0.5959,1,0.2749,보수정당,진보정당,1,1,5,1
2,서울특별시,중구,107164,55369,32708,15382,6556,212,54858,511,...,2,0.5962,1,0.2804,보수정당,진보정당,1,1,5,1
3,서울특별시,용산구,187461,95809,58427,26087,10196,319,95029,780,...,2,0.6148,1,0.2745,보수정당,진보정당,1,1,5,1
4,서울특별시,성동구,266956,131927,77631,35842,16725,488,130686,1241,...,2,0.594,1,0.2743,보수정당,진보정당,1,1,5,1
5,서울특별시,광진구,293308,141079,83209,39716,16644,443,140012,1067,...,2,0.5943,1,0.2837,보수정당,진보정당,1,1,5,1
6,서울특별시,동대문구,305097,153571,92583,40952,18045,509,152089,1482,...,2,0.6087,1,0.2693,보수정당,진보정당,1,1,5,1
7,서울특별시,중랑구,332783,155626,94016,41950,17745,656,154367,1259,...,2,0.609,1,0.2718,보수정당,진보정당,1,1,5,1
8,서울특별시,성북구,369617,183552,107844,52218,21456,620,182138,1414,...,2,0.5921,1,0.2867,보수정당,진보정당,1,1,5,1
9,서울특별시,강북구,280448,133939,75788,37446,18965,577,132776,1163,...,2,0.5708,1,0.282,보수정당,진보정당,1,1,5,1


### preprocessing

In [5]:
df_seoul = (
    df_seoul
    .assign(시도=lambda df: df['시도'].replace('서울특별시', '서울'))
)

In [6]:
df_seoul.columns

Index(['시도', '구시군', '선거인수', '투표수', '보수정당', '진보정당', '그외정당', '무소속', '득표수_계',
       '무효투표수', '기권수', '득표_1위_후보번호', '득표_1위_득표율', '득표_2위_후보번호', '득표_2위_득표율',
       '득표_1위_정당', '득표_2위_정당', '보수정당_후보자수', '진보정당_후보자수', '그외정당_후보자수',
       '무소속_후보자수'],
      dtype='object')

In [7]:
# 고정 열 순서 정의
fixed_cols = [
    '시도', '구시군', '선거년도', '선거종류',
    '득표_1위_정당', '득표_2위_정당',
    '득표_1위_후보번호', '득표_1위_득표율',
    '득표_2위_후보번호', '득표_2위_득표율',
    '보수정당_후보자수', '진보정당_후보자수',
    '그외정당_후보자수', '무소속_후보자수'
]

# 나머지 열: fixed_cols에 포함되지 않은 열
other_cols = [col for col in df_seoul.columns if col not in fixed_cols]

# 메소드 체이닝
df_seoul = (
    df_seoul
    .assign(선거종류='광역단체장', 선거년도='2006')
    .loc[:, fixed_cols + other_cols]
    .rename(columns={'시도': '지역'})
)

In [8]:
df_seoul

Unnamed: 0,지역,구시군,선거년도,선거종류,득표_1위_정당,득표_2위_정당,득표_1위_후보번호,득표_1위_득표율,득표_2위_후보번호,득표_2위_득표율,...,무소속_후보자수,선거인수,투표수,보수정당,진보정당,그외정당,무소속,득표수_계,무효투표수,기권수
0,서울,합계,2006,광역단체장,보수정당,진보정당,2,0.6106,1,0.2731,...,1,7983648,3977842,2409760,1077890,445368,13808,3946826,31016,4005806
1,서울,종로구,2006,광역단체장,보수정당,진보정당,2,0.5959,1,0.2749,...,1,134603,71240,41992,19369,8840,265,70466,774,63363
2,서울,중구,2006,광역단체장,보수정당,진보정당,2,0.5962,1,0.2804,...,1,107164,55369,32708,15382,6556,212,54858,511,51795
3,서울,용산구,2006,광역단체장,보수정당,진보정당,2,0.6148,1,0.2745,...,1,187461,95809,58427,26087,10196,319,95029,780,91652
4,서울,성동구,2006,광역단체장,보수정당,진보정당,2,0.594,1,0.2743,...,1,266956,131927,77631,35842,16725,488,130686,1241,135029
5,서울,광진구,2006,광역단체장,보수정당,진보정당,2,0.5943,1,0.2837,...,1,293308,141079,83209,39716,16644,443,140012,1067,152229
6,서울,동대문구,2006,광역단체장,보수정당,진보정당,2,0.6087,1,0.2693,...,1,305097,153571,92583,40952,18045,509,152089,1482,151526
7,서울,중랑구,2006,광역단체장,보수정당,진보정당,2,0.609,1,0.2718,...,1,332783,155626,94016,41950,17745,656,154367,1259,177157
8,서울,성북구,2006,광역단체장,보수정당,진보정당,2,0.5921,1,0.2867,...,1,369617,183552,107844,52218,21456,620,182138,1414,186065
9,서울,강북구,2006,광역단체장,보수정당,진보정당,2,0.5708,1,0.282,...,1,280448,133939,75788,37446,18965,577,132776,1163,146509


### v4.1 ~ v4.3

In [9]:
# 1. 전체 데이터 저장
df_seoul.to_csv("temp4_1_governor_seoul_4.csv", index=False, encoding="utf-8-sig")

# 2. '구시군' != '합계' 행만 추출하여 저장
df_seoul2 = df_seoul.query("구시군 != '합계'")
df_seoul2.to_csv("temp4_2_governor_seoul_4.csv", index=False, encoding="utf-8-sig")

# 3. '구시군' == '합계' 행만 추출 후 '구시군' 열 제거하여 저장
df_seoul3 = df_seoul.query("구시군 == '합계'").drop(columns="구시군")
df_seoul3.to_csv("temp4_3_governor_seoul_4.csv", index=False, encoding="utf-8-sig")

## Busan

In [10]:
df_busan = election_results['df_busan']

In [11]:
df_busan

Unnamed: 0,시도,구시군,선거인수,투표수,보수정당,진보정당,그외정당,무소속,득표수_계,무효투표수,...,득표_1위_후보번호,득표_1위_득표율,득표_2위_후보번호,득표_2위_득표율,득표_1위_정당,득표_2위_정당,보수정당_후보자수,진보정당_후보자수,그외정당_후보자수,무소속_후보자수
0,부산광역시,합계,2845104,1378618,895214,329470,141061,0,1365745,12873,...,2,0.6555,1,0.2412,보수정당,진보정당,1,1,1,0
1,부산광역시,중구,43062,22768,15325,5155,2047,0,22527,241,...,2,0.6803,1,0.2288,보수정당,진보정당,1,1,1,0
2,부산광역시,서구,112683,55046,37607,11782,5038,0,54427,619,...,2,0.691,1,0.2165,보수정당,진보정당,1,1,1,0
3,부산광역시,동구,89864,46773,30690,10749,4592,0,46031,742,...,2,0.6667,1,0.2335,보수정당,진보정당,1,1,1,0
4,부산광역시,영도구,131447,63268,38017,17638,6857,0,62512,756,...,2,0.6082,1,0.2822,보수정당,진보정당,1,1,1,0
5,부산광역시,부산진구,325963,157499,103671,35746,16643,0,156060,1439,...,2,0.6643,1,0.2291,보수정당,진보정당,1,1,1,0
6,부산광역시,동래구,216997,107497,73580,23133,10026,0,106739,758,...,2,0.6893,1,0.2167,보수정당,진보정당,1,1,1,0
7,부산광역시,남구,238368,116840,75331,28203,12390,0,115924,916,...,2,0.6498,1,0.2433,보수정당,진보정당,1,1,1,0
8,부산광역시,북구,248396,117428,73619,29620,13223,0,116462,966,...,2,0.6321,1,0.2543,보수정당,진보정당,1,1,1,0
9,부산광역시,해운대구,309668,145642,93928,35920,14637,0,144485,1157,...,2,0.6501,1,0.2486,보수정당,진보정당,1,1,1,0


### preprocessing

In [12]:
df_busan = (
    df_busan
    .assign(시도=lambda df: df['시도'].replace('부산광역시', '부산'))
)

In [13]:
df_busan.columns

Index(['시도', '구시군', '선거인수', '투표수', '보수정당', '진보정당', '그외정당', '무소속', '득표수_계',
       '무효투표수', '기권수', '득표_1위_후보번호', '득표_1위_득표율', '득표_2위_후보번호', '득표_2위_득표율',
       '득표_1위_정당', '득표_2위_정당', '보수정당_후보자수', '진보정당_후보자수', '그외정당_후보자수',
       '무소속_후보자수'],
      dtype='object')

In [14]:
# 고정 열 순서 정의
fixed_cols = [
    '시도', '구시군', '선거년도', '선거종류',
    '득표_1위_정당', '득표_2위_정당',
    '득표_1위_후보번호', '득표_1위_득표율',
    '득표_2위_후보번호', '득표_2위_득표율',
    '보수정당_후보자수', '진보정당_후보자수',
    '그외정당_후보자수', '무소속_후보자수'
]

# 나머지 열: fixed_cols에 포함되지 않은 열
other_cols = [col for col in df_busan.columns if col not in fixed_cols]

# 메소드 체이닝
df_busan = (
    df_busan
    .assign(선거종류='광역단체장', 선거년도='2006')
    .loc[:, fixed_cols + other_cols]
    .rename(columns={'시도': '지역'})
)

In [15]:
df_busan

Unnamed: 0,지역,구시군,선거년도,선거종류,득표_1위_정당,득표_2위_정당,득표_1위_후보번호,득표_1위_득표율,득표_2위_후보번호,득표_2위_득표율,...,무소속_후보자수,선거인수,투표수,보수정당,진보정당,그외정당,무소속,득표수_계,무효투표수,기권수
0,부산,합계,2006,광역단체장,보수정당,진보정당,2,0.6555,1,0.2412,...,0,2845104,1378618,895214,329470,141061,0,1365745,12873,1466486
1,부산,중구,2006,광역단체장,보수정당,진보정당,2,0.6803,1,0.2288,...,0,43062,22768,15325,5155,2047,0,22527,241,20294
2,부산,서구,2006,광역단체장,보수정당,진보정당,2,0.691,1,0.2165,...,0,112683,55046,37607,11782,5038,0,54427,619,57637
3,부산,동구,2006,광역단체장,보수정당,진보정당,2,0.6667,1,0.2335,...,0,89864,46773,30690,10749,4592,0,46031,742,43091
4,부산,영도구,2006,광역단체장,보수정당,진보정당,2,0.6082,1,0.2822,...,0,131447,63268,38017,17638,6857,0,62512,756,68179
5,부산,부산진구,2006,광역단체장,보수정당,진보정당,2,0.6643,1,0.2291,...,0,325963,157499,103671,35746,16643,0,156060,1439,168464
6,부산,동래구,2006,광역단체장,보수정당,진보정당,2,0.6893,1,0.2167,...,0,216997,107497,73580,23133,10026,0,106739,758,109500
7,부산,남구,2006,광역단체장,보수정당,진보정당,2,0.6498,1,0.2433,...,0,238368,116840,75331,28203,12390,0,115924,916,121528
8,부산,북구,2006,광역단체장,보수정당,진보정당,2,0.6321,1,0.2543,...,0,248396,117428,73619,29620,13223,0,116462,966,130968
9,부산,해운대구,2006,광역단체장,보수정당,진보정당,2,0.6501,1,0.2486,...,0,309668,145642,93928,35920,14637,0,144485,1157,164026


### v4.1 ~ v4.3

In [16]:
# 1. 전체 데이터 저장
df_busan.to_csv("temp4_1_governor_busan_4.csv", index=False, encoding="utf-8-sig")

# 2. '구시군' != '합계' 행만 추출하여 저장
df_busan2 = df_busan.query("구시군 != '합계'")
df_busan2.to_csv("temp4_2_governor_busan_4.csv", index=False, encoding="utf-8-sig")

# 3. '구시군' == '합계' 행만 추출 후 '구시군' 열 제거하여 저장
df_busan3 = df_busan.query("구시군 == '합계'").drop(columns="구시군")
df_busan3.to_csv("temp4_3_governor_busan_4.csv", index=False, encoding="utf-8-sig")

## Daegu

In [17]:
df_daegu = election_results['df_daegu']

In [18]:
df_daegu

Unnamed: 0,시도,구시군,선거인수,투표수,보수정당,진보정당,그외정당,무소속,득표수_계,무효투표수,...,득표_1위_후보번호,득표_1위_득표율,득표_2위_후보번호,득표_2위_득표율,득표_1위_정당,득표_2위_정당,보수정당_후보자수,진보정당_후보자수,그외정당_후보자수,무소속_후보자수
0,대구광역시,합계,1885043,915060,636057,191131,44261,35232,906681,8379,...,2,0.7015,1,0.2108,보수정당,진보정당,1,1,2,1
1,대구광역시,중구,66126,35274,24056,7753,1188,1870,34867,407,...,2,0.6899,1,0.2224,보수정당,진보정당,1,1,2,1
2,대구광역시,동구,267710,129221,90128,27773,6226,3714,127841,1380,...,2,0.705,1,0.2172,보수정당,진보정당,1,1,2,1
3,대구광역시,서구,194812,92272,63241,16137,4111,7757,91246,1026,...,2,0.6931,1,0.1769,보수정당,진보정당,1,1,2,1
4,대구광역시,남구,145954,70026,47545,17919,2263,1706,69433,593,...,2,0.6848,1,0.2581,보수정당,진보정당,1,1,2,1
5,대구광역시,북구,341531,159678,110959,32906,9418,5025,158308,1370,...,2,0.7009,1,0.2079,보수정당,진보정당,1,1,2,1
6,대구광역시,수성구,320246,157451,114184,31146,6653,4375,156358,1093,...,2,0.7303,1,0.1992,보수정당,진보정당,1,1,2,1
7,대구광역시,달서구,432607,206965,145236,43630,9287,7388,205541,1424,...,2,0.7066,1,0.2123,보수정당,진보정당,1,1,2,1
8,대구광역시,달성군,116057,64173,40708,13867,5115,3397,63087,1086,...,2,0.6453,1,0.2198,보수정당,진보정당,1,1,2,1


### preprocessing

In [19]:
df_daegu = (
    df_daegu
    .assign(시도=lambda df: df['시도'].replace('대구광역시', '대구'))
)

In [20]:
df_daegu.columns

Index(['시도', '구시군', '선거인수', '투표수', '보수정당', '진보정당', '그외정당', '무소속', '득표수_계',
       '무효투표수', '기권수', '득표_1위_후보번호', '득표_1위_득표율', '득표_2위_후보번호', '득표_2위_득표율',
       '득표_1위_정당', '득표_2위_정당', '보수정당_후보자수', '진보정당_후보자수', '그외정당_후보자수',
       '무소속_후보자수'],
      dtype='object')

In [21]:
# 고정 열 순서 정의
fixed_cols = [
    '시도', '구시군', '선거년도', '선거종류',
    '득표_1위_정당', '득표_2위_정당',
    '득표_1위_후보번호', '득표_1위_득표율',
    '득표_2위_후보번호', '득표_2위_득표율',
    '보수정당_후보자수', '진보정당_후보자수',
    '그외정당_후보자수', '무소속_후보자수'
]

# 나머지 열: fixed_cols에 포함되지 않은 열
other_cols = [col for col in df_daegu.columns if col not in fixed_cols]

# 메소드 체이닝
df_daegu = (
    df_daegu
    .assign(선거종류='광역단체장', 선거년도='2006')
    .loc[:, fixed_cols + other_cols]
    .rename(columns={'시도': '지역'})
)

In [22]:
df_daegu

Unnamed: 0,지역,구시군,선거년도,선거종류,득표_1위_정당,득표_2위_정당,득표_1위_후보번호,득표_1위_득표율,득표_2위_후보번호,득표_2위_득표율,...,무소속_후보자수,선거인수,투표수,보수정당,진보정당,그외정당,무소속,득표수_계,무효투표수,기권수
0,대구,합계,2006,광역단체장,보수정당,진보정당,2,0.7015,1,0.2108,...,1,1885043,915060,636057,191131,44261,35232,906681,8379,969983
1,대구,중구,2006,광역단체장,보수정당,진보정당,2,0.6899,1,0.2224,...,1,66126,35274,24056,7753,1188,1870,34867,407,30852
2,대구,동구,2006,광역단체장,보수정당,진보정당,2,0.705,1,0.2172,...,1,267710,129221,90128,27773,6226,3714,127841,1380,138489
3,대구,서구,2006,광역단체장,보수정당,진보정당,2,0.6931,1,0.1769,...,1,194812,92272,63241,16137,4111,7757,91246,1026,102540
4,대구,남구,2006,광역단체장,보수정당,진보정당,2,0.6848,1,0.2581,...,1,145954,70026,47545,17919,2263,1706,69433,593,75928
5,대구,북구,2006,광역단체장,보수정당,진보정당,2,0.7009,1,0.2079,...,1,341531,159678,110959,32906,9418,5025,158308,1370,181853
6,대구,수성구,2006,광역단체장,보수정당,진보정당,2,0.7303,1,0.1992,...,1,320246,157451,114184,31146,6653,4375,156358,1093,162795
7,대구,달서구,2006,광역단체장,보수정당,진보정당,2,0.7066,1,0.2123,...,1,432607,206965,145236,43630,9287,7388,205541,1424,225642
8,대구,달성군,2006,광역단체장,보수정당,진보정당,2,0.6453,1,0.2198,...,1,116057,64173,40708,13867,5115,3397,63087,1086,51884


### v4.1 ~ v4.3

In [23]:
# 1. 전체 데이터 저장
df_daegu.to_csv("temp4_1_governor_daegu_4.csv", index=False, encoding="utf-8-sig")

# 2. '구시군' != '합계' 행만 추출하여 저장
df_daegu2 = df_daegu.query("구시군 != '합계'")
df_daegu2.to_csv("temp4_2_governor_daegu_4.csv", index=False, encoding="utf-8-sig")

# 3. '구시군' == '합계' 행만 추출 후 '구시군' 열 제거하여 저장
df_daegu3 = df_daegu.query("구시군 == '합계'").drop(columns="구시군")
df_daegu3.to_csv("temp4_3_governor_daegu_4.csv", index=False, encoding="utf-8-sig")

## Incheon

In [24]:
df_incheon = election_results['df_incheon']

In [25]:
df_incheon

Unnamed: 0,시도,구시군,선거인수,투표수,보수정당,진보정당,그외정당,무소속,득표수_계,무효투표수,...,득표_1위_후보번호,득표_1위_득표율,득표_2위_후보번호,득표_2위_득표율,득표_1위_정당,득표_2위_정당,보수정당_후보자수,진보정당_후보자수,그외정당_후보자수,무소속_후보자수
0,인천광역시,합계,1940403,859506,526932,200650,123237,0,850819,8687,...,2,0.6193,1,0.2358,보수정당,진보정당,1,1,2,0
1,인천광역시,중구,72876,34796,21570,8119,4641,0,34330,466,...,2,0.6283,1,0.2365,보수정당,진보정당,1,1,2,0
2,인천광역시,동구,61515,30794,18707,6663,4997,0,30367,427,...,2,0.616,1,0.2194,보수정당,진보정당,1,1,2,0
3,인천광역시,남구,327614,140581,90299,31036,17805,0,139140,1441,...,2,0.649,1,0.2231,보수정당,진보정당,1,1,2,0
4,인천광역시,연수구,191076,88010,57235,19370,10765,0,87370,640,...,2,0.6551,1,0.2217,보수정당,진보정당,1,1,2,0
5,인천광역시,남동구,284121,123423,73284,28961,20079,0,122324,1099,...,2,0.5991,1,0.2368,보수정당,진보정당,1,1,2,0
6,인천광역시,부평구,425004,181447,106584,42156,31187,0,179927,1520,...,2,0.5924,1,0.2343,보수정당,진보정당,1,1,2,0
7,인천광역시,계양구,239585,100411,58806,26480,14326,0,99612,799,...,2,0.5904,1,0.2658,보수정당,진보정당,1,1,2,0
8,인천광역시,서구,271645,114473,70169,27258,15908,0,113335,1138,...,2,0.6191,1,0.2405,보수정당,진보정당,1,1,2,0
9,인천광역시,강화군,53699,35405,23919,8071,2560,0,34550,855,...,2,0.6923,1,0.2336,보수정당,진보정당,1,1,2,0


### preprocessing

In [26]:
df_incheon = (
    df_incheon
    .assign(시도=lambda df: df['시도'].replace('인천광역시', '인천'))
)

In [27]:
df_incheon.columns

Index(['시도', '구시군', '선거인수', '투표수', '보수정당', '진보정당', '그외정당', '무소속', '득표수_계',
       '무효투표수', '기권수', '득표_1위_후보번호', '득표_1위_득표율', '득표_2위_후보번호', '득표_2위_득표율',
       '득표_1위_정당', '득표_2위_정당', '보수정당_후보자수', '진보정당_후보자수', '그외정당_후보자수',
       '무소속_후보자수'],
      dtype='object')

In [28]:
# 고정 열 순서 정의
fixed_cols = [
    '시도', '구시군', '선거년도', '선거종류',
    '득표_1위_정당', '득표_2위_정당',
    '득표_1위_후보번호', '득표_1위_득표율',
    '득표_2위_후보번호', '득표_2위_득표율',
    '보수정당_후보자수', '진보정당_후보자수',
    '그외정당_후보자수', '무소속_후보자수'
]

# 나머지 열: fixed_cols에 포함되지 않은 열
other_cols = [col for col in df_incheon.columns if col not in fixed_cols]

# 메소드 체이닝
df_incheon = (
    df_incheon
    .assign(선거종류='광역단체장', 선거년도='2006')
    .loc[:, fixed_cols + other_cols]
    .rename(columns={'시도': '지역'})
)

In [29]:
df_incheon

Unnamed: 0,지역,구시군,선거년도,선거종류,득표_1위_정당,득표_2위_정당,득표_1위_후보번호,득표_1위_득표율,득표_2위_후보번호,득표_2위_득표율,...,무소속_후보자수,선거인수,투표수,보수정당,진보정당,그외정당,무소속,득표수_계,무효투표수,기권수
0,인천,합계,2006,광역단체장,보수정당,진보정당,2,0.6193,1,0.2358,...,0,1940403,859506,526932,200650,123237,0,850819,8687,1080897
1,인천,중구,2006,광역단체장,보수정당,진보정당,2,0.6283,1,0.2365,...,0,72876,34796,21570,8119,4641,0,34330,466,38080
2,인천,동구,2006,광역단체장,보수정당,진보정당,2,0.616,1,0.2194,...,0,61515,30794,18707,6663,4997,0,30367,427,30721
3,인천,남구,2006,광역단체장,보수정당,진보정당,2,0.649,1,0.2231,...,0,327614,140581,90299,31036,17805,0,139140,1441,187033
4,인천,연수구,2006,광역단체장,보수정당,진보정당,2,0.6551,1,0.2217,...,0,191076,88010,57235,19370,10765,0,87370,640,103066
5,인천,남동구,2006,광역단체장,보수정당,진보정당,2,0.5991,1,0.2368,...,0,284121,123423,73284,28961,20079,0,122324,1099,160698
6,인천,부평구,2006,광역단체장,보수정당,진보정당,2,0.5924,1,0.2343,...,0,425004,181447,106584,42156,31187,0,179927,1520,243557
7,인천,계양구,2006,광역단체장,보수정당,진보정당,2,0.5904,1,0.2658,...,0,239585,100411,58806,26480,14326,0,99612,799,139174
8,인천,서구,2006,광역단체장,보수정당,진보정당,2,0.6191,1,0.2405,...,0,271645,114473,70169,27258,15908,0,113335,1138,157172
9,인천,강화군,2006,광역단체장,보수정당,진보정당,2,0.6923,1,0.2336,...,0,53699,35405,23919,8071,2560,0,34550,855,18294


### v4.1 ~ v4.3

In [30]:
# 1. 전체 데이터 저장
df_incheon.to_csv("temp4_1_governor_incheon_4.csv", index=False, encoding="utf-8-sig")

# 2. '구시군' != '합계' 행만 추출하여 저장
df_incheon2 = df_incheon.query("구시군 != '합계'")
df_incheon2.to_csv("temp4_2_governor_incheon_4.csv", index=False, encoding="utf-8-sig")

# 3. '구시군' == '합계' 행만 추출 후 '구시군' 열 제거하여 저장
df_incheon3 = df_incheon.query("구시군 == '합계'").drop(columns="구시군")
df_incheon3.to_csv("temp4_3_governor_incheon_4.csv", index=False, encoding="utf-8-sig")

## Gwangju

In [31]:
df_gwangju = election_results['df_gwangju']

In [32]:
df_gwangju

Unnamed: 0,시도,구시군,선거인수,투표수,보수정당,진보정당,그외정당,무소속,득표수_계,무효투표수,...,득표_1위_후보번호,득표_1위_득표율,득표_2위_후보번호,득표_2위_득표율,득표_1위_정당,득표_2위_정당,보수정당_후보자수,진보정당_후보자수,그외정당_후보자수,무소속_후보자수
0,광주광역시,합계,1014620,469316,18461,157756,288501,0,464718,4598,...,3,0.5162,1,0.3395,그외정당,진보정당,1,1,2,0
1,광주광역시,동구,90767,43236,2201,16085,24444,0,42730,506,...,3,0.4962,1,0.3764,그외정당,진보정당,1,1,2,0
2,광주광역시,서구,221034,102566,3911,34849,62942,0,101702,864,...,3,0.5069,1,0.3427,그외정당,진보정당,1,1,2,0
3,광주광역시,남구,161406,78309,3011,26536,47939,0,77486,823,...,3,0.543,1,0.3425,그외정당,진보정당,1,1,2,0
4,광주광역시,북구,333413,147702,5905,53265,87216,0,146386,1316,...,3,0.4958,1,0.3639,그외정당,진보정당,1,1,2,0
5,광주광역시,광산구,208000,97503,3433,27021,65960,0,96414,1089,...,3,0.5442,1,0.2803,그외정당,진보정당,1,1,2,0


### preprocessing

In [33]:
df_gwangju = (
    df_gwangju
    .assign(시도=lambda df: df['시도'].replace('광주광역시', '광주'))
)

In [34]:
df_gwangju.columns

Index(['시도', '구시군', '선거인수', '투표수', '보수정당', '진보정당', '그외정당', '무소속', '득표수_계',
       '무효투표수', '기권수', '득표_1위_후보번호', '득표_1위_득표율', '득표_2위_후보번호', '득표_2위_득표율',
       '득표_1위_정당', '득표_2위_정당', '보수정당_후보자수', '진보정당_후보자수', '그외정당_후보자수',
       '무소속_후보자수'],
      dtype='object')

In [35]:
# 고정 열 순서 정의
fixed_cols = [
    '시도', '구시군', '선거년도', '선거종류',
    '득표_1위_정당', '득표_2위_정당',
    '득표_1위_후보번호', '득표_1위_득표율',
    '득표_2위_후보번호', '득표_2위_득표율',
    '보수정당_후보자수', '진보정당_후보자수',
    '그외정당_후보자수', '무소속_후보자수'
]

# 나머지 열: fixed_cols에 포함되지 않은 열
other_cols = [col for col in df_gwangju.columns if col not in fixed_cols]

# 메소드 체이닝
df_gwangju = (
    df_gwangju
    .assign(선거종류='광역단체장', 선거년도='2006')
    .loc[:, fixed_cols + other_cols]
    .rename(columns={'시도': '지역'})
)

In [36]:
df_gwangju

Unnamed: 0,지역,구시군,선거년도,선거종류,득표_1위_정당,득표_2위_정당,득표_1위_후보번호,득표_1위_득표율,득표_2위_후보번호,득표_2위_득표율,...,무소속_후보자수,선거인수,투표수,보수정당,진보정당,그외정당,무소속,득표수_계,무효투표수,기권수
0,광주,합계,2006,광역단체장,그외정당,진보정당,3,0.5162,1,0.3395,...,0,1014620,469316,18461,157756,288501,0,464718,4598,545304
1,광주,동구,2006,광역단체장,그외정당,진보정당,3,0.4962,1,0.3764,...,0,90767,43236,2201,16085,24444,0,42730,506,47531
2,광주,서구,2006,광역단체장,그외정당,진보정당,3,0.5069,1,0.3427,...,0,221034,102566,3911,34849,62942,0,101702,864,118468
3,광주,남구,2006,광역단체장,그외정당,진보정당,3,0.543,1,0.3425,...,0,161406,78309,3011,26536,47939,0,77486,823,83097
4,광주,북구,2006,광역단체장,그외정당,진보정당,3,0.4958,1,0.3639,...,0,333413,147702,5905,53265,87216,0,146386,1316,185711
5,광주,광산구,2006,광역단체장,그외정당,진보정당,3,0.5442,1,0.2803,...,0,208000,97503,3433,27021,65960,0,96414,1089,110497


### v4.1 ~ v4.3

In [37]:
# 1. 전체 데이터 저장
df_gwangju.to_csv("temp4_1_governor_gwangju_4.csv", index=False, encoding="utf-8-sig")

# 2. '구시군' != '합계' 행만 추출하여 저장
df_gwangju2 = df_gwangju.query("구시군 != '합계'")
df_gwangju2.to_csv("temp4_2_governor_gwangju_4.csv", index=False, encoding="utf-8-sig")

# 3. '구시군' == '합계' 행만 추출 후 '구시군' 열 제거하여 저장
df_gwangju3 = df_gwangju.query("구시군 == '합계'").drop(columns="구시군")
df_gwangju3.to_csv("temp4_3_governor_gwangju_4.csv", index=False, encoding="utf-8-sig")

## Daejeon

In [38]:
df_daejeon = election_results['df_daejeon']

In [39]:
df_daejeon

Unnamed: 0,시도,구시군,선거인수,투표수,보수정당,진보정당,그외정당,무소속,득표수_계,무효투표수,...,득표_1위_후보번호,득표_1위_득표율,득표_2위_후보번호,득표_2위_득표율,득표_1위_정당,득표_2위_정당,보수정당_후보자수,진보정당_후보자수,그외정당_후보자수,무소속_후보자수
0,대전광역시,합계,1077468,532568,231489,217273,79279,0,528041,4527,...,2,0.4384,1,0.4115,보수정당,진보정당,1,1,4,0
1,대전광역시,동구,183206,87967,35745,36078,15084,0,86907,1060,...,1,0.4151,2,0.4113,진보정당,보수정당,1,1,4,0
2,대전광역시,중구,203076,100902,44780,39961,15272,0,100013,889,...,2,0.4477,1,0.3996,보수정당,진보정당,1,1,4,0
3,대전광역시,서구,367265,182493,83617,73194,24446,0,181257,1236,...,2,0.4613,1,0.4038,보수정당,진보정당,1,1,4,0
4,대전광역시,유성구,164867,84058,36053,36081,11410,0,83544,514,...,1,0.4319,2,0.4315,진보정당,보수정당,1,1,4,0
5,대전광역시,대덕구,159054,77148,31294,31959,13067,0,76320,828,...,1,0.4188,2,0.41,진보정당,보수정당,1,1,4,0


### preprocessing

In [40]:
df_daejeon = (
    df_daejeon
    .assign(시도=lambda df: df['시도'].replace('대전광역시', '대전'))
)

In [41]:
df_daejeon.columns

Index(['시도', '구시군', '선거인수', '투표수', '보수정당', '진보정당', '그외정당', '무소속', '득표수_계',
       '무효투표수', '기권수', '득표_1위_후보번호', '득표_1위_득표율', '득표_2위_후보번호', '득표_2위_득표율',
       '득표_1위_정당', '득표_2위_정당', '보수정당_후보자수', '진보정당_후보자수', '그외정당_후보자수',
       '무소속_후보자수'],
      dtype='object')

In [42]:
# 고정 열 순서 정의
fixed_cols = [
    '시도', '구시군', '선거년도', '선거종류',
    '득표_1위_정당', '득표_2위_정당',
    '득표_1위_후보번호', '득표_1위_득표율',
    '득표_2위_후보번호', '득표_2위_득표율',
    '보수정당_후보자수', '진보정당_후보자수',
    '그외정당_후보자수', '무소속_후보자수'
]

# 나머지 열: fixed_cols에 포함되지 않은 열
other_cols = [col for col in df_daejeon.columns if col not in fixed_cols]

# 메소드 체이닝
df_daejeon = (
    df_daejeon
    .assign(선거종류='광역단체장', 선거년도='2006')
    .loc[:, fixed_cols + other_cols]
    .rename(columns={'시도': '지역'})
)

In [43]:
df_daejeon

Unnamed: 0,지역,구시군,선거년도,선거종류,득표_1위_정당,득표_2위_정당,득표_1위_후보번호,득표_1위_득표율,득표_2위_후보번호,득표_2위_득표율,...,무소속_후보자수,선거인수,투표수,보수정당,진보정당,그외정당,무소속,득표수_계,무효투표수,기권수
0,대전,합계,2006,광역단체장,보수정당,진보정당,2,0.4384,1,0.4115,...,0,1077468,532568,231489,217273,79279,0,528041,4527,544900
1,대전,동구,2006,광역단체장,진보정당,보수정당,1,0.4151,2,0.4113,...,0,183206,87967,35745,36078,15084,0,86907,1060,95239
2,대전,중구,2006,광역단체장,보수정당,진보정당,2,0.4477,1,0.3996,...,0,203076,100902,44780,39961,15272,0,100013,889,102174
3,대전,서구,2006,광역단체장,보수정당,진보정당,2,0.4613,1,0.4038,...,0,367265,182493,83617,73194,24446,0,181257,1236,184772
4,대전,유성구,2006,광역단체장,진보정당,보수정당,1,0.4319,2,0.4315,...,0,164867,84058,36053,36081,11410,0,83544,514,80809
5,대전,대덕구,2006,광역단체장,진보정당,보수정당,1,0.4188,2,0.41,...,0,159054,77148,31294,31959,13067,0,76320,828,81906


### v4.1 ~ v4.3

In [44]:
# 1. 전체 데이터 저장
df_daejeon.to_csv("temp4_1_governor_daejeon_4.csv", index=False, encoding="utf-8-sig")

# 2. '구시군' != '합계' 행만 추출하여 저장
df_daejeon2 = df_daejeon.query("구시군 != '합계'")
df_daejeon2.to_csv("temp4_2_governor_daejeon_4.csv", index=False, encoding="utf-8-sig")

# 3. '구시군' == '합계' 행만 추출 후 '구시군' 열 제거하여 저장
df_daejeon3 = df_daejeon.query("구시군 == '합계'").drop(columns="구시군")
df_daejeon3.to_csv("temp4_3_governor_daejeon_4.csv", index=False, encoding="utf-8-sig")

## Ulsan

In [45]:
df_ulsan = election_results['df_ulsan']

In [46]:
df_ulsan

Unnamed: 0,시도,구시군,선거인수,투표수,보수정당,진보정당,그외정당,무소속,득표수_계,무효투표수,...,득표_1위_후보번호,득표_1위_득표율,득표_2위_후보번호,득표_2위_득표율,득표_1위_정당,득표_2위_정당,보수정당_후보자수,진보정당_후보자수,그외정당_후보자수,무소속_후보자수
0,울산광역시,합계,790289,417200,261361,47579,104384,0,413324,3876,...,2,0.6323,4,0.2525,보수정당,그외정당,1,1,1,0
1,울산광역시,중구,175337,88549,59499,9023,19390,0,87912,637,...,2,0.6768,4,0.2206,보수정당,그외정당,1,1,1,0
2,울산광역시,남구,250864,123837,85037,14533,23446,0,123016,821,...,2,0.6913,4,0.1906,보수정당,그외정당,1,1,1,0
3,울산광역시,동구,136957,75733,40077,8675,26290,0,75042,691,...,2,0.5341,4,0.3503,보수정당,그외정당,1,1,1,0
4,울산광역시,북구,99489,55385,28677,5176,21047,0,54900,485,...,2,0.5223,4,0.3834,보수정당,그외정당,1,1,1,0
5,울산광역시,울주군,127642,73696,48071,10172,14211,0,72454,1242,...,2,0.6635,4,0.1961,보수정당,그외정당,1,1,1,0


### preprocessing

In [47]:
df_ulsan = (
    df_ulsan
    .assign(시도=lambda df: df['시도'].replace('울산광역시', '울산'))
)

In [48]:
df_ulsan.columns

Index(['시도', '구시군', '선거인수', '투표수', '보수정당', '진보정당', '그외정당', '무소속', '득표수_계',
       '무효투표수', '기권수', '득표_1위_후보번호', '득표_1위_득표율', '득표_2위_후보번호', '득표_2위_득표율',
       '득표_1위_정당', '득표_2위_정당', '보수정당_후보자수', '진보정당_후보자수', '그외정당_후보자수',
       '무소속_후보자수'],
      dtype='object')

In [49]:
# 고정 열 순서 정의
fixed_cols = [
    '시도', '구시군', '선거년도', '선거종류',
    '득표_1위_정당', '득표_2위_정당',
    '득표_1위_후보번호', '득표_1위_득표율',
    '득표_2위_후보번호', '득표_2위_득표율',
    '보수정당_후보자수', '진보정당_후보자수',
    '그외정당_후보자수', '무소속_후보자수'
]

# 나머지 열: fixed_cols에 포함되지 않은 열
other_cols = [col for col in df_ulsan.columns if col not in fixed_cols]

# 메소드 체이닝
df_ulsan = (
    df_ulsan
    .assign(선거종류='광역단체장', 선거년도='2006')
    .loc[:, fixed_cols + other_cols]
    .rename(columns={'시도': '지역'})
)

In [50]:
df_ulsan

Unnamed: 0,지역,구시군,선거년도,선거종류,득표_1위_정당,득표_2위_정당,득표_1위_후보번호,득표_1위_득표율,득표_2위_후보번호,득표_2위_득표율,...,무소속_후보자수,선거인수,투표수,보수정당,진보정당,그외정당,무소속,득표수_계,무효투표수,기권수
0,울산,합계,2006,광역단체장,보수정당,그외정당,2,0.6323,4,0.2525,...,0,790289,417200,261361,47579,104384,0,413324,3876,373089
1,울산,중구,2006,광역단체장,보수정당,그외정당,2,0.6768,4,0.2206,...,0,175337,88549,59499,9023,19390,0,87912,637,86788
2,울산,남구,2006,광역단체장,보수정당,그외정당,2,0.6913,4,0.1906,...,0,250864,123837,85037,14533,23446,0,123016,821,127027
3,울산,동구,2006,광역단체장,보수정당,그외정당,2,0.5341,4,0.3503,...,0,136957,75733,40077,8675,26290,0,75042,691,61224
4,울산,북구,2006,광역단체장,보수정당,그외정당,2,0.5223,4,0.3834,...,0,99489,55385,28677,5176,21047,0,54900,485,44104
5,울산,울주군,2006,광역단체장,보수정당,그외정당,2,0.6635,4,0.1961,...,0,127642,73696,48071,10172,14211,0,72454,1242,53946


### v4.1 ~ v4.3

In [51]:
# 1. 전체 데이터 저장
df_ulsan.to_csv("temp4_1_governor_ulsan_4.csv", index=False, encoding="utf-8-sig")

# 2. '구시군' != '합계' 행만 추출하여 저장
df_ulsan2 = df_ulsan.query("구시군 != '합계'")
df_ulsan2.to_csv("temp4_2_governor_ulsan_4.csv", index=False, encoding="utf-8-sig")

# 3. '구시군' == '합계' 행만 추출 후 '구시군' 열 제거하여 저장
df_ulsan3 = df_ulsan.query("구시군 == '합계'").drop(columns="구시군")
df_ulsan3.to_csv("temp4_3_governor_ulsan_4.csv", index=False, encoding="utf-8-sig")

## Gyeonggi

In [52]:
df_gyeonggi = election_results['df_gyeonggi']

In [53]:
df_gyeonggi

Unnamed: 0,시도,구시군,선거인수,투표수,보수정당,진보정당,그외정당,무소속,득표수_계,무효투표수,...,득표_1위_후보번호,득표_1위_득표율,득표_2위_후보번호,득표_2위_득표율,득표_1위_정당,득표_2위_정당,보수정당_후보자수,진보정당_후보자수,그외정당_후보자수,무소속_후보자수
0,경기도,합계,7918828,3695552,2181677,1124317,349515,0,3655509,40043,...,2,0.5968,1,0.3076,보수정당,진보정당,1,1,2,0
1,경기도,수원시장안구,206015,98094,56361,31861,9161,0,97383,711,...,2,0.5788,1,0.3272,보수정당,진보정당,1,1,2,0
2,경기도,수원시권선구,217455,97485,56267,31707,8660,0,96634,851,...,2,0.5823,1,0.3281,보수정당,진보정당,1,1,2,0
3,경기도,수원시팔달구,166662,72030,42829,22480,6021,0,71330,700,...,2,0.6004,1,0.3152,보수정당,진보정당,1,1,2,0
4,경기도,수원시영통구,173454,82006,45246,31247,5095,0,81588,418,...,2,0.5546,1,0.383,보수정당,진보정당,1,1,2,0
5,경기도,성남시수정구,206678,84853,41683,28638,13484,0,83805,1048,...,2,0.4974,1,0.3417,보수정당,진보정당,1,1,2,0
6,경기도,성남시중원구,207205,82650,41203,26147,14478,0,81828,822,...,2,0.5035,1,0.3195,보수정당,진보정당,1,1,2,0
7,경기도,성남시분당구,330404,164190,107353,46797,9169,0,163319,871,...,2,0.6573,1,0.2865,보수정당,진보정당,1,1,2,0
8,경기도,의정부시,299556,130381,77107,41367,10643,0,129117,1264,...,2,0.5972,1,0.3204,보수정당,진보정당,1,1,2,0
9,경기도,안양시만안구,204688,95595,53699,31436,9555,0,94690,905,...,2,0.5671,1,0.332,보수정당,진보정당,1,1,2,0


### preprocessing

In [54]:
df_gyeonggi = (
    df_gyeonggi
    .assign(시도=lambda df: df['시도'].replace('경기도', '경기'))
)

In [55]:
df_gyeonggi.columns

Index(['시도', '구시군', '선거인수', '투표수', '보수정당', '진보정당', '그외정당', '무소속', '득표수_계',
       '무효투표수', '기권수', '득표_1위_후보번호', '득표_1위_득표율', '득표_2위_후보번호', '득표_2위_득표율',
       '득표_1위_정당', '득표_2위_정당', '보수정당_후보자수', '진보정당_후보자수', '그외정당_후보자수',
       '무소속_후보자수'],
      dtype='object')

In [56]:
# 고정 열 순서 정의
fixed_cols = [
    '시도', '구시군', '선거년도', '선거종류',
    '득표_1위_정당', '득표_2위_정당',
    '득표_1위_후보번호', '득표_1위_득표율',
    '득표_2위_후보번호', '득표_2위_득표율',
    '보수정당_후보자수', '진보정당_후보자수',
    '그외정당_후보자수', '무소속_후보자수'
]

# 나머지 열: fixed_cols에 포함되지 않은 열
other_cols = [col for col in df_gyeonggi.columns if col not in fixed_cols]

# 메소드 체이닝
df_gyeonggi = (
    df_gyeonggi
    .assign(선거종류='광역단체장', 선거년도='2006')
    .loc[:, fixed_cols + other_cols]
    .rename(columns={'시도': '지역'})
)

In [57]:
df_gyeonggi

Unnamed: 0,지역,구시군,선거년도,선거종류,득표_1위_정당,득표_2위_정당,득표_1위_후보번호,득표_1위_득표율,득표_2위_후보번호,득표_2위_득표율,...,무소속_후보자수,선거인수,투표수,보수정당,진보정당,그외정당,무소속,득표수_계,무효투표수,기권수
0,경기,합계,2006,광역단체장,보수정당,진보정당,2,0.5968,1,0.3076,...,0,7918828,3695552,2181677,1124317,349515,0,3655509,40043,4223276
1,경기,수원시장안구,2006,광역단체장,보수정당,진보정당,2,0.5788,1,0.3272,...,0,206015,98094,56361,31861,9161,0,97383,711,107921
2,경기,수원시권선구,2006,광역단체장,보수정당,진보정당,2,0.5823,1,0.3281,...,0,217455,97485,56267,31707,8660,0,96634,851,119970
3,경기,수원시팔달구,2006,광역단체장,보수정당,진보정당,2,0.6004,1,0.3152,...,0,166662,72030,42829,22480,6021,0,71330,700,94632
4,경기,수원시영통구,2006,광역단체장,보수정당,진보정당,2,0.5546,1,0.383,...,0,173454,82006,45246,31247,5095,0,81588,418,91448
5,경기,성남시수정구,2006,광역단체장,보수정당,진보정당,2,0.4974,1,0.3417,...,0,206678,84853,41683,28638,13484,0,83805,1048,121825
6,경기,성남시중원구,2006,광역단체장,보수정당,진보정당,2,0.5035,1,0.3195,...,0,207205,82650,41203,26147,14478,0,81828,822,124555
7,경기,성남시분당구,2006,광역단체장,보수정당,진보정당,2,0.6573,1,0.2865,...,0,330404,164190,107353,46797,9169,0,163319,871,166214
8,경기,의정부시,2006,광역단체장,보수정당,진보정당,2,0.5972,1,0.3204,...,0,299556,130381,77107,41367,10643,0,129117,1264,169175
9,경기,안양시만안구,2006,광역단체장,보수정당,진보정당,2,0.5671,1,0.332,...,0,204688,95595,53699,31436,9555,0,94690,905,109093


### v4.1 ~ v4.3

In [58]:
# 1. 전체 데이터 저장
df_gyeonggi.to_csv("temp4_1_governor_gyeonggi_4.csv", index=False, encoding="utf-8-sig")

# 2. '구시군' != '합계' 행만 추출하여 저장
df_gyeonggi2 = df_gyeonggi.query("구시군 != '합계'")
df_gyeonggi2.to_csv("temp4_2_governor_gyeonggi_4.csv", index=False, encoding="utf-8-sig")

# 3. '구시군' == '합계' 행만 추출 후 '구시군' 열 제거하여 저장
df_gyeonggi3 = df_gyeonggi.query("구시군 == '합계'").drop(columns="구시군")
df_gyeonggi3.to_csv("temp4_3_governor_gyeonggi_4.csv", index=False, encoding="utf-8-sig")

## Gangwon

In [59]:
df_gangwon = election_results['df_gangwon']

In [60]:
df_gangwon

Unnamed: 0,시도,구시군,선거인수,투표수,보수정당,진보정당,그외정당,무소속,득표수_계,무효투표수,...,득표_1위_후보번호,득표_1위_득표율,득표_2위_후보번호,득표_2위_득표율,득표_1위_정당,득표_2위_정당,보수정당_후보자수,진보정당_후보자수,그외정당_후보자수,무소속_후보자수
0,강원도,합계,1160977,681633,471613,148302,48411,0,668326,13307,...,2,0.7057,1,0.2219,보수정당,진보정당,1,1,2,0
1,강원도,춘천시,192644,104177,45125,36231,19333,0,100689,3488,...,2,0.4482,1,0.3598,보수정당,진보정당,1,1,2,0
2,강원도,원주시,215770,106902,70598,31345,3718,0,105661,1241,...,2,0.6682,1,0.2967,보수정당,진보정당,1,1,2,0
3,강원도,강릉시,171138,92512,74805,12467,3773,0,91045,1467,...,2,0.8216,1,0.1369,보수정당,진보정당,1,1,2,0
4,강원도,동해시,74884,43269,36395,4903,1389,0,42687,582,...,2,0.8526,1,0.1149,보수정당,진보정당,1,1,2,0
5,강원도,삼척시,57751,39784,31894,4897,2167,0,38958,826,...,2,0.8187,1,0.1257,보수정당,진보정당,1,1,2,0
6,강원도,태백시,41345,27807,21005,4148,2245,0,27398,409,...,2,0.7667,1,0.1514,보수정당,진보정당,1,1,2,0
7,강원도,정선군,35948,25156,18270,4982,1412,0,24664,492,...,2,0.7408,1,0.202,보수정당,진보정당,1,1,2,0
8,강원도,속초시,64750,34125,26288,6072,1297,0,33657,468,...,2,0.7811,1,0.1804,보수정당,진보정당,1,1,2,0
9,강원도,고성군,25708,18721,14514,3010,792,0,18316,405,...,2,0.7924,1,0.1643,보수정당,진보정당,1,1,2,0


### preprocessing

In [61]:
df_gangwon = (
    df_gangwon
    .assign(시도=lambda df: df['시도'].replace('강원도', '강원'))
)

In [62]:
df_gangwon.columns

Index(['시도', '구시군', '선거인수', '투표수', '보수정당', '진보정당', '그외정당', '무소속', '득표수_계',
       '무효투표수', '기권수', '득표_1위_후보번호', '득표_1위_득표율', '득표_2위_후보번호', '득표_2위_득표율',
       '득표_1위_정당', '득표_2위_정당', '보수정당_후보자수', '진보정당_후보자수', '그외정당_후보자수',
       '무소속_후보자수'],
      dtype='object')

In [63]:
# 고정 열 순서 정의
fixed_cols = [
    '시도', '구시군', '선거년도', '선거종류',
    '득표_1위_정당', '득표_2위_정당',
    '득표_1위_후보번호', '득표_1위_득표율',
    '득표_2위_후보번호', '득표_2위_득표율',
    '보수정당_후보자수', '진보정당_후보자수',
    '그외정당_후보자수', '무소속_후보자수'
]

# 나머지 열: fixed_cols에 포함되지 않은 열
other_cols = [col for col in df_gangwon.columns if col not in fixed_cols]

# 메소드 체이닝
df_gangwon = (
    df_gangwon
    .assign(선거종류='광역단체장', 선거년도='2006')
    .loc[:, fixed_cols + other_cols]
    .rename(columns={'시도': '지역'})
)

In [64]:
df_gangwon

Unnamed: 0,지역,구시군,선거년도,선거종류,득표_1위_정당,득표_2위_정당,득표_1위_후보번호,득표_1위_득표율,득표_2위_후보번호,득표_2위_득표율,...,무소속_후보자수,선거인수,투표수,보수정당,진보정당,그외정당,무소속,득표수_계,무효투표수,기권수
0,강원,합계,2006,광역단체장,보수정당,진보정당,2,0.7057,1,0.2219,...,0,1160977,681633,471613,148302,48411,0,668326,13307,479344
1,강원,춘천시,2006,광역단체장,보수정당,진보정당,2,0.4482,1,0.3598,...,0,192644,104177,45125,36231,19333,0,100689,3488,88467
2,강원,원주시,2006,광역단체장,보수정당,진보정당,2,0.6682,1,0.2967,...,0,215770,106902,70598,31345,3718,0,105661,1241,108868
3,강원,강릉시,2006,광역단체장,보수정당,진보정당,2,0.8216,1,0.1369,...,0,171138,92512,74805,12467,3773,0,91045,1467,78626
4,강원,동해시,2006,광역단체장,보수정당,진보정당,2,0.8526,1,0.1149,...,0,74884,43269,36395,4903,1389,0,42687,582,31615
5,강원,삼척시,2006,광역단체장,보수정당,진보정당,2,0.8187,1,0.1257,...,0,57751,39784,31894,4897,2167,0,38958,826,17967
6,강원,태백시,2006,광역단체장,보수정당,진보정당,2,0.7667,1,0.1514,...,0,41345,27807,21005,4148,2245,0,27398,409,13538
7,강원,정선군,2006,광역단체장,보수정당,진보정당,2,0.7408,1,0.202,...,0,35948,25156,18270,4982,1412,0,24664,492,10792
8,강원,속초시,2006,광역단체장,보수정당,진보정당,2,0.7811,1,0.1804,...,0,64750,34125,26288,6072,1297,0,33657,468,30625
9,강원,고성군,2006,광역단체장,보수정당,진보정당,2,0.7924,1,0.1643,...,0,25708,18721,14514,3010,792,0,18316,405,6987


### v4.1 ~ v4.3

In [65]:
# 1. 전체 데이터 저장
df_gangwon.to_csv("temp4_1_governor_gangwon_4.csv", index=False, encoding="utf-8-sig")

# 2. '구시군' != '합계' 행만 추출하여 저장
df_gangwon2 = df_gangwon.query("구시군 != '합계'")
df_gangwon2.to_csv("temp4_2_governor_gangwon_4.csv", index=False, encoding="utf-8-sig")

# 3. '구시군' == '합계' 행만 추출 후 '구시군' 열 제거하여 저장
df_gangwon3 = df_gangwon.query("구시군 == '합계'").drop(columns="구시군")
df_gangwon3.to_csv("temp4_3_governor_gangwon_4.csv", index=False, encoding="utf-8-sig")

## Chungbuk

In [66]:
df_chungbuk = election_results['df_chungbuk']

In [67]:
df_chungbuk

Unnamed: 0,시도,구시군,선거인수,투표수,보수정당,진보정당,그외정당,무소속,득표수_계,무효투표수,...,득표_1위_후보번호,득표_1위_득표율,득표_2위_후보번호,득표_2위_득표율,득표_1위_정당,득표_2위_정당,보수정당_후보자수,진보정당_후보자수,그외정당_후보자수,무소속_후보자수
0,충청북도,합계,1126282,616053,361157,185426,58741,0,605324,10729,...,2,0.5966,1,0.3063,보수정당,진보정당,1,1,2,0
1,충청북도,청주시상당구,188493,89032,52776,28260,7129,0,88165,867,...,2,0.5986,1,0.3205,보수정당,진보정당,1,1,2,0
2,충청북도,청주시흥덕구,269249,121820,70472,38846,11363,0,120681,1139,...,2,0.584,1,0.3219,보수정당,진보정당,1,1,2,0
3,충청북도,충주시,155769,83466,50932,24287,6841,0,82060,1406,...,2,0.6207,1,0.296,보수정당,진보정당,1,1,2,0
4,충청북도,제천시,105920,60200,37174,16169,5826,0,59169,1031,...,2,0.6283,1,0.2733,보수정당,진보정당,1,1,2,0
5,충청북도,단양군,27458,19533,11497,5643,1877,0,19017,516,...,2,0.6046,1,0.2967,보수정당,진보정당,1,1,2,0
6,충청북도,청원군,95259,53191,27833,18015,6236,0,52084,1107,...,2,0.5344,1,0.3459,보수정당,진보정당,1,1,2,0
7,충청북도,영동군,41700,29569,15383,9031,4354,0,28768,801,...,2,0.5347,1,0.3139,보수정당,진보정당,1,1,2,0
8,충청북도,보은군,30273,22890,11107,8833,2360,0,22300,590,...,2,0.4981,1,0.3961,보수정당,진보정당,1,1,2,0
9,충청북도,옥천군,44454,31353,15423,11522,3376,0,30321,1032,...,2,0.5087,1,0.38,보수정당,진보정당,1,1,2,0


### preprocessing

In [68]:
df_chungbuk = (
    df_chungbuk
    .assign(시도=lambda df: df['시도'].replace('충청북도', '충북'))
)

In [69]:
df_chungbuk.columns

Index(['시도', '구시군', '선거인수', '투표수', '보수정당', '진보정당', '그외정당', '무소속', '득표수_계',
       '무효투표수', '기권수', '득표_1위_후보번호', '득표_1위_득표율', '득표_2위_후보번호', '득표_2위_득표율',
       '득표_1위_정당', '득표_2위_정당', '보수정당_후보자수', '진보정당_후보자수', '그외정당_후보자수',
       '무소속_후보자수'],
      dtype='object')

In [70]:
# 고정 열 순서 정의
fixed_cols = [
    '시도', '구시군', '선거년도', '선거종류',
    '득표_1위_정당', '득표_2위_정당',
    '득표_1위_후보번호', '득표_1위_득표율',
    '득표_2위_후보번호', '득표_2위_득표율',
    '보수정당_후보자수', '진보정당_후보자수',
    '그외정당_후보자수', '무소속_후보자수'
]

# 나머지 열: fixed_cols에 포함되지 않은 열
other_cols = [col for col in df_chungbuk.columns if col not in fixed_cols]

# 메소드 체이닝
df_chungbuk = (
    df_chungbuk
    .assign(선거종류='광역단체장', 선거년도='2006')
    .loc[:, fixed_cols + other_cols]
    .rename(columns={'시도': '지역'})
)

In [71]:
df_chungbuk

Unnamed: 0,지역,구시군,선거년도,선거종류,득표_1위_정당,득표_2위_정당,득표_1위_후보번호,득표_1위_득표율,득표_2위_후보번호,득표_2위_득표율,...,무소속_후보자수,선거인수,투표수,보수정당,진보정당,그외정당,무소속,득표수_계,무효투표수,기권수
0,충북,합계,2006,광역단체장,보수정당,진보정당,2,0.5966,1,0.3063,...,0,1126282,616053,361157,185426,58741,0,605324,10729,510229
1,충북,청주시상당구,2006,광역단체장,보수정당,진보정당,2,0.5986,1,0.3205,...,0,188493,89032,52776,28260,7129,0,88165,867,99461
2,충북,청주시흥덕구,2006,광역단체장,보수정당,진보정당,2,0.584,1,0.3219,...,0,269249,121820,70472,38846,11363,0,120681,1139,147429
3,충북,충주시,2006,광역단체장,보수정당,진보정당,2,0.6207,1,0.296,...,0,155769,83466,50932,24287,6841,0,82060,1406,72303
4,충북,제천시,2006,광역단체장,보수정당,진보정당,2,0.6283,1,0.2733,...,0,105920,60200,37174,16169,5826,0,59169,1031,45720
5,충북,단양군,2006,광역단체장,보수정당,진보정당,2,0.6046,1,0.2967,...,0,27458,19533,11497,5643,1877,0,19017,516,7925
6,충북,청원군,2006,광역단체장,보수정당,진보정당,2,0.5344,1,0.3459,...,0,95259,53191,27833,18015,6236,0,52084,1107,42068
7,충북,영동군,2006,광역단체장,보수정당,진보정당,2,0.5347,1,0.3139,...,0,41700,29569,15383,9031,4354,0,28768,801,12131
8,충북,보은군,2006,광역단체장,보수정당,진보정당,2,0.4981,1,0.3961,...,0,30273,22890,11107,8833,2360,0,22300,590,7383
9,충북,옥천군,2006,광역단체장,보수정당,진보정당,2,0.5087,1,0.38,...,0,44454,31353,15423,11522,3376,0,30321,1032,13101


### v4.1 ~ v4.3

In [72]:
# 1. 전체 데이터 저장
df_chungbuk.to_csv("temp4_1_governor_chungbuk_4.csv", index=False, encoding="utf-8-sig")

# 2. '구시군' != '합계' 행만 추출하여 저장
df_chungbuk2 = df_chungbuk.query("구시군 != '합계'")
df_chungbuk2.to_csv("temp4_2_governor_chungbuk_4.csv", index=False, encoding="utf-8-sig")

# 3. '구시군' == '합계' 행만 추출 후 '구시군' 열 제거하여 저장
df_chungbuk3 = df_chungbuk.query("구시군 == '합계'").drop(columns="구시군")
df_chungbuk3.to_csv("temp4_3_governor_chungbuk_4.csv", index=False, encoding="utf-8-sig")

## Chungnam

In [73]:
df_chungnam = election_results['df_chungnam']

In [74]:
df_chungnam

Unnamed: 0,시도,구시군,선거인수,투표수,보수정당,진보정당,그외정당,무소속,득표수_계,무효투표수,...,득표_1위_후보번호,득표_1위_득표율,득표_2위_후보번호,득표_2위_득표율,득표_1위_정당,득표_2위_정당,보수정당_후보자수,진보정당_후보자수,그외정당_후보자수,무소속_후보자수
0,충청남도,합계,1503240,838462,379420,178169,261671,0,819260,19202,...,2,0.4631,5,0.2554,보수정당,그외정당,1,1,2,0
1,충청남도,천안시,372195,162726,84436,36426,39914,0,160776,1950,...,2,0.5252,1,0.2266,보수정당,진보정당,1,1,2,0
2,충청남도,공주시,100960,61082,21948,13868,23783,0,59599,1483,...,2,0.3683,5,0.347,보수정당,그외정당,1,1,2,0
3,충청남도,보령시,84236,52539,20818,16826,13674,0,51318,1221,...,2,0.4057,1,0.3279,보수정당,진보정당,1,1,2,0
4,충청남도,아산시,156817,74771,28561,10839,34057,0,73457,1314,...,2,0.3888,5,0.3739,보수정당,그외정당,1,1,2,0
5,충청남도,서산시,112204,61310,27317,13144,19336,0,59797,1513,...,2,0.4568,5,0.2566,보수정당,그외정당,1,1,2,0
6,충청남도,태안군,51770,35084,13756,7572,12515,0,33843,1241,...,2,0.4065,5,0.3162,보수정당,그외정당,1,1,2,0
7,충청남도,금산군,47627,32263,12045,6017,13184,0,31246,1017,...,2,0.3855,5,0.3729,보수정당,그외정당,1,1,2,0
8,충청남도,연기군,65548,39859,12927,13698,12208,0,38833,1026,...,1,0.3527,2,0.3329,진보정당,보수정당,1,1,2,0
9,충청남도,논산시,104305,57835,20497,13044,22625,0,56166,1669,...,2,0.3649,5,0.3507,보수정당,그외정당,1,1,2,0


### preprocessing

In [75]:
df_chungnam = (
    df_chungnam
    .assign(시도=lambda df: df['시도'].replace('충청남도', '충남'))
)

In [76]:
df_chungnam.columns

Index(['시도', '구시군', '선거인수', '투표수', '보수정당', '진보정당', '그외정당', '무소속', '득표수_계',
       '무효투표수', '기권수', '득표_1위_후보번호', '득표_1위_득표율', '득표_2위_후보번호', '득표_2위_득표율',
       '득표_1위_정당', '득표_2위_정당', '보수정당_후보자수', '진보정당_후보자수', '그외정당_후보자수',
       '무소속_후보자수'],
      dtype='object')

In [77]:
# 고정 열 순서 정의
fixed_cols = [
    '시도', '구시군', '선거년도', '선거종류',
    '득표_1위_정당', '득표_2위_정당',
    '득표_1위_후보번호', '득표_1위_득표율',
    '득표_2위_후보번호', '득표_2위_득표율',
    '보수정당_후보자수', '진보정당_후보자수',
    '그외정당_후보자수', '무소속_후보자수'
]

# 나머지 열: fixed_cols에 포함되지 않은 열
other_cols = [col for col in df_chungnam.columns if col not in fixed_cols]

# 메소드 체이닝
df_chungnam = (
    df_chungnam
    .assign(선거종류='광역단체장', 선거년도='2006')
    .loc[:, fixed_cols + other_cols]
    .rename(columns={'시도': '지역'})
)

In [78]:
df_chungnam

Unnamed: 0,지역,구시군,선거년도,선거종류,득표_1위_정당,득표_2위_정당,득표_1위_후보번호,득표_1위_득표율,득표_2위_후보번호,득표_2위_득표율,...,무소속_후보자수,선거인수,투표수,보수정당,진보정당,그외정당,무소속,득표수_계,무효투표수,기권수
0,충남,합계,2006,광역단체장,보수정당,그외정당,2,0.4631,5,0.2554,...,0,1503240,838462,379420,178169,261671,0,819260,19202,664778
1,충남,천안시,2006,광역단체장,보수정당,진보정당,2,0.5252,1,0.2266,...,0,372195,162726,84436,36426,39914,0,160776,1950,209469
2,충남,공주시,2006,광역단체장,보수정당,그외정당,2,0.3683,5,0.347,...,0,100960,61082,21948,13868,23783,0,59599,1483,39878
3,충남,보령시,2006,광역단체장,보수정당,진보정당,2,0.4057,1,0.3279,...,0,84236,52539,20818,16826,13674,0,51318,1221,31697
4,충남,아산시,2006,광역단체장,보수정당,그외정당,2,0.3888,5,0.3739,...,0,156817,74771,28561,10839,34057,0,73457,1314,82046
5,충남,서산시,2006,광역단체장,보수정당,그외정당,2,0.4568,5,0.2566,...,0,112204,61310,27317,13144,19336,0,59797,1513,50894
6,충남,태안군,2006,광역단체장,보수정당,그외정당,2,0.4065,5,0.3162,...,0,51770,35084,13756,7572,12515,0,33843,1241,16686
7,충남,금산군,2006,광역단체장,보수정당,그외정당,2,0.3855,5,0.3729,...,0,47627,32263,12045,6017,13184,0,31246,1017,15364
8,충남,연기군,2006,광역단체장,진보정당,보수정당,1,0.3527,2,0.3329,...,0,65548,39859,12927,13698,12208,0,38833,1026,25689
9,충남,논산시,2006,광역단체장,보수정당,그외정당,2,0.3649,5,0.3507,...,0,104305,57835,20497,13044,22625,0,56166,1669,46470


### v4.1 ~ v4.3

In [79]:
# 1. 전체 데이터 저장
df_chungnam.to_csv("temp4_1_governor_chungnam_4.csv", index=False, encoding="utf-8-sig")

# 2. '구시군' != '합계' 행만 추출하여 저장
df_chungnam2 = df_chungnam.query("구시군 != '합계'")
df_chungnam2.to_csv("temp4_2_governor_chungnam_4.csv", index=False, encoding="utf-8-sig")

# 3. '구시군' == '합계' 행만 추출 후 '구시군' 열 제거하여 저장
df_chungnam3 = df_chungnam.query("구시군 == '합계'").drop(columns="구시군")
df_chungnam3.to_csv("temp4_3_governor_chungnam_4.csv", index=False, encoding="utf-8-sig")

## Jeonbuk

In [80]:
df_jeonbuk = election_results['df_jeonbuk']

In [81]:
df_jeonbuk

Unnamed: 0,시도,구시군,선거인수,투표수,보수정당,진보정당,그외정당,무소속,득표수_계,무효투표수,...,득표_1위_후보번호,득표_1위_득표율,득표_2위_후보번호,득표_2위_득표율,득표_1위_정당,득표_2위_정당,보수정당_후보자수,진보정당_후보자수,그외정당_후보자수,무소속_후보자수
0,전라북도,합계,1429632,827387,62922,389436,357563,0,809921,17466,...,1,0.4808,3,0.3653,진보정당,그외정당,1,1,2,0
1,전라북도,전주시완산구,252082,121291,6322,59401,54379,0,120102,1189,...,1,0.4946,3,0.3828,진보정당,그외정당,1,1,2,0
2,전라북도,전주시덕진구,198612,94137,5075,45562,42690,0,93327,810,...,1,0.4882,3,0.3657,진보정당,그외정당,1,1,2,0
3,전라북도,군산시,198467,111760,19593,44385,45208,0,109186,2574,...,1,0.4065,3,0.3308,진보정당,그외정당,1,1,2,0
4,전라북도,익산시,235847,123052,8189,55008,57461,0,120658,2394,...,1,0.4559,3,0.398,진보정당,그외정당,1,1,2,0
5,전라북도,정읍시,99727,62082,3181,30801,26641,0,60623,1459,...,1,0.5081,3,0.3528,진보정당,그외정당,1,1,2,0
6,전라북도,남원시,72197,50215,2944,27923,18139,0,49006,1209,...,1,0.5698,3,0.3238,진보정당,그외정당,1,1,2,0
7,전라북도,김제시,82928,55599,3576,27773,22594,0,53943,1656,...,1,0.5149,3,0.3582,진보정당,그외정당,1,1,2,0
8,전라북도,완주군,66720,41559,2019,19070,19524,0,40613,946,...,1,0.4696,3,0.3801,진보정당,그외정당,1,1,2,0
9,전라북도,진안군,23432,17528,1437,9415,6173,0,17025,503,...,1,0.553,3,0.2929,진보정당,그외정당,1,1,2,0


### preprocessing

In [82]:
df_jeonbuk = (
    df_jeonbuk
    .assign(시도=lambda df: df['시도'].replace('전라북도', '전북'))
)

In [83]:
df_jeonbuk.columns

Index(['시도', '구시군', '선거인수', '투표수', '보수정당', '진보정당', '그외정당', '무소속', '득표수_계',
       '무효투표수', '기권수', '득표_1위_후보번호', '득표_1위_득표율', '득표_2위_후보번호', '득표_2위_득표율',
       '득표_1위_정당', '득표_2위_정당', '보수정당_후보자수', '진보정당_후보자수', '그외정당_후보자수',
       '무소속_후보자수'],
      dtype='object')

In [84]:
# 고정 열 순서 정의
fixed_cols = [
    '시도', '구시군', '선거년도', '선거종류',
    '득표_1위_정당', '득표_2위_정당',
    '득표_1위_후보번호', '득표_1위_득표율',
    '득표_2위_후보번호', '득표_2위_득표율',
    '보수정당_후보자수', '진보정당_후보자수',
    '그외정당_후보자수', '무소속_후보자수'
]

# 나머지 열: fixed_cols에 포함되지 않은 열
other_cols = [col for col in df_jeonbuk.columns if col not in fixed_cols]

# 메소드 체이닝
df_jeonbuk = (
    df_jeonbuk
    .assign(선거종류='광역단체장', 선거년도='2006')
    .loc[:, fixed_cols + other_cols]
    .rename(columns={'시도': '지역'})
)

In [85]:
df_jeonbuk

Unnamed: 0,지역,구시군,선거년도,선거종류,득표_1위_정당,득표_2위_정당,득표_1위_후보번호,득표_1위_득표율,득표_2위_후보번호,득표_2위_득표율,...,무소속_후보자수,선거인수,투표수,보수정당,진보정당,그외정당,무소속,득표수_계,무효투표수,기권수
0,전북,합계,2006,광역단체장,진보정당,그외정당,1,0.4808,3,0.3653,...,0,1429632,827387,62922,389436,357563,0,809921,17466,602245
1,전북,전주시완산구,2006,광역단체장,진보정당,그외정당,1,0.4946,3,0.3828,...,0,252082,121291,6322,59401,54379,0,120102,1189,130791
2,전북,전주시덕진구,2006,광역단체장,진보정당,그외정당,1,0.4882,3,0.3657,...,0,198612,94137,5075,45562,42690,0,93327,810,104475
3,전북,군산시,2006,광역단체장,진보정당,그외정당,1,0.4065,3,0.3308,...,0,198467,111760,19593,44385,45208,0,109186,2574,86707
4,전북,익산시,2006,광역단체장,진보정당,그외정당,1,0.4559,3,0.398,...,0,235847,123052,8189,55008,57461,0,120658,2394,112795
5,전북,정읍시,2006,광역단체장,진보정당,그외정당,1,0.5081,3,0.3528,...,0,99727,62082,3181,30801,26641,0,60623,1459,37645
6,전북,남원시,2006,광역단체장,진보정당,그외정당,1,0.5698,3,0.3238,...,0,72197,50215,2944,27923,18139,0,49006,1209,21982
7,전북,김제시,2006,광역단체장,진보정당,그외정당,1,0.5149,3,0.3582,...,0,82928,55599,3576,27773,22594,0,53943,1656,27329
8,전북,완주군,2006,광역단체장,진보정당,그외정당,1,0.4696,3,0.3801,...,0,66720,41559,2019,19070,19524,0,40613,946,25161
9,전북,진안군,2006,광역단체장,진보정당,그외정당,1,0.553,3,0.2929,...,0,23432,17528,1437,9415,6173,0,17025,503,5904


### v4.1 ~ v4.3

In [86]:
# 1. 전체 데이터 저장
df_jeonbuk.to_csv("temp4_1_governor_jeonbuk_4.csv", index=False, encoding="utf-8-sig")

# 2. '구시군' != '합계' 행만 추출하여 저장
df_jeonbuk2 = df_jeonbuk.query("구시군 != '합계'")
df_jeonbuk2.to_csv("temp4_2_governor_jeonbuk_4.csv", index=False, encoding="utf-8-sig")

# 3. '구시군' == '합계' 행만 추출 후 '구시군' 열 제거하여 저장
df_jeonbuk3 = df_jeonbuk.query("구시군 == '합계'").drop(columns="구시군")
df_jeonbuk3.to_csv("temp4_3_governor_jeonbuk_4.csv", index=False, encoding="utf-8-sig")

## Jeonnam

In [87]:
df_jeonnam = election_results['df_jeonnam']

In [88]:
df_jeonnam

Unnamed: 0,시도,구시군,선거인수,투표수,보수정당,진보정당,그외정당,무소속,득표수_계,무효투표수,...,득표_1위_후보번호,득표_1위_득표율,득표_2위_후보번호,득표_2위_득표율,득표_1위_정당,득표_2위_정당,보수정당_후보자수,진보정당_후보자수,그외정당_후보자수,무소속_후보자수
0,전라남도,합계,1513912,973820,55444,181756,709596,0,946796,27024,...,3,0.6769,1,0.192,그외정당,진보정당,1,1,2,0
1,전라남도,목포시,176932,88358,2667,12717,71692,0,87076,1282,...,3,0.76,1,0.146,그외정당,진보정당,1,1,2,0
2,전라남도,여수시,224001,122812,4973,32665,83156,0,120794,2018,...,3,0.6329,1,0.2704,그외정당,진보정당,1,1,2,0
3,전라남도,순천시,194227,105389,4311,27461,71696,0,103468,1921,...,3,0.6175,1,0.2654,그외정당,진보정당,1,1,2,0
4,전라남도,나주시,79340,55195,2484,5955,44952,0,53391,1804,...,3,0.7654,1,0.1115,그외정당,진보정당,1,1,2,0
5,전라남도,광양시,96951,58745,4266,20925,32639,0,57830,915,...,3,0.4756,1,0.3618,그외정당,진보정당,1,1,2,0
6,전라남도,담양군,41974,29475,1643,5756,21246,0,28645,830,...,3,0.6851,1,0.2009,그외정당,진보정당,1,1,2,0
7,전라남도,장성군,39626,27752,1948,3757,21321,0,27026,726,...,3,0.7172,1,0.139,그외정당,진보정당,1,1,2,0
8,전라남도,곡성군,28138,21815,1439,4152,15498,0,21089,726,...,3,0.6054,1,0.1969,그외정당,진보정당,1,1,2,0
9,전라남도,구례군,24281,19123,1118,4412,13039,0,18569,554,...,3,0.627,1,0.2376,그외정당,진보정당,1,1,2,0


### preprocessing

In [89]:
df_jeonnam = (
    df_jeonnam
    .assign(시도=lambda df: df['시도'].replace('전라남도', '전남'))
)

In [90]:
df_jeonnam.columns

Index(['시도', '구시군', '선거인수', '투표수', '보수정당', '진보정당', '그외정당', '무소속', '득표수_계',
       '무효투표수', '기권수', '득표_1위_후보번호', '득표_1위_득표율', '득표_2위_후보번호', '득표_2위_득표율',
       '득표_1위_정당', '득표_2위_정당', '보수정당_후보자수', '진보정당_후보자수', '그외정당_후보자수',
       '무소속_후보자수'],
      dtype='object')

In [91]:
# 고정 열 순서 정의
fixed_cols = [
    '시도', '구시군', '선거년도', '선거종류',
    '득표_1위_정당', '득표_2위_정당',
    '득표_1위_후보번호', '득표_1위_득표율',
    '득표_2위_후보번호', '득표_2위_득표율',
    '보수정당_후보자수', '진보정당_후보자수',
    '그외정당_후보자수', '무소속_후보자수'
]

# 나머지 열: fixed_cols에 포함되지 않은 열
other_cols = [col for col in df_jeonnam.columns if col not in fixed_cols]

# 메소드 체이닝
df_jeonnam = (
    df_jeonnam
    .assign(선거종류='광역단체장', 선거년도='2006')
    .loc[:, fixed_cols + other_cols]
    .rename(columns={'시도': '지역'})
)

In [92]:
df_jeonnam

Unnamed: 0,지역,구시군,선거년도,선거종류,득표_1위_정당,득표_2위_정당,득표_1위_후보번호,득표_1위_득표율,득표_2위_후보번호,득표_2위_득표율,...,무소속_후보자수,선거인수,투표수,보수정당,진보정당,그외정당,무소속,득표수_계,무효투표수,기권수
0,전남,합계,2006,광역단체장,그외정당,진보정당,3,0.6769,1,0.192,...,0,1513912,973820,55444,181756,709596,0,946796,27024,540092
1,전남,목포시,2006,광역단체장,그외정당,진보정당,3,0.76,1,0.146,...,0,176932,88358,2667,12717,71692,0,87076,1282,88574
2,전남,여수시,2006,광역단체장,그외정당,진보정당,3,0.6329,1,0.2704,...,0,224001,122812,4973,32665,83156,0,120794,2018,101189
3,전남,순천시,2006,광역단체장,그외정당,진보정당,3,0.6175,1,0.2654,...,0,194227,105389,4311,27461,71696,0,103468,1921,88838
4,전남,나주시,2006,광역단체장,그외정당,진보정당,3,0.7654,1,0.1115,...,0,79340,55195,2484,5955,44952,0,53391,1804,24145
5,전남,광양시,2006,광역단체장,그외정당,진보정당,3,0.4756,1,0.3618,...,0,96951,58745,4266,20925,32639,0,57830,915,38206
6,전남,담양군,2006,광역단체장,그외정당,진보정당,3,0.6851,1,0.2009,...,0,41974,29475,1643,5756,21246,0,28645,830,12499
7,전남,장성군,2006,광역단체장,그외정당,진보정당,3,0.7172,1,0.139,...,0,39626,27752,1948,3757,21321,0,27026,726,11874
8,전남,곡성군,2006,광역단체장,그외정당,진보정당,3,0.6054,1,0.1969,...,0,28138,21815,1439,4152,15498,0,21089,726,6323
9,전남,구례군,2006,광역단체장,그외정당,진보정당,3,0.627,1,0.2376,...,0,24281,19123,1118,4412,13039,0,18569,554,5158


### v4.1 ~ v4.3

In [93]:
# 1. 전체 데이터 저장
df_jeonnam.to_csv("temp4_1_governor_jeonnam_4.csv", index=False, encoding="utf-8-sig")

# 2. '구시군' != '합계' 행만 추출하여 저장
df_jeonnam2 = df_jeonnam.query("구시군 != '합계'")
df_jeonnam2.to_csv("temp4_2_governor_jeonnam_4.csv", index=False, encoding="utf-8-sig")

# 3. '구시군' == '합계' 행만 추출 후 '구시군' 열 제거하여 저장
df_jeonnam3 = df_jeonnam.query("구시군 == '합계'").drop(columns="구시군")
df_jeonnam3.to_csv("temp4_3_governor_jeonnam_4.csv", index=False, encoding="utf-8-sig")

## Gyeongbuk

In [94]:
df_gyeongbuk = election_results['df_gyeongbuk']

In [95]:
df_gyeongbuk

Unnamed: 0,시도,구시군,선거인수,투표수,보수정당,진보정당,그외정당,무소속,득표수_계,무효투표수,...,득표_1위_후보번호,득표_1위_득표율,득표_2위_후보번호,득표_2위_득표율,득표_1위_정당,득표_2위_정당,보수정당_후보자수,진보정당_후보자수,그외정당_후보자수,무소속_후보자수
0,경상북도,합계,2087709,1284342,961363,290358,0,0,1251721,32621,...,2,0.768,1,0.232,보수정당,진보정당,1,1,0,0
1,경상북도,포항시북구,190449,105458,78490,25052,0,0,103542,1916,...,2,0.758,1,0.242,보수정당,진보정당,1,1,0,0
2,경상북도,포항시남구,191369,106552,76190,28285,0,0,104475,2077,...,2,0.7293,1,0.2707,보수정당,진보정당,1,1,0,0
3,경상북도,울릉군,8429,7197,5034,1978,0,0,7012,185,...,2,0.7179,1,0.2821,보수정당,진보정당,1,1,0,0
4,경상북도,경주시,210980,126368,99188,24020,0,0,123208,3160,...,2,0.805,1,0.195,보수정당,진보정당,1,1,0,0
5,경상북도,김천시,110274,75345,59700,13759,0,0,73459,1886,...,2,0.8127,1,0.1873,보수정당,진보정당,1,1,0,0
6,경상북도,안동시,134448,86104,63435,20188,0,0,83623,2481,...,2,0.7586,1,0.2414,보수정당,진보정당,1,1,0,0
7,경상북도,구미시,269226,132377,98851,31617,0,0,130468,1909,...,2,0.7577,1,0.2423,보수정당,진보정당,1,1,0,0
8,경상북도,영주시,93247,63459,48518,13339,0,0,61857,1602,...,2,0.7844,1,0.2156,보수정당,진보정당,1,1,0,0
9,경상북도,영천시,86235,55405,37956,15816,0,0,53772,1633,...,2,0.7059,1,0.2941,보수정당,진보정당,1,1,0,0


### preprocessing

In [96]:
df_gyeongbuk = (
    df_gyeongbuk
    .assign(시도=lambda df: df['시도'].replace('경상북도', '경북'))
)

In [97]:
df_gyeongbuk.columns

Index(['시도', '구시군', '선거인수', '투표수', '보수정당', '진보정당', '그외정당', '무소속', '득표수_계',
       '무효투표수', '기권수', '득표_1위_후보번호', '득표_1위_득표율', '득표_2위_후보번호', '득표_2위_득표율',
       '득표_1위_정당', '득표_2위_정당', '보수정당_후보자수', '진보정당_후보자수', '그외정당_후보자수',
       '무소속_후보자수'],
      dtype='object')

In [98]:
# 고정 열 순서 정의
fixed_cols = [
    '시도', '구시군', '선거년도', '선거종류',
    '득표_1위_정당', '득표_2위_정당',
    '득표_1위_후보번호', '득표_1위_득표율',
    '득표_2위_후보번호', '득표_2위_득표율',
    '보수정당_후보자수', '진보정당_후보자수',
    '그외정당_후보자수', '무소속_후보자수'
]

# 나머지 열: fixed_cols에 포함되지 않은 열
other_cols = [col for col in df_gyeongbuk.columns if col not in fixed_cols]

# 메소드 체이닝
df_gyeongbuk = (
    df_gyeongbuk
    .assign(선거종류='광역단체장', 선거년도='2006')
    .loc[:, fixed_cols + other_cols]
    .rename(columns={'시도': '지역'})
)

In [99]:
df_gyeongbuk

Unnamed: 0,지역,구시군,선거년도,선거종류,득표_1위_정당,득표_2위_정당,득표_1위_후보번호,득표_1위_득표율,득표_2위_후보번호,득표_2위_득표율,...,무소속_후보자수,선거인수,투표수,보수정당,진보정당,그외정당,무소속,득표수_계,무효투표수,기권수
0,경북,합계,2006,광역단체장,보수정당,진보정당,2,0.768,1,0.232,...,0,2087709,1284342,961363,290358,0,0,1251721,32621,803367
1,경북,포항시북구,2006,광역단체장,보수정당,진보정당,2,0.758,1,0.242,...,0,190449,105458,78490,25052,0,0,103542,1916,84991
2,경북,포항시남구,2006,광역단체장,보수정당,진보정당,2,0.7293,1,0.2707,...,0,191369,106552,76190,28285,0,0,104475,2077,84817
3,경북,울릉군,2006,광역단체장,보수정당,진보정당,2,0.7179,1,0.2821,...,0,8429,7197,5034,1978,0,0,7012,185,1232
4,경북,경주시,2006,광역단체장,보수정당,진보정당,2,0.805,1,0.195,...,0,210980,126368,99188,24020,0,0,123208,3160,84612
5,경북,김천시,2006,광역단체장,보수정당,진보정당,2,0.8127,1,0.1873,...,0,110274,75345,59700,13759,0,0,73459,1886,34929
6,경북,안동시,2006,광역단체장,보수정당,진보정당,2,0.7586,1,0.2414,...,0,134448,86104,63435,20188,0,0,83623,2481,48344
7,경북,구미시,2006,광역단체장,보수정당,진보정당,2,0.7577,1,0.2423,...,0,269226,132377,98851,31617,0,0,130468,1909,136849
8,경북,영주시,2006,광역단체장,보수정당,진보정당,2,0.7844,1,0.2156,...,0,93247,63459,48518,13339,0,0,61857,1602,29788
9,경북,영천시,2006,광역단체장,보수정당,진보정당,2,0.7059,1,0.2941,...,0,86235,55405,37956,15816,0,0,53772,1633,30830


### v4.1 ~ v4.3

In [100]:
# 1. 전체 데이터 저장
df_gyeongbuk.to_csv("temp4_1_governor_gyeongbuk_4.csv", index=False, encoding="utf-8-sig")

# 2. '구시군' != '합계' 행만 추출하여 저장
df_gyeongbuk2 = df_gyeongbuk.query("구시군 != '합계'")
df_gyeongbuk2.to_csv("temp4_2_governor_gyeongbuk_4.csv", index=False, encoding="utf-8-sig")

# 3. '구시군' == '합계' 행만 추출 후 '구시군' 열 제거하여 저장
df_gyeongbuk3 = df_gyeongbuk.query("구시군 == '합계'").drop(columns="구시군")
df_gyeongbuk3.to_csv("temp4_3_governor_gyeongbuk_4.csv", index=False, encoding="utf-8-sig")

## Gyeongnam

In [101]:
df_gyeongnam = election_results['df_gyeongnam']

In [102]:
df_gyeongnam

Unnamed: 0,시도,구시군,선거인수,투표수,보수정당,진보정당,그외정당,무소속,득표수_계,무효투표수,...,득표_1위_후보번호,득표_1위_득표율,득표_2위_후보번호,득표_2위_득표율,득표_1위_정당,득표_2위_정당,보수정당_후보자수,진보정당_후보자수,그외정당_후보자수,무소속_후보자수
0,경상남도,합계,2375265,1373815,852377,343137,154729,0,1350243,23572,...,2,0.6313,1,0.2541,보수정당,진보정당,1,1,2,0
1,경상남도,창원시,356493,184115,112369,38627,31624,0,182620,1495,...,2,0.6153,1,0.2115,보수정당,진보정당,1,1,2,0
2,경상남도,마산시,326663,171791,122814,31539,15542,0,169895,1896,...,2,0.7229,1,0.1856,보수정당,진보정당,1,1,2,0
3,경상남도,진주시,252537,145232,77772,50737,14603,0,143112,2120,...,2,0.5434,1,0.3545,보수정당,진보정당,1,1,2,0
4,경상남도,진해시,118111,70223,46643,15379,7095,0,69117,1106,...,2,0.6748,1,0.2225,보수정당,진보정당,1,1,2,0
5,경상남도,통영시,102469,60728,41786,13684,4250,0,59720,1008,...,2,0.6997,1,0.2291,보수정당,진보정당,1,1,2,0
6,경상남도,고성군,46167,33469,21513,7661,3440,0,32614,855,...,2,0.6596,1,0.2349,보수정당,진보정당,1,1,2,0
7,경상남도,사천시,86136,57392,35061,15408,5680,0,56149,1243,...,2,0.6244,1,0.2744,보수정당,진보정당,1,1,2,0
8,경상남도,김해시,315263,150966,87632,47354,14142,0,149128,1838,...,2,0.5876,1,0.3175,보수정당,진보정당,1,1,2,0
9,경상남도,밀양시,90362,58412,36650,15879,4669,0,57198,1214,...,2,0.6408,1,0.2776,보수정당,진보정당,1,1,2,0


### preprocessing

In [103]:
df_gyeongnam = (
    df_gyeongnam
    .assign(시도=lambda df: df['시도'].replace('경상남도', '경남'))
)

In [104]:
df_gyeongnam.columns

Index(['시도', '구시군', '선거인수', '투표수', '보수정당', '진보정당', '그외정당', '무소속', '득표수_계',
       '무효투표수', '기권수', '득표_1위_후보번호', '득표_1위_득표율', '득표_2위_후보번호', '득표_2위_득표율',
       '득표_1위_정당', '득표_2위_정당', '보수정당_후보자수', '진보정당_후보자수', '그외정당_후보자수',
       '무소속_후보자수'],
      dtype='object')

In [105]:
# 고정 열 순서 정의
fixed_cols = [
    '시도', '구시군', '선거년도', '선거종류',
    '득표_1위_정당', '득표_2위_정당',
    '득표_1위_후보번호', '득표_1위_득표율',
    '득표_2위_후보번호', '득표_2위_득표율',
    '보수정당_후보자수', '진보정당_후보자수',
    '그외정당_후보자수', '무소속_후보자수'
]

# 나머지 열: fixed_cols에 포함되지 않은 열
other_cols = [col for col in df_gyeongnam.columns if col not in fixed_cols]

# 메소드 체이닝
df_gyeongnam = (
    df_gyeongnam
    .assign(선거종류='광역단체장', 선거년도='2006')
    .loc[:, fixed_cols + other_cols]
    .rename(columns={'시도': '지역'})
)

In [106]:
df_gyeongnam

Unnamed: 0,지역,구시군,선거년도,선거종류,득표_1위_정당,득표_2위_정당,득표_1위_후보번호,득표_1위_득표율,득표_2위_후보번호,득표_2위_득표율,...,무소속_후보자수,선거인수,투표수,보수정당,진보정당,그외정당,무소속,득표수_계,무효투표수,기권수
0,경남,합계,2006,광역단체장,보수정당,진보정당,2,0.6313,1,0.2541,...,0,2375265,1373815,852377,343137,154729,0,1350243,23572,1001450
1,경남,창원시,2006,광역단체장,보수정당,진보정당,2,0.6153,1,0.2115,...,0,356493,184115,112369,38627,31624,0,182620,1495,172378
2,경남,마산시,2006,광역단체장,보수정당,진보정당,2,0.7229,1,0.1856,...,0,326663,171791,122814,31539,15542,0,169895,1896,154872
3,경남,진주시,2006,광역단체장,보수정당,진보정당,2,0.5434,1,0.3545,...,0,252537,145232,77772,50737,14603,0,143112,2120,107305
4,경남,진해시,2006,광역단체장,보수정당,진보정당,2,0.6748,1,0.2225,...,0,118111,70223,46643,15379,7095,0,69117,1106,47888
5,경남,통영시,2006,광역단체장,보수정당,진보정당,2,0.6997,1,0.2291,...,0,102469,60728,41786,13684,4250,0,59720,1008,41741
6,경남,고성군,2006,광역단체장,보수정당,진보정당,2,0.6596,1,0.2349,...,0,46167,33469,21513,7661,3440,0,32614,855,12698
7,경남,사천시,2006,광역단체장,보수정당,진보정당,2,0.6244,1,0.2744,...,0,86136,57392,35061,15408,5680,0,56149,1243,28744
8,경남,김해시,2006,광역단체장,보수정당,진보정당,2,0.5876,1,0.3175,...,0,315263,150966,87632,47354,14142,0,149128,1838,164297
9,경남,밀양시,2006,광역단체장,보수정당,진보정당,2,0.6408,1,0.2776,...,0,90362,58412,36650,15879,4669,0,57198,1214,31950


### v4.1 ~ v4.3

In [107]:
# 1. 전체 데이터 저장
df_gyeongnam.to_csv("temp4_1_governor_gyeongnam_4.csv", index=False, encoding="utf-8-sig")

# 2. '구시군' != '합계' 행만 추출하여 저장
df_gyeongnam2 = df_gyeongnam.query("구시군 != '합계'")
df_gyeongnam2.to_csv("temp4_2_governor_gyeongnam_4.csv", index=False, encoding="utf-8-sig")

# 3. '구시군' == '합계' 행만 추출 후 '구시군' 열 제거하여 저장
df_gyeongnam3 = df_gyeongnam.query("구시군 == '합계'").drop(columns="구시군")
df_gyeongnam3.to_csv("temp4_3_governor_gyeongnam_4.csv", index=False, encoding="utf-8-sig")

## Jeju

In [108]:
df_jeju = election_results['df_jeju']

In [109]:
df_jeju

Unnamed: 0,시도,구시군,선거인수,투표수,보수정당,진보정당,그외정당,무소속,득표수_계,무효투표수,...,득표_1위_후보번호,득표_1위_득표율,득표_2위_후보번호,득표_2위_득표율,득표_1위_정당,득표_2위_정당,보수정당_후보자수,진보정당_후보자수,그외정당_후보자수,무소속_후보자수
0,제주특별자치도,합계,411862,277003,112774,44334,0,117244,274352,2651,...,6,0.4273,2,0.4111,무소속,보수정당,1,1,0,1
1,제주특별자치도,제주시,218768,138717,56423,22748,0,58722,137893,824,...,6,0.4259,2,0.4092,무소속,보수정당,1,1,0,1
2,제주특별자치도,북제주군,74387,51650,16607,10793,0,23455,50855,795,...,6,0.4612,2,0.3266,무소속,보수정당,1,1,0,1
3,제주특별자치도,서귀포시,62209,43561,19930,5087,0,18098,43115,446,...,2,0.4623,6,0.4198,보수정당,무소속,1,1,0,1
4,제주특별자치도,남제주군,56498,43075,19814,5706,0,16969,42489,586,...,2,0.4663,6,0.3994,보수정당,무소속,1,1,0,1


### preprocessing

In [110]:
df_jeju = (
    df_jeju
    .assign(시도=lambda df: df['시도'].replace('제주특별자치도', '제주'))
)

In [111]:
df_jeju.columns

Index(['시도', '구시군', '선거인수', '투표수', '보수정당', '진보정당', '그외정당', '무소속', '득표수_계',
       '무효투표수', '기권수', '득표_1위_후보번호', '득표_1위_득표율', '득표_2위_후보번호', '득표_2위_득표율',
       '득표_1위_정당', '득표_2위_정당', '보수정당_후보자수', '진보정당_후보자수', '그외정당_후보자수',
       '무소속_후보자수'],
      dtype='object')

In [112]:
# 고정 열 순서 정의
fixed_cols = [
    '시도', '구시군', '선거년도', '선거종류',
    '득표_1위_정당', '득표_2위_정당',
    '득표_1위_후보번호', '득표_1위_득표율',
    '득표_2위_후보번호', '득표_2위_득표율',
    '보수정당_후보자수', '진보정당_후보자수',
    '그외정당_후보자수', '무소속_후보자수'
]

# 나머지 열: fixed_cols에 포함되지 않은 열
other_cols = [col for col in df_jeju.columns if col not in fixed_cols]

# 메소드 체이닝
df_jeju = (
    df_jeju
    .assign(선거종류='광역단체장', 선거년도='2006')
    .loc[:, fixed_cols + other_cols]
    .rename(columns={'시도': '지역'})
)

In [113]:
df_jeju

Unnamed: 0,지역,구시군,선거년도,선거종류,득표_1위_정당,득표_2위_정당,득표_1위_후보번호,득표_1위_득표율,득표_2위_후보번호,득표_2위_득표율,...,무소속_후보자수,선거인수,투표수,보수정당,진보정당,그외정당,무소속,득표수_계,무효투표수,기권수
0,제주,합계,2006,광역단체장,무소속,보수정당,6,0.4273,2,0.4111,...,1,411862,277003,112774,44334,0,117244,274352,2651,134859
1,제주,제주시,2006,광역단체장,무소속,보수정당,6,0.4259,2,0.4092,...,1,218768,138717,56423,22748,0,58722,137893,824,80051
2,제주,북제주군,2006,광역단체장,무소속,보수정당,6,0.4612,2,0.3266,...,1,74387,51650,16607,10793,0,23455,50855,795,22737
3,제주,서귀포시,2006,광역단체장,보수정당,무소속,2,0.4623,6,0.4198,...,1,62209,43561,19930,5087,0,18098,43115,446,18648
4,제주,남제주군,2006,광역단체장,보수정당,무소속,2,0.4663,6,0.3994,...,1,56498,43075,19814,5706,0,16969,42489,586,13423


### v4.1 ~ v4.3

In [114]:
# 1. 전체 데이터 저장
df_jeju.to_csv("temp4_1_governor_jeju_4.csv", index=False, encoding="utf-8-sig")

# 2. '구시군' != '합계' 행만 추출하여 저장
df_jeju2 = df_jeju.query("구시군 != '합계'")
df_jeju2.to_csv("temp4_2_governor_jeju_4.csv", index=False, encoding="utf-8-sig")

# 3. '구시군' == '합계' 행만 추출 후 '구시군' 열 제거하여 저장
df_jeju3 = df_jeju.query("구시군 == '합계'").drop(columns="구시군")
df_jeju3.to_csv("temp4_3_governor_jeju_4.csv", index=False, encoding="utf-8-sig")

## Merge

### v4.1

In [115]:
AVAILABLE_REGIONS = [
    'seoul', 'busan', 'daegu', 'incheon', 'gwangju', 'daejeon',
    'ulsan', 'gyeonggi', 'gangwon', 'chungbuk', 'chungnam',
    'jeonbuk', 'jeonnam', 'gyeongbuk', 'gyeongnam', 'jeju'
]

df_combined = (
    pd.concat([globals()[f'df_{region}'] for region in AVAILABLE_REGIONS], ignore_index=True)
)

In [116]:
df_combined

Unnamed: 0,지역,구시군,선거년도,선거종류,득표_1위_정당,득표_2위_정당,득표_1위_후보번호,득표_1위_득표율,득표_2위_후보번호,득표_2위_득표율,...,무소속_후보자수,선거인수,투표수,보수정당,진보정당,그외정당,무소속,득표수_계,무효투표수,기권수
0,서울,합계,2006,광역단체장,보수정당,진보정당,2,0.6106,1,0.2731,...,1,7983648,3977842,2409760,1077890,445368,13808,3946826,31016,4005806
1,서울,종로구,2006,광역단체장,보수정당,진보정당,2,0.5959,1,0.2749,...,1,134603,71240,41992,19369,8840,265,70466,774,63363
2,서울,중구,2006,광역단체장,보수정당,진보정당,2,0.5962,1,0.2804,...,1,107164,55369,32708,15382,6556,212,54858,511,51795
3,서울,용산구,2006,광역단체장,보수정당,진보정당,2,0.6148,1,0.2745,...,1,187461,95809,58427,26087,10196,319,95029,780,91652
4,서울,성동구,2006,광역단체장,보수정당,진보정당,2,0.5940,1,0.2743,...,1,266956,131927,77631,35842,16725,488,130686,1241,135029
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
261,제주,합계,2006,광역단체장,무소속,보수정당,6,0.4273,2,0.4111,...,1,411862,277003,112774,44334,0,117244,274352,2651,134859
262,제주,제주시,2006,광역단체장,무소속,보수정당,6,0.4259,2,0.4092,...,1,218768,138717,56423,22748,0,58722,137893,824,80051
263,제주,북제주군,2006,광역단체장,무소속,보수정당,6,0.4612,2,0.3266,...,1,74387,51650,16607,10793,0,23455,50855,795,22737
264,제주,서귀포시,2006,광역단체장,보수정당,무소속,2,0.4623,6,0.4198,...,1,62209,43561,19930,5087,0,18098,43115,446,18648


In [117]:
df_combined.to_csv("temp4_1_governor_4.csv", index=False, encoding="utf-8-sig")

### v4.2

In [118]:
AVAILABLE_REGIONS = [
    'seoul', 'busan', 'daegu', 'incheon', 'gwangju', 'daejeon',
    'ulsan', 'gyeonggi', 'gangwon', 'chungbuk', 'chungnam',
    'jeonbuk', 'jeonnam', 'gyeongbuk', 'gyeongnam', 'jeju'
]

df_combined2 = (
    pd.concat([globals()[f'df_{region}2'] for region in AVAILABLE_REGIONS], ignore_index=True)
)

In [119]:
df_combined2

Unnamed: 0,지역,구시군,선거년도,선거종류,득표_1위_정당,득표_2위_정당,득표_1위_후보번호,득표_1위_득표율,득표_2위_후보번호,득표_2위_득표율,...,무소속_후보자수,선거인수,투표수,보수정당,진보정당,그외정당,무소속,득표수_계,무효투표수,기권수
0,서울,종로구,2006,광역단체장,보수정당,진보정당,2,0.5959,1,0.2749,...,1,134603,71240,41992,19369,8840,265,70466,774,63363
1,서울,중구,2006,광역단체장,보수정당,진보정당,2,0.5962,1,0.2804,...,1,107164,55369,32708,15382,6556,212,54858,511,51795
2,서울,용산구,2006,광역단체장,보수정당,진보정당,2,0.6148,1,0.2745,...,1,187461,95809,58427,26087,10196,319,95029,780,91652
3,서울,성동구,2006,광역단체장,보수정당,진보정당,2,0.5940,1,0.2743,...,1,266956,131927,77631,35842,16725,488,130686,1241,135029
4,서울,광진구,2006,광역단체장,보수정당,진보정당,2,0.5943,1,0.2837,...,1,293308,141079,83209,39716,16644,443,140012,1067,152229
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
245,경남,합천군,2006,광역단체장,보수정당,진보정당,2,0.6992,1,0.1696,...,0,46207,34995,23711,5752,4450,0,33913,1082,11212
246,제주,제주시,2006,광역단체장,무소속,보수정당,6,0.4259,2,0.4092,...,1,218768,138717,56423,22748,0,58722,137893,824,80051
247,제주,북제주군,2006,광역단체장,무소속,보수정당,6,0.4612,2,0.3266,...,1,74387,51650,16607,10793,0,23455,50855,795,22737
248,제주,서귀포시,2006,광역단체장,보수정당,무소속,2,0.4623,6,0.4198,...,1,62209,43561,19930,5087,0,18098,43115,446,18648


In [120]:
df_combined2.to_csv("temp4_2_governor_4.csv", index=False, encoding="utf-8-sig")

### v4.3

In [121]:
AVAILABLE_REGIONS = [
    'seoul', 'busan', 'daegu', 'incheon', 'gwangju', 'daejeon',
    'ulsan', 'gyeonggi', 'gangwon', 'chungbuk', 'chungnam',
    'jeonbuk', 'jeonnam', 'gyeongbuk', 'gyeongnam', 'jeju'
]

df_combined3 = (
    pd.concat([globals()[f'df_{region}3'] for region in AVAILABLE_REGIONS], ignore_index=True)
)

In [122]:
df_combined3

Unnamed: 0,지역,선거년도,선거종류,득표_1위_정당,득표_2위_정당,득표_1위_후보번호,득표_1위_득표율,득표_2위_후보번호,득표_2위_득표율,보수정당_후보자수,...,무소속_후보자수,선거인수,투표수,보수정당,진보정당,그외정당,무소속,득표수_계,무효투표수,기권수
0,서울,2006,광역단체장,보수정당,진보정당,2,0.6106,1,0.2731,1,...,1,7983648,3977842,2409760,1077890,445368,13808,3946826,31016,4005806
1,부산,2006,광역단체장,보수정당,진보정당,2,0.6555,1,0.2412,1,...,0,2845104,1378618,895214,329470,141061,0,1365745,12873,1466486
2,대구,2006,광역단체장,보수정당,진보정당,2,0.7015,1,0.2108,1,...,1,1885043,915060,636057,191131,44261,35232,906681,8379,969983
3,인천,2006,광역단체장,보수정당,진보정당,2,0.6193,1,0.2358,1,...,0,1940403,859506,526932,200650,123237,0,850819,8687,1080897
4,광주,2006,광역단체장,그외정당,진보정당,3,0.5162,1,0.3395,1,...,0,1014620,469316,18461,157756,288501,0,464718,4598,545304
5,대전,2006,광역단체장,보수정당,진보정당,2,0.4384,1,0.4115,1,...,0,1077468,532568,231489,217273,79279,0,528041,4527,544900
6,울산,2006,광역단체장,보수정당,그외정당,2,0.6323,4,0.2525,1,...,0,790289,417200,261361,47579,104384,0,413324,3876,373089
7,경기,2006,광역단체장,보수정당,진보정당,2,0.5968,1,0.3076,1,...,0,7918828,3695552,2181677,1124317,349515,0,3655509,40043,4223276
8,강원,2006,광역단체장,보수정당,진보정당,2,0.7057,1,0.2219,1,...,0,1160977,681633,471613,148302,48411,0,668326,13307,479344
9,충북,2006,광역단체장,보수정당,진보정당,2,0.5966,1,0.3063,1,...,0,1126282,616053,361157,185426,58741,0,605324,10729,510229


In [123]:
df_combined3.to_csv("temp4_3_governor_4.csv", index=False, encoding="utf-8-sig")

# Batch CSV Files to ZIP

In [124]:
import zipfile
import glob

# Find all CSV files in current directory
csv_files = glob.glob('*.csv')

# Create ZIP file
with zipfile.ZipFile('all_csv_files.zip', 'w') as zipf:
   for file in csv_files:
       zipf.write(file)
       print(f"Added: {file}")  # Show progress

print(f"Total {len(csv_files)} files compressed.")

Added: temp4_2_governor_jeju_4.csv
Added: temp4_3_governor_chungbuk_4.csv
Added: temp4_1_governor_incheon_4.csv
Added: temp4_1_governor_chungnam_4.csv
Added: temp4_1_governor_jeju_4.csv
Added: temp4_1_governor_gangwon_4.csv
Added: temp4_3_governor_seoul_4.csv
Added: temp4_1_governor_gyeonggi_4.csv
Added: temp4_3_governor_jeonbuk_4.csv
Added: temp4_3_governor_gyeonggi_4.csv
Added: temp4_1_governor_seoul_4.csv
Added: temp4_3_governor_gwangju_4.csv
Added: temp4_3_governor_chungnam_4.csv
Added: temp4_2_governor_gwangju_4.csv
Added: temp4_1_governor_4.csv
Added: temp4_1_governor_gwangju_4.csv
Added: temp4_1_governor_daegu_4.csv
Added: temp4_1_governor_jeonnam_4.csv
Added: temp4_3_governor_4.csv
Added: temp4_3_governor_gyeongbuk_4.csv
Added: temp4_3_governor_jeju_4.csv
Added: temp4_3_governor_gangwon_4.csv
Added: temp4_1_governor_busan_4.csv
Added: temp4_1_governor_ulsan_4.csv
Added: temp4_1_governor_chungbuk_4.csv
Added: temp4_2_governor_jeonnam_4.csv
Added: temp4_1_governor_gyeongnam_4.csv