# Functions

In [1]:
import pandas as pd
import numpy as np
import re
from typing import Dict, Tuple

def process_governor_election_data(region_name: str) -> pd.DataFrame:
    """
    특정 지역의 지사 선거 데이터를 처리하여 병합된 결과를 반환

    Args:
        region_name (str): 지역명 (예: 'busan', 'seoul', 'gyeonggi' 등)

    Returns:
        pd.DataFrame: 병합된 지사 선거 데이터
    """

    # URL 생성
    df1_url = f"https://raw.githubusercontent.com/sw1kwon/korean-elections/refs/heads/main/temp/v1_g/3rd_2002/temp1_governor_{region_name}_3.csv"
    df2_url = f"https://raw.githubusercontent.com/sw1kwon/korean-elections/refs/heads/main/temp/v2_1_g/3rd_2002/temp2_1_governor_{region_name}_3.csv"

    print(f"=== {region_name} 지사 선거 데이터 처리 시작 ===")
    print(f"상세 데이터 URL: {df1_url}")
    print(f"요약 데이터 URL: {df2_url}")

    try:
        # 첫 번째 CSV 파일 처리 (상세 득표 데이터)
        df1 = pd.read_csv(df1_url)
        print(f"상세 데이터 로드 완료: {df1.shape}")

        # 득표수_{숫자}_정당_이름 패턴의 컬럼들 찾기
        vote_columns = [col for col in df1.columns if col.startswith('득표수_') and col != '득표수_계']
        print(f"득표수 관련 컬럼 수: {len(vote_columns)}")

        # 각 행별로 1위와 2위 후보의 번호 찾기
        def find_top_two_candidates(row):
            vote_data = []

            # 모든 후보의 득표수와 번호를 수집
            for col in vote_columns:
                votes = row[col] if pd.notna(row[col]) else 0
                # 컬럼명에서 숫자 추출
                match = re.search(r'득표수_(\d+)_', col)
                if match:
                    candidate_number = int(match.group(1))
                    vote_data.append((votes, candidate_number, col))

            # 득표수 기준으로 내림차순 정렬
            vote_data.sort(key=lambda x: x[0], reverse=True)

            # 1위와 2위 후보 반환
            first_place = vote_data[0] if len(vote_data) > 0 else (0, None, None)
            second_place = vote_data[1] if len(vote_data) > 1 else (0, None, None)

            return first_place, second_place

        # 1위 후보 번호 찾기
        def find_winner_number(row):
            first_place, _ = find_top_two_candidates(row)
            return first_place[1]

        # 2위 후보 번호 찾기
        def find_second_number(row):
            _, second_place = find_top_two_candidates(row)
            return second_place[1]

        # 1위 득표율 계산
        def calculate_vote_rate_1st(row):
            first_place, _ = find_top_two_candidates(row)
            max_votes = first_place[0]
            total_votes = row['득표수_계'] if pd.notna(row['득표수_계']) else 0

            if total_votes > 0:
                return round(max_votes / total_votes, 4)
            else:
                return 0

        # 2위 득표율 계산
        def calculate_vote_rate_2nd(row):
            _, second_place = find_top_two_candidates(row)
            second_votes = second_place[0]
            total_votes = row['득표수_계'] if pd.notna(row['득표수_계']) else 0

            if total_votes > 0:
                return round(second_votes / total_votes, 4)
            else:
                return 0

        # 새로운 컬럼 추가 (1위, 2위)
        df1['득표_1위_후보번호'] = df1.apply(find_winner_number, axis=1)
        df1['득표_1위_득표율'] = df1.apply(calculate_vote_rate_1st, axis=1)
        df1['득표_2위_후보번호'] = df1.apply(find_second_number, axis=1)
        df1['득표_2위_득표율'] = df1.apply(calculate_vote_rate_2nd, axis=1)

        # 정당 카테고리 매핑 딕셔너리 (지역별로 다를 수 있음)
        category_mapping = get_governor_category_mapping(region_name, vote_columns)
        print(f"생성된 카테고리 매핑: {category_mapping}")

        # 1위, 2위 후보번호를 카테고리로 매핑
        # fillna()로 매핑되지 않은 후보는 '기타'로 처리
        df1['득표_1위_정당'] = df1['득표_1위_후보번호'].map(category_mapping).fillna('기타')
        df1['득표_2위_정당'] = df1['득표_2위_후보번호'].map(category_mapping).fillna('기타')

        # 매핑 결과 확인
        print("매핑 후 1위 정당 분포 (처리 중):")
        print(df1['득표_1위_정당'].value_counts())

        # 매핑되지 않은 후보 확인
        unmapped_1st = df1[df1['득표_1위_정당'] == '기타']['득표_1위_후보번호'].unique()
        unmapped_2nd = df1[df1['득표_2위_정당'] == '기타']['득표_2위_후보번호'].unique()
        if len(unmapped_1st) > 0:
            print(f"경고: 1위에서 매핑되지 않은 후보번호: {unmapped_1st}")
        if len(unmapped_2nd) > 0:
            print(f"경고: 2위에서 매핑되지 않은 후보번호: {unmapped_2nd}")

        # 카테고리별 후보자 수 계산 (모든 카테고리 포함)
        candidate_counts = {}
        all_categories = ['보수정당', '진보정당', '그외정당', '무소속']

        # 먼저 모든 카테고리를 0으로 초기화
        for category in all_categories:
            candidate_counts[category] = 0

        # 실제 매핑에서 카운트
        for candidate_num, category in category_mapping.items():
            if category in candidate_counts:
                candidate_counts[category] += 1
            else:
                candidate_counts[category] = 1

        print(f"카테고리별 후보자 수: {candidate_counts}")

        # 각 카테고리별로 개별 컬럼 생성 (없는 카테고리도 0으로 포함)
        for category in all_categories:
            candidate_count = candidate_counts.get(category, 0)  # 없으면 0
            df1[f'{category}_후보자수'] = candidate_count
            print(f"  {category}_후보자수: {candidate_count}")

        # 필요한 컬럼들만 선택 (병합용)
        merge_columns = ['시도', '구시군', '득표_1위_후보번호', '득표_1위_득표율', '득표_2위_후보번호', '득표_2위_득표율',
                         '득표_1위_정당', '득표_2위_정당'] + [f'{cat}_후보자수' for cat in all_categories]

        df1_for_merge = df1[merge_columns].copy()

        # 두 번째 CSV 파일 읽기 (요약 데이터)
        df2 = pd.read_csv(df2_url)
        print(f"요약 데이터 로드 완료: {df2.shape}")

        # 시도, 구시군을 키로 하여 병합
        merged_df = pd.merge(df2, df1_for_merge, on=['시도', '구시군'], how='left')

        # 병합 결과 검증
        missing_data = merged_df[merged_df['득표_1위_후보번호'].isna()]
        if len(missing_data) > 0:
            print(f"경고: 병합되지 않은 데이터가 {len(missing_data)}개 있습니다")
        else:
            print("모든 데이터가 성공적으로 병합되었습니다!")

        print(f"최종 데이터 형태: {merged_df.shape}")
        print(f"1위 정당 분포:")
        print(merged_df['득표_1위_정당'].value_counts())
        print(f"=== {region_name} 지사 선거 데이터 처리 완료 ===\n")

        return merged_df

    except Exception as e:
        print(f"오류 발생: {e}")
        return None

def get_governor_category_mapping(region_name: str, vote_columns: list) -> Dict[int, str]:
    """
    지역별 지사 선거 정당 카테고리 매핑을 반환
    각 지역마다 후보자와 정당이 다르므로 수동으로 설정 필요

    Args:
        region_name: 지역명
        vote_columns: 득표수 컬럼 리스트 (후보 확인용)

    Returns:
        해당 지역의 후보번호별 카테고리 매핑
    """

    print(f"\n=== {region_name} 지사 선거 후보 정보 ===")
    print("실제 후보 컬럼들:")
    for col in vote_columns:
        print(f"  {col}")

    # 지역별 매핑 설정 (2002년 제3회 지방선거 기준)
    if region_name == 'seoul':  # 서울
        mapping = {
            1: '보수정당',
            2: '진보정당',
            3: '그외정당',
            4: '그외정당',
            5: '그외정당',
            6: '무소속'
        }

    elif region_name == 'busan':  # 부산
        mapping = {
            1: '보수정당',
            2: '진보정당',
            3: '그외정당'
        }

    elif region_name == 'daegu':  # 대구
        mapping = {
            1: '보수정당',
            3: '무소속'
        }

    elif region_name == 'incheon':  # 인천
        mapping = {
            1: '보수정당',
            2: '진보정당',
            3: '그외정당',
            4: '그외정당',
            5: '그외정당'
        }

    elif region_name == 'gwangju':  # 광주
        mapping = {
            1: '보수정당',
            2: '진보정당',
            3: '그외정당',
            4: '무소속',
            5: '무소속',
            6: '무소속'
        }

    elif region_name == 'daejeon':  # 대전
        mapping = {
            1: '보수정당',
            3: '그외정당',
            4: '무소속',
            5: '무소속'
        }

    elif region_name == 'ulsan':  # 울산
        mapping = {
            1: '보수정당',
            3: '그외정당',
            4: '그외정당'
        }

    elif region_name == 'gyeonggi':  # 경기
        mapping = {
            1: '보수정당',
            2: '진보정당',
            3: '그외정당'
        }

    elif region_name == 'gangwon':  # 강원
        mapping = {
            1: '보수정당',
            2: '진보정당'
        }

    elif region_name == 'chungbuk':  # 충북
        mapping = {
            1: '보수정당',
            3: '그외정당',
            4: '무소속'
        }

    elif region_name == 'chungnam':  # 충남
        mapping = {
            1: '보수정당',
            3: '그외정당'
        }

    elif region_name == 'jeonbuk':  # 전북
        mapping = {
            1: '보수정당',
            2: '진보정당',
            3: '무소속'
        }

    elif region_name == 'jeonnam':  # 전남
        mapping = {
            1: '보수정당',
            2: '진보정당',
            3: '무소속',
            4: '무소속',
            5: '무소속'
        }

    elif region_name == 'gyeongbuk':  # 경북
        mapping = {
            1: '보수정당',
            3: '무소속'
        }

    elif region_name == 'gyeongnam':  # 경남
        mapping = {
            1: '보수정당',
            2: '진보정당',
            3: '그외정당'
        }

    elif region_name == 'jeju':  # 제주
        mapping = {
            1: '보수정당',
            2: '진보정당',
            3: '그외정당'
        }

    else:
        print(f"경고: {region_name} 지역에 대한 매핑이 정의되지 않았습니다.")
        print("기본 매핑을 사용합니다. 수동으로 매핑을 추가해주세요.")
        # 기본 매핑 (수정 필요)
        mapping = {
            1: '보수정당',
            2: '진보정당',
            3: '그외정당',
            4: '그외정당',
            5: '무소속'
        }

    # 실제 존재하는 후보 번호만 추출
    existing_candidates = set()
    for col in vote_columns:
        match = re.search(r'득표수_(\d+)_', col)
        if match:
            existing_candidates.add(int(match.group(1)))

    print(f"실제 존재하는 후보번호: {sorted(existing_candidates)}")

    # 존재하는 후보에 대해서만 매핑 적용
    filtered_mapping = {k: v for k, v in mapping.items() if k in existing_candidates}

    # 매핑되지 않은 후보 번호 확인
    unmapped_candidates = existing_candidates - set(mapping.keys())
    if unmapped_candidates:
        print(f"경고: 매핑되지 않은 후보번호들: {sorted(unmapped_candidates)}")
        print("이 후보들은 '기타' 카테고리로 분류됩니다.")

    print(f"적용된 매핑: {filtered_mapping}")
    print("=" * 50)

    return filtered_mapping

def process_multiple_governor_elections(region_names: list) -> Dict[str, pd.DataFrame]:
    """
    여러 지역의 지사 선거를 일괄 처리하여 df_지역명 형태로 변수 저장

    Args:
        region_names (list): 처리할 지역명 리스트

    Returns:
        Dict[str, pd.DataFrame]: 지역별 처리된 데이터프레임 딕셔너리
    """
    results = {}

    for region_name in region_names:
        print(f"\n{'='*50}")
        result_df = process_governor_election_data(region_name)

        if result_df is not None:
            # df_busan, df_seoul 형태로 변수명 지정
            var_name = f'df_{region_name}'
            results[var_name] = result_df

            print(f"데이터프레임 저장: {var_name} (shape: {result_df.shape})")
        else:
            print(f"{region_name} 지사 선거 데이터 처리 실패")

    return results

# 지역명들
AVAILABLE_REGIONS = [
    'seoul', 'busan', 'daegu', 'incheon', 'gwangju', 'daejeon',
    'ulsan', 'gyeonggi', 'gangwon', 'chungbuk', 'chungnam',
    'jeonbuk', 'jeonnam', 'gyeongbuk', 'gyeongnam', 'jeju'
]

# # 사용 예시
# if __name__ == "__main__":
#     # 방법 1: 특정 지역들만 처리
#     selected_regions = ['busan', 'seoul', 'gyeonggi', 'incheon']
#     governor_results = process_multiple_governor_elections(selected_regions)

#     # 개별 접근 예시:
#     # df_busan = governor_results['df_busan']
#     # df_seoul = governor_results['df_seoul']

#     # 방법 2: 모든 지역 일괄 처리
#     # all_governor_results = process_multiple_governor_elections(AVAILABLE_REGIONS)

#     print(f"\n사용 가능한 지역들: {AVAILABLE_REGIONS}")

# Preprocessing & Merge

In [2]:
AVAILABLE_REGIONS = [
    'seoul', 'busan', 'daegu', 'incheon', 'gwangju', 'daejeon',
    'ulsan', 'gyeonggi', 'gangwon', 'chungbuk', 'chungnam',
    'jeonbuk', 'jeonnam', 'gyeongbuk', 'gyeongnam', 'jeju'
]

election_results = process_multiple_governor_elections(AVAILABLE_REGIONS)


=== seoul 지사 선거 데이터 처리 시작 ===
상세 데이터 URL: https://raw.githubusercontent.com/sw1kwon/korean-elections/refs/heads/main/temp/v1_g/3rd_2002/temp1_governor_seoul_3.csv
요약 데이터 URL: https://raw.githubusercontent.com/sw1kwon/korean-elections/refs/heads/main/temp/v2_1_g/3rd_2002/temp2_1_governor_seoul_3.csv
상세 데이터 로드 완료: (26, 13)
득표수 관련 컬럼 수: 6

=== seoul 지사 선거 후보 정보 ===
실제 후보 컬럼들:
  득표수_1_한나라당_이명박
  득표수_2_새천년민주당_김민석
  득표수_3_녹색평화당_임삼진
  득표수_4_민주노동당_이문옥
  득표수_5_사회당_원용수
  득표수_6_무소속_이경희
실제 존재하는 후보번호: [1, 2, 3, 4, 5, 6]
적용된 매핑: {1: '보수정당', 2: '진보정당', 3: '그외정당', 4: '그외정당', 5: '그외정당', 6: '무소속'}
생성된 카테고리 매핑: {1: '보수정당', 2: '진보정당', 3: '그외정당', 4: '그외정당', 5: '그외정당', 6: '무소속'}
매핑 후 1위 정당 분포 (처리 중):
득표_1위_정당
보수정당    25
진보정당     1
Name: count, dtype: int64
카테고리별 후보자 수: {'보수정당': 1, '진보정당': 1, '그외정당': 3, '무소속': 1}
  보수정당_후보자수: 1
  진보정당_후보자수: 1
  그외정당_후보자수: 3
  무소속_후보자수: 1
요약 데이터 로드 완료: (26, 10)
모든 데이터가 성공적으로 병합되었습니다!
최종 데이터 형태: (26, 20)
1위 정당 분포:
득표_1위_정당
보수정당    25
진보정당     1
Name: count, dtype: int64
=== s

# Governor Election 3rd

## Seoul

In [3]:
df_seoul = election_results['df_seoul']

In [4]:
df_seoul

Unnamed: 0,시도,구시군,선거인수,투표수,보수정당,진보정당,그외정당,득표수_계,무효투표수,기권수,득표_1위_후보번호,득표_1위_득표율,득표_2위_후보번호,득표_2위_득표율,득표_1위_정당,득표_2위_정당,보수정당_후보자수,진보정당_후보자수,그외정당_후보자수,무소속_후보자수
0,서울특별시,합계,7665343,3510898,1819057,1496754,163294,3479105,31793,4154445,1,0.5229,2,0.4302,보수정당,진보정당,1,1,3,1
1,서울특별시,종로구,141157,72241,37592,30353,3506,71451,790,68916,1,0.5261,2,0.4248,보수정당,진보정당,1,1,3,1
2,서울특별시,중구,110996,56999,28220,25799,2283,56302,697,53997,1,0.5012,2,0.4582,보수정당,진보정당,1,1,3,1
3,서울특별시,용산구,186438,89215,47878,36435,4001,88314,901,97223,1,0.5421,2,0.4126,보수정당,진보정당,1,1,3,1
4,서울특별시,성동구,259275,122030,60188,54742,5884,120814,1216,137245,1,0.4982,2,0.4531,보수정당,진보정당,1,1,3,1
5,서울특별시,광진구,288387,128061,65409,56266,5346,127021,1040,160326,1,0.5149,2,0.443,보수정당,진보정당,1,1,3,1
6,서울특별시,동대문구,293982,139856,71954,59919,6574,138447,1409,154126,1,0.5197,2,0.4328,보수정당,진보정당,1,1,3,1
7,서울특별시,중랑구,328660,141503,72168,60936,6853,139957,1546,187157,1,0.5156,2,0.4354,보수정당,진보정당,1,1,3,1
8,서울특별시,성북구,344694,159058,78071,70160,9248,157479,1579,185636,1,0.4958,2,0.4455,보수정당,진보정당,1,1,3,1
9,서울특별시,강북구,272559,119468,56515,55030,6683,118228,1240,153091,1,0.478,2,0.4655,보수정당,진보정당,1,1,3,1


### preprocessing

In [5]:
df_seoul = (
    df_seoul
    .assign(시도=lambda df: df['시도'].replace('서울특별시', '서울'))
)

In [6]:
df_seoul.columns

Index(['시도', '구시군', '선거인수', '투표수', '보수정당', '진보정당', '그외정당', '득표수_계', '무효투표수',
       '기권수', '득표_1위_후보번호', '득표_1위_득표율', '득표_2위_후보번호', '득표_2위_득표율', '득표_1위_정당',
       '득표_2위_정당', '보수정당_후보자수', '진보정당_후보자수', '그외정당_후보자수', '무소속_후보자수'],
      dtype='object')

In [7]:
# 고정 열 순서 정의
fixed_cols = [
    '시도', '구시군', '선거년도', '선거종류',
    '득표_1위_정당', '득표_2위_정당',
    '득표_1위_후보번호', '득표_1위_득표율',
    '득표_2위_후보번호', '득표_2위_득표율',
    '보수정당_후보자수', '진보정당_후보자수',
    '그외정당_후보자수', '무소속_후보자수'
]

# 나머지 열: fixed_cols에 포함되지 않은 열
other_cols = [col for col in df_seoul.columns if col not in fixed_cols]

# 메소드 체이닝
df_seoul = (
    df_seoul
    .assign(선거종류='광역단체장', 선거년도='2002')
    .loc[:, fixed_cols + other_cols]
    .rename(columns={'시도': '지역'})
)

In [8]:
df_seoul

Unnamed: 0,지역,구시군,선거년도,선거종류,득표_1위_정당,득표_2위_정당,득표_1위_후보번호,득표_1위_득표율,득표_2위_후보번호,득표_2위_득표율,...,그외정당_후보자수,무소속_후보자수,선거인수,투표수,보수정당,진보정당,그외정당,득표수_계,무효투표수,기권수
0,서울,합계,2002,광역단체장,보수정당,진보정당,1,0.5229,2,0.4302,...,3,1,7665343,3510898,1819057,1496754,163294,3479105,31793,4154445
1,서울,종로구,2002,광역단체장,보수정당,진보정당,1,0.5261,2,0.4248,...,3,1,141157,72241,37592,30353,3506,71451,790,68916
2,서울,중구,2002,광역단체장,보수정당,진보정당,1,0.5012,2,0.4582,...,3,1,110996,56999,28220,25799,2283,56302,697,53997
3,서울,용산구,2002,광역단체장,보수정당,진보정당,1,0.5421,2,0.4126,...,3,1,186438,89215,47878,36435,4001,88314,901,97223
4,서울,성동구,2002,광역단체장,보수정당,진보정당,1,0.4982,2,0.4531,...,3,1,259275,122030,60188,54742,5884,120814,1216,137245
5,서울,광진구,2002,광역단체장,보수정당,진보정당,1,0.5149,2,0.443,...,3,1,288387,128061,65409,56266,5346,127021,1040,160326
6,서울,동대문구,2002,광역단체장,보수정당,진보정당,1,0.5197,2,0.4328,...,3,1,293982,139856,71954,59919,6574,138447,1409,154126
7,서울,중랑구,2002,광역단체장,보수정당,진보정당,1,0.5156,2,0.4354,...,3,1,328660,141503,72168,60936,6853,139957,1546,187157
8,서울,성북구,2002,광역단체장,보수정당,진보정당,1,0.4958,2,0.4455,...,3,1,344694,159058,78071,70160,9248,157479,1579,185636
9,서울,강북구,2002,광역단체장,보수정당,진보정당,1,0.478,2,0.4655,...,3,1,272559,119468,56515,55030,6683,118228,1240,153091


### v4.1 ~ v4.3

In [9]:
# 1. 전체 데이터 저장
df_seoul.to_csv("temp4_1_governor_seoul_3.csv", index=False, encoding="utf-8-sig")

# 2. '구시군' != '합계' 행만 추출하여 저장
df_seoul2 = df_seoul.query("구시군 != '합계'")
df_seoul2.to_csv("temp4_2_governor_seoul_3.csv", index=False, encoding="utf-8-sig")

# 3. '구시군' == '합계' 행만 추출 후 '구시군' 열 제거하여 저장
df_seoul3 = df_seoul.query("구시군 == '합계'").drop(columns="구시군")
df_seoul3.to_csv("temp4_3_governor_seoul_3.csv", index=False, encoding="utf-8-sig")

## Busan

In [10]:
df_busan = election_results['df_busan']

In [11]:
df_busan

Unnamed: 0,시도,구시군,선거인수,투표수,보수정당,진보정당,그외정당,득표수_계,무효투표수,기권수,득표_1위_후보번호,득표_1위_득표율,득표_2위_후보번호,득표_2위_득표율,득표_1위_정당,득표_2위_정당,보수정당_후보자수,진보정당_후보자수,그외정당_후보자수,무소속_후보자수
0,부산광역시,합계,2784721,1163040,729589,221938,192594,1144121,18919,1621681,1,0.6377,2,0.194,보수정당,진보정당,1,1,1,0
1,부산광역시,중구,45490,23688,15669,4039,3487,23195,493,21802,1,0.6755,2,0.1741,보수정당,진보정당,1,1,1,0
2,부산광역시,서구,116858,47313,31336,8397,6594,46327,986,69545,1,0.6764,2,0.1813,보수정당,진보정당,1,1,1,0
3,부산광역시,동구,97322,44749,29853,7808,6247,43908,841,52573,1,0.6799,2,0.1778,보수정당,진보정당,1,1,1,0
4,부산광역시,영도구,136484,59273,37299,12417,8545,58261,1012,77211,1,0.6402,2,0.2131,보수정당,진보정당,1,1,1,0
5,부산광역시,부산진구,320729,152469,95573,28774,25497,149844,2625,168260,1,0.6378,2,0.192,보수정당,진보정당,1,1,1,0
6,부산광역시,동래구,219090,92495,59940,15482,15783,91205,1290,126595,1,0.6572,3,0.173,보수정당,그외정당,1,1,1,0
7,부산광역시,남구,230494,99050,62976,17767,16906,97649,1401,131444,1,0.6449,2,0.1819,보수정당,진보정당,1,1,1,0
8,부산광역시,북구,221411,82584,49151,18109,14128,81388,1196,138827,1,0.6039,2,0.2225,보수정당,진보정당,1,1,1,0
9,부산광역시,해운대구,289845,114749,69300,22401,21305,113006,1743,175096,1,0.6132,2,0.1982,보수정당,진보정당,1,1,1,0


### preprocessing

In [12]:
df_busan = (
    df_busan
    .assign(시도=lambda df: df['시도'].replace('부산광역시', '부산'))
)

In [13]:
df_busan.columns

Index(['시도', '구시군', '선거인수', '투표수', '보수정당', '진보정당', '그외정당', '득표수_계', '무효투표수',
       '기권수', '득표_1위_후보번호', '득표_1위_득표율', '득표_2위_후보번호', '득표_2위_득표율', '득표_1위_정당',
       '득표_2위_정당', '보수정당_후보자수', '진보정당_후보자수', '그외정당_후보자수', '무소속_후보자수'],
      dtype='object')

In [14]:
# 고정 열 순서 정의
fixed_cols = [
    '시도', '구시군', '선거년도', '선거종류',
    '득표_1위_정당', '득표_2위_정당',
    '득표_1위_후보번호', '득표_1위_득표율',
    '득표_2위_후보번호', '득표_2위_득표율',
    '보수정당_후보자수', '진보정당_후보자수',
    '그외정당_후보자수', '무소속_후보자수'
]

# 나머지 열: fixed_cols에 포함되지 않은 열
other_cols = [col for col in df_busan.columns if col not in fixed_cols]

# 메소드 체이닝
df_busan = (
    df_busan
    .assign(선거종류='광역단체장', 선거년도='2002')
    .loc[:, fixed_cols + other_cols]
    .rename(columns={'시도': '지역'})
)

In [15]:
df_busan

Unnamed: 0,지역,구시군,선거년도,선거종류,득표_1위_정당,득표_2위_정당,득표_1위_후보번호,득표_1위_득표율,득표_2위_후보번호,득표_2위_득표율,...,그외정당_후보자수,무소속_후보자수,선거인수,투표수,보수정당,진보정당,그외정당,득표수_계,무효투표수,기권수
0,부산,합계,2002,광역단체장,보수정당,진보정당,1,0.6377,2,0.194,...,1,0,2784721,1163040,729589,221938,192594,1144121,18919,1621681
1,부산,중구,2002,광역단체장,보수정당,진보정당,1,0.6755,2,0.1741,...,1,0,45490,23688,15669,4039,3487,23195,493,21802
2,부산,서구,2002,광역단체장,보수정당,진보정당,1,0.6764,2,0.1813,...,1,0,116858,47313,31336,8397,6594,46327,986,69545
3,부산,동구,2002,광역단체장,보수정당,진보정당,1,0.6799,2,0.1778,...,1,0,97322,44749,29853,7808,6247,43908,841,52573
4,부산,영도구,2002,광역단체장,보수정당,진보정당,1,0.6402,2,0.2131,...,1,0,136484,59273,37299,12417,8545,58261,1012,77211
5,부산,부산진구,2002,광역단체장,보수정당,진보정당,1,0.6378,2,0.192,...,1,0,320729,152469,95573,28774,25497,149844,2625,168260
6,부산,동래구,2002,광역단체장,보수정당,그외정당,1,0.6572,3,0.173,...,1,0,219090,92495,59940,15482,15783,91205,1290,126595
7,부산,남구,2002,광역단체장,보수정당,진보정당,1,0.6449,2,0.1819,...,1,0,230494,99050,62976,17767,16906,97649,1401,131444
8,부산,북구,2002,광역단체장,보수정당,진보정당,1,0.6039,2,0.2225,...,1,0,221411,82584,49151,18109,14128,81388,1196,138827
9,부산,해운대구,2002,광역단체장,보수정당,진보정당,1,0.6132,2,0.1982,...,1,0,289845,114749,69300,22401,21305,113006,1743,175096


### v4.1 ~ v4.3

In [16]:
# 1. 전체 데이터 저장
df_busan.to_csv("temp4_1_governor_busan_3.csv", index=False, encoding="utf-8-sig")

# 2. '구시군' != '합계' 행만 추출하여 저장
df_busan2 = df_busan.query("구시군 != '합계'")
df_busan2.to_csv("temp4_2_governor_busan_3.csv", index=False, encoding="utf-8-sig")

# 3. '구시군' == '합계' 행만 추출 후 '구시군' 열 제거하여 저장
df_busan3 = df_busan.query("구시군 == '합계'").drop(columns="구시군")
df_busan3.to_csv("temp4_3_governor_busan_3.csv", index=False, encoding="utf-8-sig")

## Daegu

In [17]:
df_daegu = election_results['df_daegu']

In [18]:
df_daegu

Unnamed: 0,시도,구시군,선거인수,투표수,보수정당,진보정당,그외정당,득표수_계,무효투표수,기권수,득표_1위_후보번호,득표_1위_득표율,득표_2위_후보번호,득표_2위_득표율,득표_1위_정당,득표_2위_정당,보수정당_후보자수,진보정당_후보자수,그외정당_후보자수,무소속_후보자수
0,대구광역시,합계,1814278,751994,452943,0,287396,740339,11655,1062284,1,0.6118,3,0.3882,보수정당,무소속,1,0,0,1
1,대구광역시,중구,70409,34423,20807,0,13020,33827,596,35986,1,0.6151,3,0.3849,보수정당,무소속,1,0,0,1
2,대구광역시,동구,250562,105543,66098,0,37617,103715,1828,145019,1,0.6373,3,0.3627,보수정당,무소속,1,0,0,1
3,대구광역시,서구,206981,84479,53305,0,29549,82854,1625,122502,1,0.6434,3,0.3566,보수정당,무소속,1,0,0,1
4,대구광역시,남구,148146,64865,31249,0,32866,64115,750,83281,3,0.5126,1,0.4874,무소속,보수정당,1,0,0,1
5,대구광역시,북구,294071,117993,74706,0,41585,116291,1702,176078,1,0.6424,3,0.3576,보수정당,무소속,1,0,0,1
6,대구광역시,수성구,320476,134338,81824,0,50820,132644,1694,186138,1,0.6169,3,0.3831,보수정당,무소속,1,0,0,1
7,대구광역시,달서구,414225,155487,91557,0,61725,153282,2205,258738,1,0.5973,3,0.4027,보수정당,무소속,1,0,0,1
8,대구광역시,달성군,109408,54866,33397,0,20214,53611,1255,54542,1,0.623,3,0.377,보수정당,무소속,1,0,0,1


### preprocessing

In [19]:
df_daegu = (
    df_daegu
    .assign(시도=lambda df: df['시도'].replace('대구광역시', '대구'))
)

In [20]:
df_daegu.columns

Index(['시도', '구시군', '선거인수', '투표수', '보수정당', '진보정당', '그외정당', '득표수_계', '무효투표수',
       '기권수', '득표_1위_후보번호', '득표_1위_득표율', '득표_2위_후보번호', '득표_2위_득표율', '득표_1위_정당',
       '득표_2위_정당', '보수정당_후보자수', '진보정당_후보자수', '그외정당_후보자수', '무소속_후보자수'],
      dtype='object')

In [21]:
# 고정 열 순서 정의
fixed_cols = [
    '시도', '구시군', '선거년도', '선거종류',
    '득표_1위_정당', '득표_2위_정당',
    '득표_1위_후보번호', '득표_1위_득표율',
    '득표_2위_후보번호', '득표_2위_득표율',
    '보수정당_후보자수', '진보정당_후보자수',
    '그외정당_후보자수', '무소속_후보자수'
]

# 나머지 열: fixed_cols에 포함되지 않은 열
other_cols = [col for col in df_daegu.columns if col not in fixed_cols]

# 메소드 체이닝
df_daegu = (
    df_daegu
    .assign(선거종류='광역단체장', 선거년도='2002')
    .loc[:, fixed_cols + other_cols]
    .rename(columns={'시도': '지역'})
)

In [22]:
df_daegu

Unnamed: 0,지역,구시군,선거년도,선거종류,득표_1위_정당,득표_2위_정당,득표_1위_후보번호,득표_1위_득표율,득표_2위_후보번호,득표_2위_득표율,...,그외정당_후보자수,무소속_후보자수,선거인수,투표수,보수정당,진보정당,그외정당,득표수_계,무효투표수,기권수
0,대구,합계,2002,광역단체장,보수정당,무소속,1,0.6118,3,0.3882,...,0,1,1814278,751994,452943,0,287396,740339,11655,1062284
1,대구,중구,2002,광역단체장,보수정당,무소속,1,0.6151,3,0.3849,...,0,1,70409,34423,20807,0,13020,33827,596,35986
2,대구,동구,2002,광역단체장,보수정당,무소속,1,0.6373,3,0.3627,...,0,1,250562,105543,66098,0,37617,103715,1828,145019
3,대구,서구,2002,광역단체장,보수정당,무소속,1,0.6434,3,0.3566,...,0,1,206981,84479,53305,0,29549,82854,1625,122502
4,대구,남구,2002,광역단체장,무소속,보수정당,3,0.5126,1,0.4874,...,0,1,148146,64865,31249,0,32866,64115,750,83281
5,대구,북구,2002,광역단체장,보수정당,무소속,1,0.6424,3,0.3576,...,0,1,294071,117993,74706,0,41585,116291,1702,176078
6,대구,수성구,2002,광역단체장,보수정당,무소속,1,0.6169,3,0.3831,...,0,1,320476,134338,81824,0,50820,132644,1694,186138
7,대구,달서구,2002,광역단체장,보수정당,무소속,1,0.5973,3,0.4027,...,0,1,414225,155487,91557,0,61725,153282,2205,258738
8,대구,달성군,2002,광역단체장,보수정당,무소속,1,0.623,3,0.377,...,0,1,109408,54866,33397,0,20214,53611,1255,54542


### v4.1 ~ v4.3

In [23]:
# 1. 전체 데이터 저장
df_daegu.to_csv("temp4_1_governor_daegu_3.csv", index=False, encoding="utf-8-sig")

# 2. '구시군' != '합계' 행만 추출하여 저장
df_daegu2 = df_daegu.query("구시군 != '합계'")
df_daegu2.to_csv("temp4_2_governor_daegu_3.csv", index=False, encoding="utf-8-sig")

# 3. '구시군' == '합계' 행만 추출 후 '구시군' 열 제거하여 저장
df_daegu3 = df_daegu.query("구시군 == '합계'").drop(columns="구시군")
df_daegu3.to_csv("temp4_3_governor_daegu_3.csv", index=False, encoding="utf-8-sig")

## Incheon

In [24]:
df_incheon = election_results['df_incheon']

In [25]:
df_incheon

Unnamed: 0,시도,구시군,선거인수,투표수,보수정당,진보정당,그외정당,득표수_계,무효투표수,기권수,득표_1위_후보번호,득표_1위_득표율,득표_2위_후보번호,득표_2위_득표율,득표_1위_정당,득표_2위_정당,보수정당_후보자수,진보정당_후보자수,그외정당_후보자수,무소속_후보자수
0,인천광역시,합계,1809907,711602,393932,225210,82111,701253,10349,1098305,1,0.5618,2,0.3212,보수정당,진보정당,1,1,3,0
1,인천광역시,중구,56476,27197,15627,8295,2740,26662,535,29279,1,0.5861,2,0.3111,보수정당,진보정당,1,1,3,0
2,인천광역시,동구,57370,28452,16071,8484,3261,27816,636,28918,1,0.5778,2,0.305,보수정당,진보정당,1,1,3,0
3,인천광역시,남구,317337,124830,71928,37404,13645,122977,1853,192507,1,0.5849,2,0.3042,보수정당,진보정당,1,1,3,0
4,인천광역시,연수구,174043,67745,40332,19621,6982,66935,810,106298,1,0.6026,2,0.2931,보수정당,진보정당,1,1,3,0
5,인천광역시,남동구,286397,105918,56823,33644,14082,104549,1369,180479,1,0.5435,2,0.3218,보수정당,진보정당,1,1,3,0
6,인천광역시,부평구,391526,148406,78925,47685,19995,146605,1801,243120,1,0.5384,2,0.3253,보수정당,진보정당,1,1,3,0
7,인천광역시,계양구,226315,79673,45748,25930,7157,78835,838,146642,1,0.5803,2,0.3289,보수정당,진보정당,1,1,3,0
8,인천광역시,서구,237302,85958,46138,27657,10928,84723,1235,151344,1,0.5446,2,0.3264,보수정당,진보정당,1,1,3,0
9,인천광역시,강화군,51913,34626,17134,14150,2370,33654,972,17287,1,0.5091,2,0.4205,보수정당,진보정당,1,1,3,0


### preprocessing

In [26]:
df_incheon = (
    df_incheon
    .assign(시도=lambda df: df['시도'].replace('인천광역시', '인천'))
)

In [27]:
df_incheon.columns

Index(['시도', '구시군', '선거인수', '투표수', '보수정당', '진보정당', '그외정당', '득표수_계', '무효투표수',
       '기권수', '득표_1위_후보번호', '득표_1위_득표율', '득표_2위_후보번호', '득표_2위_득표율', '득표_1위_정당',
       '득표_2위_정당', '보수정당_후보자수', '진보정당_후보자수', '그외정당_후보자수', '무소속_후보자수'],
      dtype='object')

In [28]:
# 고정 열 순서 정의
fixed_cols = [
    '시도', '구시군', '선거년도', '선거종류',
    '득표_1위_정당', '득표_2위_정당',
    '득표_1위_후보번호', '득표_1위_득표율',
    '득표_2위_후보번호', '득표_2위_득표율',
    '보수정당_후보자수', '진보정당_후보자수',
    '그외정당_후보자수', '무소속_후보자수'
]

# 나머지 열: fixed_cols에 포함되지 않은 열
other_cols = [col for col in df_incheon.columns if col not in fixed_cols]

# 메소드 체이닝
df_incheon = (
    df_incheon
    .assign(선거종류='광역단체장', 선거년도='2002')
    .loc[:, fixed_cols + other_cols]
    .rename(columns={'시도': '지역'})
)

In [29]:
df_incheon

Unnamed: 0,지역,구시군,선거년도,선거종류,득표_1위_정당,득표_2위_정당,득표_1위_후보번호,득표_1위_득표율,득표_2위_후보번호,득표_2위_득표율,...,그외정당_후보자수,무소속_후보자수,선거인수,투표수,보수정당,진보정당,그외정당,득표수_계,무효투표수,기권수
0,인천,합계,2002,광역단체장,보수정당,진보정당,1,0.5618,2,0.3212,...,3,0,1809907,711602,393932,225210,82111,701253,10349,1098305
1,인천,중구,2002,광역단체장,보수정당,진보정당,1,0.5861,2,0.3111,...,3,0,56476,27197,15627,8295,2740,26662,535,29279
2,인천,동구,2002,광역단체장,보수정당,진보정당,1,0.5778,2,0.305,...,3,0,57370,28452,16071,8484,3261,27816,636,28918
3,인천,남구,2002,광역단체장,보수정당,진보정당,1,0.5849,2,0.3042,...,3,0,317337,124830,71928,37404,13645,122977,1853,192507
4,인천,연수구,2002,광역단체장,보수정당,진보정당,1,0.6026,2,0.2931,...,3,0,174043,67745,40332,19621,6982,66935,810,106298
5,인천,남동구,2002,광역단체장,보수정당,진보정당,1,0.5435,2,0.3218,...,3,0,286397,105918,56823,33644,14082,104549,1369,180479
6,인천,부평구,2002,광역단체장,보수정당,진보정당,1,0.5384,2,0.3253,...,3,0,391526,148406,78925,47685,19995,146605,1801,243120
7,인천,계양구,2002,광역단체장,보수정당,진보정당,1,0.5803,2,0.3289,...,3,0,226315,79673,45748,25930,7157,78835,838,146642
8,인천,서구,2002,광역단체장,보수정당,진보정당,1,0.5446,2,0.3264,...,3,0,237302,85958,46138,27657,10928,84723,1235,151344
9,인천,강화군,2002,광역단체장,보수정당,진보정당,1,0.5091,2,0.4205,...,3,0,51913,34626,17134,14150,2370,33654,972,17287


### v4.1 ~ v4.3

In [30]:
# 1. 전체 데이터 저장
df_incheon.to_csv("temp4_1_governor_incheon_3.csv", index=False, encoding="utf-8-sig")

# 2. '구시군' != '합계' 행만 추출하여 저장
df_incheon2 = df_incheon.query("구시군 != '합계'")
df_incheon2.to_csv("temp4_2_governor_incheon_3.csv", index=False, encoding="utf-8-sig")

# 3. '구시군' == '합계' 행만 추출 후 '구시군' 열 제거하여 저장
df_incheon3 = df_incheon.query("구시군 == '합계'").drop(columns="구시군")
df_incheon3.to_csv("temp4_3_governor_incheon_3.csv", index=False, encoding="utf-8-sig")

## Gwangju

In [31]:
df_gwangju = election_results['df_gwangju']

In [32]:
df_gwangju

Unnamed: 0,시도,구시군,선거인수,투표수,보수정당,진보정당,그외정당,득표수_계,무효투표수,기권수,득표_1위_후보번호,득표_1위_득표율,득표_2위_후보번호,득표_2위_득표율,득표_1위_정당,득표_2위_정당,보수정당_후보자수,진보정당_후보자수,그외정당_후보자수,무소속_후보자수
0,광주광역시,합계,954481,403821,43695,185938,167515,397148,6673,550660,2,0.4682,5,0.2705,진보정당,무소속,1,1,1,3
1,광주광역시,동구,89770,42924,6436,20363,15353,42152,772,46846,2,0.4831,5,0.2241,진보정당,무소속,1,1,1,3
2,광주광역시,서구,204779,85482,10740,39589,33997,84326,1156,119297,2,0.4695,5,0.2661,진보정당,무소속,1,1,1,3
3,광주광역시,남구,159676,71438,7523,34966,27833,70322,1116,88238,2,0.4972,5,0.2681,진보정당,무소속,1,1,1,3
4,광주광역시,북구,324452,128528,12760,55377,58527,126664,1864,195924,2,0.4372,5,0.3054,진보정당,무소속,1,1,1,3
5,광주광역시,광산구,175804,75449,6236,35643,31805,73684,1765,100355,2,0.4837,5,0.2443,진보정당,무소속,1,1,1,3


### preprocessing

In [33]:
df_gwangju = (
    df_gwangju
    .assign(시도=lambda df: df['시도'].replace('광주광역시', '광주'))
)

In [34]:
df_gwangju.columns

Index(['시도', '구시군', '선거인수', '투표수', '보수정당', '진보정당', '그외정당', '득표수_계', '무효투표수',
       '기권수', '득표_1위_후보번호', '득표_1위_득표율', '득표_2위_후보번호', '득표_2위_득표율', '득표_1위_정당',
       '득표_2위_정당', '보수정당_후보자수', '진보정당_후보자수', '그외정당_후보자수', '무소속_후보자수'],
      dtype='object')

In [35]:
# 고정 열 순서 정의
fixed_cols = [
    '시도', '구시군', '선거년도', '선거종류',
    '득표_1위_정당', '득표_2위_정당',
    '득표_1위_후보번호', '득표_1위_득표율',
    '득표_2위_후보번호', '득표_2위_득표율',
    '보수정당_후보자수', '진보정당_후보자수',
    '그외정당_후보자수', '무소속_후보자수'
]

# 나머지 열: fixed_cols에 포함되지 않은 열
other_cols = [col for col in df_gwangju.columns if col not in fixed_cols]

# 메소드 체이닝
df_gwangju = (
    df_gwangju
    .assign(선거종류='광역단체장', 선거년도='2002')
    .loc[:, fixed_cols + other_cols]
    .rename(columns={'시도': '지역'})
)

In [36]:
df_gwangju

Unnamed: 0,지역,구시군,선거년도,선거종류,득표_1위_정당,득표_2위_정당,득표_1위_후보번호,득표_1위_득표율,득표_2위_후보번호,득표_2위_득표율,...,그외정당_후보자수,무소속_후보자수,선거인수,투표수,보수정당,진보정당,그외정당,득표수_계,무효투표수,기권수
0,광주,합계,2002,광역단체장,진보정당,무소속,2,0.4682,5,0.2705,...,1,3,954481,403821,43695,185938,167515,397148,6673,550660
1,광주,동구,2002,광역단체장,진보정당,무소속,2,0.4831,5,0.2241,...,1,3,89770,42924,6436,20363,15353,42152,772,46846
2,광주,서구,2002,광역단체장,진보정당,무소속,2,0.4695,5,0.2661,...,1,3,204779,85482,10740,39589,33997,84326,1156,119297
3,광주,남구,2002,광역단체장,진보정당,무소속,2,0.4972,5,0.2681,...,1,3,159676,71438,7523,34966,27833,70322,1116,88238
4,광주,북구,2002,광역단체장,진보정당,무소속,2,0.4372,5,0.3054,...,1,3,324452,128528,12760,55377,58527,126664,1864,195924
5,광주,광산구,2002,광역단체장,진보정당,무소속,2,0.4837,5,0.2443,...,1,3,175804,75449,6236,35643,31805,73684,1765,100355


### v4.1 ~ v4.3

In [37]:
# 1. 전체 데이터 저장
df_gwangju.to_csv("temp4_1_governor_gwangju_3.csv", index=False, encoding="utf-8-sig")

# 2. '구시군' != '합계' 행만 추출하여 저장
df_gwangju2 = df_gwangju.query("구시군 != '합계'")
df_gwangju2.to_csv("temp4_2_governor_gwangju_3.csv", index=False, encoding="utf-8-sig")

# 3. '구시군' == '합계' 행만 추출 후 '구시군' 열 제거하여 저장
df_gwangju3 = df_gwangju.query("구시군 == '합계'").drop(columns="구시군")
df_gwangju3.to_csv("temp4_3_governor_gwangju_3.csv", index=False, encoding="utf-8-sig")

## Daejeon

In [38]:
df_daejeon = election_results['df_daejeon']

In [39]:
df_daejeon

Unnamed: 0,시도,구시군,선거인수,투표수,보수정당,진보정당,그외정당,득표수_계,무효투표수,기권수,득표_1위_후보번호,득표_1위_득표율,득표_2위_후보번호,득표_2위_득표율,득표_1위_정당,득표_2위_정당,보수정당_후보자수,진보정당_후보자수,그외정당_후보자수,무소속_후보자수
0,대전광역시,합계,987180,417809,191832,0,219651,411483,6326,569371,1,0.4662,3,0.402,보수정당,그외정당,1,0,1,2
1,대전광역시,동구,181210,76220,32969,0,41724,74693,1527,104990,1,0.4414,3,0.4289,보수정당,그외정당,1,0,1,2
2,대전광역시,중구,195396,83818,37980,0,44530,82510,1308,111578,1,0.4603,3,0.4082,보수정당,그외정당,1,0,1,2
3,대전광역시,서구,333834,140783,67939,0,71268,139207,1576,193051,1,0.488,3,0.3838,보수정당,그외정당,1,0,1,2
4,대전광역시,유성구,119056,52548,24855,0,26857,51712,836,66508,1,0.4806,3,0.3799,보수정당,그외정당,1,0,1,2
5,대전광역시,대덕구,157684,64440,28089,0,35272,63361,1079,93244,1,0.4433,3,0.4204,보수정당,그외정당,1,0,1,2


### preprocessing

In [40]:
df_daejeon = (
    df_daejeon
    .assign(시도=lambda df: df['시도'].replace('대전광역시', '대전'))
)

In [41]:
df_daejeon.columns

Index(['시도', '구시군', '선거인수', '투표수', '보수정당', '진보정당', '그외정당', '득표수_계', '무효투표수',
       '기권수', '득표_1위_후보번호', '득표_1위_득표율', '득표_2위_후보번호', '득표_2위_득표율', '득표_1위_정당',
       '득표_2위_정당', '보수정당_후보자수', '진보정당_후보자수', '그외정당_후보자수', '무소속_후보자수'],
      dtype='object')

In [42]:
# 고정 열 순서 정의
fixed_cols = [
    '시도', '구시군', '선거년도', '선거종류',
    '득표_1위_정당', '득표_2위_정당',
    '득표_1위_후보번호', '득표_1위_득표율',
    '득표_2위_후보번호', '득표_2위_득표율',
    '보수정당_후보자수', '진보정당_후보자수',
    '그외정당_후보자수', '무소속_후보자수'
]

# 나머지 열: fixed_cols에 포함되지 않은 열
other_cols = [col for col in df_daejeon.columns if col not in fixed_cols]

# 메소드 체이닝
df_daejeon = (
    df_daejeon
    .assign(선거종류='광역단체장', 선거년도='2002')
    .loc[:, fixed_cols + other_cols]
    .rename(columns={'시도': '지역'})
)

In [43]:
df_daejeon

Unnamed: 0,지역,구시군,선거년도,선거종류,득표_1위_정당,득표_2위_정당,득표_1위_후보번호,득표_1위_득표율,득표_2위_후보번호,득표_2위_득표율,...,그외정당_후보자수,무소속_후보자수,선거인수,투표수,보수정당,진보정당,그외정당,득표수_계,무효투표수,기권수
0,대전,합계,2002,광역단체장,보수정당,그외정당,1,0.4662,3,0.402,...,1,2,987180,417809,191832,0,219651,411483,6326,569371
1,대전,동구,2002,광역단체장,보수정당,그외정당,1,0.4414,3,0.4289,...,1,2,181210,76220,32969,0,41724,74693,1527,104990
2,대전,중구,2002,광역단체장,보수정당,그외정당,1,0.4603,3,0.4082,...,1,2,195396,83818,37980,0,44530,82510,1308,111578
3,대전,서구,2002,광역단체장,보수정당,그외정당,1,0.488,3,0.3838,...,1,2,333834,140783,67939,0,71268,139207,1576,193051
4,대전,유성구,2002,광역단체장,보수정당,그외정당,1,0.4806,3,0.3799,...,1,2,119056,52548,24855,0,26857,51712,836,66508
5,대전,대덕구,2002,광역단체장,보수정당,그외정당,1,0.4433,3,0.4204,...,1,2,157684,64440,28089,0,35272,63361,1079,93244


### v4.1 ~ v4.3

In [44]:
# 1. 전체 데이터 저장
df_daejeon.to_csv("temp4_1_governor_daejeon_3.csv", index=False, encoding="utf-8-sig")

# 2. '구시군' != '합계' 행만 추출하여 저장
df_daejeon2 = df_daejeon.query("구시군 != '합계'")
df_daejeon2.to_csv("temp4_2_governor_daejeon_3.csv", index=False, encoding="utf-8-sig")

# 3. '구시군' == '합계' 행만 추출 후 '구시군' 열 제거하여 저장
df_daejeon3 = df_daejeon.query("구시군 == '합계'").drop(columns="구시군")
df_daejeon3.to_csv("temp4_3_governor_daejeon_3.csv", index=False, encoding="utf-8-sig")

## Ulsan

In [45]:
df_ulsan = election_results['df_ulsan']

In [46]:
df_ulsan

Unnamed: 0,시도,구시군,선거인수,투표수,보수정당,진보정당,그외정당,득표수_계,무효투표수,기권수,득표_1위_후보번호,득표_1위_득표율,득표_2위_후보번호,득표_2위_득표율,득표_1위_정당,득표_2위_정당,보수정당_후보자수,진보정당_후보자수,그외정당_후보자수,무소속_후보자수
0,울산광역시,합계,722806,378121,197772,0,174875,372647,5474,344685,1,0.5307,3,0.4362,보수정당,그외정당,1,0,2,0
1,울산광역시,중구,162972,81207,45410,0,34682,80092,1115,81765,1,0.567,3,0.4066,보수정당,그외정당,1,0,2,0
2,울산광역시,남구,234702,108730,63805,0,43804,107609,1121,125972,1,0.5929,3,0.3792,보수정당,그외정당,1,0,2,0
3,울산광역시,동구,125305,70935,28200,0,41764,69964,971,54370,3,0.5584,1,0.4031,그외정당,보수정당,1,0,2,0
4,울산광역시,북구,79943,47783,20174,0,27017,47191,592,32160,3,0.5423,1,0.4275,그외정당,보수정당,1,0,2,0
5,울산광역시,울주군,119884,69466,40183,0,27608,67791,1675,50418,1,0.5927,3,0.3615,보수정당,그외정당,1,0,2,0


### preprocessing

In [47]:
df_ulsan = (
    df_ulsan
    .assign(시도=lambda df: df['시도'].replace('울산광역시', '울산'))
)

In [48]:
df_ulsan.columns

Index(['시도', '구시군', '선거인수', '투표수', '보수정당', '진보정당', '그외정당', '득표수_계', '무효투표수',
       '기권수', '득표_1위_후보번호', '득표_1위_득표율', '득표_2위_후보번호', '득표_2위_득표율', '득표_1위_정당',
       '득표_2위_정당', '보수정당_후보자수', '진보정당_후보자수', '그외정당_후보자수', '무소속_후보자수'],
      dtype='object')

In [49]:
# 고정 열 순서 정의
fixed_cols = [
    '시도', '구시군', '선거년도', '선거종류',
    '득표_1위_정당', '득표_2위_정당',
    '득표_1위_후보번호', '득표_1위_득표율',
    '득표_2위_후보번호', '득표_2위_득표율',
    '보수정당_후보자수', '진보정당_후보자수',
    '그외정당_후보자수', '무소속_후보자수'
]

# 나머지 열: fixed_cols에 포함되지 않은 열
other_cols = [col for col in df_ulsan.columns if col not in fixed_cols]

# 메소드 체이닝
df_ulsan = (
    df_ulsan
    .assign(선거종류='광역단체장', 선거년도='2002')
    .loc[:, fixed_cols + other_cols]
    .rename(columns={'시도': '지역'})
)

In [50]:
df_ulsan

Unnamed: 0,지역,구시군,선거년도,선거종류,득표_1위_정당,득표_2위_정당,득표_1위_후보번호,득표_1위_득표율,득표_2위_후보번호,득표_2위_득표율,...,그외정당_후보자수,무소속_후보자수,선거인수,투표수,보수정당,진보정당,그외정당,득표수_계,무효투표수,기권수
0,울산,합계,2002,광역단체장,보수정당,그외정당,1,0.5307,3,0.4362,...,2,0,722806,378121,197772,0,174875,372647,5474,344685
1,울산,중구,2002,광역단체장,보수정당,그외정당,1,0.567,3,0.4066,...,2,0,162972,81207,45410,0,34682,80092,1115,81765
2,울산,남구,2002,광역단체장,보수정당,그외정당,1,0.5929,3,0.3792,...,2,0,234702,108730,63805,0,43804,107609,1121,125972
3,울산,동구,2002,광역단체장,그외정당,보수정당,3,0.5584,1,0.4031,...,2,0,125305,70935,28200,0,41764,69964,971,54370
4,울산,북구,2002,광역단체장,그외정당,보수정당,3,0.5423,1,0.4275,...,2,0,79943,47783,20174,0,27017,47191,592,32160
5,울산,울주군,2002,광역단체장,보수정당,그외정당,1,0.5927,3,0.3615,...,2,0,119884,69466,40183,0,27608,67791,1675,50418


### v4.1 ~ v4.3

In [51]:
# 1. 전체 데이터 저장
df_ulsan.to_csv("temp4_1_governor_ulsan_3.csv", index=False, encoding="utf-8-sig")

# 2. '구시군' != '합계' 행만 추출하여 저장
df_ulsan2 = df_ulsan.query("구시군 != '합계'")
df_ulsan2.to_csv("temp4_2_governor_ulsan_3.csv", index=False, encoding="utf-8-sig")

# 3. '구시군' == '합계' 행만 추출 후 '구시군' 열 제거하여 저장
df_ulsan3 = df_ulsan.query("구시군 == '합계'").drop(columns="구시군")
df_ulsan3.to_csv("temp4_3_governor_ulsan_3.csv", index=False, encoding="utf-8-sig")

## Gyeonggi

In [52]:
df_gyeonggi = election_results['df_gyeonggi']

In [53]:
df_gyeonggi

Unnamed: 0,시도,구시군,선거인수,투표수,보수정당,진보정당,그외정당,득표수_계,무효투표수,기권수,득표_1위_후보번호,득표_1위_득표율,득표_2위_후보번호,득표_2위_득표율,득표_1위_정당,득표_2위_정당,보수정당_후보자수,진보정당_후보자수,그외정당_후보자수,무소속_후보자수
0,경기도,합계,6777575,3024844,1744291,1075243,168357,2987891,36953,3752731,1,0.5838,2,0.3599,보수정당,진보정당,1,1,1,0
1,경기도,수원시장안구,233829,99253,59603,33422,5251,98276,977,134576,1,0.6065,2,0.3401,보수정당,진보정당,1,1,1,0
2,경기도,수원시권선구,225950,92598,55604,31178,4856,91638,960,133352,1,0.6068,2,0.3402,보수정당,진보정당,1,1,1,0
3,경기도,수원시팔달구,224122,88058,53550,29845,3859,87254,804,136064,1,0.6137,2,0.342,보수정당,진보정당,1,1,1,0
4,경기도,성남시수정구,192566,78431,37715,34523,5176,77414,1017,114135,1,0.4872,2,0.446,보수정당,진보정당,1,1,1,0
5,경기도,성남시중원구,199094,76103,36501,33577,4923,75001,1102,122991,1,0.4867,2,0.4477,보수정당,진보정당,1,1,1,0
6,경기도,성남시분당구,281390,135540,86423,44447,3846,134716,824,145850,1,0.6415,2,0.3299,보수정당,진보정당,1,1,1,0
7,경기도,의정부시,260157,110516,61852,40860,6429,109141,1375,149641,1,0.5667,2,0.3744,보수정당,진보정당,1,1,1,0
8,경기도,안양시만안구,191427,78621,44315,29802,3736,77853,768,112806,1,0.5692,2,0.3828,보수정당,진보정당,1,1,1,0
9,경기도,안양시동안구,224569,104102,60990,38365,3936,103291,811,120467,1,0.5905,2,0.3714,보수정당,진보정당,1,1,1,0


### preprocessing

In [54]:
df_gyeonggi = (
    df_gyeonggi
    .assign(시도=lambda df: df['시도'].replace('경기도', '경기'))
)

In [55]:
df_gyeonggi.columns

Index(['시도', '구시군', '선거인수', '투표수', '보수정당', '진보정당', '그외정당', '득표수_계', '무효투표수',
       '기권수', '득표_1위_후보번호', '득표_1위_득표율', '득표_2위_후보번호', '득표_2위_득표율', '득표_1위_정당',
       '득표_2위_정당', '보수정당_후보자수', '진보정당_후보자수', '그외정당_후보자수', '무소속_후보자수'],
      dtype='object')

In [56]:
# 고정 열 순서 정의
fixed_cols = [
    '시도', '구시군', '선거년도', '선거종류',
    '득표_1위_정당', '득표_2위_정당',
    '득표_1위_후보번호', '득표_1위_득표율',
    '득표_2위_후보번호', '득표_2위_득표율',
    '보수정당_후보자수', '진보정당_후보자수',
    '그외정당_후보자수', '무소속_후보자수'
]

# 나머지 열: fixed_cols에 포함되지 않은 열
other_cols = [col for col in df_gyeonggi.columns if col not in fixed_cols]

# 메소드 체이닝
df_gyeonggi = (
    df_gyeonggi
    .assign(선거종류='광역단체장', 선거년도='2002')
    .loc[:, fixed_cols + other_cols]
    .rename(columns={'시도': '지역'})
)

In [57]:
df_gyeonggi

Unnamed: 0,지역,구시군,선거년도,선거종류,득표_1위_정당,득표_2위_정당,득표_1위_후보번호,득표_1위_득표율,득표_2위_후보번호,득표_2위_득표율,...,그외정당_후보자수,무소속_후보자수,선거인수,투표수,보수정당,진보정당,그외정당,득표수_계,무효투표수,기권수
0,경기,합계,2002,광역단체장,보수정당,진보정당,1,0.5838,2,0.3599,...,1,0,6777575,3024844,1744291,1075243,168357,2987891,36953,3752731
1,경기,수원시장안구,2002,광역단체장,보수정당,진보정당,1,0.6065,2,0.3401,...,1,0,233829,99253,59603,33422,5251,98276,977,134576
2,경기,수원시권선구,2002,광역단체장,보수정당,진보정당,1,0.6068,2,0.3402,...,1,0,225950,92598,55604,31178,4856,91638,960,133352
3,경기,수원시팔달구,2002,광역단체장,보수정당,진보정당,1,0.6137,2,0.342,...,1,0,224122,88058,53550,29845,3859,87254,804,136064
4,경기,성남시수정구,2002,광역단체장,보수정당,진보정당,1,0.4872,2,0.446,...,1,0,192566,78431,37715,34523,5176,77414,1017,114135
5,경기,성남시중원구,2002,광역단체장,보수정당,진보정당,1,0.4867,2,0.4477,...,1,0,199094,76103,36501,33577,4923,75001,1102,122991
6,경기,성남시분당구,2002,광역단체장,보수정당,진보정당,1,0.6415,2,0.3299,...,1,0,281390,135540,86423,44447,3846,134716,824,145850
7,경기,의정부시,2002,광역단체장,보수정당,진보정당,1,0.5667,2,0.3744,...,1,0,260157,110516,61852,40860,6429,109141,1375,149641
8,경기,안양시만안구,2002,광역단체장,보수정당,진보정당,1,0.5692,2,0.3828,...,1,0,191427,78621,44315,29802,3736,77853,768,112806
9,경기,안양시동안구,2002,광역단체장,보수정당,진보정당,1,0.5905,2,0.3714,...,1,0,224569,104102,60990,38365,3936,103291,811,120467


### v4.1 ~ v4.3

In [58]:
# 1. 전체 데이터 저장
df_gyeonggi.to_csv("temp4_1_governor_gyeonggi_3.csv", index=False, encoding="utf-8-sig")

# 2. '구시군' != '합계' 행만 추출하여 저장
df_gyeonggi2 = df_gyeonggi.query("구시군 != '합계'")
df_gyeonggi2.to_csv("temp4_2_governor_gyeonggi_3.csv", index=False, encoding="utf-8-sig")

# 3. '구시군' == '합계' 행만 추출 후 '구시군' 열 제거하여 저장
df_gyeonggi3 = df_gyeonggi.query("구시군 == '합계'").drop(columns="구시군")
df_gyeonggi3.to_csv("temp4_3_governor_gyeonggi_3.csv", index=False, encoding="utf-8-sig")

## Gangwon

In [59]:
df_gangwon = election_results['df_gangwon']

In [60]:
df_gangwon

Unnamed: 0,시도,구시군,선거인수,투표수,보수정당,진보정당,그외정당,득표수_계,무효투표수,기권수,득표_1위_후보번호,득표_1위_득표율,득표_2위_후보번호,득표_2위_득표율,득표_1위_정당,득표_2위_정당,보수정당_후보자수,진보정당_후보자수,그외정당_후보자수,무소속_후보자수
0,강원도,합계,1129859,671242,468987,190451,0,659438,11804,458617,1,0.7112,2,0.2888,보수정당,진보정당,1,1,0,0
1,강원도,춘천시,181938,95277,52497,41426,0,93923,1354,86661,1,0.5589,2,0.4411,보수정당,진보정당,1,1,0,0
2,강원도,원주시,194677,97205,62405,32931,0,95336,1869,97472,1,0.6546,2,0.3454,보수정당,진보정당,1,1,0,0
3,강원도,강릉시,169090,91290,72430,17571,0,90001,1289,77800,1,0.8048,2,0.1952,보수정당,진보정당,1,1,0,0
4,강원도,동해시,74930,42530,35744,6107,0,41851,679,32400,1,0.8541,2,0.1459,보수정당,진보정당,1,1,0,0
5,강원도,삼척시,59541,41665,33307,7553,0,40860,805,17876,1,0.8151,2,0.1849,보수정당,진보정당,1,1,0,0
6,강원도,태백시,41864,25876,20400,5075,0,25475,401,15988,1,0.8008,2,0.1992,보수정당,진보정당,1,1,0,0
7,강원도,정선군,37031,26016,19524,5873,0,25397,619,11015,1,0.7688,2,0.2312,보수정당,진보정당,1,1,0,0
8,강원도,속초시,64193,36439,26621,9175,0,35796,643,27754,1,0.7437,2,0.2563,보수정당,진보정당,1,1,0,0
9,강원도,고성군,26309,19349,14401,4490,0,18891,458,6960,1,0.7623,2,0.2377,보수정당,진보정당,1,1,0,0


### preprocessing

In [61]:
df_gangwon = (
    df_gangwon
    .assign(시도=lambda df: df['시도'].replace('강원도', '강원'))
)

In [62]:
df_gangwon.columns

Index(['시도', '구시군', '선거인수', '투표수', '보수정당', '진보정당', '그외정당', '득표수_계', '무효투표수',
       '기권수', '득표_1위_후보번호', '득표_1위_득표율', '득표_2위_후보번호', '득표_2위_득표율', '득표_1위_정당',
       '득표_2위_정당', '보수정당_후보자수', '진보정당_후보자수', '그외정당_후보자수', '무소속_후보자수'],
      dtype='object')

In [63]:
# 고정 열 순서 정의
fixed_cols = [
    '시도', '구시군', '선거년도', '선거종류',
    '득표_1위_정당', '득표_2위_정당',
    '득표_1위_후보번호', '득표_1위_득표율',
    '득표_2위_후보번호', '득표_2위_득표율',
    '보수정당_후보자수', '진보정당_후보자수',
    '그외정당_후보자수', '무소속_후보자수'
]

# 나머지 열: fixed_cols에 포함되지 않은 열
other_cols = [col for col in df_gangwon.columns if col not in fixed_cols]

# 메소드 체이닝
df_gangwon = (
    df_gangwon
    .assign(선거종류='광역단체장', 선거년도='2002')
    .loc[:, fixed_cols + other_cols]
    .rename(columns={'시도': '지역'})
)

In [64]:
df_gangwon

Unnamed: 0,지역,구시군,선거년도,선거종류,득표_1위_정당,득표_2위_정당,득표_1위_후보번호,득표_1위_득표율,득표_2위_후보번호,득표_2위_득표율,...,그외정당_후보자수,무소속_후보자수,선거인수,투표수,보수정당,진보정당,그외정당,득표수_계,무효투표수,기권수
0,강원,합계,2002,광역단체장,보수정당,진보정당,1,0.7112,2,0.2888,...,0,0,1129859,671242,468987,190451,0,659438,11804,458617
1,강원,춘천시,2002,광역단체장,보수정당,진보정당,1,0.5589,2,0.4411,...,0,0,181938,95277,52497,41426,0,93923,1354,86661
2,강원,원주시,2002,광역단체장,보수정당,진보정당,1,0.6546,2,0.3454,...,0,0,194677,97205,62405,32931,0,95336,1869,97472
3,강원,강릉시,2002,광역단체장,보수정당,진보정당,1,0.8048,2,0.1952,...,0,0,169090,91290,72430,17571,0,90001,1289,77800
4,강원,동해시,2002,광역단체장,보수정당,진보정당,1,0.8541,2,0.1459,...,0,0,74930,42530,35744,6107,0,41851,679,32400
5,강원,삼척시,2002,광역단체장,보수정당,진보정당,1,0.8151,2,0.1849,...,0,0,59541,41665,33307,7553,0,40860,805,17876
6,강원,태백시,2002,광역단체장,보수정당,진보정당,1,0.8008,2,0.1992,...,0,0,41864,25876,20400,5075,0,25475,401,15988
7,강원,정선군,2002,광역단체장,보수정당,진보정당,1,0.7688,2,0.2312,...,0,0,37031,26016,19524,5873,0,25397,619,11015
8,강원,속초시,2002,광역단체장,보수정당,진보정당,1,0.7437,2,0.2563,...,0,0,64193,36439,26621,9175,0,35796,643,27754
9,강원,고성군,2002,광역단체장,보수정당,진보정당,1,0.7623,2,0.2377,...,0,0,26309,19349,14401,4490,0,18891,458,6960


### v4.1 ~ v4.3

In [65]:
# 1. 전체 데이터 저장
df_gangwon.to_csv("temp4_1_governor_gangwon_3.csv", index=False, encoding="utf-8-sig")

# 2. '구시군' != '합계' 행만 추출하여 저장
df_gangwon2 = df_gangwon.query("구시군 != '합계'")
df_gangwon2.to_csv("temp4_2_governor_gangwon_3.csv", index=False, encoding="utf-8-sig")

# 3. '구시군' == '합계' 행만 추출 후 '구시군' 열 제거하여 저장
df_gangwon3 = df_gangwon.query("구시군 == '합계'").drop(columns="구시군")
df_gangwon3.to_csv("temp4_3_governor_gangwon_3.csv", index=False, encoding="utf-8-sig")

## Chungbuk

In [66]:
df_chungbuk = election_results['df_chungbuk']

In [67]:
df_chungbuk

Unnamed: 0,시도,구시군,선거인수,투표수,보수정당,진보정당,그외정당,득표수_계,무효투표수,기권수,득표_1위_후보번호,득표_1위_득표율,득표_2위_후보번호,득표_2위_득표율,득표_1위_정당,득표_2위_정당,보수정당_후보자수,진보정당_후보자수,그외정당_후보자수,무소속_후보자수
0,충청북도,합계,1076451,600550,343546,0,242712,586258,14292,475901,1,0.586,3,0.3348,보수정당,그외정당,1,0,1,1
1,충청북도,청주시상당구,163686,76240,46565,0,28568,75133,1107,87446,1,0.6198,3,0.2912,보수정당,그외정당,1,0,1,1
2,충청북도,청주시흥덕구,246912,109497,66724,0,41132,107856,1641,137415,1,0.6186,3,0.3013,보수정당,그외정당,1,0,1,1
3,충청북도,충주시,154380,83817,49029,0,32703,81732,2085,70563,1,0.5999,3,0.3271,보수정당,그외정당,1,0,1,1
4,충청북도,제천시,105294,62841,46428,0,15244,61672,1169,42453,1,0.7528,3,0.1908,보수정당,그외정당,1,0,1,1
5,충청북도,단양군,29210,20330,13104,0,6682,19786,544,8880,1,0.6623,3,0.2715,보수정당,그외정당,1,0,1,1
6,충청북도,청원군,92158,55498,28133,0,25811,53944,1554,36660,1,0.5215,3,0.3886,보수정당,그외정당,1,0,1,1
7,충청북도,영동군,43630,31206,18517,0,11740,30257,949,12424,1,0.612,3,0.2988,보수정당,그외정당,1,0,1,1
8,충청북도,보은군,32369,24757,8565,0,15443,24008,749,7612,3,0.578,1,0.3568,그외정당,보수정당,1,0,1,1
9,충청북도,옥천군,45404,31654,12864,0,17590,30454,1200,13750,3,0.4917,1,0.4224,그외정당,보수정당,1,0,1,1


### preprocessing

In [68]:
df_chungbuk = (
    df_chungbuk
    .assign(시도=lambda df: df['시도'].replace('충청북도', '충북'))
)

In [69]:
df_chungbuk.columns

Index(['시도', '구시군', '선거인수', '투표수', '보수정당', '진보정당', '그외정당', '득표수_계', '무효투표수',
       '기권수', '득표_1위_후보번호', '득표_1위_득표율', '득표_2위_후보번호', '득표_2위_득표율', '득표_1위_정당',
       '득표_2위_정당', '보수정당_후보자수', '진보정당_후보자수', '그외정당_후보자수', '무소속_후보자수'],
      dtype='object')

In [70]:
# 고정 열 순서 정의
fixed_cols = [
    '시도', '구시군', '선거년도', '선거종류',
    '득표_1위_정당', '득표_2위_정당',
    '득표_1위_후보번호', '득표_1위_득표율',
    '득표_2위_후보번호', '득표_2위_득표율',
    '보수정당_후보자수', '진보정당_후보자수',
    '그외정당_후보자수', '무소속_후보자수'
]

# 나머지 열: fixed_cols에 포함되지 않은 열
other_cols = [col for col in df_chungbuk.columns if col not in fixed_cols]

# 메소드 체이닝
df_chungbuk = (
    df_chungbuk
    .assign(선거종류='광역단체장', 선거년도='2002')
    .loc[:, fixed_cols + other_cols]
    .rename(columns={'시도': '지역'})
)

In [71]:
df_chungbuk

Unnamed: 0,지역,구시군,선거년도,선거종류,득표_1위_정당,득표_2위_정당,득표_1위_후보번호,득표_1위_득표율,득표_2위_후보번호,득표_2위_득표율,...,그외정당_후보자수,무소속_후보자수,선거인수,투표수,보수정당,진보정당,그외정당,득표수_계,무효투표수,기권수
0,충북,합계,2002,광역단체장,보수정당,그외정당,1,0.586,3,0.3348,...,1,1,1076451,600550,343546,0,242712,586258,14292,475901
1,충북,청주시상당구,2002,광역단체장,보수정당,그외정당,1,0.6198,3,0.2912,...,1,1,163686,76240,46565,0,28568,75133,1107,87446
2,충북,청주시흥덕구,2002,광역단체장,보수정당,그외정당,1,0.6186,3,0.3013,...,1,1,246912,109497,66724,0,41132,107856,1641,137415
3,충북,충주시,2002,광역단체장,보수정당,그외정당,1,0.5999,3,0.3271,...,1,1,154380,83817,49029,0,32703,81732,2085,70563
4,충북,제천시,2002,광역단체장,보수정당,그외정당,1,0.7528,3,0.1908,...,1,1,105294,62841,46428,0,15244,61672,1169,42453
5,충북,단양군,2002,광역단체장,보수정당,그외정당,1,0.6623,3,0.2715,...,1,1,29210,20330,13104,0,6682,19786,544,8880
6,충북,청원군,2002,광역단체장,보수정당,그외정당,1,0.5215,3,0.3886,...,1,1,92158,55498,28133,0,25811,53944,1554,36660
7,충북,영동군,2002,광역단체장,보수정당,그외정당,1,0.612,3,0.2988,...,1,1,43630,31206,18517,0,11740,30257,949,12424
8,충북,보은군,2002,광역단체장,그외정당,보수정당,3,0.578,1,0.3568,...,1,1,32369,24757,8565,0,15443,24008,749,7612
9,충북,옥천군,2002,광역단체장,그외정당,보수정당,3,0.4917,1,0.4224,...,1,1,45404,31654,12864,0,17590,30454,1200,13750


### v4.1 ~ v4.3

In [72]:
# 1. 전체 데이터 저장
df_chungbuk.to_csv("temp4_1_governor_chungbuk_3.csv", index=False, encoding="utf-8-sig")

# 2. '구시군' != '합계' 행만 추출하여 저장
df_chungbuk2 = df_chungbuk.query("구시군 != '합계'")
df_chungbuk2.to_csv("temp4_2_governor_chungbuk_3.csv", index=False, encoding="utf-8-sig")

# 3. '구시군' == '합계' 행만 추출 후 '구시군' 열 제거하여 저장
df_chungbuk3 = df_chungbuk.query("구시군 == '합계'").drop(columns="구시군")
df_chungbuk3.to_csv("temp4_3_governor_chungbuk_3.csv", index=False, encoding="utf-8-sig")

## Chungnam

In [73]:
df_chungnam = election_results['df_chungnam']

In [74]:
df_chungnam

Unnamed: 0,시도,구시군,선거인수,투표수,보수정당,진보정당,그외정당,득표수_계,무효투표수,기권수,득표_1위_후보번호,득표_1위_득표율,득표_2위_후보번호,득표_2위_득표율,득표_1위_정당,득표_2위_정당,보수정당_후보자수,진보정당_후보자수,그외정당_후보자수,무소속_후보자수
0,충청남도,합계,1397105,785742,251005,0,508796,759801,25941,611363,3,0.6696,1,0.3304,그외정당,보수정당,1,0,1,0
1,충청남도,천안시,299432,126601,48169,0,74741,122910,3691,172831,3,0.6081,1,0.3919,그외정당,보수정당,1,0,1,0
2,충청남도,공주시,98876,63126,13237,0,48033,61270,1856,35750,3,0.784,1,0.216,그외정당,보수정당,1,0,1,0
3,충청남도,보령시,85349,54816,14094,0,39142,53236,1580,30533,3,0.7353,1,0.2647,그외정당,보수정당,1,0,1,0
4,충청남도,아산시,135721,67878,22147,0,43398,65545,2333,67843,3,0.6621,1,0.3379,그외정당,보수정당,1,0,1,0
5,충청남도,금산군,48654,31078,7740,0,21909,29649,1429,17576,3,0.7389,1,0.2611,그외정당,보수정당,1,0,1,0
6,충청남도,연기군,60903,36324,9793,0,25493,35286,1038,24579,3,0.7225,1,0.2775,그외정당,보수정당,1,0,1,0
7,충청남도,논산시,122118,60890,15016,0,43755,58771,2119,61228,3,0.7445,1,0.2555,그외정당,보수정당,1,0,1,0
8,충청남도,부여군,68327,46428,9130,0,35761,44891,1537,21899,3,0.7966,1,0.2034,그외정당,보수정당,1,0,1,0
9,충청남도,서천군,55805,37802,10536,0,25308,35844,1958,18003,3,0.7061,1,0.2939,그외정당,보수정당,1,0,1,0


### preprocessing

In [75]:
df_chungnam = (
    df_chungnam
    .assign(시도=lambda df: df['시도'].replace('충청남도', '충남'))
)

In [76]:
df_chungnam.columns

Index(['시도', '구시군', '선거인수', '투표수', '보수정당', '진보정당', '그외정당', '득표수_계', '무효투표수',
       '기권수', '득표_1위_후보번호', '득표_1위_득표율', '득표_2위_후보번호', '득표_2위_득표율', '득표_1위_정당',
       '득표_2위_정당', '보수정당_후보자수', '진보정당_후보자수', '그외정당_후보자수', '무소속_후보자수'],
      dtype='object')

In [77]:
# 고정 열 순서 정의
fixed_cols = [
    '시도', '구시군', '선거년도', '선거종류',
    '득표_1위_정당', '득표_2위_정당',
    '득표_1위_후보번호', '득표_1위_득표율',
    '득표_2위_후보번호', '득표_2위_득표율',
    '보수정당_후보자수', '진보정당_후보자수',
    '그외정당_후보자수', '무소속_후보자수'
]

# 나머지 열: fixed_cols에 포함되지 않은 열
other_cols = [col for col in df_chungnam.columns if col not in fixed_cols]

# 메소드 체이닝
df_chungnam = (
    df_chungnam
    .assign(선거종류='광역단체장', 선거년도='2002')
    .loc[:, fixed_cols + other_cols]
    .rename(columns={'시도': '지역'})
)

In [78]:
df_chungnam

Unnamed: 0,지역,구시군,선거년도,선거종류,득표_1위_정당,득표_2위_정당,득표_1위_후보번호,득표_1위_득표율,득표_2위_후보번호,득표_2위_득표율,...,그외정당_후보자수,무소속_후보자수,선거인수,투표수,보수정당,진보정당,그외정당,득표수_계,무효투표수,기권수
0,충남,합계,2002,광역단체장,그외정당,보수정당,3,0.6696,1,0.3304,...,1,0,1397105,785742,251005,0,508796,759801,25941,611363
1,충남,천안시,2002,광역단체장,그외정당,보수정당,3,0.6081,1,0.3919,...,1,0,299432,126601,48169,0,74741,122910,3691,172831
2,충남,공주시,2002,광역단체장,그외정당,보수정당,3,0.784,1,0.216,...,1,0,98876,63126,13237,0,48033,61270,1856,35750
3,충남,보령시,2002,광역단체장,그외정당,보수정당,3,0.7353,1,0.2647,...,1,0,85349,54816,14094,0,39142,53236,1580,30533
4,충남,아산시,2002,광역단체장,그외정당,보수정당,3,0.6621,1,0.3379,...,1,0,135721,67878,22147,0,43398,65545,2333,67843
5,충남,금산군,2002,광역단체장,그외정당,보수정당,3,0.7389,1,0.2611,...,1,0,48654,31078,7740,0,21909,29649,1429,17576
6,충남,연기군,2002,광역단체장,그외정당,보수정당,3,0.7225,1,0.2775,...,1,0,60903,36324,9793,0,25493,35286,1038,24579
7,충남,논산시,2002,광역단체장,그외정당,보수정당,3,0.7445,1,0.2555,...,1,0,122118,60890,15016,0,43755,58771,2119,61228
8,충남,부여군,2002,광역단체장,그외정당,보수정당,3,0.7966,1,0.2034,...,1,0,68327,46428,9130,0,35761,44891,1537,21899
9,충남,서천군,2002,광역단체장,그외정당,보수정당,3,0.7061,1,0.2939,...,1,0,55805,37802,10536,0,25308,35844,1958,18003


### v4.1 ~ v4.3

In [79]:
# 1. 전체 데이터 저장
df_chungnam.to_csv("temp4_1_governor_chungnam_3.csv", index=False, encoding="utf-8-sig")

# 2. '구시군' != '합계' 행만 추출하여 저장
df_chungnam2 = df_chungnam.query("구시군 != '합계'")
df_chungnam2.to_csv("temp4_2_governor_chungnam_3.csv", index=False, encoding="utf-8-sig")

# 3. '구시군' == '합계' 행만 추출 후 '구시군' 열 제거하여 저장
df_chungnam3 = df_chungnam.query("구시군 == '합계'").drop(columns="구시군")
df_chungnam3.to_csv("temp4_3_governor_chungnam_3.csv", index=False, encoding="utf-8-sig")

## Jeonbuk

In [80]:
df_jeonbuk = election_results['df_jeonbuk']

In [81]:
df_jeonbuk

Unnamed: 0,시도,구시군,선거인수,투표수,보수정당,진보정당,그외정당,득표수_계,무효투표수,기권수,득표_1위_후보번호,득표_1위_득표율,득표_2위_후보번호,득표_2위_득표율,득표_1위_정당,득표_2위_정당,보수정당_후보자수,진보정당_후보자수,그외정당_후보자수,무소속_후보자수
0,전라북도,합계,1431722,787135,63661,571650,131320,766631,20504,644587,2,0.7457,3,0.1713,진보정당,무소속,1,1,0,1
1,전라북도,전주시완산구,231776,100204,8191,74023,15785,97999,2205,131572,2,0.7553,3,0.1611,진보정당,무소속,1,1,0,1
2,전라북도,전주시덕진구,197458,79884,6453,59409,12470,78332,1552,117574,2,0.7584,3,0.1592,진보정당,무소속,1,1,0,1
3,전라북도,군산시,197267,97041,5055,79646,10182,94883,2158,100226,2,0.8394,3,0.1073,진보정당,무소속,1,1,0,1
4,전라북도,익산시,234964,112465,9121,84169,16164,109454,3011,122499,2,0.769,3,0.1477,진보정당,무소속,1,1,0,1
5,전라북도,정읍시,105973,66356,4358,48685,11592,64635,1721,39617,2,0.7532,3,0.1793,진보정당,무소속,1,1,0,1
6,전라북도,남원시,75505,52567,3673,36854,10521,51048,1519,22938,2,0.7219,3,0.2061,진보정당,무소속,1,1,0,1
7,전라북도,김제시,87628,58806,8025,40039,8971,57035,1771,28822,2,0.702,3,0.1573,진보정당,무소속,1,1,0,1
8,전라북도,완주군,65838,41700,3306,29454,7671,40431,1269,24138,2,0.7285,3,0.1897,진보정당,무소속,1,1,0,1
9,전라북도,진안군,24876,19517,1905,11812,5194,18911,606,5359,2,0.6246,3,0.2747,진보정당,무소속,1,1,0,1


### preprocessing

In [82]:
df_jeonbuk = (
    df_jeonbuk
    .assign(시도=lambda df: df['시도'].replace('전라북도', '전북'))
)

In [83]:
df_jeonbuk.columns

Index(['시도', '구시군', '선거인수', '투표수', '보수정당', '진보정당', '그외정당', '득표수_계', '무효투표수',
       '기권수', '득표_1위_후보번호', '득표_1위_득표율', '득표_2위_후보번호', '득표_2위_득표율', '득표_1위_정당',
       '득표_2위_정당', '보수정당_후보자수', '진보정당_후보자수', '그외정당_후보자수', '무소속_후보자수'],
      dtype='object')

In [84]:
# 고정 열 순서 정의
fixed_cols = [
    '시도', '구시군', '선거년도', '선거종류',
    '득표_1위_정당', '득표_2위_정당',
    '득표_1위_후보번호', '득표_1위_득표율',
    '득표_2위_후보번호', '득표_2위_득표율',
    '보수정당_후보자수', '진보정당_후보자수',
    '그외정당_후보자수', '무소속_후보자수'
]

# 나머지 열: fixed_cols에 포함되지 않은 열
other_cols = [col for col in df_jeonbuk.columns if col not in fixed_cols]

# 메소드 체이닝
df_jeonbuk = (
    df_jeonbuk
    .assign(선거종류='광역단체장', 선거년도='2002')
    .loc[:, fixed_cols + other_cols]
    .rename(columns={'시도': '지역'})
)

In [85]:
df_jeonbuk

Unnamed: 0,지역,구시군,선거년도,선거종류,득표_1위_정당,득표_2위_정당,득표_1위_후보번호,득표_1위_득표율,득표_2위_후보번호,득표_2위_득표율,...,그외정당_후보자수,무소속_후보자수,선거인수,투표수,보수정당,진보정당,그외정당,득표수_계,무효투표수,기권수
0,전북,합계,2002,광역단체장,진보정당,무소속,2,0.7457,3,0.1713,...,0,1,1431722,787135,63661,571650,131320,766631,20504,644587
1,전북,전주시완산구,2002,광역단체장,진보정당,무소속,2,0.7553,3,0.1611,...,0,1,231776,100204,8191,74023,15785,97999,2205,131572
2,전북,전주시덕진구,2002,광역단체장,진보정당,무소속,2,0.7584,3,0.1592,...,0,1,197458,79884,6453,59409,12470,78332,1552,117574
3,전북,군산시,2002,광역단체장,진보정당,무소속,2,0.8394,3,0.1073,...,0,1,197267,97041,5055,79646,10182,94883,2158,100226
4,전북,익산시,2002,광역단체장,진보정당,무소속,2,0.769,3,0.1477,...,0,1,234964,112465,9121,84169,16164,109454,3011,122499
5,전북,정읍시,2002,광역단체장,진보정당,무소속,2,0.7532,3,0.1793,...,0,1,105973,66356,4358,48685,11592,64635,1721,39617
6,전북,남원시,2002,광역단체장,진보정당,무소속,2,0.7219,3,0.2061,...,0,1,75505,52567,3673,36854,10521,51048,1519,22938
7,전북,김제시,2002,광역단체장,진보정당,무소속,2,0.702,3,0.1573,...,0,1,87628,58806,8025,40039,8971,57035,1771,28822
8,전북,완주군,2002,광역단체장,진보정당,무소속,2,0.7285,3,0.1897,...,0,1,65838,41700,3306,29454,7671,40431,1269,24138
9,전북,진안군,2002,광역단체장,진보정당,무소속,2,0.6246,3,0.2747,...,0,1,24876,19517,1905,11812,5194,18911,606,5359


### v4.1 ~ v4.3

In [86]:
# 1. 전체 데이터 저장
df_jeonbuk.to_csv("temp4_1_governor_jeonbuk_3.csv", index=False, encoding="utf-8-sig")

# 2. '구시군' != '합계' 행만 추출하여 저장
df_jeonbuk2 = df_jeonbuk.query("구시군 != '합계'")
df_jeonbuk2.to_csv("temp4_2_governor_jeonbuk_3.csv", index=False, encoding="utf-8-sig")

# 3. '구시군' == '합계' 행만 추출 후 '구시군' 열 제거하여 저장
df_jeonbuk3 = df_jeonbuk.query("구시군 == '합계'").drop(columns="구시군")
df_jeonbuk3.to_csv("temp4_3_governor_jeonbuk_3.csv", index=False, encoding="utf-8-sig")

## Jeonnam

In [87]:
df_jeonnam = election_results['df_jeonnam']

In [88]:
df_jeonnam

Unnamed: 0,시도,구시군,선거인수,투표수,보수정당,진보정당,그외정당,득표수_계,무효투표수,기권수,득표_1위_후보번호,득표_1위_득표율,득표_2위_후보번호,득표_2위_득표율,득표_1위_정당,득표_2위_정당,보수정당_후보자수,진보정당_후보자수,그외정당_후보자수,무소속_후보자수
0,전라남도,합계,1533059,1006148,51504,563545,360484,975533,30615,526911,2,0.5777,3,0.2425,진보정당,무소속,1,1,0,3
1,전라남도,목포시,169428,81949,2234,48314,29595,80143,1806,87479,2,0.6028,3,0.301,진보정당,무소속,1,1,0,3
2,전라남도,여수시,224680,128464,5123,73429,46766,125318,3146,96216,2,0.5859,3,0.2572,진보정당,무소속,1,1,0,3
3,전라남도,순천시,185581,102005,4062,55386,39700,99148,2857,83576,2,0.5586,3,0.2249,진보정당,무소속,1,1,0,3
4,전라남도,나주시,80868,56762,2914,33315,18628,54857,1905,24106,2,0.6073,3,0.1821,진보정당,무소속,1,1,0,3
5,전라남도,광양시,91773,54956,4620,31800,17035,53455,1501,36817,2,0.5949,3,0.1909,진보정당,무소속,1,1,0,3
6,전라남도,담양군,42009,31362,1201,20837,8453,30491,871,10647,2,0.6834,3,0.1916,진보정당,무소속,1,1,0,3
7,전라남도,장성군,41952,30339,1402,22468,5727,29597,742,11613,2,0.7591,3,0.1089,진보정당,무소속,1,1,0,3
8,전라남도,곡성군,30366,24338,1182,14162,8182,23526,812,6028,2,0.602,3,0.2333,진보정당,무소속,1,1,0,3
9,전라남도,구례군,25437,20009,976,11588,6891,19455,554,5428,2,0.5956,3,0.227,진보정당,무소속,1,1,0,3


### preprocessing

In [89]:
df_jeonnam = (
    df_jeonnam
    .assign(시도=lambda df: df['시도'].replace('전라남도', '전남'))
)

In [90]:
df_jeonnam.columns

Index(['시도', '구시군', '선거인수', '투표수', '보수정당', '진보정당', '그외정당', '득표수_계', '무효투표수',
       '기권수', '득표_1위_후보번호', '득표_1위_득표율', '득표_2위_후보번호', '득표_2위_득표율', '득표_1위_정당',
       '득표_2위_정당', '보수정당_후보자수', '진보정당_후보자수', '그외정당_후보자수', '무소속_후보자수'],
      dtype='object')

In [91]:
# 고정 열 순서 정의
fixed_cols = [
    '시도', '구시군', '선거년도', '선거종류',
    '득표_1위_정당', '득표_2위_정당',
    '득표_1위_후보번호', '득표_1위_득표율',
    '득표_2위_후보번호', '득표_2위_득표율',
    '보수정당_후보자수', '진보정당_후보자수',
    '그외정당_후보자수', '무소속_후보자수'
]

# 나머지 열: fixed_cols에 포함되지 않은 열
other_cols = [col for col in df_jeonnam.columns if col not in fixed_cols]

# 메소드 체이닝
df_jeonnam = (
    df_jeonnam
    .assign(선거종류='광역단체장', 선거년도='2002')
    .loc[:, fixed_cols + other_cols]
    .rename(columns={'시도': '지역'})
)

In [92]:
df_jeonnam

Unnamed: 0,지역,구시군,선거년도,선거종류,득표_1위_정당,득표_2위_정당,득표_1위_후보번호,득표_1위_득표율,득표_2위_후보번호,득표_2위_득표율,...,그외정당_후보자수,무소속_후보자수,선거인수,투표수,보수정당,진보정당,그외정당,득표수_계,무효투표수,기권수
0,전남,합계,2002,광역단체장,진보정당,무소속,2,0.5777,3,0.2425,...,0,3,1533059,1006148,51504,563545,360484,975533,30615,526911
1,전남,목포시,2002,광역단체장,진보정당,무소속,2,0.6028,3,0.301,...,0,3,169428,81949,2234,48314,29595,80143,1806,87479
2,전남,여수시,2002,광역단체장,진보정당,무소속,2,0.5859,3,0.2572,...,0,3,224680,128464,5123,73429,46766,125318,3146,96216
3,전남,순천시,2002,광역단체장,진보정당,무소속,2,0.5586,3,0.2249,...,0,3,185581,102005,4062,55386,39700,99148,2857,83576
4,전남,나주시,2002,광역단체장,진보정당,무소속,2,0.6073,3,0.1821,...,0,3,80868,56762,2914,33315,18628,54857,1905,24106
5,전남,광양시,2002,광역단체장,진보정당,무소속,2,0.5949,3,0.1909,...,0,3,91773,54956,4620,31800,17035,53455,1501,36817
6,전남,담양군,2002,광역단체장,진보정당,무소속,2,0.6834,3,0.1916,...,0,3,42009,31362,1201,20837,8453,30491,871,10647
7,전남,장성군,2002,광역단체장,진보정당,무소속,2,0.7591,3,0.1089,...,0,3,41952,30339,1402,22468,5727,29597,742,11613
8,전남,곡성군,2002,광역단체장,진보정당,무소속,2,0.602,3,0.2333,...,0,3,30366,24338,1182,14162,8182,23526,812,6028
9,전남,구례군,2002,광역단체장,진보정당,무소속,2,0.5956,3,0.227,...,0,3,25437,20009,976,11588,6891,19455,554,5428


### v4.1 ~ v4.3

In [93]:
# 1. 전체 데이터 저장
df_jeonnam.to_csv("temp4_1_governor_jeonnam_3.csv", index=False, encoding="utf-8-sig")

# 2. '구시군' != '합계' 행만 추출하여 저장
df_jeonnam2 = df_jeonnam.query("구시군 != '합계'")
df_jeonnam2.to_csv("temp4_2_governor_jeonnam_3.csv", index=False, encoding="utf-8-sig")

# 3. '구시군' == '합계' 행만 추출 후 '구시군' 열 제거하여 저장
df_jeonnam3 = df_jeonnam.query("구시군 == '합계'").drop(columns="구시군")
df_jeonnam3.to_csv("temp4_3_governor_jeonnam_3.csv", index=False, encoding="utf-8-sig")

## Gyeongbuk

In [94]:
df_gyeongbuk = election_results['df_gyeongbuk']

In [95]:
df_gyeongbuk

Unnamed: 0,시도,구시군,선거인수,투표수,보수정당,진보정당,그외정당,득표수_계,무효투표수,기권수,득표_1위_후보번호,득표_1위_득표율,득표_2위_후보번호,득표_2위_득표율,득표_1위_정당,득표_2위_정당,보수정당_후보자수,진보정당_후보자수,그외정당_후보자수,무소속_후보자수
0,경상북도,합계,2044215,1234095,1028080,0,174472,1202552,31543,810120,1,0.8549,3,0.1451,보수정당,무소속,1,0,0,1
1,경상북도,포항시북구,178954,95784,82300,0,11615,93915,1869,83170,1,0.8763,3,0.1237,보수정당,무소속,1,0,0,1
2,경상북도,포항시남구,186347,97807,82806,0,13093,95899,1908,88540,1,0.8635,3,0.1365,보수정당,무소속,1,0,0,1
3,경상북도,울릉군,7630,6638,5598,0,917,6515,123,992,1,0.8592,3,0.1408,보수정당,무소속,1,0,0,1
4,경상북도,경주시,210873,125634,107442,0,15084,122526,3108,85239,1,0.8769,3,0.1231,보수정당,무소속,1,0,0,1
5,경상북도,김천시,110853,71347,59870,0,9741,69611,1736,39506,1,0.8601,3,0.1399,보수정당,무소속,1,0,0,1
6,경상북도,안동시,136401,91196,74904,0,13742,88646,2550,45205,1,0.845,3,0.155,보수정당,무소속,1,0,0,1
7,경상북도,구미시,231648,100996,83514,0,15633,99147,1849,130652,1,0.8423,3,0.1577,보수정당,무소속,1,0,0,1
8,경상북도,영주시,95784,65654,55417,0,8555,63972,1682,30130,1,0.8663,3,0.1337,보수정당,무소속,1,0,0,1
9,경상북도,영천시,90206,52940,41805,0,9466,51271,1669,37266,1,0.8154,3,0.1846,보수정당,무소속,1,0,0,1


### preprocessing

In [96]:
df_gyeongbuk = (
    df_gyeongbuk
    .assign(시도=lambda df: df['시도'].replace('경상북도', '경북'))
)

In [97]:
df_gyeongbuk.columns

Index(['시도', '구시군', '선거인수', '투표수', '보수정당', '진보정당', '그외정당', '득표수_계', '무효투표수',
       '기권수', '득표_1위_후보번호', '득표_1위_득표율', '득표_2위_후보번호', '득표_2위_득표율', '득표_1위_정당',
       '득표_2위_정당', '보수정당_후보자수', '진보정당_후보자수', '그외정당_후보자수', '무소속_후보자수'],
      dtype='object')

In [98]:
# 고정 열 순서 정의
fixed_cols = [
    '시도', '구시군', '선거년도', '선거종류',
    '득표_1위_정당', '득표_2위_정당',
    '득표_1위_후보번호', '득표_1위_득표율',
    '득표_2위_후보번호', '득표_2위_득표율',
    '보수정당_후보자수', '진보정당_후보자수',
    '그외정당_후보자수', '무소속_후보자수'
]

# 나머지 열: fixed_cols에 포함되지 않은 열
other_cols = [col for col in df_gyeongbuk.columns if col not in fixed_cols]

# 메소드 체이닝
df_gyeongbuk = (
    df_gyeongbuk
    .assign(선거종류='광역단체장', 선거년도='2002')
    .loc[:, fixed_cols + other_cols]
    .rename(columns={'시도': '지역'})
)

In [99]:
df_gyeongbuk

Unnamed: 0,지역,구시군,선거년도,선거종류,득표_1위_정당,득표_2위_정당,득표_1위_후보번호,득표_1위_득표율,득표_2위_후보번호,득표_2위_득표율,...,그외정당_후보자수,무소속_후보자수,선거인수,투표수,보수정당,진보정당,그외정당,득표수_계,무효투표수,기권수
0,경북,합계,2002,광역단체장,보수정당,무소속,1,0.8549,3,0.1451,...,0,1,2044215,1234095,1028080,0,174472,1202552,31543,810120
1,경북,포항시북구,2002,광역단체장,보수정당,무소속,1,0.8763,3,0.1237,...,0,1,178954,95784,82300,0,11615,93915,1869,83170
2,경북,포항시남구,2002,광역단체장,보수정당,무소속,1,0.8635,3,0.1365,...,0,1,186347,97807,82806,0,13093,95899,1908,88540
3,경북,울릉군,2002,광역단체장,보수정당,무소속,1,0.8592,3,0.1408,...,0,1,7630,6638,5598,0,917,6515,123,992
4,경북,경주시,2002,광역단체장,보수정당,무소속,1,0.8769,3,0.1231,...,0,1,210873,125634,107442,0,15084,122526,3108,85239
5,경북,김천시,2002,광역단체장,보수정당,무소속,1,0.8601,3,0.1399,...,0,1,110853,71347,59870,0,9741,69611,1736,39506
6,경북,안동시,2002,광역단체장,보수정당,무소속,1,0.845,3,0.155,...,0,1,136401,91196,74904,0,13742,88646,2550,45205
7,경북,구미시,2002,광역단체장,보수정당,무소속,1,0.8423,3,0.1577,...,0,1,231648,100996,83514,0,15633,99147,1849,130652
8,경북,영주시,2002,광역단체장,보수정당,무소속,1,0.8663,3,0.1337,...,0,1,95784,65654,55417,0,8555,63972,1682,30130
9,경북,영천시,2002,광역단체장,보수정당,무소속,1,0.8154,3,0.1846,...,0,1,90206,52940,41805,0,9466,51271,1669,37266


### v4.1 ~ v4.3

In [100]:
# 1. 전체 데이터 저장
df_gyeongbuk.to_csv("temp4_1_governor_gyeongbuk_3.csv", index=False, encoding="utf-8-sig")

# 2. '구시군' != '합계' 행만 추출하여 저장
df_gyeongbuk2 = df_gyeongbuk.query("구시군 != '합계'")
df_gyeongbuk2.to_csv("temp4_2_governor_gyeongbuk_3.csv", index=False, encoding="utf-8-sig")

# 3. '구시군' == '합계' 행만 추출 후 '구시군' 열 제거하여 저장
df_gyeongbuk3 = df_gyeongbuk.query("구시군 == '합계'").drop(columns="구시군")
df_gyeongbuk3.to_csv("temp4_3_governor_gyeongbuk_3.csv", index=False, encoding="utf-8-sig")

## Gyeongnam

In [101]:
df_gyeongnam = election_results['df_gyeongnam']

In [102]:
df_gyeongnam

Unnamed: 0,시도,구시군,선거인수,투표수,보수정당,진보정당,그외정당,득표수_계,무효투표수,기권수,득표_1위_후보번호,득표_1위_득표율,득표_2위_후보번호,득표_2위_득표율,득표_1위_정당,득표_2위_정당,보수정당_후보자수,진보정당_후보자수,그외정당_후보자수,무소속_후보자수
0,경상남도,합계,2227548,1259142,920706,208641,106367,1235714,23428,968406,1,0.7451,2,0.1688,보수정당,진보정당,1,1,1,0
1,경상남도,창원시,343825,166981,111414,29630,24139,165183,1798,176844,1,0.6745,2,0.1794,보수정당,진보정당,1,1,1,0
2,경상남도,마산시,316267,153345,118410,20247,12621,151278,2067,162922,1,0.7827,2,0.1338,보수정당,진보정당,1,1,1,0
3,경상남도,진주시,242187,143459,108529,23928,8679,141136,2323,98728,1,0.769,2,0.1695,보수정당,진보정당,1,1,1,0
4,경상남도,진해시,99953,60998,47057,8519,4555,60131,867,38955,1,0.7826,2,0.1417,보수정당,진보정당,1,1,1,0
5,경상남도,통영시,98533,56062,44164,7425,3347,54936,1126,42471,1,0.8039,2,0.1352,보수정당,진보정당,1,1,1,0
6,경상남도,고성군,48212,35870,25987,5946,3085,35018,852,12342,1,0.7421,2,0.1698,보수정당,진보정당,1,1,1,0
7,경상남도,사천시,88130,52361,40529,7984,2807,51320,1041,35769,1,0.7897,2,0.1556,보수정당,진보정당,1,1,1,0
8,경상남도,김해시,245332,104406,71692,25517,5753,102962,1444,140926,1,0.6963,2,0.2478,보수정당,진보정당,1,1,1,0
9,경상남도,밀양시,92677,56158,43482,7519,3947,54948,1210,36519,1,0.7913,2,0.1368,보수정당,진보정당,1,1,1,0


### preprocessing

In [103]:
df_gyeongnam = (
    df_gyeongnam
    .assign(시도=lambda df: df['시도'].replace('경상남도', '경남'))
)

In [104]:
df_gyeongnam.columns

Index(['시도', '구시군', '선거인수', '투표수', '보수정당', '진보정당', '그외정당', '득표수_계', '무효투표수',
       '기권수', '득표_1위_후보번호', '득표_1위_득표율', '득표_2위_후보번호', '득표_2위_득표율', '득표_1위_정당',
       '득표_2위_정당', '보수정당_후보자수', '진보정당_후보자수', '그외정당_후보자수', '무소속_후보자수'],
      dtype='object')

In [105]:
# 고정 열 순서 정의
fixed_cols = [
    '시도', '구시군', '선거년도', '선거종류',
    '득표_1위_정당', '득표_2위_정당',
    '득표_1위_후보번호', '득표_1위_득표율',
    '득표_2위_후보번호', '득표_2위_득표율',
    '보수정당_후보자수', '진보정당_후보자수',
    '그외정당_후보자수', '무소속_후보자수'
]

# 나머지 열: fixed_cols에 포함되지 않은 열
other_cols = [col for col in df_gyeongnam.columns if col not in fixed_cols]

# 메소드 체이닝
df_gyeongnam = (
    df_gyeongnam
    .assign(선거종류='광역단체장', 선거년도='2002')
    .loc[:, fixed_cols + other_cols]
    .rename(columns={'시도': '지역'})
)

In [106]:
df_gyeongnam

Unnamed: 0,지역,구시군,선거년도,선거종류,득표_1위_정당,득표_2위_정당,득표_1위_후보번호,득표_1위_득표율,득표_2위_후보번호,득표_2위_득표율,...,그외정당_후보자수,무소속_후보자수,선거인수,투표수,보수정당,진보정당,그외정당,득표수_계,무효투표수,기권수
0,경남,합계,2002,광역단체장,보수정당,진보정당,1,0.7451,2,0.1688,...,1,0,2227548,1259142,920706,208641,106367,1235714,23428,968406
1,경남,창원시,2002,광역단체장,보수정당,진보정당,1,0.6745,2,0.1794,...,1,0,343825,166981,111414,29630,24139,165183,1798,176844
2,경남,마산시,2002,광역단체장,보수정당,진보정당,1,0.7827,2,0.1338,...,1,0,316267,153345,118410,20247,12621,151278,2067,162922
3,경남,진주시,2002,광역단체장,보수정당,진보정당,1,0.769,2,0.1695,...,1,0,242187,143459,108529,23928,8679,141136,2323,98728
4,경남,진해시,2002,광역단체장,보수정당,진보정당,1,0.7826,2,0.1417,...,1,0,99953,60998,47057,8519,4555,60131,867,38955
5,경남,통영시,2002,광역단체장,보수정당,진보정당,1,0.8039,2,0.1352,...,1,0,98533,56062,44164,7425,3347,54936,1126,42471
6,경남,고성군,2002,광역단체장,보수정당,진보정당,1,0.7421,2,0.1698,...,1,0,48212,35870,25987,5946,3085,35018,852,12342
7,경남,사천시,2002,광역단체장,보수정당,진보정당,1,0.7897,2,0.1556,...,1,0,88130,52361,40529,7984,2807,51320,1041,35769
8,경남,김해시,2002,광역단체장,보수정당,진보정당,1,0.6963,2,0.2478,...,1,0,245332,104406,71692,25517,5753,102962,1444,140926
9,경남,밀양시,2002,광역단체장,보수정당,진보정당,1,0.7913,2,0.1368,...,1,0,92677,56158,43482,7519,3947,54948,1210,36519


### v4.1 ~ v4.3

In [107]:
# 1. 전체 데이터 저장
df_gyeongnam.to_csv("temp4_1_governor_gyeongnam_3.csv", index=False, encoding="utf-8-sig")

# 2. '구시군' != '합계' 행만 추출하여 저장
df_gyeongnam2 = df_gyeongnam.query("구시군 != '합계'")
df_gyeongnam2.to_csv("temp4_2_governor_gyeongnam_3.csv", index=False, encoding="utf-8-sig")

# 3. '구시군' == '합계' 행만 추출 후 '구시군' 열 제거하여 저장
df_gyeongnam3 = df_gyeongnam.query("구시군 == '합계'").drop(columns="구시군")
df_gyeongnam3.to_csv("temp4_3_governor_gyeongnam_3.csv", index=False, encoding="utf-8-sig")

## Jeju

In [108]:
df_jeju = election_results['df_jeju']

In [109]:
df_jeju

Unnamed: 0,시도,구시군,선거인수,투표수,보수정당,진보정당,그외정당,득표수_계,무효투표수,기권수,득표_1위_후보번호,득표_1위_득표율,득표_2위_후보번호,득표_2위_득표율,득표_1위_정당,득표_2위_정당,보수정당_후보자수,진보정당_후보자수,그외정당_후보자수,무소속_후보자수
0,제주도,합계,387982,267479,119502,135283,8373,263158,4321,120503,2,0.5141,1,0.4541,진보정당,보수정당,1,1,1,0
1,제주도,제주시,196356,124337,57387,62036,3402,122825,1512,72019,2,0.5051,1,0.4672,진보정당,보수정당,1,1,1,0
2,제주도,북제주군,74211,55474,24939,27484,1734,54157,1317,18737,2,0.5075,1,0.4605,진보정당,보수정당,1,1,1,0
3,제주도,서귀포시,60910,44602,19471,22759,1690,43920,682,16308,2,0.5182,1,0.4433,진보정당,보수정당,1,1,1,0
4,제주도,남제주군,56505,43066,17705,23004,1547,42256,810,13439,2,0.5444,1,0.419,진보정당,보수정당,1,1,1,0


### preprocessing

In [110]:
df_jeju = (
    df_jeju
    .assign(시도=lambda df: df['시도'].replace('제주도', '제주'))
)

In [111]:
df_jeju.columns

Index(['시도', '구시군', '선거인수', '투표수', '보수정당', '진보정당', '그외정당', '득표수_계', '무효투표수',
       '기권수', '득표_1위_후보번호', '득표_1위_득표율', '득표_2위_후보번호', '득표_2위_득표율', '득표_1위_정당',
       '득표_2위_정당', '보수정당_후보자수', '진보정당_후보자수', '그외정당_후보자수', '무소속_후보자수'],
      dtype='object')

In [112]:
# 고정 열 순서 정의
fixed_cols = [
    '시도', '구시군', '선거년도', '선거종류',
    '득표_1위_정당', '득표_2위_정당',
    '득표_1위_후보번호', '득표_1위_득표율',
    '득표_2위_후보번호', '득표_2위_득표율',
    '보수정당_후보자수', '진보정당_후보자수',
    '그외정당_후보자수', '무소속_후보자수'
]

# 나머지 열: fixed_cols에 포함되지 않은 열
other_cols = [col for col in df_jeju.columns if col not in fixed_cols]

# 메소드 체이닝
df_jeju = (
    df_jeju
    .assign(선거종류='광역단체장', 선거년도='2002')
    .loc[:, fixed_cols + other_cols]
    .rename(columns={'시도': '지역'})
)

In [113]:
df_jeju

Unnamed: 0,지역,구시군,선거년도,선거종류,득표_1위_정당,득표_2위_정당,득표_1위_후보번호,득표_1위_득표율,득표_2위_후보번호,득표_2위_득표율,...,그외정당_후보자수,무소속_후보자수,선거인수,투표수,보수정당,진보정당,그외정당,득표수_계,무효투표수,기권수
0,제주,합계,2002,광역단체장,진보정당,보수정당,2,0.5141,1,0.4541,...,1,0,387982,267479,119502,135283,8373,263158,4321,120503
1,제주,제주시,2002,광역단체장,진보정당,보수정당,2,0.5051,1,0.4672,...,1,0,196356,124337,57387,62036,3402,122825,1512,72019
2,제주,북제주군,2002,광역단체장,진보정당,보수정당,2,0.5075,1,0.4605,...,1,0,74211,55474,24939,27484,1734,54157,1317,18737
3,제주,서귀포시,2002,광역단체장,진보정당,보수정당,2,0.5182,1,0.4433,...,1,0,60910,44602,19471,22759,1690,43920,682,16308
4,제주,남제주군,2002,광역단체장,진보정당,보수정당,2,0.5444,1,0.419,...,1,0,56505,43066,17705,23004,1547,42256,810,13439


### v4.1 ~ v4.3

In [114]:
# 1. 전체 데이터 저장
df_jeju.to_csv("temp4_1_governor_jeju_3.csv", index=False, encoding="utf-8-sig")

# 2. '구시군' != '합계' 행만 추출하여 저장
df_jeju2 = df_jeju.query("구시군 != '합계'")
df_jeju2.to_csv("temp4_2_governor_jeju_3.csv", index=False, encoding="utf-8-sig")

# 3. '구시군' == '합계' 행만 추출 후 '구시군' 열 제거하여 저장
df_jeju3 = df_jeju.query("구시군 == '합계'").drop(columns="구시군")
df_jeju3.to_csv("temp4_3_governor_jeju_3.csv", index=False, encoding="utf-8-sig")

## Merge

### v4.1

In [115]:
AVAILABLE_REGIONS = [
    'seoul', 'busan', 'daegu', 'incheon', 'gwangju', 'daejeon',
    'ulsan', 'gyeonggi', 'gangwon', 'chungbuk', 'chungnam',
    'jeonbuk', 'jeonnam', 'gyeongbuk', 'gyeongnam', 'jeju'
]

df_combined = (
    pd.concat([globals()[f'df_{region}'] for region in AVAILABLE_REGIONS], ignore_index=True)
)

In [116]:
df_combined

Unnamed: 0,지역,구시군,선거년도,선거종류,득표_1위_정당,득표_2위_정당,득표_1위_후보번호,득표_1위_득표율,득표_2위_후보번호,득표_2위_득표율,...,그외정당_후보자수,무소속_후보자수,선거인수,투표수,보수정당,진보정당,그외정당,득표수_계,무효투표수,기권수
0,서울,합계,2002,광역단체장,보수정당,진보정당,1,0.5229,2,0.4302,...,3,1,7665343,3510898,1819057,1496754,163294,3479105,31793,4154445
1,서울,종로구,2002,광역단체장,보수정당,진보정당,1,0.5261,2,0.4248,...,3,1,141157,72241,37592,30353,3506,71451,790,68916
2,서울,중구,2002,광역단체장,보수정당,진보정당,1,0.5012,2,0.4582,...,3,1,110996,56999,28220,25799,2283,56302,697,53997
3,서울,용산구,2002,광역단체장,보수정당,진보정당,1,0.5421,2,0.4126,...,3,1,186438,89215,47878,36435,4001,88314,901,97223
4,서울,성동구,2002,광역단체장,보수정당,진보정당,1,0.4982,2,0.4531,...,3,1,259275,122030,60188,54742,5884,120814,1216,137245
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
254,제주,합계,2002,광역단체장,진보정당,보수정당,2,0.5141,1,0.4541,...,1,0,387982,267479,119502,135283,8373,263158,4321,120503
255,제주,제주시,2002,광역단체장,진보정당,보수정당,2,0.5051,1,0.4672,...,1,0,196356,124337,57387,62036,3402,122825,1512,72019
256,제주,북제주군,2002,광역단체장,진보정당,보수정당,2,0.5075,1,0.4605,...,1,0,74211,55474,24939,27484,1734,54157,1317,18737
257,제주,서귀포시,2002,광역단체장,진보정당,보수정당,2,0.5182,1,0.4433,...,1,0,60910,44602,19471,22759,1690,43920,682,16308


In [117]:
df_combined.to_csv("temp4_1_governor_3.csv", index=False, encoding="utf-8-sig")

### v4.2

In [118]:
AVAILABLE_REGIONS = [
    'seoul', 'busan', 'daegu', 'incheon', 'gwangju', 'daejeon',
    'ulsan', 'gyeonggi', 'gangwon', 'chungbuk', 'chungnam',
    'jeonbuk', 'jeonnam', 'gyeongbuk', 'gyeongnam', 'jeju'
]

df_combined2 = (
    pd.concat([globals()[f'df_{region}2'] for region in AVAILABLE_REGIONS], ignore_index=True)
)

In [119]:
df_combined2

Unnamed: 0,지역,구시군,선거년도,선거종류,득표_1위_정당,득표_2위_정당,득표_1위_후보번호,득표_1위_득표율,득표_2위_후보번호,득표_2위_득표율,...,그외정당_후보자수,무소속_후보자수,선거인수,투표수,보수정당,진보정당,그외정당,득표수_계,무효투표수,기권수
0,서울,종로구,2002,광역단체장,보수정당,진보정당,1,0.5261,2,0.4248,...,3,1,141157,72241,37592,30353,3506,71451,790,68916
1,서울,중구,2002,광역단체장,보수정당,진보정당,1,0.5012,2,0.4582,...,3,1,110996,56999,28220,25799,2283,56302,697,53997
2,서울,용산구,2002,광역단체장,보수정당,진보정당,1,0.5421,2,0.4126,...,3,1,186438,89215,47878,36435,4001,88314,901,97223
3,서울,성동구,2002,광역단체장,보수정당,진보정당,1,0.4982,2,0.4531,...,3,1,259275,122030,60188,54742,5884,120814,1216,137245
4,서울,광진구,2002,광역단체장,보수정당,진보정당,1,0.5149,2,0.4430,...,3,1,288387,128061,65409,56266,5346,127021,1040,160326
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
238,경남,합천군,2002,광역단체장,보수정당,진보정당,1,0.8380,2,0.0911,...,1,0,47511,37724,30721,3339,2601,36661,1063,9787
239,제주,제주시,2002,광역단체장,진보정당,보수정당,2,0.5051,1,0.4672,...,1,0,196356,124337,57387,62036,3402,122825,1512,72019
240,제주,북제주군,2002,광역단체장,진보정당,보수정당,2,0.5075,1,0.4605,...,1,0,74211,55474,24939,27484,1734,54157,1317,18737
241,제주,서귀포시,2002,광역단체장,진보정당,보수정당,2,0.5182,1,0.4433,...,1,0,60910,44602,19471,22759,1690,43920,682,16308


In [120]:
df_combined2.to_csv("temp4_2_governor_3.csv", index=False, encoding="utf-8-sig")

### v4.3

In [121]:
AVAILABLE_REGIONS = [
    'seoul', 'busan', 'daegu', 'incheon', 'gwangju', 'daejeon',
    'ulsan', 'gyeonggi', 'gangwon', 'chungbuk', 'chungnam',
    'jeonbuk', 'jeonnam', 'gyeongbuk', 'gyeongnam', 'jeju'
]

df_combined3 = (
    pd.concat([globals()[f'df_{region}3'] for region in AVAILABLE_REGIONS], ignore_index=True)
)

In [122]:
df_combined3

Unnamed: 0,지역,선거년도,선거종류,득표_1위_정당,득표_2위_정당,득표_1위_후보번호,득표_1위_득표율,득표_2위_후보번호,득표_2위_득표율,보수정당_후보자수,...,그외정당_후보자수,무소속_후보자수,선거인수,투표수,보수정당,진보정당,그외정당,득표수_계,무효투표수,기권수
0,서울,2002,광역단체장,보수정당,진보정당,1,0.5229,2,0.4302,1,...,3,1,7665343,3510898,1819057,1496754,163294,3479105,31793,4154445
1,부산,2002,광역단체장,보수정당,진보정당,1,0.6377,2,0.194,1,...,1,0,2784721,1163040,729589,221938,192594,1144121,18919,1621681
2,대구,2002,광역단체장,보수정당,무소속,1,0.6118,3,0.3882,1,...,0,1,1814278,751994,452943,0,287396,740339,11655,1062284
3,인천,2002,광역단체장,보수정당,진보정당,1,0.5618,2,0.3212,1,...,3,0,1809907,711602,393932,225210,82111,701253,10349,1098305
4,광주,2002,광역단체장,진보정당,무소속,2,0.4682,5,0.2705,1,...,1,3,954481,403821,43695,185938,167515,397148,6673,550660
5,대전,2002,광역단체장,보수정당,그외정당,1,0.4662,3,0.402,1,...,1,2,987180,417809,191832,0,219651,411483,6326,569371
6,울산,2002,광역단체장,보수정당,그외정당,1,0.5307,3,0.4362,1,...,2,0,722806,378121,197772,0,174875,372647,5474,344685
7,경기,2002,광역단체장,보수정당,진보정당,1,0.5838,2,0.3599,1,...,1,0,6777575,3024844,1744291,1075243,168357,2987891,36953,3752731
8,강원,2002,광역단체장,보수정당,진보정당,1,0.7112,2,0.2888,1,...,0,0,1129859,671242,468987,190451,0,659438,11804,458617
9,충북,2002,광역단체장,보수정당,그외정당,1,0.586,3,0.3348,1,...,1,1,1076451,600550,343546,0,242712,586258,14292,475901


In [123]:
df_combined3.to_csv("temp4_3_governor_3.csv", index=False, encoding="utf-8-sig")

# Batch CSV Files to ZIP

In [124]:
import zipfile
import glob

# Find all CSV files in current directory
csv_files = glob.glob('*.csv')

# Create ZIP file
with zipfile.ZipFile('all_csv_files.zip', 'w') as zipf:
   for file in csv_files:
       zipf.write(file)
       print(f"Added: {file}")  # Show progress

print(f"Total {len(csv_files)} files compressed.")

Added: temp4_3_governor_daegu_3.csv
Added: temp4_3_governor_gyeongbuk_3.csv
Added: temp4_2_governor_daejeon_3.csv
Added: temp4_1_governor_incheon_3.csv
Added: temp4_2_governor_gyeongnam_3.csv
Added: temp4_1_governor_daejeon_3.csv
Added: temp4_3_governor_gangwon_3.csv
Added: temp4_1_governor_3.csv
Added: temp4_1_governor_busan_3.csv
Added: temp4_2_governor_3.csv
Added: temp4_1_governor_daegu_3.csv
Added: temp4_3_governor_3.csv
Added: temp4_1_governor_jeonnam_3.csv
Added: temp4_3_governor_daejeon_3.csv
Added: temp4_3_governor_seoul_3.csv
Added: temp4_3_governor_jeonbuk_3.csv
Added: temp4_1_governor_chungnam_3.csv
Added: temp4_1_governor_gyeonggi_3.csv
Added: temp4_3_governor_busan_3.csv
Added: temp4_3_governor_gyeongnam_3.csv
Added: temp4_2_governor_gwangju_3.csv
Added: temp4_3_governor_jeju_3.csv
Added: temp4_2_governor_busan_3.csv
Added: temp4_2_governor_gyeongbuk_3.csv
Added: temp4_3_governor_ulsan_3.csv
Added: temp4_2_governor_daegu_3.csv
Added: temp4_3_governor_chungnam_3.csv
Added: