# Functions

In [1]:
import pandas as pd
import numpy as np
import re
from typing import Dict, Tuple

def process_governor_election_data(region_name: str) -> pd.DataFrame:
    """
    특정 지역의 지사 선거 데이터를 처리하여 병합된 결과를 반환

    Args:
        region_name (str): 지역명 (예: 'busan', 'seoul', 'gyeonggi' 등)

    Returns:
        pd.DataFrame: 병합된 지사 선거 데이터
    """

    # URL 생성
    df1_url = f"https://raw.githubusercontent.com/sw1kwon/korean-elections/refs/heads/main/temp/v1_g/6th_2014/temp1_governor_{region_name}_6.csv"
    df2_url = f"https://raw.githubusercontent.com/sw1kwon/korean-elections/refs/heads/main/temp/v2_2_g/6th_2014/temp2_2_governor_{region_name}_6.csv"

    print(f"=== {region_name} 지사 선거 데이터 처리 시작 ===")
    print(f"상세 데이터 URL: {df1_url}")
    print(f"요약 데이터 URL: {df2_url}")

    try:
        # 첫 번째 CSV 파일 처리 (상세 득표 데이터)
        df1 = pd.read_csv(df1_url)
        print(f"상세 데이터 로드 완료: {df1.shape}")

        # 득표수_{숫자}_정당_이름 패턴의 컬럼들 찾기
        vote_columns = [col for col in df1.columns if col.startswith('득표수_') and col != '득표수_계']
        print(f"득표수 관련 컬럼 수: {len(vote_columns)}")

        # 각 행별로 1위와 2위 후보의 번호 찾기
        def find_top_two_candidates(row):
            vote_data = []

            # 모든 후보의 득표수와 번호를 수집
            for col in vote_columns:
                votes = row[col] if pd.notna(row[col]) else 0
                # 컬럼명에서 숫자 추출
                match = re.search(r'득표수_(\d+)_', col)
                if match:
                    candidate_number = int(match.group(1))
                    vote_data.append((votes, candidate_number, col))

            # 득표수 기준으로 내림차순 정렬
            vote_data.sort(key=lambda x: x[0], reverse=True)

            # 1위와 2위 후보 반환
            first_place = vote_data[0] if len(vote_data) > 0 else (0, None, None)
            second_place = vote_data[1] if len(vote_data) > 1 else (0, None, None)

            return first_place, second_place

        # 1위 후보 번호 찾기
        def find_winner_number(row):
            first_place, _ = find_top_two_candidates(row)
            return first_place[1]

        # 2위 후보 번호 찾기
        def find_second_number(row):
            _, second_place = find_top_two_candidates(row)
            return second_place[1]

        # 1위 득표율 계산
        def calculate_vote_rate_1st(row):
            first_place, _ = find_top_two_candidates(row)
            max_votes = first_place[0]
            total_votes = row['득표수_계'] if pd.notna(row['득표수_계']) else 0

            if total_votes > 0:
                return round(max_votes / total_votes, 4)
            else:
                return 0

        # 2위 득표율 계산
        def calculate_vote_rate_2nd(row):
            _, second_place = find_top_two_candidates(row)
            second_votes = second_place[0]
            total_votes = row['득표수_계'] if pd.notna(row['득표수_계']) else 0

            if total_votes > 0:
                return round(second_votes / total_votes, 4)
            else:
                return 0

        # 새로운 컬럼 추가 (1위, 2위)
        df1['득표_1위_후보번호'] = df1.apply(find_winner_number, axis=1)
        df1['득표_1위_득표율'] = df1.apply(calculate_vote_rate_1st, axis=1)
        df1['득표_2위_후보번호'] = df1.apply(find_second_number, axis=1)
        df1['득표_2위_득표율'] = df1.apply(calculate_vote_rate_2nd, axis=1)

        # 정당 카테고리 매핑 딕셔너리 (지역별로 다를 수 있음)
        category_mapping = get_governor_category_mapping(region_name, vote_columns)
        print(f"생성된 카테고리 매핑: {category_mapping}")

        # 1위, 2위 후보번호를 카테고리로 매핑
        # fillna()로 매핑되지 않은 후보는 '기타'로 처리
        df1['득표_1위_정당'] = df1['득표_1위_후보번호'].map(category_mapping).fillna('기타')
        df1['득표_2위_정당'] = df1['득표_2위_후보번호'].map(category_mapping).fillna('기타')

        # 매핑 결과 확인
        print("매핑 후 1위 정당 분포 (처리 중):")
        print(df1['득표_1위_정당'].value_counts())

        # 매핑되지 않은 후보 확인
        unmapped_1st = df1[df1['득표_1위_정당'] == '기타']['득표_1위_후보번호'].unique()
        unmapped_2nd = df1[df1['득표_2위_정당'] == '기타']['득표_2위_후보번호'].unique()
        if len(unmapped_1st) > 0:
            print(f"경고: 1위에서 매핑되지 않은 후보번호: {unmapped_1st}")
        if len(unmapped_2nd) > 0:
            print(f"경고: 2위에서 매핑되지 않은 후보번호: {unmapped_2nd}")

        # 카테고리별 후보자 수 계산 (모든 카테고리 포함)
        candidate_counts = {}
        all_categories = ['보수정당', '진보정당', '그외정당', '무소속']

        # 먼저 모든 카테고리를 0으로 초기화
        for category in all_categories:
            candidate_counts[category] = 0

        # 실제 매핑에서 카운트
        for candidate_num, category in category_mapping.items():
            if category in candidate_counts:
                candidate_counts[category] += 1
            else:
                candidate_counts[category] = 1

        print(f"카테고리별 후보자 수: {candidate_counts}")

        # 각 카테고리별로 개별 컬럼 생성 (없는 카테고리도 0으로 포함)
        for category in all_categories:
            candidate_count = candidate_counts.get(category, 0)  # 없으면 0
            df1[f'{category}_후보자수'] = candidate_count
            print(f"  {category}_후보자수: {candidate_count}")

        # 필요한 컬럼들만 선택 (병합용)
        merge_columns = ['시도', '구시군', '득표_1위_후보번호', '득표_1위_득표율', '득표_2위_후보번호', '득표_2위_득표율',
                         '득표_1위_정당', '득표_2위_정당'] + [f'{cat}_후보자수' for cat in all_categories]

        df1_for_merge = df1[merge_columns].copy()

        # 두 번째 CSV 파일 읽기 (요약 데이터)
        df2 = pd.read_csv(df2_url)
        print(f"요약 데이터 로드 완료: {df2.shape}")

        # 시도, 구시군을 키로 하여 병합
        merged_df = pd.merge(df2, df1_for_merge, on=['시도', '구시군'], how='left')

        # 병합 결과 검증
        missing_data = merged_df[merged_df['득표_1위_후보번호'].isna()]
        if len(missing_data) > 0:
            print(f"경고: 병합되지 않은 데이터가 {len(missing_data)}개 있습니다")
        else:
            print("모든 데이터가 성공적으로 병합되었습니다!")

        print(f"최종 데이터 형태: {merged_df.shape}")
        print(f"1위 정당 분포:")
        print(merged_df['득표_1위_정당'].value_counts())
        print(f"=== {region_name} 지사 선거 데이터 처리 완료 ===\n")

        return merged_df

    except Exception as e:
        print(f"오류 발생: {e}")
        return None

def get_governor_category_mapping(region_name: str, vote_columns: list) -> Dict[int, str]:
    """
    지역별 지사 선거 정당 카테고리 매핑을 반환
    각 지역마다 후보자와 정당이 다르므로 수동으로 설정 필요

    Args:
        region_name: 지역명
        vote_columns: 득표수 컬럼 리스트 (후보 확인용)

    Returns:
        해당 지역의 후보번호별 카테고리 매핑
    """

    print(f"\n=== {region_name} 지사 선거 후보 정보 ===")
    print("실제 후보 컬럼들:")
    for col in vote_columns:
        print(f"  {col}")

    # 지역별 매핑 설정 (2014년 제6회 지방선거 기준)
    if region_name == 'seoul':  # 서울
        mapping = {
            1: '보수정당',
            2: '진보정당',
            3: '그외정당',
            4: '그외정당'
        }

    elif region_name == 'busan':  # 부산
        mapping = {
            1: '보수정당',
            4: '무소속'
        }

    elif region_name == 'daegu':  # 대구
        mapping = {
            1: '보수정당',
            2: '진보정당',
            3: '그외정당',
            4: '그외정당',
            5: '무소속'
        }

    elif region_name == 'incheon':  # 인천
        mapping = {
            1: '보수정당',
            2: '진보정당',
            3: '그외정당'
        }

    elif region_name == 'gwangju':  # 광주
        mapping = {
            1: '보수정당',
            2: '진보정당',
            3: '그외정당',
            4: '그외정당',
            5: '무소속',
            7: '무소속'
        }

    elif region_name == 'daejeon':  # 대전
        mapping = {
            1: '보수정당',
            2: '진보정당',
            3: '그외정당',
            4: '그외정당'
        }

    elif region_name == 'ulsan':  # 울산
        mapping = {
            1: '보수정당',
            4: '그외정당',
            5: '그외정당'
        }

    elif region_name == 'sejong':  # 세종
        mapping = {
            1: '보수정당',
            2: '진보정당'
        }

    elif region_name == 'gyeonggi':  # 경기
        mapping = {
            1: '보수정당',
            2: '진보정당'
        }

    elif region_name == 'gangwon':  # 강원
        mapping = {
            1: '보수정당',
            2: '진보정당',
            3: '그외정당'
        }

    elif region_name == 'chungbuk':  # 충북
        mapping = {
            1: '보수정당',
            2: '진보정당',
            3: '그외정당'
        }

    elif region_name == 'chungnam':  # 충남
        mapping = {
            1: '보수정당',
            2: '진보정당',
            4: '무소속'
        }

    elif region_name == 'jeonbuk':  # 전북
        mapping = {
            1: '보수정당',
            2: '진보정당',
            3: '그외정당'
        }

    elif region_name == 'jeonnam':  # 전남
        mapping = {
            1: '보수정당',
            2: '진보정당',
            3: '그외정당'
        }

    elif region_name == 'gyeongbuk':  # 경북
        mapping = {
            1: '보수정당',
            2: '진보정당',
            3: '그외정당',
            4: '그외정당'
        }

    elif region_name == 'gyeongnam':  # 경남
        mapping = {
            1: '보수정당',
            2: '진보정당',
            3: '그외정당'
        }

    elif region_name == 'jeju':  # 제주
        mapping = {
            1: '보수정당',
            2: '진보정당',
            3: '그외정당',
            4: '그외정당'
        }

    else:
        print(f"경고: {region_name} 지역에 대한 매핑이 정의되지 않았습니다.")
        print("기본 매핑을 사용합니다. 수동으로 매핑을 추가해주세요.")
        # 기본 매핑 (수정 필요)
        mapping = {
            1: '보수정당',
            2: '진보정당',
            3: '그외정당',
            4: '그외정당',
            5: '무소속'
        }

    # 실제 존재하는 후보 번호만 추출
    existing_candidates = set()
    for col in vote_columns:
        match = re.search(r'득표수_(\d+)_', col)
        if match:
            existing_candidates.add(int(match.group(1)))

    print(f"실제 존재하는 후보번호: {sorted(existing_candidates)}")

    # 존재하는 후보에 대해서만 매핑 적용
    filtered_mapping = {k: v for k, v in mapping.items() if k in existing_candidates}

    # 매핑되지 않은 후보 번호 확인
    unmapped_candidates = existing_candidates - set(mapping.keys())
    if unmapped_candidates:
        print(f"경고: 매핑되지 않은 후보번호들: {sorted(unmapped_candidates)}")
        print("이 후보들은 '기타' 카테고리로 분류됩니다.")

    print(f"적용된 매핑: {filtered_mapping}")
    print("=" * 50)

    return filtered_mapping

def process_multiple_governor_elections(region_names: list) -> Dict[str, pd.DataFrame]:
    """
    여러 지역의 지사 선거를 일괄 처리하여 df_지역명 형태로 변수 저장

    Args:
        region_names (list): 처리할 지역명 리스트

    Returns:
        Dict[str, pd.DataFrame]: 지역별 처리된 데이터프레임 딕셔너리
    """
    results = {}

    for region_name in region_names:
        print(f"\n{'='*50}")
        result_df = process_governor_election_data(region_name)

        if result_df is not None:
            # df_busan, df_seoul 형태로 변수명 지정
            var_name = f'df_{region_name}'
            results[var_name] = result_df

            print(f"데이터프레임 저장: {var_name} (shape: {result_df.shape})")
        else:
            print(f"{region_name} 지사 선거 데이터 처리 실패")

    return results

# 지역명들
AVAILABLE_REGIONS = [
    'seoul', 'busan', 'daegu', 'incheon', 'gwangju', 'daejeon',
    'ulsan', 'sejong', 'gyeonggi', 'gangwon', 'chungbuk', 'chungnam',
    'jeonbuk', 'jeonnam', 'gyeongbuk', 'gyeongnam', 'jeju'
]

# # 사용 예시
# if __name__ == "__main__":
#     # 방법 1: 특정 지역들만 처리
#     selected_regions = ['busan', 'seoul', 'gyeonggi', 'incheon']
#     governor_results = process_multiple_governor_elections(selected_regions)

#     # 개별 접근 예시:
#     # df_busan = governor_results['df_busan']
#     # df_seoul = governor_results['df_seoul']

#     # 방법 2: 모든 지역 일괄 처리
#     # all_governor_results = process_multiple_governor_elections(AVAILABLE_REGIONS)

#     print(f"\n사용 가능한 지역들: {AVAILABLE_REGIONS}")

# Preprocessing & Merge

In [2]:
AVAILABLE_REGIONS = [
    'seoul', 'busan', 'daegu', 'incheon', 'gwangju', 'daejeon',
    'ulsan', 'sejong', 'gyeonggi', 'gangwon', 'chungbuk', 'chungnam',
    'jeonbuk', 'jeonnam', 'gyeongbuk', 'gyeongnam', 'jeju'
]

election_results = process_multiple_governor_elections(AVAILABLE_REGIONS)


=== seoul 지사 선거 데이터 처리 시작 ===
상세 데이터 URL: https://raw.githubusercontent.com/sw1kwon/korean-elections/refs/heads/main/temp/v1_g/6th_2014/temp1_governor_seoul_6.csv
요약 데이터 URL: https://raw.githubusercontent.com/sw1kwon/korean-elections/refs/heads/main/temp/v2_2_g/6th_2014/temp2_2_governor_seoul_6.csv
상세 데이터 로드 완료: (26, 11)
득표수 관련 컬럼 수: 4

=== seoul 지사 선거 후보 정보 ===
실제 후보 컬럼들:
  득표수_1_새누리당_정몽준
  득표수_2_새정치민주연합_박원순
  득표수_3_통합진보당_정태흥
  득표수_4_새정치국민의당_홍정식
실제 존재하는 후보번호: [1, 2, 3, 4]
적용된 매핑: {1: '보수정당', 2: '진보정당', 3: '그외정당', 4: '그외정당'}
생성된 카테고리 매핑: {1: '보수정당', 2: '진보정당', 3: '그외정당', 4: '그외정당'}
매핑 후 1위 정당 분포 (처리 중):
득표_1위_정당
진보정당    23
보수정당     3
Name: count, dtype: int64
카테고리별 후보자 수: {'보수정당': 1, '진보정당': 1, '그외정당': 2, '무소속': 0}
  보수정당_후보자수: 1
  진보정당_후보자수: 1
  그외정당_후보자수: 2
  무소속_후보자수: 0
요약 데이터 로드 완료: (26, 11)
모든 데이터가 성공적으로 병합되었습니다!
최종 데이터 형태: (26, 21)
1위 정당 분포:
득표_1위_정당
진보정당    23
보수정당     3
Name: count, dtype: int64
=== seoul 지사 선거 데이터 처리 완료 ===

데이터프레임 저장: df_seoul (shape: (26, 21))

=== busan 지사

# Governor Election 6th

## Seoul

In [3]:
df_seoul = election_results['df_seoul']

In [4]:
df_seoul

Unnamed: 0,시도,구시군,선거인수,투표수,보수정당,진보정당,그외정당,무소속,득표수_계,무효투표수,...,득표_1위_후보번호,득표_1위_득표율,득표_2위_후보번호,득표_2위_득표율,득표_1위_정당,득표_2위_정당,보수정당_후보자수,진보정당_후보자수,그외정당_후보자수,무소속_후보자수
0,서울특별시,합계,8441594,4948897,2109869,2752171,41241,0,4903281,45616,...,2,0.5613,1,0.4303,진보정당,보수정당,1,1,2,0
1,서울특별시,종로구,136925,81113,34599,45020,568,0,80187,926,...,2,0.5614,1,0.4315,진보정당,보수정당,1,1,2,0
2,서울특별시,중구,113235,65393,28273,35662,553,0,64488,905,...,2,0.553,1,0.4384,진보정당,보수정당,1,1,2,0
3,서울특별시,용산구,204529,118198,58479,57807,827,0,117113,1085,...,1,0.4993,2,0.4936,보수정당,진보정당,1,1,2,0
4,서울특별시,성동구,252308,146873,62376,81645,1261,0,145282,1591,...,2,0.562,1,0.4293,진보정당,보수정당,1,1,2,0
5,서울특별시,광진구,309269,177484,72160,102449,1424,0,176033,1451,...,2,0.582,1,0.4099,진보정당,보수정당,1,1,2,0
6,서울특별시,동대문구,308318,179384,76325,99568,1594,0,177487,1897,...,2,0.561,1,0.43,진보정당,보수정당,1,1,2,0
7,서울특별시,중랑구,351965,189523,80700,104905,1836,0,187441,2082,...,2,0.5597,1,0.4305,진보정당,보수정당,1,1,2,0
8,서울특별시,성북구,394293,229201,92897,131919,2031,0,226847,2354,...,2,0.5815,1,0.4095,진보정당,보수정당,1,1,2,0
9,서울특별시,강북구,284048,155350,64944,87128,1620,0,153692,1658,...,2,0.5669,1,0.4226,진보정당,보수정당,1,1,2,0


### preprocessing

In [5]:
df_seoul = (
    df_seoul
    .assign(시도=lambda df: df['시도'].replace('서울특별시', '서울'))
)

In [6]:
df_seoul.columns

Index(['시도', '구시군', '선거인수', '투표수', '보수정당', '진보정당', '그외정당', '무소속', '득표수_계',
       '무효투표수', '기권수', '득표_1위_후보번호', '득표_1위_득표율', '득표_2위_후보번호', '득표_2위_득표율',
       '득표_1위_정당', '득표_2위_정당', '보수정당_후보자수', '진보정당_후보자수', '그외정당_후보자수',
       '무소속_후보자수'],
      dtype='object')

In [7]:
# 고정 열 순서 정의
fixed_cols = [
    '시도', '구시군', '선거년도', '선거종류',
    '득표_1위_정당', '득표_2위_정당',
    '득표_1위_후보번호', '득표_1위_득표율',
    '득표_2위_후보번호', '득표_2위_득표율',
    '보수정당_후보자수', '진보정당_후보자수',
    '그외정당_후보자수', '무소속_후보자수'
]

# 나머지 열: fixed_cols에 포함되지 않은 열
other_cols = [col for col in df_seoul.columns if col not in fixed_cols]

# 메소드 체이닝
df_seoul = (
    df_seoul
    .assign(선거종류='광역단체장', 선거년도='2014')
    .loc[:, fixed_cols + other_cols]
    .rename(columns={'시도': '지역'})
)

In [8]:
df_seoul

Unnamed: 0,지역,구시군,선거년도,선거종류,득표_1위_정당,득표_2위_정당,득표_1위_후보번호,득표_1위_득표율,득표_2위_후보번호,득표_2위_득표율,...,무소속_후보자수,선거인수,투표수,보수정당,진보정당,그외정당,무소속,득표수_계,무효투표수,기권수
0,서울,합계,2014,광역단체장,진보정당,보수정당,2,0.5613,1,0.4303,...,0,8441594,4948897,2109869,2752171,41241,0,4903281,45616,3492697
1,서울,종로구,2014,광역단체장,진보정당,보수정당,2,0.5614,1,0.4315,...,0,136925,81113,34599,45020,568,0,80187,926,55812
2,서울,중구,2014,광역단체장,진보정당,보수정당,2,0.553,1,0.4384,...,0,113235,65393,28273,35662,553,0,64488,905,47842
3,서울,용산구,2014,광역단체장,보수정당,진보정당,1,0.4993,2,0.4936,...,0,204529,118198,58479,57807,827,0,117113,1085,86331
4,서울,성동구,2014,광역단체장,진보정당,보수정당,2,0.562,1,0.4293,...,0,252308,146873,62376,81645,1261,0,145282,1591,105435
5,서울,광진구,2014,광역단체장,진보정당,보수정당,2,0.582,1,0.4099,...,0,309269,177484,72160,102449,1424,0,176033,1451,131785
6,서울,동대문구,2014,광역단체장,진보정당,보수정당,2,0.561,1,0.43,...,0,308318,179384,76325,99568,1594,0,177487,1897,128934
7,서울,중랑구,2014,광역단체장,진보정당,보수정당,2,0.5597,1,0.4305,...,0,351965,189523,80700,104905,1836,0,187441,2082,162442
8,서울,성북구,2014,광역단체장,진보정당,보수정당,2,0.5815,1,0.4095,...,0,394293,229201,92897,131919,2031,0,226847,2354,165092
9,서울,강북구,2014,광역단체장,진보정당,보수정당,2,0.5669,1,0.4226,...,0,284048,155350,64944,87128,1620,0,153692,1658,128698


### v4.1 ~ v4.3

In [9]:
# 1. 전체 데이터 저장
df_seoul.to_csv("temp4_1_governor_seoul_6.csv", index=False, encoding="utf-8-sig")

# 2. '구시군' != '합계' 행만 추출하여 저장
df_seoul2 = df_seoul.query("구시군 != '합계'")
df_seoul2.to_csv("temp4_2_governor_seoul_6.csv", index=False, encoding="utf-8-sig")

# 3. '구시군' == '합계' 행만 추출 후 '구시군' 열 제거하여 저장
df_seoul3 = df_seoul.query("구시군 == '합계'").drop(columns="구시군")
df_seoul3.to_csv("temp4_3_governor_seoul_6.csv", index=False, encoding="utf-8-sig")

## Busan

In [10]:
df_busan = election_results['df_busan']

In [11]:
df_busan

Unnamed: 0,시도,구시군,선거인수,투표수,보수정당,진보정당,그외정당,무소속,득표수_계,무효투표수,...,득표_1위_후보번호,득표_1위_득표율,득표_2위_후보번호,득표_2위_득표율,득표_1위_정당,득표_2위_정당,보수정당_후보자수,진보정당_후보자수,그외정당_후보자수,무소속_후보자수
0,부산광역시,합계,2932179,1629167,797926,0,0,777225,1575151,54016,...,1,0.5066,4,0.4934,보수정당,무소속,1,0,0,1
1,부산광역시,중구,41868,23404,12576,0,0,10057,22633,771,...,1,0.5556,4,0.4444,보수정당,무소속,1,0,0,1
2,부산광역시,서구,102694,55778,29893,0,0,24088,53981,1797,...,1,0.5538,4,0.4462,보수정당,무소속,1,0,0,1
3,부산광역시,동구,83350,47730,25288,0,0,20693,45981,1749,...,1,0.55,4,0.45,보수정당,무소속,1,0,0,1
4,부산광역시,영도구,115944,60321,30436,0,0,27462,57898,2423,...,1,0.5257,4,0.4743,보수정당,무소속,1,0,0,1
5,부산광역시,부산진구,329022,180227,88196,0,0,85731,173927,6300,...,1,0.5071,4,0.4929,보수정당,무소속,1,0,0,1
6,부산광역시,동래구,229691,130592,64409,0,0,62544,126953,3639,...,1,0.5073,4,0.4927,보수정당,무소속,1,0,0,1
7,부산광역시,남구,241177,137349,66903,0,0,66081,132984,4365,...,1,0.5031,4,0.4969,보수정당,무소속,1,0,0,1
8,부산광역시,북구,251760,139860,65060,0,0,69774,134834,5026,...,4,0.5175,1,0.4825,무소속,보수정당,1,0,0,1
9,부산광역시,해운대구,342080,188267,91704,0,0,91013,182717,5550,...,1,0.5019,4,0.4981,보수정당,무소속,1,0,0,1


### preprocessing

In [12]:
df_busan = (
    df_busan
    .assign(시도=lambda df: df['시도'].replace('부산광역시', '부산'))
)

In [13]:
df_busan.columns

Index(['시도', '구시군', '선거인수', '투표수', '보수정당', '진보정당', '그외정당', '무소속', '득표수_계',
       '무효투표수', '기권수', '득표_1위_후보번호', '득표_1위_득표율', '득표_2위_후보번호', '득표_2위_득표율',
       '득표_1위_정당', '득표_2위_정당', '보수정당_후보자수', '진보정당_후보자수', '그외정당_후보자수',
       '무소속_후보자수'],
      dtype='object')

In [14]:
# 고정 열 순서 정의
fixed_cols = [
    '시도', '구시군', '선거년도', '선거종류',
    '득표_1위_정당', '득표_2위_정당',
    '득표_1위_후보번호', '득표_1위_득표율',
    '득표_2위_후보번호', '득표_2위_득표율',
    '보수정당_후보자수', '진보정당_후보자수',
    '그외정당_후보자수', '무소속_후보자수'
]

# 나머지 열: fixed_cols에 포함되지 않은 열
other_cols = [col for col in df_busan.columns if col not in fixed_cols]

# 메소드 체이닝
df_busan = (
    df_busan
    .assign(선거종류='광역단체장', 선거년도='2014')
    .loc[:, fixed_cols + other_cols]
    .rename(columns={'시도': '지역'})
)

In [15]:
df_busan

Unnamed: 0,지역,구시군,선거년도,선거종류,득표_1위_정당,득표_2위_정당,득표_1위_후보번호,득표_1위_득표율,득표_2위_후보번호,득표_2위_득표율,...,무소속_후보자수,선거인수,투표수,보수정당,진보정당,그외정당,무소속,득표수_계,무효투표수,기권수
0,부산,합계,2014,광역단체장,보수정당,무소속,1,0.5066,4,0.4934,...,1,2932179,1629167,797926,0,0,777225,1575151,54016,1303012
1,부산,중구,2014,광역단체장,보수정당,무소속,1,0.5556,4,0.4444,...,1,41868,23404,12576,0,0,10057,22633,771,18464
2,부산,서구,2014,광역단체장,보수정당,무소속,1,0.5538,4,0.4462,...,1,102694,55778,29893,0,0,24088,53981,1797,46916
3,부산,동구,2014,광역단체장,보수정당,무소속,1,0.55,4,0.45,...,1,83350,47730,25288,0,0,20693,45981,1749,35620
4,부산,영도구,2014,광역단체장,보수정당,무소속,1,0.5257,4,0.4743,...,1,115944,60321,30436,0,0,27462,57898,2423,55623
5,부산,부산진구,2014,광역단체장,보수정당,무소속,1,0.5071,4,0.4929,...,1,329022,180227,88196,0,0,85731,173927,6300,148795
6,부산,동래구,2014,광역단체장,보수정당,무소속,1,0.5073,4,0.4927,...,1,229691,130592,64409,0,0,62544,126953,3639,99099
7,부산,남구,2014,광역단체장,보수정당,무소속,1,0.5031,4,0.4969,...,1,241177,137349,66903,0,0,66081,132984,4365,103828
8,부산,북구,2014,광역단체장,무소속,보수정당,4,0.5175,1,0.4825,...,1,251760,139860,65060,0,0,69774,134834,5026,111900
9,부산,해운대구,2014,광역단체장,보수정당,무소속,1,0.5019,4,0.4981,...,1,342080,188267,91704,0,0,91013,182717,5550,153813


### v4.1 ~ v4.3

In [16]:
# 1. 전체 데이터 저장
df_busan.to_csv("temp4_1_governor_busan_6.csv", index=False, encoding="utf-8-sig")

# 2. '구시군' != '합계' 행만 추출하여 저장
df_busan2 = df_busan.query("구시군 != '합계'")
df_busan2.to_csv("temp4_2_governor_busan_6.csv", index=False, encoding="utf-8-sig")

# 3. '구시군' == '합계' 행만 추출 후 '구시군' 열 제거하여 저장
df_busan3 = df_busan.query("구시군 == '합계'").drop(columns="구시군")
df_busan3.to_csv("temp4_3_governor_busan_6.csv", index=False, encoding="utf-8-sig")

## Daegu

In [17]:
df_daegu = election_results['df_daegu']

In [18]:
df_daegu

Unnamed: 0,시도,구시군,선거인수,투표수,보수정당,진보정당,그외정당,무소속,득표수_계,무효투표수,...,득표_1위_후보번호,득표_1위_득표율,득표_2위_후보번호,득표_2위_득표율,득표_1위_정당,득표_2위_정당,보수정당_후보자수,진보정당_후보자수,그외정당_후보자수,무소속_후보자수
0,대구광역시,합계,2012579,1052638,581175,418891,23779,14774,1038619,14019,...,1,0.5596,2,0.4033,보수정당,진보정당,1,1,2,1
1,대구광역시,중구,65996,35719,20419,13410,777,485,35091,628,...,1,0.5819,2,0.3821,보수정당,진보정당,1,1,2,1
2,대구광역시,동구,286747,151471,86217,56920,3789,2214,149140,2331,...,1,0.5781,2,0.3817,보수정당,진보정당,1,1,2,1
3,대구광역시,서구,183116,91230,56272,29458,2190,1609,89529,1701,...,1,0.6285,2,0.329,보수정당,진보정당,1,1,2,1
4,대구광역시,남구,142345,70076,42488,24071,1448,1015,69022,1054,...,1,0.6156,2,0.3487,보수정당,진보정당,1,1,2,1
5,대구광역시,북구,348883,179341,99100,70646,4548,2758,177052,2289,...,1,0.5597,2,0.399,보수정당,진보정당,1,1,2,1
6,대구광역시,수성구,359016,201364,99577,94715,3190,1950,199432,1932,...,1,0.4993,2,0.4749,보수정당,진보정당,1,1,2,1
7,대구광역시,달서구,481475,249724,134637,103302,5678,3230,246847,2877,...,1,0.5454,2,0.4185,보수정당,진보정당,1,1,2,1
8,대구광역시,달성군,145001,73713,42465,26369,2159,1513,72506,1207,...,1,0.5857,2,0.3637,보수정당,진보정당,1,1,2,1


### preprocessing

In [19]:
df_daegu = (
    df_daegu
    .assign(시도=lambda df: df['시도'].replace('대구광역시', '대구'))
)

In [20]:
df_daegu.columns

Index(['시도', '구시군', '선거인수', '투표수', '보수정당', '진보정당', '그외정당', '무소속', '득표수_계',
       '무효투표수', '기권수', '득표_1위_후보번호', '득표_1위_득표율', '득표_2위_후보번호', '득표_2위_득표율',
       '득표_1위_정당', '득표_2위_정당', '보수정당_후보자수', '진보정당_후보자수', '그외정당_후보자수',
       '무소속_후보자수'],
      dtype='object')

In [21]:
# 고정 열 순서 정의
fixed_cols = [
    '시도', '구시군', '선거년도', '선거종류',
    '득표_1위_정당', '득표_2위_정당',
    '득표_1위_후보번호', '득표_1위_득표율',
    '득표_2위_후보번호', '득표_2위_득표율',
    '보수정당_후보자수', '진보정당_후보자수',
    '그외정당_후보자수', '무소속_후보자수'
]

# 나머지 열: fixed_cols에 포함되지 않은 열
other_cols = [col for col in df_daegu.columns if col not in fixed_cols]

# 메소드 체이닝
df_daegu = (
    df_daegu
    .assign(선거종류='광역단체장', 선거년도='2014')
    .loc[:, fixed_cols + other_cols]
    .rename(columns={'시도': '지역'})
)

In [22]:
df_daegu

Unnamed: 0,지역,구시군,선거년도,선거종류,득표_1위_정당,득표_2위_정당,득표_1위_후보번호,득표_1위_득표율,득표_2위_후보번호,득표_2위_득표율,...,무소속_후보자수,선거인수,투표수,보수정당,진보정당,그외정당,무소속,득표수_계,무효투표수,기권수
0,대구,합계,2014,광역단체장,보수정당,진보정당,1,0.5596,2,0.4033,...,1,2012579,1052638,581175,418891,23779,14774,1038619,14019,959941
1,대구,중구,2014,광역단체장,보수정당,진보정당,1,0.5819,2,0.3821,...,1,65996,35719,20419,13410,777,485,35091,628,30277
2,대구,동구,2014,광역단체장,보수정당,진보정당,1,0.5781,2,0.3817,...,1,286747,151471,86217,56920,3789,2214,149140,2331,135276
3,대구,서구,2014,광역단체장,보수정당,진보정당,1,0.6285,2,0.329,...,1,183116,91230,56272,29458,2190,1609,89529,1701,91886
4,대구,남구,2014,광역단체장,보수정당,진보정당,1,0.6156,2,0.3487,...,1,142345,70076,42488,24071,1448,1015,69022,1054,72269
5,대구,북구,2014,광역단체장,보수정당,진보정당,1,0.5597,2,0.399,...,1,348883,179341,99100,70646,4548,2758,177052,2289,169542
6,대구,수성구,2014,광역단체장,보수정당,진보정당,1,0.4993,2,0.4749,...,1,359016,201364,99577,94715,3190,1950,199432,1932,157652
7,대구,달서구,2014,광역단체장,보수정당,진보정당,1,0.5454,2,0.4185,...,1,481475,249724,134637,103302,5678,3230,246847,2877,231751
8,대구,달성군,2014,광역단체장,보수정당,진보정당,1,0.5857,2,0.3637,...,1,145001,73713,42465,26369,2159,1513,72506,1207,71288


### v4.1 ~ v4.3

In [23]:
# 1. 전체 데이터 저장
df_daegu.to_csv("temp4_1_governor_daegu_6.csv", index=False, encoding="utf-8-sig")

# 2. '구시군' != '합계' 행만 추출하여 저장
df_daegu2 = df_daegu.query("구시군 != '합계'")
df_daegu2.to_csv("temp4_2_governor_daegu_6.csv", index=False, encoding="utf-8-sig")

# 3. '구시군' == '합계' 행만 추출 후 '구시군' 열 제거하여 저장
df_daegu3 = df_daegu.query("구시군 == '합계'").drop(columns="구시군")
df_daegu3.to_csv("temp4_3_governor_daegu_6.csv", index=False, encoding="utf-8-sig")

## Incheon

In [24]:
df_incheon = election_results['df_incheon']

In [25]:
df_incheon

Unnamed: 0,시도,구시군,선거인수,투표수,보수정당,진보정당,그외정당,무소속,득표수_계,무효투표수,...,득표_1위_후보번호,득표_1위_득표율,득표_2위_후보번호,득표_2위_득표율,득표_1위_정당,득표_2위_정당,보수정당_후보자수,진보정당_후보자수,그외정당_후보자수,무소속_후보자수
0,인천광역시,합계,2319198,1244502,615077,593555,22651,0,1231283,13219,...,1,0.4995,2,0.4821,보수정당,진보정당,1,1,1,0
1,인천광역시,중구,89767,48565,26324,20788,845,0,47957,608,...,1,0.5489,2,0.4335,보수정당,진보정당,1,1,1,0
2,인천광역시,동구,61693,36534,19573,15605,711,0,35889,645,...,1,0.5454,2,0.4348,보수정당,진보정당,1,1,1,0
3,인천광역시,남구,340427,174258,92490,76984,2854,0,172328,1930,...,1,0.5367,2,0.4467,보수정당,진보정당,1,1,1,0
4,인천광역시,연수구,238453,136706,71942,61389,2154,0,135485,1221,...,1,0.531,2,0.4531,보수정당,진보정당,1,1,1,0
5,인천광역시,남동구,405404,215359,102688,106433,4308,0,213429,1930,...,2,0.4987,1,0.4811,진보정당,보수정당,1,1,1,0
6,인천광역시,부평구,451971,237759,108912,121970,4670,0,235552,2207,...,2,0.5178,1,0.4624,진보정당,보수정당,1,1,1,0
7,인천광역시,계양구,274659,143387,61275,78343,2562,0,142180,1207,...,2,0.551,1,0.431,진보정당,보수정당,1,1,1,0
8,인천광역시,서구,380611,199733,98389,96162,3413,0,197964,1769,...,1,0.497,2,0.4858,보수정당,진보정당,1,1,1,0
9,인천광역시,강화군,58089,38611,24762,11854,815,0,37431,1180,...,1,0.6615,2,0.3167,보수정당,진보정당,1,1,1,0


### preprocessing

In [26]:
df_incheon = (
    df_incheon
    .assign(시도=lambda df: df['시도'].replace('인천광역시', '인천'))
)

In [27]:
df_incheon.columns

Index(['시도', '구시군', '선거인수', '투표수', '보수정당', '진보정당', '그외정당', '무소속', '득표수_계',
       '무효투표수', '기권수', '득표_1위_후보번호', '득표_1위_득표율', '득표_2위_후보번호', '득표_2위_득표율',
       '득표_1위_정당', '득표_2위_정당', '보수정당_후보자수', '진보정당_후보자수', '그외정당_후보자수',
       '무소속_후보자수'],
      dtype='object')

In [28]:
# 고정 열 순서 정의
fixed_cols = [
    '시도', '구시군', '선거년도', '선거종류',
    '득표_1위_정당', '득표_2위_정당',
    '득표_1위_후보번호', '득표_1위_득표율',
    '득표_2위_후보번호', '득표_2위_득표율',
    '보수정당_후보자수', '진보정당_후보자수',
    '그외정당_후보자수', '무소속_후보자수'
]

# 나머지 열: fixed_cols에 포함되지 않은 열
other_cols = [col for col in df_incheon.columns if col not in fixed_cols]

# 메소드 체이닝
df_incheon = (
    df_incheon
    .assign(선거종류='광역단체장', 선거년도='2014')
    .loc[:, fixed_cols + other_cols]
    .rename(columns={'시도': '지역'})
)

In [29]:
df_incheon

Unnamed: 0,지역,구시군,선거년도,선거종류,득표_1위_정당,득표_2위_정당,득표_1위_후보번호,득표_1위_득표율,득표_2위_후보번호,득표_2위_득표율,...,무소속_후보자수,선거인수,투표수,보수정당,진보정당,그외정당,무소속,득표수_계,무효투표수,기권수
0,인천,합계,2014,광역단체장,보수정당,진보정당,1,0.4995,2,0.4821,...,0,2319198,1244502,615077,593555,22651,0,1231283,13219,1074696
1,인천,중구,2014,광역단체장,보수정당,진보정당,1,0.5489,2,0.4335,...,0,89767,48565,26324,20788,845,0,47957,608,41202
2,인천,동구,2014,광역단체장,보수정당,진보정당,1,0.5454,2,0.4348,...,0,61693,36534,19573,15605,711,0,35889,645,25159
3,인천,남구,2014,광역단체장,보수정당,진보정당,1,0.5367,2,0.4467,...,0,340427,174258,92490,76984,2854,0,172328,1930,166169
4,인천,연수구,2014,광역단체장,보수정당,진보정당,1,0.531,2,0.4531,...,0,238453,136706,71942,61389,2154,0,135485,1221,101747
5,인천,남동구,2014,광역단체장,진보정당,보수정당,2,0.4987,1,0.4811,...,0,405404,215359,102688,106433,4308,0,213429,1930,190045
6,인천,부평구,2014,광역단체장,진보정당,보수정당,2,0.5178,1,0.4624,...,0,451971,237759,108912,121970,4670,0,235552,2207,214212
7,인천,계양구,2014,광역단체장,진보정당,보수정당,2,0.551,1,0.431,...,0,274659,143387,61275,78343,2562,0,142180,1207,131272
8,인천,서구,2014,광역단체장,보수정당,진보정당,1,0.497,2,0.4858,...,0,380611,199733,98389,96162,3413,0,197964,1769,180878
9,인천,강화군,2014,광역단체장,보수정당,진보정당,1,0.6615,2,0.3167,...,0,58089,38611,24762,11854,815,0,37431,1180,19478


### v4.1 ~ v4.3

In [30]:
# 1. 전체 데이터 저장
df_incheon.to_csv("temp4_1_governor_incheon_6.csv", index=False, encoding="utf-8-sig")

# 2. '구시군' != '합계' 행만 추출하여 저장
df_incheon2 = df_incheon.query("구시군 != '합계'")
df_incheon2.to_csv("temp4_2_governor_incheon_6.csv", index=False, encoding="utf-8-sig")

# 3. '구시군' == '합계' 행만 추출 후 '구시군' 열 제거하여 저장
df_incheon3 = df_incheon.query("구시군 == '합계'").drop(columns="구시군")
df_incheon3.to_csv("temp4_3_governor_incheon_6.csv", index=False, encoding="utf-8-sig")

## Gwangju

In [31]:
df_gwangju = election_results['df_gwangju']

In [32]:
df_gwangju

Unnamed: 0,시도,구시군,선거인수,투표수,보수정당,진보정당,그외정당,무소속,득표수_계,무효투표수,...,득표_1위_후보번호,득표_1위_득표율,득표_2위_후보번호,득표_2위_득표율,득표_1위_정당,득표_2위_정당,보수정당_후보자수,진보정당_후보자수,그외정당_후보자수,무소속_후보자수
0,광주광역시,합계,1138418,650008,21614,367203,27985,217915,634717,15291,...,2,0.5785,5,0.3177,진보정당,무소속,1,1,2,2
1,광주광역시,동구,86465,51253,1889,28605,1829,17642,49965,1288,...,2,0.5725,5,0.3313,진보정당,무소속,1,1,2,2
2,광주광역시,서구,242201,142219,4481,82754,4925,47376,139536,2683,...,2,0.5931,5,0.3113,진보정당,무소속,1,1,2,2
3,광주광역시,남구,171997,101811,3160,57010,3173,36187,99530,2281,...,2,0.5728,5,0.3396,진보정당,무소속,1,1,2,2
4,광주광역시,북구,351880,198547,6840,113138,9113,64873,193964,4583,...,2,0.5833,5,0.3087,진보정당,무소속,1,1,2,2
5,광주광역시,광산구,285875,156178,5244,85696,8945,51837,151722,4456,...,2,0.5648,5,0.3163,진보정당,무소속,1,1,2,2


### preprocessing

In [33]:
df_gwangju = (
    df_gwangju
    .assign(시도=lambda df: df['시도'].replace('광주광역시', '광주'))
)

In [34]:
df_gwangju.columns

Index(['시도', '구시군', '선거인수', '투표수', '보수정당', '진보정당', '그외정당', '무소속', '득표수_계',
       '무효투표수', '기권수', '득표_1위_후보번호', '득표_1위_득표율', '득표_2위_후보번호', '득표_2위_득표율',
       '득표_1위_정당', '득표_2위_정당', '보수정당_후보자수', '진보정당_후보자수', '그외정당_후보자수',
       '무소속_후보자수'],
      dtype='object')

In [35]:
# 고정 열 순서 정의
fixed_cols = [
    '시도', '구시군', '선거년도', '선거종류',
    '득표_1위_정당', '득표_2위_정당',
    '득표_1위_후보번호', '득표_1위_득표율',
    '득표_2위_후보번호', '득표_2위_득표율',
    '보수정당_후보자수', '진보정당_후보자수',
    '그외정당_후보자수', '무소속_후보자수'
]

# 나머지 열: fixed_cols에 포함되지 않은 열
other_cols = [col for col in df_gwangju.columns if col not in fixed_cols]

# 메소드 체이닝
df_gwangju = (
    df_gwangju
    .assign(선거종류='광역단체장', 선거년도='2014')
    .loc[:, fixed_cols + other_cols]
    .rename(columns={'시도': '지역'})
)

In [36]:
df_gwangju

Unnamed: 0,지역,구시군,선거년도,선거종류,득표_1위_정당,득표_2위_정당,득표_1위_후보번호,득표_1위_득표율,득표_2위_후보번호,득표_2위_득표율,...,무소속_후보자수,선거인수,투표수,보수정당,진보정당,그외정당,무소속,득표수_계,무효투표수,기권수
0,광주,합계,2014,광역단체장,진보정당,무소속,2,0.5785,5,0.3177,...,2,1138418,650008,21614,367203,27985,217915,634717,15291,488410
1,광주,동구,2014,광역단체장,진보정당,무소속,2,0.5725,5,0.3313,...,2,86465,51253,1889,28605,1829,17642,49965,1288,35212
2,광주,서구,2014,광역단체장,진보정당,무소속,2,0.5931,5,0.3113,...,2,242201,142219,4481,82754,4925,47376,139536,2683,99982
3,광주,남구,2014,광역단체장,진보정당,무소속,2,0.5728,5,0.3396,...,2,171997,101811,3160,57010,3173,36187,99530,2281,70186
4,광주,북구,2014,광역단체장,진보정당,무소속,2,0.5833,5,0.3087,...,2,351880,198547,6840,113138,9113,64873,193964,4583,153333
5,광주,광산구,2014,광역단체장,진보정당,무소속,2,0.5648,5,0.3163,...,2,285875,156178,5244,85696,8945,51837,151722,4456,129697


### v4.1 ~ v4.3

In [37]:
# 1. 전체 데이터 저장
df_gwangju.to_csv("temp4_1_governor_gwangju_6.csv", index=False, encoding="utf-8-sig")

# 2. '구시군' != '합계' 행만 추출하여 저장
df_gwangju2 = df_gwangju.query("구시군 != '합계'")
df_gwangju2.to_csv("temp4_2_governor_gwangju_6.csv", index=False, encoding="utf-8-sig")

# 3. '구시군' == '합계' 행만 추출 후 '구시군' 열 제거하여 저장
df_gwangju3 = df_gwangju.query("구시군 == '합계'").drop(columns="구시군")
df_gwangju3.to_csv("temp4_3_governor_gwangju_6.csv", index=False, encoding="utf-8-sig")

## Daejeon

In [38]:
df_daejeon = election_results['df_daejeon']

In [39]:
df_daejeon

Unnamed: 0,시도,구시군,선거인수,투표수,보수정당,진보정당,그외정당,무소속,득표수_계,무효투표수,...,득표_1위_후보번호,득표_1위_득표율,득표_2위_후보번호,득표_2위_득표율,득표_1위_정당,득표_2위_정당,보수정당_후보자수,진보정당_후보자수,그외정당_후보자수,무소속_후보자수
0,대전광역시,합계,1207972,652671,301389,322762,20355,0,644506,8165,...,2,0.5008,1,0.4676,진보정당,보수정당,1,1,2,0
1,대전광역시,동구,202671,104754,51063,48607,3335,0,103005,1749,...,1,0.4957,2,0.4719,보수정당,진보정당,1,1,2,0
2,대전광역시,중구,213239,116220,55956,55596,2963,0,114515,1705,...,1,0.4886,2,0.4855,보수정당,진보정당,1,1,2,0
3,대전광역시,서구,389886,209058,96154,104428,6256,0,206838,2220,...,2,0.5049,1,0.4649,진보정당,보수정당,1,1,2,0
4,대전광역시,유성구,240489,138723,57099,75806,4482,0,137387,1336,...,2,0.5518,1,0.4156,진보정당,보수정당,1,1,2,0
5,대전광역시,대덕구,161687,83916,41117,38325,3319,0,82761,1155,...,1,0.4968,2,0.4631,보수정당,진보정당,1,1,2,0


### preprocessing

In [40]:
df_daejeon = (
    df_daejeon
    .assign(시도=lambda df: df['시도'].replace('대전광역시', '대전'))
)

In [41]:
df_daejeon.columns

Index(['시도', '구시군', '선거인수', '투표수', '보수정당', '진보정당', '그외정당', '무소속', '득표수_계',
       '무효투표수', '기권수', '득표_1위_후보번호', '득표_1위_득표율', '득표_2위_후보번호', '득표_2위_득표율',
       '득표_1위_정당', '득표_2위_정당', '보수정당_후보자수', '진보정당_후보자수', '그외정당_후보자수',
       '무소속_후보자수'],
      dtype='object')

In [42]:
# 고정 열 순서 정의
fixed_cols = [
    '시도', '구시군', '선거년도', '선거종류',
    '득표_1위_정당', '득표_2위_정당',
    '득표_1위_후보번호', '득표_1위_득표율',
    '득표_2위_후보번호', '득표_2위_득표율',
    '보수정당_후보자수', '진보정당_후보자수',
    '그외정당_후보자수', '무소속_후보자수'
]

# 나머지 열: fixed_cols에 포함되지 않은 열
other_cols = [col for col in df_daejeon.columns if col not in fixed_cols]

# 메소드 체이닝
df_daejeon = (
    df_daejeon
    .assign(선거종류='광역단체장', 선거년도='2014')
    .loc[:, fixed_cols + other_cols]
    .rename(columns={'시도': '지역'})
)

In [43]:
df_daejeon

Unnamed: 0,지역,구시군,선거년도,선거종류,득표_1위_정당,득표_2위_정당,득표_1위_후보번호,득표_1위_득표율,득표_2위_후보번호,득표_2위_득표율,...,무소속_후보자수,선거인수,투표수,보수정당,진보정당,그외정당,무소속,득표수_계,무효투표수,기권수
0,대전,합계,2014,광역단체장,진보정당,보수정당,2,0.5008,1,0.4676,...,0,1207972,652671,301389,322762,20355,0,644506,8165,555301
1,대전,동구,2014,광역단체장,보수정당,진보정당,1,0.4957,2,0.4719,...,0,202671,104754,51063,48607,3335,0,103005,1749,97917
2,대전,중구,2014,광역단체장,보수정당,진보정당,1,0.4886,2,0.4855,...,0,213239,116220,55956,55596,2963,0,114515,1705,97019
3,대전,서구,2014,광역단체장,진보정당,보수정당,2,0.5049,1,0.4649,...,0,389886,209058,96154,104428,6256,0,206838,2220,180828
4,대전,유성구,2014,광역단체장,진보정당,보수정당,2,0.5518,1,0.4156,...,0,240489,138723,57099,75806,4482,0,137387,1336,101766
5,대전,대덕구,2014,광역단체장,보수정당,진보정당,1,0.4968,2,0.4631,...,0,161687,83916,41117,38325,3319,0,82761,1155,77771


### v4.1 ~ v4.3

In [44]:
# 1. 전체 데이터 저장
df_daejeon.to_csv("temp4_1_governor_daejeon_6.csv", index=False, encoding="utf-8-sig")

# 2. '구시군' != '합계' 행만 추출하여 저장
df_daejeon2 = df_daejeon.query("구시군 != '합계'")
df_daejeon2.to_csv("temp4_2_governor_daejeon_6.csv", index=False, encoding="utf-8-sig")

# 3. '구시군' == '합계' 행만 추출 후 '구시군' 열 제거하여 저장
df_daejeon3 = df_daejeon.query("구시군 == '합계'").drop(columns="구시군")
df_daejeon3.to_csv("temp4_3_governor_daejeon_6.csv", index=False, encoding="utf-8-sig")

## Ulsan

In [45]:
df_ulsan = election_results['df_ulsan']

In [46]:
df_ulsan

Unnamed: 0,시도,구시군,선거인수,투표수,보수정당,진보정당,그외정당,무소속,득표수_계,무효투표수,...,득표_1위_후보번호,득표_1위_득표율,득표_2위_후보번호,득표_2위_득표율,득표_1위_정당,득표_2위_정당,보수정당_후보자수,진보정당_후보자수,그외정당_후보자수,무소속_후보자수
0,울산광역시,합계,912325,511881,306311,0,161843,0,468154,43727,...,1,0.6543,4,0.2643,보수정당,그외정당,1,0,2,0
1,울산광역시,중구,189218,106699,66129,0,31825,0,97954,8745,...,1,0.6751,4,0.2543,보수정당,그외정당,1,0,2,0
2,울산광역시,남구,277480,151643,95192,0,44919,0,140111,11532,...,1,0.6794,4,0.2633,보수정당,그외정당,1,0,2,0
3,울산광역시,동구,141266,81696,44961,0,28605,0,73566,8130,...,1,0.6112,4,0.2419,보수정당,그외정당,1,0,2,0
4,울산광역시,북구,139347,79777,42353,0,31078,0,73431,6346,...,1,0.5768,4,0.332,보수정당,그외정당,1,0,2,0
5,울산광역시,울주군,165014,92066,57676,0,25416,0,83092,8974,...,1,0.6941,4,0.2378,보수정당,그외정당,1,0,2,0


### preprocessing

In [47]:
df_ulsan = (
    df_ulsan
    .assign(시도=lambda df: df['시도'].replace('울산광역시', '울산'))
)

In [48]:
df_ulsan.columns

Index(['시도', '구시군', '선거인수', '투표수', '보수정당', '진보정당', '그외정당', '무소속', '득표수_계',
       '무효투표수', '기권수', '득표_1위_후보번호', '득표_1위_득표율', '득표_2위_후보번호', '득표_2위_득표율',
       '득표_1위_정당', '득표_2위_정당', '보수정당_후보자수', '진보정당_후보자수', '그외정당_후보자수',
       '무소속_후보자수'],
      dtype='object')

In [49]:
# 고정 열 순서 정의
fixed_cols = [
    '시도', '구시군', '선거년도', '선거종류',
    '득표_1위_정당', '득표_2위_정당',
    '득표_1위_후보번호', '득표_1위_득표율',
    '득표_2위_후보번호', '득표_2위_득표율',
    '보수정당_후보자수', '진보정당_후보자수',
    '그외정당_후보자수', '무소속_후보자수'
]

# 나머지 열: fixed_cols에 포함되지 않은 열
other_cols = [col for col in df_ulsan.columns if col not in fixed_cols]

# 메소드 체이닝
df_ulsan = (
    df_ulsan
    .assign(선거종류='광역단체장', 선거년도='2014')
    .loc[:, fixed_cols + other_cols]
    .rename(columns={'시도': '지역'})
)

In [50]:
df_ulsan

Unnamed: 0,지역,구시군,선거년도,선거종류,득표_1위_정당,득표_2위_정당,득표_1위_후보번호,득표_1위_득표율,득표_2위_후보번호,득표_2위_득표율,...,무소속_후보자수,선거인수,투표수,보수정당,진보정당,그외정당,무소속,득표수_계,무효투표수,기권수
0,울산,합계,2014,광역단체장,보수정당,그외정당,1,0.6543,4,0.2643,...,0,912325,511881,306311,0,161843,0,468154,43727,400444
1,울산,중구,2014,광역단체장,보수정당,그외정당,1,0.6751,4,0.2543,...,0,189218,106699,66129,0,31825,0,97954,8745,82519
2,울산,남구,2014,광역단체장,보수정당,그외정당,1,0.6794,4,0.2633,...,0,277480,151643,95192,0,44919,0,140111,11532,125837
3,울산,동구,2014,광역단체장,보수정당,그외정당,1,0.6112,4,0.2419,...,0,141266,81696,44961,0,28605,0,73566,8130,59570
4,울산,북구,2014,광역단체장,보수정당,그외정당,1,0.5768,4,0.332,...,0,139347,79777,42353,0,31078,0,73431,6346,59570
5,울산,울주군,2014,광역단체장,보수정당,그외정당,1,0.6941,4,0.2378,...,0,165014,92066,57676,0,25416,0,83092,8974,72948


### v4.1 ~ v4.3

In [51]:
# 1. 전체 데이터 저장
df_ulsan.to_csv("temp4_1_governor_ulsan_6.csv", index=False, encoding="utf-8-sig")

# 2. '구시군' != '합계' 행만 추출하여 저장
df_ulsan2 = df_ulsan.query("구시군 != '합계'")
df_ulsan2.to_csv("temp4_2_governor_ulsan_6.csv", index=False, encoding="utf-8-sig")

# 3. '구시군' == '합계' 행만 추출 후 '구시군' 열 제거하여 저장
df_ulsan3 = df_ulsan.query("구시군 == '합계'").drop(columns="구시군")
df_ulsan3.to_csv("temp4_3_governor_ulsan_6.csv", index=False, encoding="utf-8-sig")

## Sejong

In [52]:
df_sejong = election_results['df_sejong']

In [53]:
df_sejong

Unnamed: 0,시도,구시군,선거인수,투표수,보수정당,진보정당,그외정당,무소속,득표수_계,무효투표수,...,득표_1위_후보번호,득표_1위_득표율,득표_2위_후보번호,득표_2위_득표율,득표_1위_정당,득표_2위_정당,보수정당_후보자수,진보정당_후보자수,그외정당_후보자수,무소속_후보자수
0,세종특별자치시,합계,101559,63629,26451,36203,0,0,62654,975,...,2,0.5778,1,0.4222,진보정당,보수정당,1,1,0,0


### preprocessing

In [54]:
df_sejong = (
    df_sejong
    .assign(시도=lambda df: df['시도'].replace('세종특별자치시', '세종'))
)

In [55]:
df_sejong.columns

Index(['시도', '구시군', '선거인수', '투표수', '보수정당', '진보정당', '그외정당', '무소속', '득표수_계',
       '무효투표수', '기권수', '득표_1위_후보번호', '득표_1위_득표율', '득표_2위_후보번호', '득표_2위_득표율',
       '득표_1위_정당', '득표_2위_정당', '보수정당_후보자수', '진보정당_후보자수', '그외정당_후보자수',
       '무소속_후보자수'],
      dtype='object')

In [56]:
# 고정 열 순서 정의
fixed_cols = [
    '시도', '구시군', '선거년도', '선거종류',
    '득표_1위_정당', '득표_2위_정당',
    '득표_1위_후보번호', '득표_1위_득표율',
    '득표_2위_후보번호', '득표_2위_득표율',
    '보수정당_후보자수', '진보정당_후보자수',
    '그외정당_후보자수', '무소속_후보자수'
]

# 나머지 열: fixed_cols에 포함되지 않은 열
other_cols = [col for col in df_sejong.columns if col not in fixed_cols]

# 메소드 체이닝
df_sejong = (
    df_sejong
    .assign(선거종류='광역단체장', 선거년도='2014')
    .loc[:, fixed_cols + other_cols]
    .rename(columns={'시도': '지역'})
)

In [57]:
df_sejong

Unnamed: 0,지역,구시군,선거년도,선거종류,득표_1위_정당,득표_2위_정당,득표_1위_후보번호,득표_1위_득표율,득표_2위_후보번호,득표_2위_득표율,...,무소속_후보자수,선거인수,투표수,보수정당,진보정당,그외정당,무소속,득표수_계,무효투표수,기권수
0,세종,합계,2014,광역단체장,진보정당,보수정당,2,0.5778,1,0.4222,...,0,101559,63629,26451,36203,0,0,62654,975,37930


### v4.1 ~ v4.3

- 세종은 구,시,군이 없고 읍,면만 있어서 df_sejong2의 row가 없음

In [58]:
# 1. 전체 데이터 저장
df_sejong.to_csv("temp4_1_governor_sejong_6.csv", index=False, encoding="utf-8-sig")

# 2. '구시군' != '합계' 행만 추출하여 저장
df_sejong2 = df_sejong.query("구시군 != '합계'")
df_sejong2.to_csv("temp4_2_governor_sejong_6.csv", index=False, encoding="utf-8-sig")

# 3. '구시군' == '합계' 행만 추출 후 '구시군' 열 제거하여 저장
df_sejong3 = df_sejong.query("구시군 == '합계'").drop(columns="구시군")
df_sejong3.to_csv("temp4_3_governor_sejong_6.csv", index=False, encoding="utf-8-sig")

## Gyeonggi

In [59]:
df_gyeonggi = election_results['df_gyeonggi']

In [60]:
df_gyeonggi

Unnamed: 0,시도,구시군,선거인수,투표수,보수정당,진보정당,그외정당,무소속,득표수_계,무효투표수,...,득표_1위_후보번호,득표_1위_득표율,득표_2위_후보번호,득표_2위_득표율,득표_1위_정당,득표_2위_정당,보수정당_후보자수,진보정당_후보자수,그외정당_후보자수,무소속_후보자수
0,경기도,합계,9679317,5156691,2524981,2481824,0,0,5006805,149886,...,1,0.5043,2,0.4957,보수정당,진보정당,1,1,0,0
1,경기도,수원시장안구,236664,135841,64235,68152,0,0,132387,3454,...,2,0.5148,1,0.4852,진보정당,보수정당,1,1,0,0
2,경기도,수원시권선구,230904,121183,58441,59901,0,0,118342,2841,...,2,0.5062,1,0.4938,진보정당,보수정당,1,1,0,0
3,경기도,수원시팔달구,203541,105744,54133,48962,0,0,103095,2649,...,1,0.5251,2,0.4749,보수정당,진보정당,1,1,0,0
4,경기도,수원시영통구,240776,142435,58276,81424,0,0,139700,2735,...,2,0.5828,1,0.4172,진보정당,보수정당,1,1,0,0
5,경기도,성남시수정구,191794,97639,43871,50514,0,0,94385,3254,...,2,0.5352,1,0.4648,진보정당,보수정당,1,1,0,0
6,경기도,성남시중원구,211683,109445,49739,55787,0,0,105526,3919,...,2,0.5287,1,0.4713,진보정당,보수정당,1,1,0,0
7,경기도,성남시분당구,389934,241915,123412,114263,0,0,237675,4240,...,1,0.5192,2,0.4808,보수정당,진보정당,1,1,0,0
8,경기도,의정부시,345708,171491,86274,80456,0,0,166730,4761,...,1,0.5174,2,0.4826,보수정당,진보정당,1,1,0,0
9,경기도,안양시만안구,205999,113522,56134,54398,0,0,110532,2990,...,1,0.5079,2,0.4921,보수정당,진보정당,1,1,0,0


### preprocessing

In [61]:
df_gyeonggi = (
    df_gyeonggi
    .assign(시도=lambda df: df['시도'].replace('경기도', '경기'))
)

In [62]:
df_gyeonggi.columns

Index(['시도', '구시군', '선거인수', '투표수', '보수정당', '진보정당', '그외정당', '무소속', '득표수_계',
       '무효투표수', '기권수', '득표_1위_후보번호', '득표_1위_득표율', '득표_2위_후보번호', '득표_2위_득표율',
       '득표_1위_정당', '득표_2위_정당', '보수정당_후보자수', '진보정당_후보자수', '그외정당_후보자수',
       '무소속_후보자수'],
      dtype='object')

In [63]:
# 고정 열 순서 정의
fixed_cols = [
    '시도', '구시군', '선거년도', '선거종류',
    '득표_1위_정당', '득표_2위_정당',
    '득표_1위_후보번호', '득표_1위_득표율',
    '득표_2위_후보번호', '득표_2위_득표율',
    '보수정당_후보자수', '진보정당_후보자수',
    '그외정당_후보자수', '무소속_후보자수'
]

# 나머지 열: fixed_cols에 포함되지 않은 열
other_cols = [col for col in df_gyeonggi.columns if col not in fixed_cols]

# 메소드 체이닝
df_gyeonggi = (
    df_gyeonggi
    .assign(선거종류='광역단체장', 선거년도='2014')
    .loc[:, fixed_cols + other_cols]
    .rename(columns={'시도': '지역'})
)

In [64]:
df_gyeonggi

Unnamed: 0,지역,구시군,선거년도,선거종류,득표_1위_정당,득표_2위_정당,득표_1위_후보번호,득표_1위_득표율,득표_2위_후보번호,득표_2위_득표율,...,무소속_후보자수,선거인수,투표수,보수정당,진보정당,그외정당,무소속,득표수_계,무효투표수,기권수
0,경기,합계,2014,광역단체장,보수정당,진보정당,1,0.5043,2,0.4957,...,0,9679317,5156691,2524981,2481824,0,0,5006805,149886,4522626
1,경기,수원시장안구,2014,광역단체장,진보정당,보수정당,2,0.5148,1,0.4852,...,0,236664,135841,64235,68152,0,0,132387,3454,100823
2,경기,수원시권선구,2014,광역단체장,진보정당,보수정당,2,0.5062,1,0.4938,...,0,230904,121183,58441,59901,0,0,118342,2841,109721
3,경기,수원시팔달구,2014,광역단체장,보수정당,진보정당,1,0.5251,2,0.4749,...,0,203541,105744,54133,48962,0,0,103095,2649,97797
4,경기,수원시영통구,2014,광역단체장,진보정당,보수정당,2,0.5828,1,0.4172,...,0,240776,142435,58276,81424,0,0,139700,2735,98341
5,경기,성남시수정구,2014,광역단체장,진보정당,보수정당,2,0.5352,1,0.4648,...,0,191794,97639,43871,50514,0,0,94385,3254,94155
6,경기,성남시중원구,2014,광역단체장,진보정당,보수정당,2,0.5287,1,0.4713,...,0,211683,109445,49739,55787,0,0,105526,3919,102238
7,경기,성남시분당구,2014,광역단체장,보수정당,진보정당,1,0.5192,2,0.4808,...,0,389934,241915,123412,114263,0,0,237675,4240,148019
8,경기,의정부시,2014,광역단체장,보수정당,진보정당,1,0.5174,2,0.4826,...,0,345708,171491,86274,80456,0,0,166730,4761,174217
9,경기,안양시만안구,2014,광역단체장,보수정당,진보정당,1,0.5079,2,0.4921,...,0,205999,113522,56134,54398,0,0,110532,2990,92477


### v4.1 ~ v4.3

In [65]:
# 1. 전체 데이터 저장
df_gyeonggi.to_csv("temp4_1_governor_gyeonggi_6.csv", index=False, encoding="utf-8-sig")

# 2. '구시군' != '합계' 행만 추출하여 저장
df_gyeonggi2 = df_gyeonggi.query("구시군 != '합계'")
df_gyeonggi2.to_csv("temp4_2_governor_gyeonggi_6.csv", index=False, encoding="utf-8-sig")

# 3. '구시군' == '합계' 행만 추출 후 '구시군' 열 제거하여 저장
df_gyeonggi3 = df_gyeonggi.query("구시군 == '합계'").drop(columns="구시군")
df_gyeonggi3.to_csv("temp4_3_governor_gyeonggi_6.csv", index=False, encoding="utf-8-sig")

## Gangwon

In [66]:
df_gangwon = election_results['df_gangwon']

In [67]:
df_gangwon

Unnamed: 0,시도,구시군,선거인수,투표수,보수정당,진보정당,그외정당,무소속,득표수_계,무효투표수,...,득표_1위_후보번호,득표_1위_득표율,득표_2위_후보번호,득표_2위_득표율,득표_1위_정당,득표_2위_정당,보수정당_후보자수,진보정당_후보자수,그외정당_후보자수,무소속_후보자수
0,강원도,합계,1255469,781359,369201,381338,15774,0,766313,15046,...,2,0.4976,1,0.4818,진보정당,보수정당,1,1,1,0
1,강원도,춘천시,220197,131007,50016,77737,1857,0,129610,1397,...,2,0.5998,1,0.3859,진보정당,보수정당,1,1,1,0
2,강원도,원주시,257088,142997,64877,73995,2442,0,141314,1683,...,2,0.5236,1,0.4591,진보정당,보수정당,1,1,1,0
3,강원도,강릉시,176430,104326,60251,40526,1736,0,102513,1813,...,1,0.5877,2,0.3953,보수정당,진보정당,1,1,1,0
4,강원도,동해시,75796,45629,23041,20786,942,0,44769,860,...,1,0.5147,2,0.4643,보수정당,진보정당,1,1,1,0
5,강원도,삼척시,61597,42406,20875,18782,1514,0,41171,1235,...,1,0.507,2,0.4562,보수정당,진보정당,1,1,1,0
6,강원도,태백시,40070,27089,13097,12746,449,0,26292,797,...,1,0.4981,2,0.4848,보수정당,진보정당,1,1,1,0
7,강원도,속초시,66777,39108,18718,18821,694,0,38233,875,...,2,0.4923,1,0.4896,진보정당,보수정당,1,1,1,0
8,강원도,고성군,26235,19179,9569,8480,596,0,18645,534,...,1,0.5132,2,0.4548,보수정당,진보정당,1,1,1,0
9,강원도,양양군,23787,17549,8761,7967,383,0,17111,438,...,1,0.512,2,0.4656,보수정당,진보정당,1,1,1,0


### preprocessing

In [68]:
df_gangwon = (
    df_gangwon
    .assign(시도=lambda df: df['시도'].replace('강원도', '강원'))
)

In [69]:
df_gangwon.columns

Index(['시도', '구시군', '선거인수', '투표수', '보수정당', '진보정당', '그외정당', '무소속', '득표수_계',
       '무효투표수', '기권수', '득표_1위_후보번호', '득표_1위_득표율', '득표_2위_후보번호', '득표_2위_득표율',
       '득표_1위_정당', '득표_2위_정당', '보수정당_후보자수', '진보정당_후보자수', '그외정당_후보자수',
       '무소속_후보자수'],
      dtype='object')

In [70]:
# 고정 열 순서 정의
fixed_cols = [
    '시도', '구시군', '선거년도', '선거종류',
    '득표_1위_정당', '득표_2위_정당',
    '득표_1위_후보번호', '득표_1위_득표율',
    '득표_2위_후보번호', '득표_2위_득표율',
    '보수정당_후보자수', '진보정당_후보자수',
    '그외정당_후보자수', '무소속_후보자수'
]

# 나머지 열: fixed_cols에 포함되지 않은 열
other_cols = [col for col in df_gangwon.columns if col not in fixed_cols]

# 메소드 체이닝
df_gangwon = (
    df_gangwon
    .assign(선거종류='광역단체장', 선거년도='2014')
    .loc[:, fixed_cols + other_cols]
    .rename(columns={'시도': '지역'})
)

In [71]:
df_gangwon

Unnamed: 0,지역,구시군,선거년도,선거종류,득표_1위_정당,득표_2위_정당,득표_1위_후보번호,득표_1위_득표율,득표_2위_후보번호,득표_2위_득표율,...,무소속_후보자수,선거인수,투표수,보수정당,진보정당,그외정당,무소속,득표수_계,무효투표수,기권수
0,강원,합계,2014,광역단체장,진보정당,보수정당,2,0.4976,1,0.4818,...,0,1255469,781359,369201,381338,15774,0,766313,15046,474110
1,강원,춘천시,2014,광역단체장,진보정당,보수정당,2,0.5998,1,0.3859,...,0,220197,131007,50016,77737,1857,0,129610,1397,89190
2,강원,원주시,2014,광역단체장,진보정당,보수정당,2,0.5236,1,0.4591,...,0,257088,142997,64877,73995,2442,0,141314,1683,114091
3,강원,강릉시,2014,광역단체장,보수정당,진보정당,1,0.5877,2,0.3953,...,0,176430,104326,60251,40526,1736,0,102513,1813,72104
4,강원,동해시,2014,광역단체장,보수정당,진보정당,1,0.5147,2,0.4643,...,0,75796,45629,23041,20786,942,0,44769,860,30167
5,강원,삼척시,2014,광역단체장,보수정당,진보정당,1,0.507,2,0.4562,...,0,61597,42406,20875,18782,1514,0,41171,1235,19191
6,강원,태백시,2014,광역단체장,보수정당,진보정당,1,0.4981,2,0.4848,...,0,40070,27089,13097,12746,449,0,26292,797,12981
7,강원,속초시,2014,광역단체장,진보정당,보수정당,2,0.4923,1,0.4896,...,0,66777,39108,18718,18821,694,0,38233,875,27669
8,강원,고성군,2014,광역단체장,보수정당,진보정당,1,0.5132,2,0.4548,...,0,26235,19179,9569,8480,596,0,18645,534,7056
9,강원,양양군,2014,광역단체장,보수정당,진보정당,1,0.512,2,0.4656,...,0,23787,17549,8761,7967,383,0,17111,438,6238


### v4.1 ~ v4.3

In [72]:
# 1. 전체 데이터 저장
df_gangwon.to_csv("temp4_1_governor_gangwon_6.csv", index=False, encoding="utf-8-sig")

# 2. '구시군' != '합계' 행만 추출하여 저장
df_gangwon2 = df_gangwon.query("구시군 != '합계'")
df_gangwon2.to_csv("temp4_2_governor_gangwon_6.csv", index=False, encoding="utf-8-sig")

# 3. '구시군' == '합계' 행만 추출 후 '구시군' 열 제거하여 저장
df_gangwon3 = df_gangwon.query("구시군 == '합계'").drop(columns="구시군")
df_gangwon3.to_csv("temp4_3_governor_gangwon_6.csv", index=False, encoding="utf-8-sig")

## Chungbuk

In [73]:
df_chungbuk = election_results['df_chungbuk']

In [74]:
df_chungbuk

Unnamed: 0,시도,구시군,선거인수,투표수,보수정당,진보정당,그외정당,무소속,득표수_계,무효투표수,...,득표_1위_후보번호,득표_1위_득표율,득표_2위_후보번호,득표_2위_득표율,득표_1위_정당,득표_2위_정당,보수정당_후보자수,진보정당_후보자수,그외정당_후보자수,무소속_후보자수
0,충청북도,합계,1261119,741049,346152,361115,18590,0,725857,15192,...,2,0.4975,1,0.4769,진보정당,보수정당,1,1,1,0
1,충청북도,청주시상당구,194201,107458,51082,52717,2382,0,106181,1277,...,2,0.4965,1,0.4811,진보정당,보수정당,1,1,1,0
2,충청북도,청주시흥덕구,328448,178070,79126,92795,4259,0,176180,1890,...,2,0.5267,1,0.4491,진보정당,보수정당,1,1,1,0
3,충청북도,충주시,169145,97900,49606,45133,1571,0,96310,1590,...,1,0.5151,2,0.4686,보수정당,진보정당,1,1,1,0
4,충청북도,제천시,112013,67732,31934,32605,1619,0,66158,1574,...,2,0.4928,1,0.4827,진보정당,보수정당,1,1,1,0
5,충청북도,단양군,26692,18685,9313,8268,495,0,18076,609,...,1,0.5152,2,0.4574,보수정당,진보정당,1,1,1,0
6,충청북도,청원군,123457,70847,32671,34629,2005,0,69305,1542,...,2,0.4997,1,0.4714,진보정당,보수정당,1,1,1,0
7,충청북도,영동군,43012,30700,15150,13444,974,0,29568,1132,...,1,0.5124,2,0.4547,보수정당,진보정당,1,1,1,0
8,충청북도,보은군,29809,22561,10416,10369,773,0,21558,1003,...,1,0.4832,2,0.481,보수정당,진보정당,1,1,1,0
9,충청북도,옥천군,44490,30898,13947,14869,872,0,29688,1210,...,2,0.5008,1,0.4698,진보정당,보수정당,1,1,1,0


### preprocessing

In [75]:
df_chungbuk = (
    df_chungbuk
    .assign(시도=lambda df: df['시도'].replace('충청북도', '충북'))
)

In [76]:
df_chungbuk.columns

Index(['시도', '구시군', '선거인수', '투표수', '보수정당', '진보정당', '그외정당', '무소속', '득표수_계',
       '무효투표수', '기권수', '득표_1위_후보번호', '득표_1위_득표율', '득표_2위_후보번호', '득표_2위_득표율',
       '득표_1위_정당', '득표_2위_정당', '보수정당_후보자수', '진보정당_후보자수', '그외정당_후보자수',
       '무소속_후보자수'],
      dtype='object')

In [77]:
# 고정 열 순서 정의
fixed_cols = [
    '시도', '구시군', '선거년도', '선거종류',
    '득표_1위_정당', '득표_2위_정당',
    '득표_1위_후보번호', '득표_1위_득표율',
    '득표_2위_후보번호', '득표_2위_득표율',
    '보수정당_후보자수', '진보정당_후보자수',
    '그외정당_후보자수', '무소속_후보자수'
]

# 나머지 열: fixed_cols에 포함되지 않은 열
other_cols = [col for col in df_chungbuk.columns if col not in fixed_cols]

# 메소드 체이닝
df_chungbuk = (
    df_chungbuk
    .assign(선거종류='광역단체장', 선거년도='2014')
    .loc[:, fixed_cols + other_cols]
    .rename(columns={'시도': '지역'})
)

In [78]:
df_chungbuk

Unnamed: 0,지역,구시군,선거년도,선거종류,득표_1위_정당,득표_2위_정당,득표_1위_후보번호,득표_1위_득표율,득표_2위_후보번호,득표_2위_득표율,...,무소속_후보자수,선거인수,투표수,보수정당,진보정당,그외정당,무소속,득표수_계,무효투표수,기권수
0,충북,합계,2014,광역단체장,진보정당,보수정당,2,0.4975,1,0.4769,...,0,1261119,741049,346152,361115,18590,0,725857,15192,520070
1,충북,청주시상당구,2014,광역단체장,진보정당,보수정당,2,0.4965,1,0.4811,...,0,194201,107458,51082,52717,2382,0,106181,1277,86743
2,충북,청주시흥덕구,2014,광역단체장,진보정당,보수정당,2,0.5267,1,0.4491,...,0,328448,178070,79126,92795,4259,0,176180,1890,150378
3,충북,충주시,2014,광역단체장,보수정당,진보정당,1,0.5151,2,0.4686,...,0,169145,97900,49606,45133,1571,0,96310,1590,71245
4,충북,제천시,2014,광역단체장,진보정당,보수정당,2,0.4928,1,0.4827,...,0,112013,67732,31934,32605,1619,0,66158,1574,44281
5,충북,단양군,2014,광역단체장,보수정당,진보정당,1,0.5152,2,0.4574,...,0,26692,18685,9313,8268,495,0,18076,609,8007
6,충북,청원군,2014,광역단체장,진보정당,보수정당,2,0.4997,1,0.4714,...,0,123457,70847,32671,34629,2005,0,69305,1542,52610
7,충북,영동군,2014,광역단체장,보수정당,진보정당,1,0.5124,2,0.4547,...,0,43012,30700,15150,13444,974,0,29568,1132,12312
8,충북,보은군,2014,광역단체장,보수정당,진보정당,1,0.4832,2,0.481,...,0,29809,22561,10416,10369,773,0,21558,1003,7248
9,충북,옥천군,2014,광역단체장,진보정당,보수정당,2,0.5008,1,0.4698,...,0,44490,30898,13947,14869,872,0,29688,1210,13592


### v4.1 ~ v4.3

In [79]:
# 1. 전체 데이터 저장
df_chungbuk.to_csv("temp4_1_governor_chungbuk_6.csv", index=False, encoding="utf-8-sig")

# 2. '구시군' != '합계' 행만 추출하여 저장
df_chungbuk2 = df_chungbuk.query("구시군 != '합계'")
df_chungbuk2.to_csv("temp4_2_governor_chungbuk_6.csv", index=False, encoding="utf-8-sig")

# 3. '구시군' == '합계' 행만 추출 후 '구시군' 열 제거하여 저장
df_chungbuk3 = df_chungbuk.query("구시군 == '합계'").drop(columns="구시군")
df_chungbuk3.to_csv("temp4_3_governor_chungbuk_6.csv", index=False, encoding="utf-8-sig")

## Chungnam

In [80]:
df_chungnam = election_results['df_chungnam']

In [81]:
df_chungnam

Unnamed: 0,시도,구시군,선거인수,투표수,보수정당,진보정당,그외정당,무소속,득표수_계,무효투표수,...,득표_1위_후보번호,득표_1위_득표율,득표_2위_후보번호,득표_2위_득표율,득표_1위_정당,득표_2위_정당,보수정당_후보자수,진보정당_후보자수,그외정당_후보자수,무소속_후보자수
0,충청남도,합계,1644554,916206,392315,465994,0,34204,892513,23693,...,2,0.5221,1,0.4396,진보정당,보수정당,1,1,0,1
1,충청남도,천안시서북구,223113,103339,40623,58379,0,3198,102200,1139,...,2,0.5712,1,0.3975,진보정당,보수정당,1,1,0,1
2,충청남도,천안시동남구,236496,117049,49771,62796,0,2987,115554,1495,...,2,0.5434,1,0.4307,진보정당,보수정당,1,1,0,1
3,충청남도,공주시,96439,57596,26855,27730,0,1519,56104,1492,...,2,0.4943,1,0.4787,진보정당,보수정당,1,1,0,1
4,충청남도,보령시,86603,55365,25254,25451,0,2550,53255,2110,...,2,0.4779,1,0.4742,진보정당,보수정당,1,1,0,1
5,충청남도,아산시,222224,113433,47139,60927,0,3356,111422,2011,...,2,0.5468,1,0.4231,진보정당,보수정당,1,1,0,1
6,충청남도,서산시,130906,70754,30125,36161,0,2781,69067,1687,...,2,0.5236,1,0.4362,진보정당,보수정당,1,1,0,1
7,충청남도,태안군,53331,35682,16046,15507,0,2419,33972,1710,...,1,0.4723,2,0.4565,보수정당,진보정당,1,1,0,1
8,충청남도,금산군,46813,30036,12522,14725,0,1612,28859,1177,...,2,0.5102,1,0.4339,진보정당,보수정당,1,1,0,1
9,충청남도,논산시,103962,62061,21888,36205,0,2037,60130,1931,...,2,0.6021,1,0.364,진보정당,보수정당,1,1,0,1


### preprocessing

In [82]:
df_chungnam = (
    df_chungnam
    .assign(시도=lambda df: df['시도'].replace('충청남도', '충남'))
)

In [83]:
df_chungnam.columns

Index(['시도', '구시군', '선거인수', '투표수', '보수정당', '진보정당', '그외정당', '무소속', '득표수_계',
       '무효투표수', '기권수', '득표_1위_후보번호', '득표_1위_득표율', '득표_2위_후보번호', '득표_2위_득표율',
       '득표_1위_정당', '득표_2위_정당', '보수정당_후보자수', '진보정당_후보자수', '그외정당_후보자수',
       '무소속_후보자수'],
      dtype='object')

In [84]:
# 고정 열 순서 정의
fixed_cols = [
    '시도', '구시군', '선거년도', '선거종류',
    '득표_1위_정당', '득표_2위_정당',
    '득표_1위_후보번호', '득표_1위_득표율',
    '득표_2위_후보번호', '득표_2위_득표율',
    '보수정당_후보자수', '진보정당_후보자수',
    '그외정당_후보자수', '무소속_후보자수'
]

# 나머지 열: fixed_cols에 포함되지 않은 열
other_cols = [col for col in df_chungnam.columns if col not in fixed_cols]

# 메소드 체이닝
df_chungnam = (
    df_chungnam
    .assign(선거종류='광역단체장', 선거년도='2014')
    .loc[:, fixed_cols + other_cols]
    .rename(columns={'시도': '지역'})
)

In [85]:
df_chungnam

Unnamed: 0,지역,구시군,선거년도,선거종류,득표_1위_정당,득표_2위_정당,득표_1위_후보번호,득표_1위_득표율,득표_2위_후보번호,득표_2위_득표율,...,무소속_후보자수,선거인수,투표수,보수정당,진보정당,그외정당,무소속,득표수_계,무효투표수,기권수
0,충남,합계,2014,광역단체장,진보정당,보수정당,2,0.5221,1,0.4396,...,1,1644554,916206,392315,465994,0,34204,892513,23693,728348
1,충남,천안시서북구,2014,광역단체장,진보정당,보수정당,2,0.5712,1,0.3975,...,1,223113,103339,40623,58379,0,3198,102200,1139,119774
2,충남,천안시동남구,2014,광역단체장,진보정당,보수정당,2,0.5434,1,0.4307,...,1,236496,117049,49771,62796,0,2987,115554,1495,119447
3,충남,공주시,2014,광역단체장,진보정당,보수정당,2,0.4943,1,0.4787,...,1,96439,57596,26855,27730,0,1519,56104,1492,38843
4,충남,보령시,2014,광역단체장,진보정당,보수정당,2,0.4779,1,0.4742,...,1,86603,55365,25254,25451,0,2550,53255,2110,31238
5,충남,아산시,2014,광역단체장,진보정당,보수정당,2,0.5468,1,0.4231,...,1,222224,113433,47139,60927,0,3356,111422,2011,108791
6,충남,서산시,2014,광역단체장,진보정당,보수정당,2,0.5236,1,0.4362,...,1,130906,70754,30125,36161,0,2781,69067,1687,60152
7,충남,태안군,2014,광역단체장,보수정당,진보정당,1,0.4723,2,0.4565,...,1,53331,35682,16046,15507,0,2419,33972,1710,17649
8,충남,금산군,2014,광역단체장,진보정당,보수정당,2,0.5102,1,0.4339,...,1,46813,30036,12522,14725,0,1612,28859,1177,16777
9,충남,논산시,2014,광역단체장,진보정당,보수정당,2,0.6021,1,0.364,...,1,103962,62061,21888,36205,0,2037,60130,1931,41901


### v4.1 ~ v4.3

In [86]:
# 1. 전체 데이터 저장
df_chungnam.to_csv("temp4_1_governor_chungnam_6.csv", index=False, encoding="utf-8-sig")

# 2. '구시군' != '합계' 행만 추출하여 저장
df_chungnam2 = df_chungnam.query("구시군 != '합계'")
df_chungnam2.to_csv("temp4_2_governor_chungnam_6.csv", index=False, encoding="utf-8-sig")

# 3. '구시군' == '합계' 행만 추출 후 '구시군' 열 제거하여 저장
df_chungnam3 = df_chungnam.query("구시군 == '합계'").drop(columns="구시군")
df_chungnam3.to_csv("temp4_3_governor_chungnam_6.csv", index=False, encoding="utf-8-sig")

## Jeonbuk

In [87]:
df_jeonbuk = election_results['df_jeonbuk']

In [88]:
df_jeonbuk

Unnamed: 0,시도,구시군,선거인수,투표수,보수정당,진보정당,그외정당,무소속,득표수_계,무효투표수,...,득표_1위_후보번호,득표_1위_득표율,득표_2위_후보번호,득표_2위_득표율,득표_1위_정당,득표_2위_정당,보수정당_후보자수,진보정당_후보자수,그외정당_후보자수,무소속_후보자수
0,전라북도,합계,1503242,900029,177172,599654,89337,0,866163,33866,...,2,0.6923,1,0.2045,진보정당,보수정당,1,1,1,0
1,전라북도,전주시완산구,283872,159841,39423,99971,17298,0,156692,3149,...,2,0.638,1,0.2516,진보정당,보수정당,1,1,1,0
2,전라북도,전주시덕진구,221516,123206,29005,77702,14026,0,120733,2473,...,2,0.6436,1,0.2402,진보정당,보수정당,1,1,1,0
3,전라북도,군산시,220810,117779,19923,83123,10831,0,113877,3902,...,2,0.7299,1,0.175,진보정당,보수정당,1,1,1,0
4,전라북도,익산시,244122,129480,22697,91000,11226,0,124923,4557,...,2,0.7284,1,0.1817,진보정당,보수정당,1,1,1,0
5,전라북도,정읍시,96960,61781,9104,44302,5359,0,58765,3016,...,2,0.7539,1,0.1549,진보정당,보수정당,1,1,1,0
6,전라북도,남원시,70411,47558,8103,32868,4594,0,45565,1993,...,2,0.7213,1,0.1778,진보정당,보수정당,1,1,1,0
7,전라북도,김제시,77336,51495,7317,37675,3766,0,48758,2737,...,2,0.7727,1,0.1501,진보정당,보수정당,1,1,1,0
8,전라북도,완주군,70666,44750,9986,26815,5816,0,42617,2133,...,2,0.6292,1,0.2343,진보정당,보수정당,1,1,1,0
9,전라북도,진안군,23289,18439,6355,9290,1789,0,17434,1005,...,2,0.5329,1,0.3645,진보정당,보수정당,1,1,1,0


### preprocessing

In [89]:
df_jeonbuk = (
    df_jeonbuk
    .assign(시도=lambda df: df['시도'].replace('전라북도', '전북'))
)

In [90]:
df_jeonbuk.columns

Index(['시도', '구시군', '선거인수', '투표수', '보수정당', '진보정당', '그외정당', '무소속', '득표수_계',
       '무효투표수', '기권수', '득표_1위_후보번호', '득표_1위_득표율', '득표_2위_후보번호', '득표_2위_득표율',
       '득표_1위_정당', '득표_2위_정당', '보수정당_후보자수', '진보정당_후보자수', '그외정당_후보자수',
       '무소속_후보자수'],
      dtype='object')

In [91]:
# 고정 열 순서 정의
fixed_cols = [
    '시도', '구시군', '선거년도', '선거종류',
    '득표_1위_정당', '득표_2위_정당',
    '득표_1위_후보번호', '득표_1위_득표율',
    '득표_2위_후보번호', '득표_2위_득표율',
    '보수정당_후보자수', '진보정당_후보자수',
    '그외정당_후보자수', '무소속_후보자수'
]

# 나머지 열: fixed_cols에 포함되지 않은 열
other_cols = [col for col in df_jeonbuk.columns if col not in fixed_cols]

# 메소드 체이닝
df_jeonbuk = (
    df_jeonbuk
    .assign(선거종류='광역단체장', 선거년도='2014')
    .loc[:, fixed_cols + other_cols]
    .rename(columns={'시도': '지역'})
)

In [92]:
df_jeonbuk

Unnamed: 0,지역,구시군,선거년도,선거종류,득표_1위_정당,득표_2위_정당,득표_1위_후보번호,득표_1위_득표율,득표_2위_후보번호,득표_2위_득표율,...,무소속_후보자수,선거인수,투표수,보수정당,진보정당,그외정당,무소속,득표수_계,무효투표수,기권수
0,전북,합계,2014,광역단체장,진보정당,보수정당,2,0.6923,1,0.2045,...,0,1503242,900029,177172,599654,89337,0,866163,33866,603213
1,전북,전주시완산구,2014,광역단체장,진보정당,보수정당,2,0.638,1,0.2516,...,0,283872,159841,39423,99971,17298,0,156692,3149,124031
2,전북,전주시덕진구,2014,광역단체장,진보정당,보수정당,2,0.6436,1,0.2402,...,0,221516,123206,29005,77702,14026,0,120733,2473,98310
3,전북,군산시,2014,광역단체장,진보정당,보수정당,2,0.7299,1,0.175,...,0,220810,117779,19923,83123,10831,0,113877,3902,103031
4,전북,익산시,2014,광역단체장,진보정당,보수정당,2,0.7284,1,0.1817,...,0,244122,129480,22697,91000,11226,0,124923,4557,114642
5,전북,정읍시,2014,광역단체장,진보정당,보수정당,2,0.7539,1,0.1549,...,0,96960,61781,9104,44302,5359,0,58765,3016,35179
6,전북,남원시,2014,광역단체장,진보정당,보수정당,2,0.7213,1,0.1778,...,0,70411,47558,8103,32868,4594,0,45565,1993,22853
7,전북,김제시,2014,광역단체장,진보정당,보수정당,2,0.7727,1,0.1501,...,0,77336,51495,7317,37675,3766,0,48758,2737,25841
8,전북,완주군,2014,광역단체장,진보정당,보수정당,2,0.6292,1,0.2343,...,0,70666,44750,9986,26815,5816,0,42617,2133,25916
9,전북,진안군,2014,광역단체장,진보정당,보수정당,2,0.5329,1,0.3645,...,0,23289,18439,6355,9290,1789,0,17434,1005,4850


### v4.1 ~ v4.3

In [93]:
# 1. 전체 데이터 저장
df_jeonbuk.to_csv("temp4_1_governor_jeonbuk_6.csv", index=False, encoding="utf-8-sig")

# 2. '구시군' != '합계' 행만 추출하여 저장
df_jeonbuk2 = df_jeonbuk.query("구시군 != '합계'")
df_jeonbuk2.to_csv("temp4_2_governor_jeonbuk_6.csv", index=False, encoding="utf-8-sig")

# 3. '구시군' == '합계' 행만 추출 후 '구시군' 열 제거하여 저장
df_jeonbuk3 = df_jeonbuk.query("구시군 == '합계'").drop(columns="구시군")
df_jeonbuk3.to_csv("temp4_3_governor_jeonbuk_6.csv", index=False, encoding="utf-8-sig")

## Jeonnam

In [94]:
df_jeonnam = election_results['df_jeonnam']

In [95]:
df_jeonnam

Unnamed: 0,시도,구시군,선거인수,투표수,보수정당,진보정당,그외정당,무소속,득표수_계,무효투표수,...,득표_1위_후보번호,득표_1위_득표율,득표_2위_후보번호,득표_2위_득표율,득표_1위_정당,득표_2위_정당,보수정당_후보자수,진보정당_후보자수,그외정당_후보자수,무소속_후보자수
0,전라남도,합계,1549440,1015688,92549,755233,120868,0,968650,47038,...,2,0.7797,3,0.1248,진보정당,그외정당,1,1,1,0
1,전라남도,목포시,185570,103181,8017,80860,10840,0,99717,3464,...,2,0.8109,3,0.1087,진보정당,그외정당,1,1,1,0
2,전라남도,여수시,232635,135390,11394,104927,15608,0,131929,3461,...,2,0.7953,3,0.1183,진보정당,그외정당,1,1,1,0
3,전라남도,순천시,214889,126320,10695,95005,16777,0,122477,3843,...,2,0.7757,3,0.137,진보정당,그외정당,1,1,1,0
4,전라남도,나주시,76122,51515,4157,38556,6067,0,48780,2735,...,2,0.7904,3,0.1244,진보정당,그외정당,1,1,1,0
5,전라남도,광양시,115763,73537,8669,52699,9775,0,71143,2394,...,2,0.7407,3,0.1374,진보정당,그외정당,1,1,1,0
6,전라남도,담양군,41046,29086,2627,22183,3004,0,27814,1272,...,2,0.7975,3,0.108,진보정당,그외정당,1,1,1,0
7,전라남도,장성군,39166,28305,3097,20673,3135,0,26905,1400,...,2,0.7684,3,0.1165,진보정당,그외정당,1,1,1,0
8,전라남도,곡성군,26872,20627,1976,14497,2835,0,19308,1319,...,2,0.7508,3,0.1468,진보정당,그외정당,1,1,1,0
9,전라남도,구례군,23363,19041,2351,12759,2796,0,17906,1135,...,2,0.7126,3,0.1561,진보정당,그외정당,1,1,1,0


### preprocessing

In [96]:
df_jeonnam = (
    df_jeonnam
    .assign(시도=lambda df: df['시도'].replace('전라남도', '전남'))
)

In [97]:
df_jeonnam.columns

Index(['시도', '구시군', '선거인수', '투표수', '보수정당', '진보정당', '그외정당', '무소속', '득표수_계',
       '무효투표수', '기권수', '득표_1위_후보번호', '득표_1위_득표율', '득표_2위_후보번호', '득표_2위_득표율',
       '득표_1위_정당', '득표_2위_정당', '보수정당_후보자수', '진보정당_후보자수', '그외정당_후보자수',
       '무소속_후보자수'],
      dtype='object')

In [98]:
# 고정 열 순서 정의
fixed_cols = [
    '시도', '구시군', '선거년도', '선거종류',
    '득표_1위_정당', '득표_2위_정당',
    '득표_1위_후보번호', '득표_1위_득표율',
    '득표_2위_후보번호', '득표_2위_득표율',
    '보수정당_후보자수', '진보정당_후보자수',
    '그외정당_후보자수', '무소속_후보자수'
]

# 나머지 열: fixed_cols에 포함되지 않은 열
other_cols = [col for col in df_jeonnam.columns if col not in fixed_cols]

# 메소드 체이닝
df_jeonnam = (
    df_jeonnam
    .assign(선거종류='광역단체장', 선거년도='2014')
    .loc[:, fixed_cols + other_cols]
    .rename(columns={'시도': '지역'})
)

In [99]:
df_jeonnam

Unnamed: 0,지역,구시군,선거년도,선거종류,득표_1위_정당,득표_2위_정당,득표_1위_후보번호,득표_1위_득표율,득표_2위_후보번호,득표_2위_득표율,...,무소속_후보자수,선거인수,투표수,보수정당,진보정당,그외정당,무소속,득표수_계,무효투표수,기권수
0,전남,합계,2014,광역단체장,진보정당,그외정당,2,0.7797,3,0.1248,...,0,1549440,1015688,92549,755233,120868,0,968650,47038,533752
1,전남,목포시,2014,광역단체장,진보정당,그외정당,2,0.8109,3,0.1087,...,0,185570,103181,8017,80860,10840,0,99717,3464,82389
2,전남,여수시,2014,광역단체장,진보정당,그외정당,2,0.7953,3,0.1183,...,0,232635,135390,11394,104927,15608,0,131929,3461,97245
3,전남,순천시,2014,광역단체장,진보정당,그외정당,2,0.7757,3,0.137,...,0,214889,126320,10695,95005,16777,0,122477,3843,88569
4,전남,나주시,2014,광역단체장,진보정당,그외정당,2,0.7904,3,0.1244,...,0,76122,51515,4157,38556,6067,0,48780,2735,24607
5,전남,광양시,2014,광역단체장,진보정당,그외정당,2,0.7407,3,0.1374,...,0,115763,73537,8669,52699,9775,0,71143,2394,42226
6,전남,담양군,2014,광역단체장,진보정당,그외정당,2,0.7975,3,0.108,...,0,41046,29086,2627,22183,3004,0,27814,1272,11960
7,전남,장성군,2014,광역단체장,진보정당,그외정당,2,0.7684,3,0.1165,...,0,39166,28305,3097,20673,3135,0,26905,1400,10861
8,전남,곡성군,2014,광역단체장,진보정당,그외정당,2,0.7508,3,0.1468,...,0,26872,20627,1976,14497,2835,0,19308,1319,6245
9,전남,구례군,2014,광역단체장,진보정당,그외정당,2,0.7126,3,0.1561,...,0,23363,19041,2351,12759,2796,0,17906,1135,4322


### v4.1 ~ v4.3

In [100]:
# 1. 전체 데이터 저장
df_jeonnam.to_csv("temp4_1_governor_jeonnam_6.csv", index=False, encoding="utf-8-sig")

# 2. '구시군' != '합계' 행만 추출하여 저장
df_jeonnam2 = df_jeonnam.query("구시군 != '합계'")
df_jeonnam2.to_csv("temp4_2_governor_jeonnam_6.csv", index=False, encoding="utf-8-sig")

# 3. '구시군' == '합계' 행만 추출 후 '구시군' 열 제거하여 저장
df_jeonnam3 = df_jeonnam.query("구시군 == '합계'").drop(columns="구시군")
df_jeonnam3.to_csv("temp4_3_governor_jeonnam_6.csv", index=False, encoding="utf-8-sig")

## Gyeongbuk

In [101]:
df_gyeongbuk = election_results['df_gyeongbuk']

In [102]:
df_gyeongbuk

Unnamed: 0,시도,구시군,선거인수,투표수,보수정당,진보정당,그외정당,무소속,득표수_계,무효투표수,...,득표_1위_후보번호,득표_1위_득표율,득표_2위_후보번호,득표_2위_득표율,득표_1위_정당,득표_2위_정당,보수정당_후보자수,진보정당_후보자수,그외정당_후보자수,무소속_후보자수
0,경상북도,합계,2211734,1314925,986989,189603,93067,0,1269659,45266,...,1,0.7774,2,0.1493,보수정당,진보정당,1,1,2,0
1,경상북도,포항시북구,216200,112449,82655,20064,7034,0,109753,2696,...,1,0.7531,2,0.1828,보수정당,진보정당,1,1,2,0
2,경상북도,포항시남구,201120,103003,76273,18395,5878,0,100546,2457,...,1,0.7586,2,0.183,보수정당,진보정당,1,1,2,0
3,경상북도,울릉군,9344,7490,5925,847,400,0,7172,318,...,1,0.8261,2,0.1181,보수정당,진보정당,1,1,2,0
4,경상북도,경주시,216922,126290,94824,16705,10306,0,121835,4455,...,1,0.7783,2,0.1371,보수정당,진보정당,1,1,2,0
5,경상북도,김천시,111552,70244,53436,9389,4631,0,67456,2788,...,1,0.7922,2,0.1392,보수정당,진보정당,1,1,2,0
6,경상북도,안동시,137715,90090,70575,11133,5365,0,87073,3017,...,1,0.8105,2,0.1279,보수정당,진보정당,1,1,2,0
7,경상북도,구미시,321096,160254,112367,32606,11220,0,156193,4061,...,1,0.7194,2,0.2088,보수정당,진보정당,1,1,2,0
8,경상북도,영주시,92496,62153,46595,8960,4456,0,60011,2142,...,1,0.7764,2,0.1493,보수정당,진보정당,1,1,2,0
9,경상북도,영천시,85998,54651,40283,7044,4788,0,52115,2536,...,1,0.773,2,0.1352,보수정당,진보정당,1,1,2,0


### preprocessing

In [103]:
df_gyeongbuk = (
    df_gyeongbuk
    .assign(시도=lambda df: df['시도'].replace('경상북도', '경북'))
)

In [104]:
df_gyeongbuk.columns

Index(['시도', '구시군', '선거인수', '투표수', '보수정당', '진보정당', '그외정당', '무소속', '득표수_계',
       '무효투표수', '기권수', '득표_1위_후보번호', '득표_1위_득표율', '득표_2위_후보번호', '득표_2위_득표율',
       '득표_1위_정당', '득표_2위_정당', '보수정당_후보자수', '진보정당_후보자수', '그외정당_후보자수',
       '무소속_후보자수'],
      dtype='object')

In [105]:
# 고정 열 순서 정의
fixed_cols = [
    '시도', '구시군', '선거년도', '선거종류',
    '득표_1위_정당', '득표_2위_정당',
    '득표_1위_후보번호', '득표_1위_득표율',
    '득표_2위_후보번호', '득표_2위_득표율',
    '보수정당_후보자수', '진보정당_후보자수',
    '그외정당_후보자수', '무소속_후보자수'
]

# 나머지 열: fixed_cols에 포함되지 않은 열
other_cols = [col for col in df_gyeongbuk.columns if col not in fixed_cols]

# 메소드 체이닝
df_gyeongbuk = (
    df_gyeongbuk
    .assign(선거종류='광역단체장', 선거년도='2014')
    .loc[:, fixed_cols + other_cols]
    .rename(columns={'시도': '지역'})
)

In [106]:
df_gyeongbuk

Unnamed: 0,지역,구시군,선거년도,선거종류,득표_1위_정당,득표_2위_정당,득표_1위_후보번호,득표_1위_득표율,득표_2위_후보번호,득표_2위_득표율,...,무소속_후보자수,선거인수,투표수,보수정당,진보정당,그외정당,무소속,득표수_계,무효투표수,기권수
0,경북,합계,2014,광역단체장,보수정당,진보정당,1,0.7774,2,0.1493,...,0,2211734,1314925,986989,189603,93067,0,1269659,45266,896809
1,경북,포항시북구,2014,광역단체장,보수정당,진보정당,1,0.7531,2,0.1828,...,0,216200,112449,82655,20064,7034,0,109753,2696,103751
2,경북,포항시남구,2014,광역단체장,보수정당,진보정당,1,0.7586,2,0.183,...,0,201120,103003,76273,18395,5878,0,100546,2457,98117
3,경북,울릉군,2014,광역단체장,보수정당,진보정당,1,0.8261,2,0.1181,...,0,9344,7490,5925,847,400,0,7172,318,1854
4,경북,경주시,2014,광역단체장,보수정당,진보정당,1,0.7783,2,0.1371,...,0,216922,126290,94824,16705,10306,0,121835,4455,90632
5,경북,김천시,2014,광역단체장,보수정당,진보정당,1,0.7922,2,0.1392,...,0,111552,70244,53436,9389,4631,0,67456,2788,41308
6,경북,안동시,2014,광역단체장,보수정당,진보정당,1,0.8105,2,0.1279,...,0,137715,90090,70575,11133,5365,0,87073,3017,47625
7,경북,구미시,2014,광역단체장,보수정당,진보정당,1,0.7194,2,0.2088,...,0,321096,160254,112367,32606,11220,0,156193,4061,160842
8,경북,영주시,2014,광역단체장,보수정당,진보정당,1,0.7764,2,0.1493,...,0,92496,62153,46595,8960,4456,0,60011,2142,30343
9,경북,영천시,2014,광역단체장,보수정당,진보정당,1,0.773,2,0.1352,...,0,85998,54651,40283,7044,4788,0,52115,2536,31347


### v4.1 ~ v4.3

In [107]:
# 1. 전체 데이터 저장
df_gyeongbuk.to_csv("temp4_1_governor_gyeongbuk_6.csv", index=False, encoding="utf-8-sig")

# 2. '구시군' != '합계' 행만 추출하여 저장
df_gyeongbuk2 = df_gyeongbuk.query("구시군 != '합계'")
df_gyeongbuk2.to_csv("temp4_2_governor_gyeongbuk_6.csv", index=False, encoding="utf-8-sig")

# 3. '구시군' == '합계' 행만 추출 후 '구시군' 열 제거하여 저장
df_gyeongbuk3 = df_gyeongbuk.query("구시군 == '합계'").drop(columns="구시군")
df_gyeongbuk3.to_csv("temp4_3_governor_gyeongbuk_6.csv", index=False, encoding="utf-8-sig")

## Gyeongnam

In [108]:
df_gyeongnam = election_results['df_gyeongnam']

In [109]:
df_gyeongnam

Unnamed: 0,시도,구시군,선거인수,투표수,보수정당,진보정당,그외정당,무소속,득표수_계,무효투표수,...,득표_1위_후보번호,득표_1위_득표율,득표_2위_후보번호,득표_2위_득표율,득표_1위_정당,득표_2위_정당,보수정당_후보자수,진보정당_후보자수,그외정당_후보자수,무소속_후보자수
0,경상남도,합계,2658347,1589673,913162,559367,79015,0,1551544,38129,...,1,0.5886,2,0.3605,보수정당,진보정당,1,1,1,0
1,경상남도,창원시의창구,204523,114829,59480,47216,6020,0,112716,2113,...,1,0.5277,2,0.4189,보수정당,진보정당,1,1,1,0
2,경상남도,창원시성산구,185723,111830,50723,52430,7033,0,110186,1644,...,2,0.4758,1,0.4603,진보정당,보수정당,1,1,1,0
3,경상남도,창원시마산합포구,151800,87780,58522,24933,2756,0,86211,1569,...,1,0.6788,2,0.2892,보수정당,진보정당,1,1,1,0
4,경상남도,창원시마산회원구,175918,102205,63561,33392,3673,0,100626,1579,...,1,0.6317,2,0.3318,보수정당,진보정당,1,1,1,0
5,경상남도,창원시진해구,139286,77939,45520,27864,3079,0,76463,1476,...,1,0.5953,2,0.3644,보수정당,진보정당,1,1,1,0
6,경상남도,진주시,268027,165372,99871,50316,11916,0,162103,3269,...,1,0.6161,2,0.3104,보수정당,진보정당,1,1,1,0
7,경상남도,통영시,111561,67447,43751,18819,3146,0,65716,1731,...,1,0.6658,2,0.2864,보수정당,진보정당,1,1,1,0
8,경상남도,고성군,47887,32026,19165,10011,1705,0,30881,1145,...,1,0.6206,2,0.3242,보수정당,진보정당,1,1,1,0
9,경상남도,사천시,94250,63316,40192,17624,3469,0,61285,2031,...,1,0.6558,2,0.2876,보수정당,진보정당,1,1,1,0


### preprocessing

In [110]:
df_gyeongnam = (
    df_gyeongnam
    .assign(시도=lambda df: df['시도'].replace('경상남도', '경남'))
)

In [111]:
df_gyeongnam.columns

Index(['시도', '구시군', '선거인수', '투표수', '보수정당', '진보정당', '그외정당', '무소속', '득표수_계',
       '무효투표수', '기권수', '득표_1위_후보번호', '득표_1위_득표율', '득표_2위_후보번호', '득표_2위_득표율',
       '득표_1위_정당', '득표_2위_정당', '보수정당_후보자수', '진보정당_후보자수', '그외정당_후보자수',
       '무소속_후보자수'],
      dtype='object')

In [112]:
# 고정 열 순서 정의
fixed_cols = [
    '시도', '구시군', '선거년도', '선거종류',
    '득표_1위_정당', '득표_2위_정당',
    '득표_1위_후보번호', '득표_1위_득표율',
    '득표_2위_후보번호', '득표_2위_득표율',
    '보수정당_후보자수', '진보정당_후보자수',
    '그외정당_후보자수', '무소속_후보자수'
]

# 나머지 열: fixed_cols에 포함되지 않은 열
other_cols = [col for col in df_gyeongnam.columns if col not in fixed_cols]

# 메소드 체이닝
df_gyeongnam = (
    df_gyeongnam
    .assign(선거종류='광역단체장', 선거년도='2014')
    .loc[:, fixed_cols + other_cols]
    .rename(columns={'시도': '지역'})
)

In [113]:
df_gyeongnam

Unnamed: 0,지역,구시군,선거년도,선거종류,득표_1위_정당,득표_2위_정당,득표_1위_후보번호,득표_1위_득표율,득표_2위_후보번호,득표_2위_득표율,...,무소속_후보자수,선거인수,투표수,보수정당,진보정당,그외정당,무소속,득표수_계,무효투표수,기권수
0,경남,합계,2014,광역단체장,보수정당,진보정당,1,0.5886,2,0.3605,...,0,2658347,1589673,913162,559367,79015,0,1551544,38129,1068674
1,경남,창원시의창구,2014,광역단체장,보수정당,진보정당,1,0.5277,2,0.4189,...,0,204523,114829,59480,47216,6020,0,112716,2113,89694
2,경남,창원시성산구,2014,광역단체장,진보정당,보수정당,2,0.4758,1,0.4603,...,0,185723,111830,50723,52430,7033,0,110186,1644,73893
3,경남,창원시마산합포구,2014,광역단체장,보수정당,진보정당,1,0.6788,2,0.2892,...,0,151800,87780,58522,24933,2756,0,86211,1569,64020
4,경남,창원시마산회원구,2014,광역단체장,보수정당,진보정당,1,0.6317,2,0.3318,...,0,175918,102205,63561,33392,3673,0,100626,1579,73713
5,경남,창원시진해구,2014,광역단체장,보수정당,진보정당,1,0.5953,2,0.3644,...,0,139286,77939,45520,27864,3079,0,76463,1476,61347
6,경남,진주시,2014,광역단체장,보수정당,진보정당,1,0.6161,2,0.3104,...,0,268027,165372,99871,50316,11916,0,162103,3269,102655
7,경남,통영시,2014,광역단체장,보수정당,진보정당,1,0.6658,2,0.2864,...,0,111561,67447,43751,18819,3146,0,65716,1731,44114
8,경남,고성군,2014,광역단체장,보수정당,진보정당,1,0.6206,2,0.3242,...,0,47887,32026,19165,10011,1705,0,30881,1145,15861
9,경남,사천시,2014,광역단체장,보수정당,진보정당,1,0.6558,2,0.2876,...,0,94250,63316,40192,17624,3469,0,61285,2031,30934


### v4.1 ~ v4.3

In [114]:
# 1. 전체 데이터 저장
df_gyeongnam.to_csv("temp4_1_governor_gyeongnam_6.csv", index=False, encoding="utf-8-sig")

# 2. '구시군' != '합계' 행만 추출하여 저장
df_gyeongnam2 = df_gyeongnam.query("구시군 != '합계'")
df_gyeongnam2.to_csv("temp4_2_governor_gyeongnam_6.csv", index=False, encoding="utf-8-sig")

# 3. '구시군' == '합계' 행만 추출 후 '구시군' 열 제거하여 저장
df_gyeongnam3 = df_gyeongnam.query("구시군 == '합계'").drop(columns="구시군")
df_gyeongnam3.to_csv("temp4_3_governor_gyeongnam_6.csv", index=False, encoding="utf-8-sig")

## Jeju

In [115]:
df_jeju = election_results['df_jeju']

In [116]:
df_jeju

Unnamed: 0,시도,구시군,선거인수,투표수,보수정당,진보정당,그외정당,무소속,득표수_계,무효투표수,...,득표_1위_후보번호,득표_1위_득표율,득표_2위_후보번호,득표_2위_득표율,득표_1위_정당,득표_2위_정당,보수정당_후보자수,진보정당_후보자수,그외정당_후보자수,무소속_후보자수
0,제주특별자치도,합계,467182,293323,172793,99493,15846,0,288132,5191,...,1,0.5997,2,0.3453,보수정당,진보정당,1,1,2,0
1,제주특별자치도,제주시,340604,208224,119758,73632,11314,0,204704,3520,...,1,0.585,2,0.3597,보수정당,진보정당,1,1,2,0
2,제주특별자치도,서귀포시,126578,85099,53035,25861,4532,0,83428,1671,...,1,0.6357,2,0.31,보수정당,진보정당,1,1,2,0


### preprocessing

In [117]:
df_jeju = (
    df_jeju
    .assign(시도=lambda df: df['시도'].replace('제주특별자치도', '제주'))
)

In [118]:
df_jeju.columns

Index(['시도', '구시군', '선거인수', '투표수', '보수정당', '진보정당', '그외정당', '무소속', '득표수_계',
       '무효투표수', '기권수', '득표_1위_후보번호', '득표_1위_득표율', '득표_2위_후보번호', '득표_2위_득표율',
       '득표_1위_정당', '득표_2위_정당', '보수정당_후보자수', '진보정당_후보자수', '그외정당_후보자수',
       '무소속_후보자수'],
      dtype='object')

In [119]:
# 고정 열 순서 정의
fixed_cols = [
    '시도', '구시군', '선거년도', '선거종류',
    '득표_1위_정당', '득표_2위_정당',
    '득표_1위_후보번호', '득표_1위_득표율',
    '득표_2위_후보번호', '득표_2위_득표율',
    '보수정당_후보자수', '진보정당_후보자수',
    '그외정당_후보자수', '무소속_후보자수'
]

# 나머지 열: fixed_cols에 포함되지 않은 열
other_cols = [col for col in df_jeju.columns if col not in fixed_cols]

# 메소드 체이닝
df_jeju = (
    df_jeju
    .assign(선거종류='광역단체장', 선거년도='2014')
    .loc[:, fixed_cols + other_cols]
    .rename(columns={'시도': '지역'})
)

In [120]:
df_jeju

Unnamed: 0,지역,구시군,선거년도,선거종류,득표_1위_정당,득표_2위_정당,득표_1위_후보번호,득표_1위_득표율,득표_2위_후보번호,득표_2위_득표율,...,무소속_후보자수,선거인수,투표수,보수정당,진보정당,그외정당,무소속,득표수_계,무효투표수,기권수
0,제주,합계,2014,광역단체장,보수정당,진보정당,1,0.5997,2,0.3453,...,0,467182,293323,172793,99493,15846,0,288132,5191,173859
1,제주,제주시,2014,광역단체장,보수정당,진보정당,1,0.585,2,0.3597,...,0,340604,208224,119758,73632,11314,0,204704,3520,132380
2,제주,서귀포시,2014,광역단체장,보수정당,진보정당,1,0.6357,2,0.31,...,0,126578,85099,53035,25861,4532,0,83428,1671,41479


### v4.1 ~ v4.3

In [121]:
# 1. 전체 데이터 저장
df_jeju.to_csv("temp4_1_governor_jeju_6.csv", index=False, encoding="utf-8-sig")

# 2. '구시군' != '합계' 행만 추출하여 저장
df_jeju2 = df_jeju.query("구시군 != '합계'")
df_jeju2.to_csv("temp4_2_governor_jeju_6.csv", index=False, encoding="utf-8-sig")

# 3. '구시군' == '합계' 행만 추출 후 '구시군' 열 제거하여 저장
df_jeju3 = df_jeju.query("구시군 == '합계'").drop(columns="구시군")
df_jeju3.to_csv("temp4_3_governor_jeju_6.csv", index=False, encoding="utf-8-sig")

## Merge

### v4.1

In [122]:
AVAILABLE_REGIONS = [
    'seoul', 'busan', 'daegu', 'incheon', 'gwangju', 'daejeon',
    'ulsan', 'sejong', 'gyeonggi', 'gangwon', 'chungbuk', 'chungnam',
    'jeonbuk', 'jeonnam', 'gyeongbuk', 'gyeongnam', 'jeju'
]

df_combined = (
    pd.concat([globals()[f'df_{region}'] for region in AVAILABLE_REGIONS], ignore_index=True)
)

In [123]:
df_combined

Unnamed: 0,지역,구시군,선거년도,선거종류,득표_1위_정당,득표_2위_정당,득표_1위_후보번호,득표_1위_득표율,득표_2위_후보번호,득표_2위_득표율,...,무소속_후보자수,선거인수,투표수,보수정당,진보정당,그외정당,무소속,득표수_계,무효투표수,기권수
0,서울,합계,2014,광역단체장,진보정당,보수정당,2,0.5613,1,0.4303,...,0,8441594,4948897,2109869,2752171,41241,0,4903281,45616,3492697
1,서울,종로구,2014,광역단체장,진보정당,보수정당,2,0.5614,1,0.4315,...,0,136925,81113,34599,45020,568,0,80187,926,55812
2,서울,중구,2014,광역단체장,진보정당,보수정당,2,0.5530,1,0.4384,...,0,113235,65393,28273,35662,553,0,64488,905,47842
3,서울,용산구,2014,광역단체장,보수정당,진보정당,1,0.4993,2,0.4936,...,0,204529,118198,58479,57807,827,0,117113,1085,86331
4,서울,성동구,2014,광역단체장,진보정당,보수정당,2,0.5620,1,0.4293,...,0,252308,146873,62376,81645,1261,0,145282,1591,105435
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
261,경남,거창군,2014,광역단체장,보수정당,진보정당,1,0.6638,2,0.2782,...,0,52077,36296,23031,9653,2012,0,34696,1600,15781
262,경남,합천군,2014,광역단체장,보수정당,진보정당,1,0.7489,2,0.1909,...,0,43908,33386,23729,6048,1910,0,31687,1699,10522
263,제주,합계,2014,광역단체장,보수정당,진보정당,1,0.5997,2,0.3453,...,0,467182,293323,172793,99493,15846,0,288132,5191,173859
264,제주,제주시,2014,광역단체장,보수정당,진보정당,1,0.5850,2,0.3597,...,0,340604,208224,119758,73632,11314,0,204704,3520,132380


In [124]:
df_combined.to_csv("temp4_1_governor_6.csv", index=False, encoding="utf-8-sig")

### v4.2

In [125]:
AVAILABLE_REGIONS = [
    'seoul', 'busan', 'daegu', 'incheon', 'gwangju', 'daejeon',
    'ulsan', 'sejong', 'gyeonggi', 'gangwon', 'chungbuk', 'chungnam',
    'jeonbuk', 'jeonnam', 'gyeongbuk', 'gyeongnam', 'jeju'
]
df_combined2 = (
    pd.concat([globals()[f'df_{region}2'] for region in AVAILABLE_REGIONS], ignore_index=True)
)

In [126]:
df_combined2

Unnamed: 0,지역,구시군,선거년도,선거종류,득표_1위_정당,득표_2위_정당,득표_1위_후보번호,득표_1위_득표율,득표_2위_후보번호,득표_2위_득표율,...,무소속_후보자수,선거인수,투표수,보수정당,진보정당,그외정당,무소속,득표수_계,무효투표수,기권수
0,서울,종로구,2014,광역단체장,진보정당,보수정당,2,0.5614,1,0.4315,...,0,136925,81113,34599,45020,568,0,80187,926,55812
1,서울,중구,2014,광역단체장,진보정당,보수정당,2,0.5530,1,0.4384,...,0,113235,65393,28273,35662,553,0,64488,905,47842
2,서울,용산구,2014,광역단체장,보수정당,진보정당,1,0.4993,2,0.4936,...,0,204529,118198,58479,57807,827,0,117113,1085,86331
3,서울,성동구,2014,광역단체장,진보정당,보수정당,2,0.5620,1,0.4293,...,0,252308,146873,62376,81645,1261,0,145282,1591,105435
4,서울,광진구,2014,광역단체장,진보정당,보수정당,2,0.5820,1,0.4099,...,0,309269,177484,72160,102449,1424,0,176033,1451,131785
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
244,경남,산청군,2014,광역단체장,보수정당,진보정당,1,0.6727,2,0.2612,...,0,31442,21553,13918,5404,1368,0,20690,863,9889
245,경남,거창군,2014,광역단체장,보수정당,진보정당,1,0.6638,2,0.2782,...,0,52077,36296,23031,9653,2012,0,34696,1600,15781
246,경남,합천군,2014,광역단체장,보수정당,진보정당,1,0.7489,2,0.1909,...,0,43908,33386,23729,6048,1910,0,31687,1699,10522
247,제주,제주시,2014,광역단체장,보수정당,진보정당,1,0.5850,2,0.3597,...,0,340604,208224,119758,73632,11314,0,204704,3520,132380


In [127]:
df_combined2.to_csv("temp4_2_governor_6.csv", index=False, encoding="utf-8-sig")

### v4.3

In [128]:
AVAILABLE_REGIONS = [
    'seoul', 'busan', 'daegu', 'incheon', 'gwangju', 'daejeon',
    'ulsan', 'sejong', 'gyeonggi', 'gangwon', 'chungbuk', 'chungnam',
    'jeonbuk', 'jeonnam', 'gyeongbuk', 'gyeongnam', 'jeju'
]

df_combined3 = (
    pd.concat([globals()[f'df_{region}3'] for region in AVAILABLE_REGIONS], ignore_index=True)
)

In [129]:
df_combined3

Unnamed: 0,지역,선거년도,선거종류,득표_1위_정당,득표_2위_정당,득표_1위_후보번호,득표_1위_득표율,득표_2위_후보번호,득표_2위_득표율,보수정당_후보자수,...,무소속_후보자수,선거인수,투표수,보수정당,진보정당,그외정당,무소속,득표수_계,무효투표수,기권수
0,서울,2014,광역단체장,진보정당,보수정당,2,0.5613,1,0.4303,1,...,0,8441594,4948897,2109869,2752171,41241,0,4903281,45616,3492697
1,부산,2014,광역단체장,보수정당,무소속,1,0.5066,4,0.4934,1,...,1,2932179,1629167,797926,0,0,777225,1575151,54016,1303012
2,대구,2014,광역단체장,보수정당,진보정당,1,0.5596,2,0.4033,1,...,1,2012579,1052638,581175,418891,23779,14774,1038619,14019,959941
3,인천,2014,광역단체장,보수정당,진보정당,1,0.4995,2,0.4821,1,...,0,2319198,1244502,615077,593555,22651,0,1231283,13219,1074696
4,광주,2014,광역단체장,진보정당,무소속,2,0.5785,5,0.3177,1,...,2,1138418,650008,21614,367203,27985,217915,634717,15291,488410
5,대전,2014,광역단체장,진보정당,보수정당,2,0.5008,1,0.4676,1,...,0,1207972,652671,301389,322762,20355,0,644506,8165,555301
6,울산,2014,광역단체장,보수정당,그외정당,1,0.6543,4,0.2643,1,...,0,912325,511881,306311,0,161843,0,468154,43727,400444
7,세종,2014,광역단체장,진보정당,보수정당,2,0.5778,1,0.4222,1,...,0,101559,63629,26451,36203,0,0,62654,975,37930
8,경기,2014,광역단체장,보수정당,진보정당,1,0.5043,2,0.4957,1,...,0,9679317,5156691,2524981,2481824,0,0,5006805,149886,4522626
9,강원,2014,광역단체장,진보정당,보수정당,2,0.4976,1,0.4818,1,...,0,1255469,781359,369201,381338,15774,0,766313,15046,474110


In [130]:
df_combined3.to_csv("temp4_3_governor_6.csv", index=False, encoding="utf-8-sig")

# Batch CSV Files to ZIP

In [131]:
import zipfile
import glob

# Find all CSV files in current directory
csv_files = glob.glob('*.csv')

# Create ZIP file
with zipfile.ZipFile('all_csv_files.zip', 'w') as zipf:
   for file in csv_files:
       zipf.write(file)
       print(f"Added: {file}")  # Show progress

print(f"Total {len(csv_files)} files compressed.")

Added: temp4_3_governor_jeonnam_6.csv
Added: temp4_1_governor_gyeonggi_6.csv
Added: temp4_1_governor_daejeon_6.csv
Added: temp4_3_governor_jeonbuk_6.csv
Added: temp4_1_governor_jeonnam_6.csv
Added: temp4_3_governor_busan_6.csv
Added: temp4_1_governor_gyeongnam_6.csv
Added: temp4_3_governor_gangwon_6.csv
Added: temp4_1_governor_seoul_6.csv
Added: temp4_2_governor_incheon_6.csv
Added: temp4_2_governor_jeju_6.csv
Added: temp4_2_governor_daejeon_6.csv
Added: temp4_3_governor_ulsan_6.csv
Added: temp4_2_governor_seoul_6.csv
Added: temp4_2_governor_sejong_6.csv
Added: temp4_1_governor_sejong_6.csv
Added: temp4_3_governor_gyeongnam_6.csv
Added: temp4_2_governor_chungnam_6.csv
Added: temp4_2_governor_6.csv
Added: temp4_3_governor_chungnam_6.csv
Added: temp4_2_governor_chungbuk_6.csv
Added: temp4_3_governor_daejeon_6.csv
Added: temp4_3_governor_chungbuk_6.csv
Added: temp4_2_governor_ulsan_6.csv
Added: temp4_1_governor_gyeongbuk_6.csv
Added: temp4_1_governor_daegu_6.csv
Added: temp4_2_governor_je