# Functions

In [1]:
import pandas as pd
import numpy as np
import re
from typing import Dict, Tuple

def process_governor_election_data(region_name: str) -> pd.DataFrame:
    """
    특정 지역의 지사 선거 데이터를 처리하여 병합된 결과를 반환

    Args:
        region_name (str): 지역명 (예: 'busan', 'seoul', 'gyeonggi' 등)

    Returns:
        pd.DataFrame: 병합된 지사 선거 데이터
    """

    # URL 생성
    df1_url = f"https://raw.githubusercontent.com/sw1kwon/korean-elections/refs/heads/main/temp/v1_g/8th_2022/temp1_governor_{region_name}_8.csv"
    df2_url = f"https://raw.githubusercontent.com/sw1kwon/korean-elections/refs/heads/main/temp/v2_2_g/8th_2022/temp2_2_governor_{region_name}_8.csv"

    print(f"=== {region_name} 지사 선거 데이터 처리 시작 ===")
    print(f"상세 데이터 URL: {df1_url}")
    print(f"요약 데이터 URL: {df2_url}")

    try:
        # 첫 번째 CSV 파일 처리 (상세 득표 데이터)
        df1 = pd.read_csv(df1_url)
        print(f"상세 데이터 로드 완료: {df1.shape}")

        # 득표수_{숫자}_정당_이름 패턴의 컬럼들 찾기
        vote_columns = [col for col in df1.columns if col.startswith('득표수_') and col != '득표수_계']
        print(f"득표수 관련 컬럼 수: {len(vote_columns)}")

        # 각 행별로 1위와 2위 후보의 번호 찾기
        def find_top_two_candidates(row):
            vote_data = []

            # 모든 후보의 득표수와 번호를 수집
            for col in vote_columns:
                votes = row[col] if pd.notna(row[col]) else 0
                # 컬럼명에서 숫자 추출
                match = re.search(r'득표수_(\d+)_', col)
                if match:
                    candidate_number = int(match.group(1))
                    vote_data.append((votes, candidate_number, col))

            # 득표수 기준으로 내림차순 정렬
            vote_data.sort(key=lambda x: x[0], reverse=True)

            # 1위와 2위 후보 반환
            first_place = vote_data[0] if len(vote_data) > 0 else (0, None, None)
            second_place = vote_data[1] if len(vote_data) > 1 else (0, None, None)

            return first_place, second_place

        # 1위 후보 번호 찾기
        def find_winner_number(row):
            first_place, _ = find_top_two_candidates(row)
            return first_place[1]

        # 2위 후보 번호 찾기
        def find_second_number(row):
            _, second_place = find_top_two_candidates(row)
            return second_place[1]

        # 1위 득표율 계산
        def calculate_vote_rate_1st(row):
            first_place, _ = find_top_two_candidates(row)
            max_votes = first_place[0]
            total_votes = row['득표수_계'] if pd.notna(row['득표수_계']) else 0

            if total_votes > 0:
                return round(max_votes / total_votes, 4)
            else:
                return 0

        # 2위 득표율 계산
        def calculate_vote_rate_2nd(row):
            _, second_place = find_top_two_candidates(row)
            second_votes = second_place[0]
            total_votes = row['득표수_계'] if pd.notna(row['득표수_계']) else 0

            if total_votes > 0:
                return round(second_votes / total_votes, 4)
            else:
                return 0

        # 새로운 컬럼 추가 (1위, 2위)
        df1['득표_1위_후보번호'] = df1.apply(find_winner_number, axis=1)
        df1['득표_1위_득표율'] = df1.apply(calculate_vote_rate_1st, axis=1)
        df1['득표_2위_후보번호'] = df1.apply(find_second_number, axis=1)
        df1['득표_2위_득표율'] = df1.apply(calculate_vote_rate_2nd, axis=1)

        # 정당 카테고리 매핑 딕셔너리 (지역별로 다를 수 있음)
        category_mapping = get_governor_category_mapping(region_name, vote_columns)
        print(f"생성된 카테고리 매핑: {category_mapping}")

        # 1위, 2위 후보번호를 카테고리로 매핑
        # fillna()로 매핑되지 않은 후보는 '기타'로 처리
        df1['득표_1위_정당'] = df1['득표_1위_후보번호'].map(category_mapping).fillna('기타')
        df1['득표_2위_정당'] = df1['득표_2위_후보번호'].map(category_mapping).fillna('기타')

        # 매핑 결과 확인
        print("매핑 후 1위 정당 분포 (처리 중):")
        print(df1['득표_1위_정당'].value_counts())

        # 매핑되지 않은 후보 확인
        unmapped_1st = df1[df1['득표_1위_정당'] == '기타']['득표_1위_후보번호'].unique()
        unmapped_2nd = df1[df1['득표_2위_정당'] == '기타']['득표_2위_후보번호'].unique()
        if len(unmapped_1st) > 0:
            print(f"경고: 1위에서 매핑되지 않은 후보번호: {unmapped_1st}")
        if len(unmapped_2nd) > 0:
            print(f"경고: 2위에서 매핑되지 않은 후보번호: {unmapped_2nd}")

        # 카테고리별 후보자 수 계산 (모든 카테고리 포함)
        candidate_counts = {}
        all_categories = ['보수정당', '진보정당', '그외정당', '무소속']

        # 먼저 모든 카테고리를 0으로 초기화
        for category in all_categories:
            candidate_counts[category] = 0

        # 실제 매핑에서 카운트
        for candidate_num, category in category_mapping.items():
            if category in candidate_counts:
                candidate_counts[category] += 1
            else:
                candidate_counts[category] = 1

        print(f"카테고리별 후보자 수: {candidate_counts}")

        # 각 카테고리별로 개별 컬럼 생성 (없는 카테고리도 0으로 포함)
        for category in all_categories:
            candidate_count = candidate_counts.get(category, 0)  # 없으면 0
            df1[f'{category}_후보자수'] = candidate_count
            print(f"  {category}_후보자수: {candidate_count}")

        # 필요한 컬럼들만 선택 (병합용)
        merge_columns = ['시도', '구시군', '득표_1위_후보번호', '득표_1위_득표율', '득표_2위_후보번호', '득표_2위_득표율',
                         '득표_1위_정당', '득표_2위_정당'] + [f'{cat}_후보자수' for cat in all_categories]

        df1_for_merge = df1[merge_columns].copy()

        # 두 번째 CSV 파일 읽기 (요약 데이터)
        df2 = pd.read_csv(df2_url)
        print(f"요약 데이터 로드 완료: {df2.shape}")

        # 시도, 구시군을 키로 하여 병합
        merged_df = pd.merge(df2, df1_for_merge, on=['시도', '구시군'], how='left')

        # 병합 결과 검증
        missing_data = merged_df[merged_df['득표_1위_후보번호'].isna()]
        if len(missing_data) > 0:
            print(f"경고: 병합되지 않은 데이터가 {len(missing_data)}개 있습니다")
        else:
            print("모든 데이터가 성공적으로 병합되었습니다!")

        print(f"최종 데이터 형태: {merged_df.shape}")
        print(f"1위 정당 분포:")
        print(merged_df['득표_1위_정당'].value_counts())
        print(f"=== {region_name} 지사 선거 데이터 처리 완료 ===\n")

        return merged_df

    except Exception as e:
        print(f"오류 발생: {e}")
        return None

def get_governor_category_mapping(region_name: str, vote_columns: list) -> Dict[int, str]:
    """
    지역별 지사 선거 정당 카테고리 매핑을 반환
    각 지역마다 후보자와 정당이 다르므로 수동으로 설정 필요

    Args:
        region_name: 지역명
        vote_columns: 득표수 컬럼 리스트 (후보 확인용)

    Returns:
        해당 지역의 후보번호별 카테고리 매핑
    """

    print(f"\n=== {region_name} 지사 선거 후보 정보 ===")
    print("실제 후보 컬럼들:")
    for col in vote_columns:
        print(f"  {col}")

    # 지역별 매핑 설정 (2022년 제8회 지방선거 기준)
    if region_name == 'seoul':  # 서울
        mapping = {
            1: '진보정당',
            2: '보수정당',
            3: '그외정당',
            4: '그외정당',
            5: '무소속'
        }

    elif region_name == 'busan':  # 부산
        mapping = {
            1: '진보정당',
            2: '보수정당',
            3: '그외정당'
        }

    elif region_name == 'daegu':  # 대구
        mapping = {
            1: '진보정당',
            2: '보수정당',
            3: '그외정당',
            4: '그외정당'
        }

    elif region_name == 'incheon':  # 인천
        mapping = {
            1: '진보정당',
            2: '보수정당',
            3: '그외정당',
            4: '그외정당'
        }

    elif region_name == 'gwangju':  # 광주
        mapping = {
            1: '진보정당',
            2: '보수정당',
            3: '그외정당',
            4: '그외정당',
            5: '그외정당'
        }

    elif region_name == 'daejeon':  # 대전
        mapping = {
            1: '진보정당',
            2: '보수정당'
        }

    elif region_name == 'ulsan':  # 울산
        mapping = {
            1: '진보정당',
            2: '보수정당'
        }

    elif region_name == 'sejong':  # 세종
        mapping = {
            1: '진보정당',
            2: '보수정당'
        }

    elif region_name == 'gyeonggi':  # 경기
        mapping = {
            1: '진보정당',
            2: '보수정당',
            3: '그외정당',
            4: '그외정당',
            5: '그외정당',
            6: '무소속'
        }

    elif region_name == 'gangwon':  # 강원
        mapping = {
            1: '진보정당',
            2: '보수정당'
        }

    elif region_name == 'chungbuk':  # 충북
        mapping = {
            1: '진보정당',
            2: '보수정당'
        }

    elif region_name == 'chungnam':  # 충남
        mapping = {
            1: '진보정당',
            2: '보수정당'
        }

    elif region_name == 'jeonbuk':  # 전북
        mapping = {
            1: '진보정당',
            2: '보수정당'
        }

    elif region_name == 'jeonnam':  # 전남
        mapping = {
            1: '진보정당',
            2: '보수정당',
            4: '그외정당'
        }

    elif region_name == 'gyeongbuk':  # 경북
        mapping = {
            1: '진보정당',
            2: '보수정당'
        }

    elif region_name == 'gyeongnam':  # 경남
        mapping = {
            1: '진보정당',
            2: '보수정당',
            3: '그외정당',
            4: '그외정당'
        }

    elif region_name == 'jeju':  # 제주
        mapping = {
            1: '진보정당',
            2: '보수정당',
            4: '그외정당',
            5: '무소속'
        }

    else:
        print(f"경고: {region_name} 지역에 대한 매핑이 정의되지 않았습니다.")
        print("기본 매핑을 사용합니다. 수동으로 매핑을 추가해주세요.")
        # 기본 매핑 (수정 필요)
        mapping = {
            1: '진보정당',
            2: '보수정당',
            3: '그외정당',
            4: '그외정당',
            5: '무소속'
        }

    # 실제 존재하는 후보 번호만 추출
    existing_candidates = set()
    for col in vote_columns:
        match = re.search(r'득표수_(\d+)_', col)
        if match:
            existing_candidates.add(int(match.group(1)))

    print(f"실제 존재하는 후보번호: {sorted(existing_candidates)}")

    # 존재하는 후보에 대해서만 매핑 적용
    filtered_mapping = {k: v for k, v in mapping.items() if k in existing_candidates}

    # 매핑되지 않은 후보 번호 확인
    unmapped_candidates = existing_candidates - set(mapping.keys())
    if unmapped_candidates:
        print(f"경고: 매핑되지 않은 후보번호들: {sorted(unmapped_candidates)}")
        print("이 후보들은 '기타' 카테고리로 분류됩니다.")

    print(f"적용된 매핑: {filtered_mapping}")
    print("=" * 50)

    return filtered_mapping

def process_multiple_governor_elections(region_names: list) -> Dict[str, pd.DataFrame]:
    """
    여러 지역의 지사 선거를 일괄 처리하여 df_지역명 형태로 변수 저장

    Args:
        region_names (list): 처리할 지역명 리스트

    Returns:
        Dict[str, pd.DataFrame]: 지역별 처리된 데이터프레임 딕셔너리
    """
    results = {}

    for region_name in region_names:
        print(f"\n{'='*50}")
        result_df = process_governor_election_data(region_name)

        if result_df is not None:
            # df_busan, df_seoul 형태로 변수명 지정
            var_name = f'df_{region_name}'
            results[var_name] = result_df

            print(f"데이터프레임 저장: {var_name} (shape: {result_df.shape})")
        else:
            print(f"{region_name} 지사 선거 데이터 처리 실패")

    return results

# 지역명들
AVAILABLE_REGIONS = [
    'seoul', 'busan', 'daegu', 'incheon', 'gwangju', 'daejeon',
    'ulsan', 'sejong', 'gyeonggi', 'gangwon', 'chungbuk', 'chungnam',
    'jeonbuk', 'jeonnam', 'gyeongbuk', 'gyeongnam', 'jeju'
]

# # 사용 예시
# if __name__ == "__main__":
#     # 방법 1: 특정 지역들만 처리
#     selected_regions = ['busan', 'seoul', 'gyeonggi', 'incheon']
#     governor_results = process_multiple_governor_elections(selected_regions)

#     # 개별 접근 예시:
#     # df_busan = governor_results['df_busan']
#     # df_seoul = governor_results['df_seoul']

#     # 방법 2: 모든 지역 일괄 처리
#     # all_governor_results = process_multiple_governor_elections(AVAILABLE_REGIONS)

#     print(f"\n사용 가능한 지역들: {AVAILABLE_REGIONS}")

# Preprocessing & Merge

In [2]:
AVAILABLE_REGIONS = [
    'seoul', 'busan', 'daegu', 'incheon', 'gwangju', 'daejeon',
    'ulsan', 'sejong', 'gyeonggi', 'gangwon', 'chungbuk', 'chungnam',
    'jeonbuk', 'jeonnam', 'gyeongbuk', 'gyeongnam', 'jeju'
]

election_results = process_multiple_governor_elections(AVAILABLE_REGIONS)


=== seoul 지사 선거 데이터 처리 시작 ===
상세 데이터 URL: https://raw.githubusercontent.com/sw1kwon/korean-elections/refs/heads/main/temp/v1_g/8th_2022/temp1_governor_seoul_8.csv
요약 데이터 URL: https://raw.githubusercontent.com/sw1kwon/korean-elections/refs/heads/main/temp/v2_2_g/8th_2022/temp2_2_governor_seoul_8.csv
상세 데이터 로드 완료: (26, 12)
득표수 관련 컬럼 수: 5

=== seoul 지사 선거 후보 정보 ===
실제 후보 컬럼들:
  득표수_1_더불어민주당_송영길
  득표수_2_국민의힘_오세훈
  득표수_3_정의당_권수정
  득표수_4_기본소득당_신지혜
  득표수_5_무소속_김광종
실제 존재하는 후보번호: [1, 2, 3, 4, 5]
적용된 매핑: {1: '진보정당', 2: '보수정당', 3: '그외정당', 4: '그외정당', 5: '무소속'}
생성된 카테고리 매핑: {1: '진보정당', 2: '보수정당', 3: '그외정당', 4: '그외정당', 5: '무소속'}
매핑 후 1위 정당 분포 (처리 중):
득표_1위_정당
보수정당    26
Name: count, dtype: int64
카테고리별 후보자 수: {'보수정당': 1, '진보정당': 1, '그외정당': 2, '무소속': 1}
  보수정당_후보자수: 1
  진보정당_후보자수: 1
  그외정당_후보자수: 2
  무소속_후보자수: 1
요약 데이터 로드 완료: (26, 11)
모든 데이터가 성공적으로 병합되었습니다!
최종 데이터 형태: (26, 21)
1위 정당 분포:
득표_1위_정당
보수정당    26
Name: count, dtype: int64
=== seoul 지사 선거 데이터 처리 완료 ===

데이터프레임 저장: df_seoul (shape: (26, 21))



# Governor Election 8th

## Seoul

In [3]:
df_seoul = election_results['df_seoul']

In [4]:
df_seoul

Unnamed: 0,시도,구시군,선거인수,투표수,보수정당,진보정당,그외정당,무소속,득표수_계,무효투표수,...,득표_1위_후보번호,득표_1위_득표율,득표_2위_후보번호,득표_2위_득표율,득표_1위_정당,득표_2위_정당,보수정당_후보자수,진보정당_후보자수,그외정당_후보자수,무소속_후보자수
0,서울특별시,합계,8378339,4455161,2608277,1733183,66459,9000,4416919,38242,...,2,0.5905,1,0.3924,보수정당,진보정당,1,1,2,1
1,서울특별시,종로구,129816,70657,40145,28327,1377,167,70016,641,...,2,0.5734,1,0.4046,보수정당,진보정당,1,1,2,1
2,서울특별시,중구,112039,60323,34866,23811,843,125,59645,678,...,2,0.5846,1,0.3992,보수정당,진보정당,1,1,2,1
3,서울특별시,용산구,199061,104787,67579,34614,1683,192,104068,719,...,2,0.6494,1,0.3326,보수정당,진보정당,1,1,2,1
4,서울특별시,성동구,251990,139761,84320,51996,1860,273,138449,1312,...,2,0.609,1,0.3756,보수정당,진보정당,1,1,2,1
5,서울특별시,광진구,305462,157005,90734,62217,2345,310,155606,1399,...,2,0.5831,1,0.3998,보수정당,진보정당,1,1,2,1
6,서울특별시,동대문구,302024,156964,89470,63157,2472,340,155439,1525,...,2,0.5756,1,0.4063,보수정당,진보정당,1,1,2,1
7,서울특별시,중랑구,348762,177701,96447,76660,2307,361,175775,1926,...,2,0.5487,1,0.4361,보수정당,진보정당,1,1,2,1
8,서울특별시,성북구,379123,202235,110962,85825,3297,413,200497,1738,...,2,0.5534,1,0.4281,보수정당,진보정당,1,1,2,1
9,서울특별시,강북구,268130,133390,70747,58962,1911,305,131925,1465,...,2,0.5363,1,0.4469,보수정당,진보정당,1,1,2,1


### preprocessing

In [5]:
df_seoul = (
    df_seoul
    .assign(시도=lambda df: df['시도'].replace('서울특별시', '서울'))
)

In [6]:
df_seoul.columns

Index(['시도', '구시군', '선거인수', '투표수', '보수정당', '진보정당', '그외정당', '무소속', '득표수_계',
       '무효투표수', '기권수', '득표_1위_후보번호', '득표_1위_득표율', '득표_2위_후보번호', '득표_2위_득표율',
       '득표_1위_정당', '득표_2위_정당', '보수정당_후보자수', '진보정당_후보자수', '그외정당_후보자수',
       '무소속_후보자수'],
      dtype='object')

In [7]:
# 고정 열 순서 정의
fixed_cols = [
    '시도', '구시군', '선거년도', '선거종류',
    '득표_1위_정당', '득표_2위_정당',
    '득표_1위_후보번호', '득표_1위_득표율',
    '득표_2위_후보번호', '득표_2위_득표율',
    '보수정당_후보자수', '진보정당_후보자수',
    '그외정당_후보자수', '무소속_후보자수'
]

# 나머지 열: fixed_cols에 포함되지 않은 열
other_cols = [col for col in df_seoul.columns if col not in fixed_cols]

# 메소드 체이닝
df_seoul = (
    df_seoul
    .assign(선거종류='광역단체장', 선거년도='2022')
    .loc[:, fixed_cols + other_cols]
    .rename(columns={'시도': '지역'})
)

In [8]:
df_seoul

Unnamed: 0,지역,구시군,선거년도,선거종류,득표_1위_정당,득표_2위_정당,득표_1위_후보번호,득표_1위_득표율,득표_2위_후보번호,득표_2위_득표율,...,무소속_후보자수,선거인수,투표수,보수정당,진보정당,그외정당,무소속,득표수_계,무효투표수,기권수
0,서울,합계,2022,광역단체장,보수정당,진보정당,2,0.5905,1,0.3924,...,1,8378339,4455161,2608277,1733183,66459,9000,4416919,38242,3923178
1,서울,종로구,2022,광역단체장,보수정당,진보정당,2,0.5734,1,0.4046,...,1,129816,70657,40145,28327,1377,167,70016,641,59159
2,서울,중구,2022,광역단체장,보수정당,진보정당,2,0.5846,1,0.3992,...,1,112039,60323,34866,23811,843,125,59645,678,51716
3,서울,용산구,2022,광역단체장,보수정당,진보정당,2,0.6494,1,0.3326,...,1,199061,104787,67579,34614,1683,192,104068,719,94274
4,서울,성동구,2022,광역단체장,보수정당,진보정당,2,0.609,1,0.3756,...,1,251990,139761,84320,51996,1860,273,138449,1312,112229
5,서울,광진구,2022,광역단체장,보수정당,진보정당,2,0.5831,1,0.3998,...,1,305462,157005,90734,62217,2345,310,155606,1399,148457
6,서울,동대문구,2022,광역단체장,보수정당,진보정당,2,0.5756,1,0.4063,...,1,302024,156964,89470,63157,2472,340,155439,1525,145060
7,서울,중랑구,2022,광역단체장,보수정당,진보정당,2,0.5487,1,0.4361,...,1,348762,177701,96447,76660,2307,361,175775,1926,171061
8,서울,성북구,2022,광역단체장,보수정당,진보정당,2,0.5534,1,0.4281,...,1,379123,202235,110962,85825,3297,413,200497,1738,176888
9,서울,강북구,2022,광역단체장,보수정당,진보정당,2,0.5363,1,0.4469,...,1,268130,133390,70747,58962,1911,305,131925,1465,134740


### v4.1 ~ v4.3

In [9]:
# 1. 전체 데이터 저장
df_seoul.to_csv("temp4_1_governor_seoul_8.csv", index=False, encoding="utf-8-sig")

# 2. '구시군' != '합계' 행만 추출하여 저장
df_seoul2 = df_seoul.query("구시군 != '합계'")
df_seoul2.to_csv("temp4_2_governor_seoul_8.csv", index=False, encoding="utf-8-sig")

# 3. '구시군' == '합계' 행만 추출 후 '구시군' 열 제거하여 저장
df_seoul3 = df_seoul.query("구시군 == '합계'").drop(columns="구시군")
df_seoul3.to_csv("temp4_3_governor_seoul_8.csv", index=False, encoding="utf-8-sig")

## Busan

In [10]:
df_busan = election_results['df_busan']

In [11]:
df_busan

Unnamed: 0,시도,구시군,선거인수,투표수,보수정당,진보정당,그외정당,무소속,득표수_계,무효투표수,...,득표_1위_후보번호,득표_1위_득표율,득표_2위_후보번호,득표_2위_득표율,득표_1위_정당,득표_2위_정당,보수정당_후보자수,진보정당_후보자수,그외정당_후보자수,무소속_후보자수
0,부산광역시,합계,2916832,1432194,938601,455901,19733,0,1414235,17959,...,2,0.6637,1,0.3224,보수정당,진보정당,1,1,1,0
1,부산광역시,중구,38236,19127,12826,5730,268,0,18824,303,...,2,0.6814,1,0.3044,보수정당,진보정당,1,1,1,0
2,부산광역시,서구,93426,47053,31635,13898,701,0,46234,819,...,2,0.6842,1,0.3006,보수정당,진보정당,1,1,1,0
3,부산광역시,동구,80869,40832,27173,12337,562,0,40072,760,...,2,0.6781,1,0.3079,보수정당,진보정당,1,1,1,0
4,부산광역시,영도구,99395,50231,31130,17359,810,0,49299,932,...,2,0.6315,1,0.3521,보수정당,진보정당,1,1,1,0
5,부산광역시,부산진구,313025,149091,97155,48015,2184,0,147354,1737,...,2,0.6593,1,0.3258,보수정당,진보정당,1,1,1,0
6,부산광역시,동래구,234034,116790,77751,36349,1533,0,115633,1157,...,2,0.6724,1,0.3143,보수정당,진보정당,1,1,1,0
7,부산광역시,남구,227019,118507,76951,38576,1483,0,117010,1497,...,2,0.6576,1,0.3297,보수정당,진보정당,1,1,1,0
8,부산광역시,북구,245787,125984,80065,42927,1598,0,124590,1394,...,2,0.6426,1,0.3445,보수정당,진보정당,1,1,1,0
9,부산광역시,해운대구,337958,164774,110375,50629,2023,0,163027,1747,...,2,0.677,1,0.3106,보수정당,진보정당,1,1,1,0


### preprocessing

In [12]:
df_busan = (
    df_busan
    .assign(시도=lambda df: df['시도'].replace('부산광역시', '부산'))
)

In [13]:
df_busan.columns

Index(['시도', '구시군', '선거인수', '투표수', '보수정당', '진보정당', '그외정당', '무소속', '득표수_계',
       '무효투표수', '기권수', '득표_1위_후보번호', '득표_1위_득표율', '득표_2위_후보번호', '득표_2위_득표율',
       '득표_1위_정당', '득표_2위_정당', '보수정당_후보자수', '진보정당_후보자수', '그외정당_후보자수',
       '무소속_후보자수'],
      dtype='object')

In [14]:
# 고정 열 순서 정의
fixed_cols = [
    '시도', '구시군', '선거년도', '선거종류',
    '득표_1위_정당', '득표_2위_정당',
    '득표_1위_후보번호', '득표_1위_득표율',
    '득표_2위_후보번호', '득표_2위_득표율',
    '보수정당_후보자수', '진보정당_후보자수',
    '그외정당_후보자수', '무소속_후보자수'
]

# 나머지 열: fixed_cols에 포함되지 않은 열
other_cols = [col for col in df_busan.columns if col not in fixed_cols]

# 메소드 체이닝
df_busan = (
    df_busan
    .assign(선거종류='광역단체장', 선거년도='2022')
    .loc[:, fixed_cols + other_cols]
    .rename(columns={'시도': '지역'})
)

In [15]:
df_busan

Unnamed: 0,지역,구시군,선거년도,선거종류,득표_1위_정당,득표_2위_정당,득표_1위_후보번호,득표_1위_득표율,득표_2위_후보번호,득표_2위_득표율,...,무소속_후보자수,선거인수,투표수,보수정당,진보정당,그외정당,무소속,득표수_계,무효투표수,기권수
0,부산,합계,2022,광역단체장,보수정당,진보정당,2,0.6637,1,0.3224,...,0,2916832,1432194,938601,455901,19733,0,1414235,17959,1484638
1,부산,중구,2022,광역단체장,보수정당,진보정당,2,0.6814,1,0.3044,...,0,38236,19127,12826,5730,268,0,18824,303,19109
2,부산,서구,2022,광역단체장,보수정당,진보정당,2,0.6842,1,0.3006,...,0,93426,47053,31635,13898,701,0,46234,819,46373
3,부산,동구,2022,광역단체장,보수정당,진보정당,2,0.6781,1,0.3079,...,0,80869,40832,27173,12337,562,0,40072,760,40037
4,부산,영도구,2022,광역단체장,보수정당,진보정당,2,0.6315,1,0.3521,...,0,99395,50231,31130,17359,810,0,49299,932,49164
5,부산,부산진구,2022,광역단체장,보수정당,진보정당,2,0.6593,1,0.3258,...,0,313025,149091,97155,48015,2184,0,147354,1737,163934
6,부산,동래구,2022,광역단체장,보수정당,진보정당,2,0.6724,1,0.3143,...,0,234034,116790,77751,36349,1533,0,115633,1157,117244
7,부산,남구,2022,광역단체장,보수정당,진보정당,2,0.6576,1,0.3297,...,0,227019,118507,76951,38576,1483,0,117010,1497,108512
8,부산,북구,2022,광역단체장,보수정당,진보정당,2,0.6426,1,0.3445,...,0,245787,125984,80065,42927,1598,0,124590,1394,119803
9,부산,해운대구,2022,광역단체장,보수정당,진보정당,2,0.677,1,0.3106,...,0,337958,164774,110375,50629,2023,0,163027,1747,173184


### v4.1 ~ v4.3

In [16]:
# 1. 전체 데이터 저장
df_busan.to_csv("temp4_1_governor_busan_8.csv", index=False, encoding="utf-8-sig")

# 2. '구시군' != '합계' 행만 추출하여 저장
df_busan2 = df_busan.query("구시군 != '합계'")
df_busan2.to_csv("temp4_2_governor_busan_8.csv", index=False, encoding="utf-8-sig")

# 3. '구시군' == '합계' 행만 추출 후 '구시군' 열 제거하여 저장
df_busan3 = df_busan.query("구시군 == '합계'").drop(columns="구시군")
df_busan3.to_csv("temp4_3_governor_busan_8.csv", index=False, encoding="utf-8-sig")

## Daegu

In [17]:
df_daegu = election_results['df_daegu']

In [18]:
df_daegu

Unnamed: 0,시도,구시군,선거인수,투표수,보수정당,진보정당,그외정당,무소속,득표수_계,무효투표수,...,득표_1위_후보번호,득표_1위_득표율,득표_2위_후보번호,득표_2위_득표율,득표_1위_정당,득표_2위_정당,보수정당_후보자수,진보정당_후보자수,그외정당_후보자수,무소속_후보자수
0,대구광역시,합계,2044579,883141,685159,156429,28446,0,870034,13107,...,2,0.7875,1,0.1798,보수정당,진보정당,1,1,2,0
1,대구광역시,중구,68825,29536,22491,5610,1014,0,29115,421,...,2,0.7725,1,0.1927,보수정당,진보정당,1,1,2,0
2,대구광역시,동구,297073,131007,100305,24745,3899,0,128949,2058,...,2,0.7779,1,0.1919,보수정당,진보정당,1,1,2,0
3,대구광역시,서구,148238,66442,54059,8993,2107,0,65159,1283,...,2,0.8296,1,0.138,보수정당,진보정당,1,1,2,0
4,대구광역시,남구,129053,54686,43386,8593,1806,0,53785,901,...,2,0.8067,1,0.1598,보수정당,진보정당,1,1,2,0
5,대구광역시,북구,373423,160433,123952,28966,5220,0,158138,2295,...,2,0.7838,1,0.1832,보수정당,진보정당,1,1,2,0
6,대구광역시,수성구,349048,157513,120189,29601,5201,0,154991,2522,...,2,0.7755,1,0.191,보수정당,진보정당,1,1,2,0
7,대구광역시,달서구,464339,192191,149512,34133,6205,0,189850,2341,...,2,0.7875,1,0.1798,보수정당,진보정당,1,1,2,0
8,대구광역시,달성군,214580,91333,71265,15788,2994,0,90047,1286,...,2,0.7914,1,0.1753,보수정당,진보정당,1,1,2,0


### preprocessing

In [19]:
df_daegu = (
    df_daegu
    .assign(시도=lambda df: df['시도'].replace('대구광역시', '대구'))
)

In [20]:
df_daegu.columns

Index(['시도', '구시군', '선거인수', '투표수', '보수정당', '진보정당', '그외정당', '무소속', '득표수_계',
       '무효투표수', '기권수', '득표_1위_후보번호', '득표_1위_득표율', '득표_2위_후보번호', '득표_2위_득표율',
       '득표_1위_정당', '득표_2위_정당', '보수정당_후보자수', '진보정당_후보자수', '그외정당_후보자수',
       '무소속_후보자수'],
      dtype='object')

In [21]:
# 고정 열 순서 정의
fixed_cols = [
    '시도', '구시군', '선거년도', '선거종류',
    '득표_1위_정당', '득표_2위_정당',
    '득표_1위_후보번호', '득표_1위_득표율',
    '득표_2위_후보번호', '득표_2위_득표율',
    '보수정당_후보자수', '진보정당_후보자수',
    '그외정당_후보자수', '무소속_후보자수'
]

# 나머지 열: fixed_cols에 포함되지 않은 열
other_cols = [col for col in df_daegu.columns if col not in fixed_cols]

# 메소드 체이닝
df_daegu = (
    df_daegu
    .assign(선거종류='광역단체장', 선거년도='2022')
    .loc[:, fixed_cols + other_cols]
    .rename(columns={'시도': '지역'})
)

In [22]:
df_daegu

Unnamed: 0,지역,구시군,선거년도,선거종류,득표_1위_정당,득표_2위_정당,득표_1위_후보번호,득표_1위_득표율,득표_2위_후보번호,득표_2위_득표율,...,무소속_후보자수,선거인수,투표수,보수정당,진보정당,그외정당,무소속,득표수_계,무효투표수,기권수
0,대구,합계,2022,광역단체장,보수정당,진보정당,2,0.7875,1,0.1798,...,0,2044579,883141,685159,156429,28446,0,870034,13107,1161438
1,대구,중구,2022,광역단체장,보수정당,진보정당,2,0.7725,1,0.1927,...,0,68825,29536,22491,5610,1014,0,29115,421,39289
2,대구,동구,2022,광역단체장,보수정당,진보정당,2,0.7779,1,0.1919,...,0,297073,131007,100305,24745,3899,0,128949,2058,166066
3,대구,서구,2022,광역단체장,보수정당,진보정당,2,0.8296,1,0.138,...,0,148238,66442,54059,8993,2107,0,65159,1283,81796
4,대구,남구,2022,광역단체장,보수정당,진보정당,2,0.8067,1,0.1598,...,0,129053,54686,43386,8593,1806,0,53785,901,74367
5,대구,북구,2022,광역단체장,보수정당,진보정당,2,0.7838,1,0.1832,...,0,373423,160433,123952,28966,5220,0,158138,2295,212990
6,대구,수성구,2022,광역단체장,보수정당,진보정당,2,0.7755,1,0.191,...,0,349048,157513,120189,29601,5201,0,154991,2522,191535
7,대구,달서구,2022,광역단체장,보수정당,진보정당,2,0.7875,1,0.1798,...,0,464339,192191,149512,34133,6205,0,189850,2341,272148
8,대구,달성군,2022,광역단체장,보수정당,진보정당,2,0.7914,1,0.1753,...,0,214580,91333,71265,15788,2994,0,90047,1286,123247


### v4.1 ~ v4.3

In [23]:
# 1. 전체 데이터 저장
df_daegu.to_csv("temp4_1_governor_daegu_8.csv", index=False, encoding="utf-8-sig")

# 2. '구시군' != '합계' 행만 추출하여 저장
df_daegu2 = df_daegu.query("구시군 != '합계'")
df_daegu2.to_csv("temp4_2_governor_daegu_8.csv", index=False, encoding="utf-8-sig")

# 3. '구시군' == '합계' 행만 추출 후 '구시군' 열 제거하여 저장
df_daegu3 = df_daegu.query("구시군 == '합계'").drop(columns="구시군")
df_daegu3.to_csv("temp4_3_governor_daegu_8.csv", index=False, encoding="utf-8-sig")

## Incheon

In [24]:
df_incheon = election_results['df_incheon']

In [25]:
df_incheon

Unnamed: 0,시도,구시군,선거인수,투표수,보수정당,진보정당,그외정당,무소속,득표수_계,무효투표수,...,득표_1위_후보번호,득표_1위_득표율,득표_2위_후보번호,득표_2위_득표율,득표_1위_정당,득표_2위_정당,보수정당_후보자수,진보정당_후보자수,그외정당_후보자수,무소속_후보자수
0,인천광역시,합계,2534338,1240469,634250,545885,45000,0,1225135,15334,...,2,0.5177,1,0.4456,보수정당,진보정당,1,1,2,0
1,인천광역시,중구,124974,57362,30190,24425,1910,0,56525,837,...,2,0.5341,1,0.4321,보수정당,진보정당,1,1,2,0
2,인천광역시,동구,52728,28942,15595,11592,1218,0,28405,537,...,2,0.549,1,0.4081,보수정당,진보정당,1,1,2,0
3,인천광역시,미추홀구,358612,160328,86217,66673,5377,0,158267,2061,...,2,0.5448,1,0.4213,보수정당,진보정당,1,1,2,0
4,인천광역시,연수구,317883,164269,90120,63175,9647,0,162942,1327,...,2,0.5531,1,0.3877,보수정당,진보정당,1,1,2,0
5,인천광역시,남동구,441226,212064,106526,96632,6520,0,209678,2386,...,2,0.508,1,0.4609,보수정당,진보정당,1,1,2,0
6,인천광역시,부평구,426463,202623,99177,94474,6693,0,200344,2279,...,2,0.495,1,0.4716,보수정당,진보정당,1,1,2,0
7,인천광역시,계양구,258156,144721,66117,72090,4891,0,143098,1623,...,1,0.5038,2,0.462,진보정당,보수정당,1,1,2,0
8,인천광역시,서구,472254,218379,107661,101408,6833,0,215902,2477,...,2,0.4987,1,0.4697,보수정당,진보정당,1,1,2,0
9,인천광역시,강화군,63147,39088,24697,11660,1446,0,37803,1285,...,2,0.6533,1,0.3084,보수정당,진보정당,1,1,2,0


### preprocessing

In [26]:
df_incheon = (
    df_incheon
    .assign(시도=lambda df: df['시도'].replace('인천광역시', '인천'))
)

In [27]:
df_incheon.columns

Index(['시도', '구시군', '선거인수', '투표수', '보수정당', '진보정당', '그외정당', '무소속', '득표수_계',
       '무효투표수', '기권수', '득표_1위_후보번호', '득표_1위_득표율', '득표_2위_후보번호', '득표_2위_득표율',
       '득표_1위_정당', '득표_2위_정당', '보수정당_후보자수', '진보정당_후보자수', '그외정당_후보자수',
       '무소속_후보자수'],
      dtype='object')

In [28]:
# 고정 열 순서 정의
fixed_cols = [
    '시도', '구시군', '선거년도', '선거종류',
    '득표_1위_정당', '득표_2위_정당',
    '득표_1위_후보번호', '득표_1위_득표율',
    '득표_2위_후보번호', '득표_2위_득표율',
    '보수정당_후보자수', '진보정당_후보자수',
    '그외정당_후보자수', '무소속_후보자수'
]

# 나머지 열: fixed_cols에 포함되지 않은 열
other_cols = [col for col in df_incheon.columns if col not in fixed_cols]

# 메소드 체이닝
df_incheon = (
    df_incheon
    .assign(선거종류='광역단체장', 선거년도='2022')
    .loc[:, fixed_cols + other_cols]
    .rename(columns={'시도': '지역'})
)

In [29]:
df_incheon

Unnamed: 0,지역,구시군,선거년도,선거종류,득표_1위_정당,득표_2위_정당,득표_1위_후보번호,득표_1위_득표율,득표_2위_후보번호,득표_2위_득표율,...,무소속_후보자수,선거인수,투표수,보수정당,진보정당,그외정당,무소속,득표수_계,무효투표수,기권수
0,인천,합계,2022,광역단체장,보수정당,진보정당,2,0.5177,1,0.4456,...,0,2534338,1240469,634250,545885,45000,0,1225135,15334,1293869
1,인천,중구,2022,광역단체장,보수정당,진보정당,2,0.5341,1,0.4321,...,0,124974,57362,30190,24425,1910,0,56525,837,67612
2,인천,동구,2022,광역단체장,보수정당,진보정당,2,0.549,1,0.4081,...,0,52728,28942,15595,11592,1218,0,28405,537,23786
3,인천,미추홀구,2022,광역단체장,보수정당,진보정당,2,0.5448,1,0.4213,...,0,358612,160328,86217,66673,5377,0,158267,2061,198284
4,인천,연수구,2022,광역단체장,보수정당,진보정당,2,0.5531,1,0.3877,...,0,317883,164269,90120,63175,9647,0,162942,1327,153614
5,인천,남동구,2022,광역단체장,보수정당,진보정당,2,0.508,1,0.4609,...,0,441226,212064,106526,96632,6520,0,209678,2386,229162
6,인천,부평구,2022,광역단체장,보수정당,진보정당,2,0.495,1,0.4716,...,0,426463,202623,99177,94474,6693,0,200344,2279,223840
7,인천,계양구,2022,광역단체장,진보정당,보수정당,1,0.5038,2,0.462,...,0,258156,144721,66117,72090,4891,0,143098,1623,113435
8,인천,서구,2022,광역단체장,보수정당,진보정당,2,0.4987,1,0.4697,...,0,472254,218379,107661,101408,6833,0,215902,2477,253875
9,인천,강화군,2022,광역단체장,보수정당,진보정당,2,0.6533,1,0.3084,...,0,63147,39088,24697,11660,1446,0,37803,1285,24059


### v4.1 ~ v4.3

In [30]:
# 1. 전체 데이터 저장
df_incheon.to_csv("temp4_1_governor_incheon_8.csv", index=False, encoding="utf-8-sig")

# 2. '구시군' != '합계' 행만 추출하여 저장
df_incheon2 = df_incheon.query("구시군 != '합계'")
df_incheon2.to_csv("temp4_2_governor_incheon_8.csv", index=False, encoding="utf-8-sig")

# 3. '구시군' == '합계' 행만 추출 후 '구시군' 열 제거하여 저장
df_incheon3 = df_incheon.query("구시군 == '합계'").drop(columns="구시군")
df_incheon3.to_csv("temp4_3_governor_incheon_8.csv", index=False, encoding="utf-8-sig")

## Gwangju

In [31]:
df_gwangju = election_results['df_gwangju']

In [32]:
df_gwangju

Unnamed: 0,시도,구시군,선거인수,투표수,보수정당,진보정당,그외정당,무소속,득표수_계,무효투표수,...,득표_1위_후보번호,득표_1위_득표율,득표_2위_후보번호,득표_2위_득표율,득표_1위_정당,득표_2위_정당,보수정당_후보자수,진보정당_후보자수,그외정당_후보자수,무소속_후보자수
0,광주광역시,합계,1206886,454516,71062,334699,24414,0,446770,7746,...,1,0.7492,2,0.1591,진보정당,보수정당,1,1,3,0
1,광주광역시,동구,90255,38373,6994,27775,1767,0,37733,640,...,1,0.7361,2,0.1854,진보정당,보수정당,1,1,3,0
2,광주광역시,서구,246841,97941,15552,71873,5373,0,96340,1601,...,1,0.746,2,0.1614,진보정당,보수정당,1,1,3,0
3,광주광역시,남구,180158,71496,11406,53512,3267,0,70323,1173,...,1,0.7609,2,0.1622,진보정당,보수정당,1,1,3,0
4,광주광역시,북구,363222,137865,21054,102468,7250,0,135603,2262,...,1,0.7556,2,0.1553,진보정당,보수정당,1,1,3,0
5,광주광역시,광산구,326410,108841,16056,79071,6757,0,106771,2070,...,1,0.7406,2,0.1504,진보정당,보수정당,1,1,3,0


### preprocessing

In [33]:
df_gwangju = (
    df_gwangju
    .assign(시도=lambda df: df['시도'].replace('광주광역시', '광주'))
)

In [34]:
df_gwangju.columns

Index(['시도', '구시군', '선거인수', '투표수', '보수정당', '진보정당', '그외정당', '무소속', '득표수_계',
       '무효투표수', '기권수', '득표_1위_후보번호', '득표_1위_득표율', '득표_2위_후보번호', '득표_2위_득표율',
       '득표_1위_정당', '득표_2위_정당', '보수정당_후보자수', '진보정당_후보자수', '그외정당_후보자수',
       '무소속_후보자수'],
      dtype='object')

In [35]:
# 고정 열 순서 정의
fixed_cols = [
    '시도', '구시군', '선거년도', '선거종류',
    '득표_1위_정당', '득표_2위_정당',
    '득표_1위_후보번호', '득표_1위_득표율',
    '득표_2위_후보번호', '득표_2위_득표율',
    '보수정당_후보자수', '진보정당_후보자수',
    '그외정당_후보자수', '무소속_후보자수'
]

# 나머지 열: fixed_cols에 포함되지 않은 열
other_cols = [col for col in df_gwangju.columns if col not in fixed_cols]

# 메소드 체이닝
df_gwangju = (
    df_gwangju
    .assign(선거종류='광역단체장', 선거년도='2022')
    .loc[:, fixed_cols + other_cols]
    .rename(columns={'시도': '지역'})
)

In [36]:
df_gwangju

Unnamed: 0,지역,구시군,선거년도,선거종류,득표_1위_정당,득표_2위_정당,득표_1위_후보번호,득표_1위_득표율,득표_2위_후보번호,득표_2위_득표율,...,무소속_후보자수,선거인수,투표수,보수정당,진보정당,그외정당,무소속,득표수_계,무효투표수,기권수
0,광주,합계,2022,광역단체장,진보정당,보수정당,1,0.7492,2,0.1591,...,0,1206886,454516,71062,334699,24414,0,446770,7746,752370
1,광주,동구,2022,광역단체장,진보정당,보수정당,1,0.7361,2,0.1854,...,0,90255,38373,6994,27775,1767,0,37733,640,51882
2,광주,서구,2022,광역단체장,진보정당,보수정당,1,0.746,2,0.1614,...,0,246841,97941,15552,71873,5373,0,96340,1601,148900
3,광주,남구,2022,광역단체장,진보정당,보수정당,1,0.7609,2,0.1622,...,0,180158,71496,11406,53512,3267,0,70323,1173,108662
4,광주,북구,2022,광역단체장,진보정당,보수정당,1,0.7556,2,0.1553,...,0,363222,137865,21054,102468,7250,0,135603,2262,225357
5,광주,광산구,2022,광역단체장,진보정당,보수정당,1,0.7406,2,0.1504,...,0,326410,108841,16056,79071,6757,0,106771,2070,217569


### v4.1 ~ v4.3

In [37]:
# 1. 전체 데이터 저장
df_gwangju.to_csv("temp4_1_governor_gwangju_8.csv", index=False, encoding="utf-8-sig")

# 2. '구시군' != '합계' 행만 추출하여 저장
df_gwangju2 = df_gwangju.query("구시군 != '합계'")
df_gwangju2.to_csv("temp4_2_governor_gwangju_8.csv", index=False, encoding="utf-8-sig")

# 3. '구시군' == '합계' 행만 추출 후 '구시군' 열 제거하여 저장
df_gwangju3 = df_gwangju.query("구시군 == '합계'").drop(columns="구시군")
df_gwangju3.to_csv("temp4_3_governor_gwangju_8.csv", index=False, encoding="utf-8-sig")

## Daejeon

In [38]:
df_daejeon = election_results['df_daejeon']

In [39]:
df_daejeon

Unnamed: 0,시도,구시군,선거인수,투표수,보수정당,진보정당,그외정당,무소속,득표수_계,무효투표수,...,득표_1위_후보번호,득표_1위_득표율,득표_2위_후보번호,득표_2위_득표율,득표_1위_정당,득표_2위_정당,보수정당_후보자수,진보정당_후보자수,그외정당_후보자수,무소속_후보자수
0,대전광역시,합계,1233557,612639,310035,295555,0,0,605590,7049,...,2,0.512,1,0.488,보수정당,진보정당,1,1,0,0
1,대전광역시,동구,191882,94041,50120,42619,0,0,92739,1302,...,2,0.5404,1,0.4596,보수정당,진보정당,1,1,0,0
2,대전광역시,중구,199894,100343,53666,45434,0,0,99100,1243,...,2,0.5415,1,0.4585,보수정당,진보정당,1,1,0,0
3,대전광역시,서구,399035,192546,97149,93222,0,0,190371,2175,...,2,0.5103,1,0.4897,보수정당,진보정당,1,1,0,0
4,대전광역시,유성구,289980,149576,70109,78142,0,0,148251,1325,...,1,0.5271,2,0.4729,진보정당,보수정당,1,1,0,0
5,대전광역시,대덕구,152766,76133,38991,36138,0,0,75129,1004,...,2,0.519,1,0.481,보수정당,진보정당,1,1,0,0


### preprocessing

In [40]:
df_daejeon = (
    df_daejeon
    .assign(시도=lambda df: df['시도'].replace('대전광역시', '대전'))
)

In [41]:
df_daejeon.columns

Index(['시도', '구시군', '선거인수', '투표수', '보수정당', '진보정당', '그외정당', '무소속', '득표수_계',
       '무효투표수', '기권수', '득표_1위_후보번호', '득표_1위_득표율', '득표_2위_후보번호', '득표_2위_득표율',
       '득표_1위_정당', '득표_2위_정당', '보수정당_후보자수', '진보정당_후보자수', '그외정당_후보자수',
       '무소속_후보자수'],
      dtype='object')

In [42]:
# 고정 열 순서 정의
fixed_cols = [
    '시도', '구시군', '선거년도', '선거종류',
    '득표_1위_정당', '득표_2위_정당',
    '득표_1위_후보번호', '득표_1위_득표율',
    '득표_2위_후보번호', '득표_2위_득표율',
    '보수정당_후보자수', '진보정당_후보자수',
    '그외정당_후보자수', '무소속_후보자수'
]

# 나머지 열: fixed_cols에 포함되지 않은 열
other_cols = [col for col in df_daejeon.columns if col not in fixed_cols]

# 메소드 체이닝
df_daejeon = (
    df_daejeon
    .assign(선거종류='광역단체장', 선거년도='2022')
    .loc[:, fixed_cols + other_cols]
    .rename(columns={'시도': '지역'})
)

In [43]:
df_daejeon

Unnamed: 0,지역,구시군,선거년도,선거종류,득표_1위_정당,득표_2위_정당,득표_1위_후보번호,득표_1위_득표율,득표_2위_후보번호,득표_2위_득표율,...,무소속_후보자수,선거인수,투표수,보수정당,진보정당,그외정당,무소속,득표수_계,무효투표수,기권수
0,대전,합계,2022,광역단체장,보수정당,진보정당,2,0.512,1,0.488,...,0,1233557,612639,310035,295555,0,0,605590,7049,620918
1,대전,동구,2022,광역단체장,보수정당,진보정당,2,0.5404,1,0.4596,...,0,191882,94041,50120,42619,0,0,92739,1302,97841
2,대전,중구,2022,광역단체장,보수정당,진보정당,2,0.5415,1,0.4585,...,0,199894,100343,53666,45434,0,0,99100,1243,99551
3,대전,서구,2022,광역단체장,보수정당,진보정당,2,0.5103,1,0.4897,...,0,399035,192546,97149,93222,0,0,190371,2175,206489
4,대전,유성구,2022,광역단체장,진보정당,보수정당,1,0.5271,2,0.4729,...,0,289980,149576,70109,78142,0,0,148251,1325,140404
5,대전,대덕구,2022,광역단체장,보수정당,진보정당,2,0.519,1,0.481,...,0,152766,76133,38991,36138,0,0,75129,1004,76633


### v4.1 ~ v4.3

In [44]:
# 1. 전체 데이터 저장
df_daejeon.to_csv("temp4_1_governor_daejeon_8.csv", index=False, encoding="utf-8-sig")

# 2. '구시군' != '합계' 행만 추출하여 저장
df_daejeon2 = df_daejeon.query("구시군 != '합계'")
df_daejeon2.to_csv("temp4_2_governor_daejeon_8.csv", index=False, encoding="utf-8-sig")

# 3. '구시군' == '합계' 행만 추출 후 '구시군' 열 제거하여 저장
df_daejeon3 = df_daejeon.query("구시군 == '합계'").drop(columns="구시군")
df_daejeon3.to_csv("temp4_3_governor_daejeon_8.csv", index=False, encoding="utf-8-sig")

## Ulsan

In [45]:
df_ulsan = election_results['df_ulsan']

In [46]:
df_ulsan

Unnamed: 0,시도,구시군,선거인수,투표수,보수정당,진보정당,그외정당,무소속,득표수_계,무효투표수,...,득표_1위_후보번호,득표_1위_득표율,득표_2위_후보번호,득표_2위_득표율,득표_1위_정당,득표_2위_정당,보수정당_후보자수,진보정당_후보자수,그외정당_후보자수,무소속_후보자수
0,울산광역시,합계,941189,491866,290563,195430,0,0,485993,5873,...,2,0.5979,1,0.4021,보수정당,진보정당,1,1,0,0
1,울산광역시,중구,181055,98391,60730,36596,0,0,97326,1065,...,2,0.624,1,0.376,보수정당,진보정당,1,1,0,0
2,울산광역시,남구,267930,134765,84987,48425,0,0,133412,1353,...,2,0.637,1,0.363,보수정당,진보정당,1,1,0,0
3,울산광역시,동구,127369,70833,37751,32032,0,0,69783,1050,...,2,0.541,1,0.459,보수정당,진보정당,1,1,0,0
4,울산광역시,북구,175784,88502,46234,41248,0,0,87482,1020,...,2,0.5285,1,0.4715,보수정당,진보정당,1,1,0,0
5,울산광역시,울주군,189051,99375,60861,37129,0,0,97990,1385,...,2,0.6211,1,0.3789,보수정당,진보정당,1,1,0,0


### preprocessing

In [47]:
df_ulsan = (
    df_ulsan
    .assign(시도=lambda df: df['시도'].replace('울산광역시', '울산'))
)

In [48]:
df_ulsan.columns

Index(['시도', '구시군', '선거인수', '투표수', '보수정당', '진보정당', '그외정당', '무소속', '득표수_계',
       '무효투표수', '기권수', '득표_1위_후보번호', '득표_1위_득표율', '득표_2위_후보번호', '득표_2위_득표율',
       '득표_1위_정당', '득표_2위_정당', '보수정당_후보자수', '진보정당_후보자수', '그외정당_후보자수',
       '무소속_후보자수'],
      dtype='object')

In [49]:
# 고정 열 순서 정의
fixed_cols = [
    '시도', '구시군', '선거년도', '선거종류',
    '득표_1위_정당', '득표_2위_정당',
    '득표_1위_후보번호', '득표_1위_득표율',
    '득표_2위_후보번호', '득표_2위_득표율',
    '보수정당_후보자수', '진보정당_후보자수',
    '그외정당_후보자수', '무소속_후보자수'
]

# 나머지 열: fixed_cols에 포함되지 않은 열
other_cols = [col for col in df_ulsan.columns if col not in fixed_cols]

# 메소드 체이닝
df_ulsan = (
    df_ulsan
    .assign(선거종류='광역단체장', 선거년도='2022')
    .loc[:, fixed_cols + other_cols]
    .rename(columns={'시도': '지역'})
)

In [50]:
df_ulsan

Unnamed: 0,지역,구시군,선거년도,선거종류,득표_1위_정당,득표_2위_정당,득표_1위_후보번호,득표_1위_득표율,득표_2위_후보번호,득표_2위_득표율,...,무소속_후보자수,선거인수,투표수,보수정당,진보정당,그외정당,무소속,득표수_계,무효투표수,기권수
0,울산,합계,2022,광역단체장,보수정당,진보정당,2,0.5979,1,0.4021,...,0,941189,491866,290563,195430,0,0,485993,5873,449323
1,울산,중구,2022,광역단체장,보수정당,진보정당,2,0.624,1,0.376,...,0,181055,98391,60730,36596,0,0,97326,1065,82664
2,울산,남구,2022,광역단체장,보수정당,진보정당,2,0.637,1,0.363,...,0,267930,134765,84987,48425,0,0,133412,1353,133165
3,울산,동구,2022,광역단체장,보수정당,진보정당,2,0.541,1,0.459,...,0,127369,70833,37751,32032,0,0,69783,1050,56536
4,울산,북구,2022,광역단체장,보수정당,진보정당,2,0.5285,1,0.4715,...,0,175784,88502,46234,41248,0,0,87482,1020,87282
5,울산,울주군,2022,광역단체장,보수정당,진보정당,2,0.6211,1,0.3789,...,0,189051,99375,60861,37129,0,0,97990,1385,89676


### v4.1 ~ v4.3

In [51]:
# 1. 전체 데이터 저장
df_ulsan.to_csv("temp4_1_governor_ulsan_8.csv", index=False, encoding="utf-8-sig")

# 2. '구시군' != '합계' 행만 추출하여 저장
df_ulsan2 = df_ulsan.query("구시군 != '합계'")
df_ulsan2.to_csv("temp4_2_governor_ulsan_8.csv", index=False, encoding="utf-8-sig")

# 3. '구시군' == '합계' 행만 추출 후 '구시군' 열 제거하여 저장
df_ulsan3 = df_ulsan.query("구시군 == '합계'").drop(columns="구시군")
df_ulsan3.to_csv("temp4_3_governor_ulsan_8.csv", index=False, encoding="utf-8-sig")

## Sejong

In [52]:
df_sejong = election_results['df_sejong']

In [53]:
df_sejong

Unnamed: 0,시도,구시군,선거인수,투표수,보수정당,진보정당,그외정당,무소속,득표수_계,무효투표수,...,득표_1위_후보번호,득표_1위_득표율,득표_2위_후보번호,득표_2위_득표율,득표_1위_정당,득표_2위_정당,보수정당_후보자수,진보정당_후보자수,그외정당_후보자수,무소속_후보자수
0,세종특별자치시,합계,292259,149751,78415,69995,0,0,148410,1341,...,2,0.5284,1,0.4716,보수정당,진보정당,1,1,0,0


### preprocessing

In [54]:
df_sejong = (
    df_sejong
    .assign(시도=lambda df: df['시도'].replace('세종특별자치시', '세종'))
)

In [55]:
df_sejong.columns

Index(['시도', '구시군', '선거인수', '투표수', '보수정당', '진보정당', '그외정당', '무소속', '득표수_계',
       '무효투표수', '기권수', '득표_1위_후보번호', '득표_1위_득표율', '득표_2위_후보번호', '득표_2위_득표율',
       '득표_1위_정당', '득표_2위_정당', '보수정당_후보자수', '진보정당_후보자수', '그외정당_후보자수',
       '무소속_후보자수'],
      dtype='object')

In [56]:
# 고정 열 순서 정의
fixed_cols = [
    '시도', '구시군', '선거년도', '선거종류',
    '득표_1위_정당', '득표_2위_정당',
    '득표_1위_후보번호', '득표_1위_득표율',
    '득표_2위_후보번호', '득표_2위_득표율',
    '보수정당_후보자수', '진보정당_후보자수',
    '그외정당_후보자수', '무소속_후보자수'
]

# 나머지 열: fixed_cols에 포함되지 않은 열
other_cols = [col for col in df_sejong.columns if col not in fixed_cols]

# 메소드 체이닝
df_sejong = (
    df_sejong
    .assign(선거종류='광역단체장', 선거년도='2022')
    .loc[:, fixed_cols + other_cols]
    .rename(columns={'시도': '지역'})
)

In [57]:
df_sejong

Unnamed: 0,지역,구시군,선거년도,선거종류,득표_1위_정당,득표_2위_정당,득표_1위_후보번호,득표_1위_득표율,득표_2위_후보번호,득표_2위_득표율,...,무소속_후보자수,선거인수,투표수,보수정당,진보정당,그외정당,무소속,득표수_계,무효투표수,기권수
0,세종,합계,2022,광역단체장,보수정당,진보정당,2,0.5284,1,0.4716,...,0,292259,149751,78415,69995,0,0,148410,1341,142508


### v4.1 ~ v4.3

- 세종은 구,시,군이 없고 읍,면만 있어서 df_sejong2의 row가 없음

In [58]:
# 1. 전체 데이터 저장
df_sejong.to_csv("temp4_1_governor_sejong_8.csv", index=False, encoding="utf-8-sig")

# 2. '구시군' != '합계' 행만 추출하여 저장
df_sejong2 = df_sejong.query("구시군 != '합계'")
df_sejong2.to_csv("temp4_2_governor_sejong_8.csv", index=False, encoding="utf-8-sig")

# 3. '구시군' == '합계' 행만 추출 후 '구시군' 열 제거하여 저장
df_sejong3 = df_sejong.query("구시군 == '합계'").drop(columns="구시군")
df_sejong3.to_csv("temp4_3_governor_sejong_8.csv", index=False, encoding="utf-8-sig")

## Gyeonggi

In [59]:
df_gyeonggi = election_results['df_gyeonggi']

In [60]:
df_gyeonggi

Unnamed: 0,시도,구시군,선거인수,투표수,보수정당,진보정당,그외정당,무소속,득표수_계,무효투표수,...,득표_1위_후보번호,득표_1위_득표율,득표_2위_후보번호,득표_2위_득표율,득표_1위_정당,득표_2위_정당,보수정당_후보자수,진보정당_후보자수,그외정당_후보자수,무소속_후보자수
0,경기도,합계,11497206,5820631,2818680,2827593,61778,54758,5762809,57822,...,1,0.4907,2,0.4891,진보정당,보수정당,1,1,3,1
1,경기도,수원시장안구,199204,106092,49817,53333,1039,869,105058,1034,...,1,0.5077,2,0.4742,진보정당,보수정당,1,1,3,1
2,경기도,수원시권선구,352407,173856,80304,88562,1808,1580,172254,1602,...,1,0.5141,2,0.4662,진보정당,보수정당,1,1,3,1
3,경기도,수원시팔달구,164086,79283,38931,37937,833,724,78425,858,...,2,0.4964,1,0.4837,보수정당,진보정당,1,1,3,1
4,경기도,수원시영통구,296856,160556,75184,81109,1577,1647,159517,1039,...,1,0.5085,2,0.4713,진보정당,보수정당,1,1,3,1
5,경기도,성남시수정구,208907,106922,49669,53684,1373,964,105690,1232,...,1,0.5079,2,0.4699,진보정당,보수정당,1,1,3,1
6,경기도,성남시중원구,185468,94477,41860,49268,1411,820,93359,1118,...,1,0.5277,2,0.4484,진보정당,보수정당,1,1,3,1
7,경기도,성남시분당구,404133,250683,140622,104254,1728,2677,249281,1402,...,2,0.5641,1,0.4182,보수정당,진보정당,1,1,3,1
8,경기도,의정부시,400177,187916,89731,92465,2084,1742,186022,1894,...,1,0.4971,2,0.4824,진보정당,보수정당,1,1,3,1
9,경기도,안양시만안구,208936,111050,53271,54630,1068,904,109873,1177,...,1,0.4972,2,0.4848,진보정당,보수정당,1,1,3,1


### preprocessing

In [61]:
df_gyeonggi = (
    df_gyeonggi
    .assign(시도=lambda df: df['시도'].replace('경기도', '경기'))
)

In [62]:
df_gyeonggi.columns

Index(['시도', '구시군', '선거인수', '투표수', '보수정당', '진보정당', '그외정당', '무소속', '득표수_계',
       '무효투표수', '기권수', '득표_1위_후보번호', '득표_1위_득표율', '득표_2위_후보번호', '득표_2위_득표율',
       '득표_1위_정당', '득표_2위_정당', '보수정당_후보자수', '진보정당_후보자수', '그외정당_후보자수',
       '무소속_후보자수'],
      dtype='object')

In [63]:
# 고정 열 순서 정의
fixed_cols = [
    '시도', '구시군', '선거년도', '선거종류',
    '득표_1위_정당', '득표_2위_정당',
    '득표_1위_후보번호', '득표_1위_득표율',
    '득표_2위_후보번호', '득표_2위_득표율',
    '보수정당_후보자수', '진보정당_후보자수',
    '그외정당_후보자수', '무소속_후보자수'
]

# 나머지 열: fixed_cols에 포함되지 않은 열
other_cols = [col for col in df_gyeonggi.columns if col not in fixed_cols]

# 메소드 체이닝
df_gyeonggi = (
    df_gyeonggi
    .assign(선거종류='광역단체장', 선거년도='2022')
    .loc[:, fixed_cols + other_cols]
    .rename(columns={'시도': '지역'})
)

In [64]:
df_gyeonggi

Unnamed: 0,지역,구시군,선거년도,선거종류,득표_1위_정당,득표_2위_정당,득표_1위_후보번호,득표_1위_득표율,득표_2위_후보번호,득표_2위_득표율,...,무소속_후보자수,선거인수,투표수,보수정당,진보정당,그외정당,무소속,득표수_계,무효투표수,기권수
0,경기,합계,2022,광역단체장,진보정당,보수정당,1,0.4907,2,0.4891,...,1,11497206,5820631,2818680,2827593,61778,54758,5762809,57822,5676575
1,경기,수원시장안구,2022,광역단체장,진보정당,보수정당,1,0.5077,2,0.4742,...,1,199204,106092,49817,53333,1039,869,105058,1034,93112
2,경기,수원시권선구,2022,광역단체장,진보정당,보수정당,1,0.5141,2,0.4662,...,1,352407,173856,80304,88562,1808,1580,172254,1602,178551
3,경기,수원시팔달구,2022,광역단체장,보수정당,진보정당,2,0.4964,1,0.4837,...,1,164086,79283,38931,37937,833,724,78425,858,84803
4,경기,수원시영통구,2022,광역단체장,진보정당,보수정당,1,0.5085,2,0.4713,...,1,296856,160556,75184,81109,1577,1647,159517,1039,136300
5,경기,성남시수정구,2022,광역단체장,진보정당,보수정당,1,0.5079,2,0.4699,...,1,208907,106922,49669,53684,1373,964,105690,1232,101985
6,경기,성남시중원구,2022,광역단체장,진보정당,보수정당,1,0.5277,2,0.4484,...,1,185468,94477,41860,49268,1411,820,93359,1118,90991
7,경기,성남시분당구,2022,광역단체장,보수정당,진보정당,2,0.5641,1,0.4182,...,1,404133,250683,140622,104254,1728,2677,249281,1402,153450
8,경기,의정부시,2022,광역단체장,진보정당,보수정당,1,0.4971,2,0.4824,...,1,400177,187916,89731,92465,2084,1742,186022,1894,212261
9,경기,안양시만안구,2022,광역단체장,진보정당,보수정당,1,0.4972,2,0.4848,...,1,208936,111050,53271,54630,1068,904,109873,1177,97886


### v4.1 ~ v4.3

In [65]:
# 1. 전체 데이터 저장
df_gyeonggi.to_csv("temp4_1_governor_gyeonggi_8.csv", index=False, encoding="utf-8-sig")

# 2. '구시군' != '합계' 행만 추출하여 저장
df_gyeonggi2 = df_gyeonggi.query("구시군 != '합계'")
df_gyeonggi2.to_csv("temp4_2_governor_gyeonggi_8.csv", index=False, encoding="utf-8-sig")

# 3. '구시군' == '합계' 행만 추출 후 '구시군' 열 제거하여 저장
df_gyeonggi3 = df_gyeonggi.query("구시군 == '합계'").drop(columns="구시군")
df_gyeonggi3.to_csv("temp4_3_governor_gyeonggi_8.csv", index=False, encoding="utf-8-sig")

## Gangwon

In [66]:
df_gangwon = election_results['df_gangwon']

In [67]:
df_gangwon

Unnamed: 0,시도,구시군,선거인수,투표수,보수정당,진보정당,그외정당,무소속,득표수_계,무효투표수,...,득표_1위_후보번호,득표_1위_득표율,득표_2위_후보번호,득표_2위_득표율,득표_1위_정당,득표_2위_정당,보수정당_후보자수,진보정당_후보자수,그외정당_후보자수,무소속_후보자수
0,강원도,합계,1336080,772498,409461,347766,0,0,757227,15271,...,2,0.5407,1,0.4593,보수정당,진보정당,1,1,0,0
1,강원도,춘천시,244406,137765,72126,63714,0,0,135840,1925,...,2,0.531,1,0.469,보수정당,진보정당,1,1,0,0
2,강원도,원주시,304060,157538,77298,77841,0,0,155139,2399,...,1,0.5018,2,0.4982,진보정당,보수정당,1,1,0,0
3,강원도,강릉시,185804,101944,58225,42113,0,0,100338,1606,...,2,0.5803,1,0.4197,보수정당,진보정당,1,1,0,0
4,강원도,동해시,76886,42047,23731,17499,0,0,41230,817,...,2,0.5756,1,0.4244,보수정당,진보정당,1,1,0,0
5,강원도,삼척시,57023,37549,20819,15807,0,0,36626,923,...,2,0.5684,1,0.4316,보수정당,진보정당,1,1,0,0
6,강원도,태백시,35236,22538,11722,10288,0,0,22010,528,...,2,0.5326,1,0.4674,보수정당,진보정당,1,1,0,0
7,강원도,정선군,32001,22082,10767,10570,0,0,21337,745,...,2,0.5046,1,0.4954,보수정당,진보정당,1,1,0,0
8,강원도,속초시,71621,37094,20124,16266,0,0,36390,704,...,2,0.553,1,0.447,보수정당,진보정당,1,1,0,0
9,강원도,고성군,24776,16776,9024,7164,0,0,16188,588,...,2,0.5574,1,0.4426,보수정당,진보정당,1,1,0,0


### preprocessing

In [68]:
df_gangwon = (
    df_gangwon
    .assign(시도=lambda df: df['시도'].replace('강원도', '강원'))
)

In [69]:
df_gangwon.columns

Index(['시도', '구시군', '선거인수', '투표수', '보수정당', '진보정당', '그외정당', '무소속', '득표수_계',
       '무효투표수', '기권수', '득표_1위_후보번호', '득표_1위_득표율', '득표_2위_후보번호', '득표_2위_득표율',
       '득표_1위_정당', '득표_2위_정당', '보수정당_후보자수', '진보정당_후보자수', '그외정당_후보자수',
       '무소속_후보자수'],
      dtype='object')

In [70]:
# 고정 열 순서 정의
fixed_cols = [
    '시도', '구시군', '선거년도', '선거종류',
    '득표_1위_정당', '득표_2위_정당',
    '득표_1위_후보번호', '득표_1위_득표율',
    '득표_2위_후보번호', '득표_2위_득표율',
    '보수정당_후보자수', '진보정당_후보자수',
    '그외정당_후보자수', '무소속_후보자수'
]

# 나머지 열: fixed_cols에 포함되지 않은 열
other_cols = [col for col in df_gangwon.columns if col not in fixed_cols]

# 메소드 체이닝
df_gangwon = (
    df_gangwon
    .assign(선거종류='광역단체장', 선거년도='2022')
    .loc[:, fixed_cols + other_cols]
    .rename(columns={'시도': '지역'})
)

In [71]:
df_gangwon

Unnamed: 0,지역,구시군,선거년도,선거종류,득표_1위_정당,득표_2위_정당,득표_1위_후보번호,득표_1위_득표율,득표_2위_후보번호,득표_2위_득표율,...,무소속_후보자수,선거인수,투표수,보수정당,진보정당,그외정당,무소속,득표수_계,무효투표수,기권수
0,강원,합계,2022,광역단체장,보수정당,진보정당,2,0.5407,1,0.4593,...,0,1336080,772498,409461,347766,0,0,757227,15271,563582
1,강원,춘천시,2022,광역단체장,보수정당,진보정당,2,0.531,1,0.469,...,0,244406,137765,72126,63714,0,0,135840,1925,106641
2,강원,원주시,2022,광역단체장,진보정당,보수정당,1,0.5018,2,0.4982,...,0,304060,157538,77298,77841,0,0,155139,2399,146522
3,강원,강릉시,2022,광역단체장,보수정당,진보정당,2,0.5803,1,0.4197,...,0,185804,101944,58225,42113,0,0,100338,1606,83860
4,강원,동해시,2022,광역단체장,보수정당,진보정당,2,0.5756,1,0.4244,...,0,76886,42047,23731,17499,0,0,41230,817,34839
5,강원,삼척시,2022,광역단체장,보수정당,진보정당,2,0.5684,1,0.4316,...,0,57023,37549,20819,15807,0,0,36626,923,19474
6,강원,태백시,2022,광역단체장,보수정당,진보정당,2,0.5326,1,0.4674,...,0,35236,22538,11722,10288,0,0,22010,528,12698
7,강원,정선군,2022,광역단체장,보수정당,진보정당,2,0.5046,1,0.4954,...,0,32001,22082,10767,10570,0,0,21337,745,9919
8,강원,속초시,2022,광역단체장,보수정당,진보정당,2,0.553,1,0.447,...,0,71621,37094,20124,16266,0,0,36390,704,34527
9,강원,고성군,2022,광역단체장,보수정당,진보정당,2,0.5574,1,0.4426,...,0,24776,16776,9024,7164,0,0,16188,588,8000


### v4.1 ~ v4.3

In [72]:
# 1. 전체 데이터 저장
df_gangwon.to_csv("temp4_1_governor_gangwon_8.csv", index=False, encoding="utf-8-sig")

# 2. '구시군' != '합계' 행만 추출하여 저장
df_gangwon2 = df_gangwon.query("구시군 != '합계'")
df_gangwon2.to_csv("temp4_2_governor_gangwon_8.csv", index=False, encoding="utf-8-sig")

# 3. '구시군' == '합계' 행만 추출 후 '구시군' 열 제거하여 저장
df_gangwon3 = df_gangwon.query("구시군 == '합계'").drop(columns="구시군")
df_gangwon3.to_csv("temp4_3_governor_gangwon_8.csv", index=False, encoding="utf-8-sig")

## Chungbuk

In [73]:
df_chungbuk = election_results['df_chungbuk']

In [74]:
df_chungbuk

Unnamed: 0,시도,구시군,선거인수,투표수,보수정당,진보정당,그외정당,무소속,득표수_계,무효투표수,...,득표_1위_후보번호,득표_1위_득표율,득표_2위_후보번호,득표_2위_득표율,득표_1위_정당,득표_2위_정당,보수정당_후보자수,진보정당_후보자수,그외정당_후보자수,무소속_후보자수
0,충청북도,합계,1368779,692324,395517,284166,0,0,679683,12641,...,2,0.5819,1,0.4181,보수정당,진보정당,1,1,0,0
1,충청북도,청주시상당구,163058,79612,45312,33296,0,0,78608,1004,...,2,0.5764,1,0.4236,보수정당,진보정당,1,1,0,0
2,충청북도,청주시서원구,165453,80317,45411,33934,0,0,79345,972,...,2,0.5723,1,0.4277,보수정당,진보정당,1,1,0,0
3,충청북도,청주시흥덕구,223618,98308,54369,42745,0,0,97114,1194,...,2,0.5598,1,0.4402,보수정당,진보정당,1,1,0,0
4,충청북도,청주시청원구,160395,72662,39209,32455,0,0,71664,998,...,2,0.5471,1,0.4529,보수정당,진보정당,1,1,0,0
5,충청북도,충주시,181044,89723,53640,34286,0,0,87926,1797,...,2,0.6101,1,0.3899,보수정당,진보정당,1,1,0,0
6,충청북도,제천시,115563,62806,37865,23887,0,0,61752,1054,...,2,0.6132,1,0.3868,보수정당,진보정당,1,1,0,0
7,충청북도,단양군,25692,17427,10779,6235,0,0,17014,413,...,2,0.6335,1,0.3665,보수정당,진보정당,1,1,0,0
8,충청북도,영동군,41123,27150,16251,10070,0,0,26321,829,...,2,0.6174,1,0.3826,보수정당,진보정당,1,1,0,0
9,충청북도,보은군,28963,19543,11692,7199,0,0,18891,652,...,2,0.6189,1,0.3811,보수정당,진보정당,1,1,0,0


### preprocessing

In [75]:
df_chungbuk = (
    df_chungbuk
    .assign(시도=lambda df: df['시도'].replace('충청북도', '충북'))
)

In [76]:
df_chungbuk.columns

Index(['시도', '구시군', '선거인수', '투표수', '보수정당', '진보정당', '그외정당', '무소속', '득표수_계',
       '무효투표수', '기권수', '득표_1위_후보번호', '득표_1위_득표율', '득표_2위_후보번호', '득표_2위_득표율',
       '득표_1위_정당', '득표_2위_정당', '보수정당_후보자수', '진보정당_후보자수', '그외정당_후보자수',
       '무소속_후보자수'],
      dtype='object')

In [77]:
# 고정 열 순서 정의
fixed_cols = [
    '시도', '구시군', '선거년도', '선거종류',
    '득표_1위_정당', '득표_2위_정당',
    '득표_1위_후보번호', '득표_1위_득표율',
    '득표_2위_후보번호', '득표_2위_득표율',
    '보수정당_후보자수', '진보정당_후보자수',
    '그외정당_후보자수', '무소속_후보자수'
]

# 나머지 열: fixed_cols에 포함되지 않은 열
other_cols = [col for col in df_chungbuk.columns if col not in fixed_cols]

# 메소드 체이닝
df_chungbuk = (
    df_chungbuk
    .assign(선거종류='광역단체장', 선거년도='2022')
    .loc[:, fixed_cols + other_cols]
    .rename(columns={'시도': '지역'})
)

In [78]:
df_chungbuk

Unnamed: 0,지역,구시군,선거년도,선거종류,득표_1위_정당,득표_2위_정당,득표_1위_후보번호,득표_1위_득표율,득표_2위_후보번호,득표_2위_득표율,...,무소속_후보자수,선거인수,투표수,보수정당,진보정당,그외정당,무소속,득표수_계,무효투표수,기권수
0,충북,합계,2022,광역단체장,보수정당,진보정당,2,0.5819,1,0.4181,...,0,1368779,692324,395517,284166,0,0,679683,12641,676455
1,충북,청주시상당구,2022,광역단체장,보수정당,진보정당,2,0.5764,1,0.4236,...,0,163058,79612,45312,33296,0,0,78608,1004,83446
2,충북,청주시서원구,2022,광역단체장,보수정당,진보정당,2,0.5723,1,0.4277,...,0,165453,80317,45411,33934,0,0,79345,972,85136
3,충북,청주시흥덕구,2022,광역단체장,보수정당,진보정당,2,0.5598,1,0.4402,...,0,223618,98308,54369,42745,0,0,97114,1194,125310
4,충북,청주시청원구,2022,광역단체장,보수정당,진보정당,2,0.5471,1,0.4529,...,0,160395,72662,39209,32455,0,0,71664,998,87733
5,충북,충주시,2022,광역단체장,보수정당,진보정당,2,0.6101,1,0.3899,...,0,181044,89723,53640,34286,0,0,87926,1797,91321
6,충북,제천시,2022,광역단체장,보수정당,진보정당,2,0.6132,1,0.3868,...,0,115563,62806,37865,23887,0,0,61752,1054,52757
7,충북,단양군,2022,광역단체장,보수정당,진보정당,2,0.6335,1,0.3665,...,0,25692,17427,10779,6235,0,0,17014,413,8265
8,충북,영동군,2022,광역단체장,보수정당,진보정당,2,0.6174,1,0.3826,...,0,41123,27150,16251,10070,0,0,26321,829,13973
9,충북,보은군,2022,광역단체장,보수정당,진보정당,2,0.6189,1,0.3811,...,0,28963,19543,11692,7199,0,0,18891,652,9420


### v4.1 ~ v4.3

In [79]:
# 1. 전체 데이터 저장
df_chungbuk.to_csv("temp4_1_governor_chungbuk_8.csv", index=False, encoding="utf-8-sig")

# 2. '구시군' != '합계' 행만 추출하여 저장
df_chungbuk2 = df_chungbuk.query("구시군 != '합계'")
df_chungbuk2.to_csv("temp4_2_governor_chungbuk_8.csv", index=False, encoding="utf-8-sig")

# 3. '구시군' == '합계' 행만 추출 후 '구시군' 열 제거하여 저장
df_chungbuk3 = df_chungbuk.query("구시군 == '합계'").drop(columns="구시군")
df_chungbuk3.to_csv("temp4_3_governor_chungbuk_8.csv", index=False, encoding="utf-8-sig")

## Chungnam

In [80]:
df_chungnam = election_results['df_chungnam']

In [81]:
df_chungnam

Unnamed: 0,시도,구시군,선거인수,투표수,보수정당,진보정당,그외정당,무소속,득표수_계,무효투표수,...,득표_1위_후보번호,득표_1위_득표율,득표_2위_후보번호,득표_2위_득표율,득표_1위_정당,득표_2위_정당,보수정당_후보자수,진보정당_후보자수,그외정당_후보자수,무소속_후보자수
0,충청남도,합계,1803096,898369,468658,401308,0,0,869966,28403,...,2,0.5387,1,0.4613,보수정당,진보정당,1,1,0,0
1,충청남도,천안시서북구,290313,122388,57890,62212,0,0,120102,2286,...,1,0.518,2,0.482,진보정당,보수정당,1,1,0,0
2,충청남도,천안시동남구,257709,109108,55733,51262,0,0,106995,2113,...,2,0.5209,1,0.4791,보수정당,진보정당,1,1,0,0
3,충청남도,공주시,91847,51461,28250,21615,0,0,49865,1596,...,2,0.5665,1,0.4335,보수정당,진보정당,1,1,0,0
4,충청남도,보령시,86264,52539,31729,18976,0,0,50705,1834,...,2,0.6258,1,0.3742,보수정당,진보정당,1,1,0,0
5,충청남도,아산시,268765,119352,59468,57114,0,0,116582,2770,...,2,0.5101,1,0.4899,보수정당,진보정당,1,1,0,0
6,충청남도,서산시,148744,72245,37456,32258,0,0,69714,2531,...,2,0.5373,1,0.4627,보수정당,진보정당,1,1,0,0
7,충청남도,태안군,55435,34715,18185,14522,0,0,32707,2008,...,2,0.556,1,0.444,보수정당,진보정당,1,1,0,0
8,충청남도,금산군,44747,28678,14512,12853,0,0,27365,1313,...,2,0.5303,1,0.4697,보수정당,진보정당,1,1,0,0
9,충청남도,논산시,99942,53814,27506,24238,0,0,51744,2070,...,2,0.5316,1,0.4684,보수정당,진보정당,1,1,0,0


### preprocessing

In [82]:
df_chungnam = (
    df_chungnam
    .assign(시도=lambda df: df['시도'].replace('충청남도', '충남'))
)

In [83]:
df_chungnam.columns

Index(['시도', '구시군', '선거인수', '투표수', '보수정당', '진보정당', '그외정당', '무소속', '득표수_계',
       '무효투표수', '기권수', '득표_1위_후보번호', '득표_1위_득표율', '득표_2위_후보번호', '득표_2위_득표율',
       '득표_1위_정당', '득표_2위_정당', '보수정당_후보자수', '진보정당_후보자수', '그외정당_후보자수',
       '무소속_후보자수'],
      dtype='object')

In [84]:
# 고정 열 순서 정의
fixed_cols = [
    '시도', '구시군', '선거년도', '선거종류',
    '득표_1위_정당', '득표_2위_정당',
    '득표_1위_후보번호', '득표_1위_득표율',
    '득표_2위_후보번호', '득표_2위_득표율',
    '보수정당_후보자수', '진보정당_후보자수',
    '그외정당_후보자수', '무소속_후보자수'
]

# 나머지 열: fixed_cols에 포함되지 않은 열
other_cols = [col for col in df_chungnam.columns if col not in fixed_cols]

# 메소드 체이닝
df_chungnam = (
    df_chungnam
    .assign(선거종류='광역단체장', 선거년도='2022')
    .loc[:, fixed_cols + other_cols]
    .rename(columns={'시도': '지역'})
)

In [85]:
df_chungnam

Unnamed: 0,지역,구시군,선거년도,선거종류,득표_1위_정당,득표_2위_정당,득표_1위_후보번호,득표_1위_득표율,득표_2위_후보번호,득표_2위_득표율,...,무소속_후보자수,선거인수,투표수,보수정당,진보정당,그외정당,무소속,득표수_계,무효투표수,기권수
0,충남,합계,2022,광역단체장,보수정당,진보정당,2,0.5387,1,0.4613,...,0,1803096,898369,468658,401308,0,0,869966,28403,904727
1,충남,천안시서북구,2022,광역단체장,진보정당,보수정당,1,0.518,2,0.482,...,0,290313,122388,57890,62212,0,0,120102,2286,167925
2,충남,천안시동남구,2022,광역단체장,보수정당,진보정당,2,0.5209,1,0.4791,...,0,257709,109108,55733,51262,0,0,106995,2113,148601
3,충남,공주시,2022,광역단체장,보수정당,진보정당,2,0.5665,1,0.4335,...,0,91847,51461,28250,21615,0,0,49865,1596,40386
4,충남,보령시,2022,광역단체장,보수정당,진보정당,2,0.6258,1,0.3742,...,0,86264,52539,31729,18976,0,0,50705,1834,33725
5,충남,아산시,2022,광역단체장,보수정당,진보정당,2,0.5101,1,0.4899,...,0,268765,119352,59468,57114,0,0,116582,2770,149413
6,충남,서산시,2022,광역단체장,보수정당,진보정당,2,0.5373,1,0.4627,...,0,148744,72245,37456,32258,0,0,69714,2531,76499
7,충남,태안군,2022,광역단체장,보수정당,진보정당,2,0.556,1,0.444,...,0,55435,34715,18185,14522,0,0,32707,2008,20720
8,충남,금산군,2022,광역단체장,보수정당,진보정당,2,0.5303,1,0.4697,...,0,44747,28678,14512,12853,0,0,27365,1313,16069
9,충남,논산시,2022,광역단체장,보수정당,진보정당,2,0.5316,1,0.4684,...,0,99942,53814,27506,24238,0,0,51744,2070,46128


### v4.1 ~ v4.3

In [86]:
# 1. 전체 데이터 저장
df_chungnam.to_csv("temp4_1_governor_chungnam_8.csv", index=False, encoding="utf-8-sig")

# 2. '구시군' != '합계' 행만 추출하여 저장
df_chungnam2 = df_chungnam.query("구시군 != '합계'")
df_chungnam2.to_csv("temp4_2_governor_chungnam_8.csv", index=False, encoding="utf-8-sig")

# 3. '구시군' == '합계' 행만 추출 후 '구시군' 열 제거하여 저장
df_chungnam3 = df_chungnam.query("구시군 == '합계'").drop(columns="구시군")
df_chungnam3.to_csv("temp4_3_governor_chungnam_8.csv", index=False, encoding="utf-8-sig")

## Jeonbuk

In [87]:
df_jeonbuk = election_results['df_jeonbuk']

In [88]:
df_jeonbuk

Unnamed: 0,시도,구시군,선거인수,투표수,보수정당,진보정당,그외정당,무소속,득표수_계,무효투표수,...,득표_1위_후보번호,득표_1위_득표율,득표_2위_후보번호,득표_2위_득표율,득표_1위_정당,득표_2위_정당,보수정당_후보자수,진보정당_후보자수,그외정당_후보자수,무소속_후보자수
0,전라북도,합계,1532133,745354,128828,591510,0,0,720338,25016,...,1,0.8212,2,0.1788,진보정당,보수정당,1,1,0,0
1,전라북도,전주시완산구,311508,125313,23509,99377,0,0,122886,2427,...,1,0.8087,2,0.1913,진보정당,보수정당,1,1,0,0
2,전라북도,전주시덕진구,238934,96938,19377,75741,0,0,95118,1820,...,1,0.7963,2,0.2037,진보정당,보수정당,1,1,0,0
3,전라북도,군산시,224926,87125,12003,73109,0,0,85112,2013,...,1,0.859,2,0.141,진보정당,보수정당,1,1,0,0
4,전라북도,익산시,239077,107245,20682,83134,0,0,103816,3429,...,1,0.8008,2,0.1992,진보정당,보수정당,1,1,0,0
5,전라북도,정읍시,93307,54283,7960,44026,0,0,51986,2297,...,1,0.8469,2,0.1531,진보정당,보수정당,1,1,0,0
6,전라북도,남원시,69007,44516,7046,35477,0,0,42523,1993,...,1,0.8343,2,0.1657,진보정당,보수정당,1,1,0,0
7,전라북도,김제시,72358,42668,7114,33765,0,0,40879,1789,...,1,0.826,2,0.174,진보정당,보수정당,1,1,0,0
8,전라북도,완주군,78284,41408,7144,32573,0,0,39717,1691,...,1,0.8201,2,0.1799,진보정당,보수정당,1,1,0,0
9,전라북도,진안군,22634,16616,2796,13062,0,0,15858,758,...,1,0.8237,2,0.1763,진보정당,보수정당,1,1,0,0


### preprocessing

In [89]:
df_jeonbuk = (
    df_jeonbuk
    .assign(시도=lambda df: df['시도'].replace('전라북도', '전북'))
)

In [90]:
df_jeonbuk.columns

Index(['시도', '구시군', '선거인수', '투표수', '보수정당', '진보정당', '그외정당', '무소속', '득표수_계',
       '무효투표수', '기권수', '득표_1위_후보번호', '득표_1위_득표율', '득표_2위_후보번호', '득표_2위_득표율',
       '득표_1위_정당', '득표_2위_정당', '보수정당_후보자수', '진보정당_후보자수', '그외정당_후보자수',
       '무소속_후보자수'],
      dtype='object')

In [91]:
# 고정 열 순서 정의
fixed_cols = [
    '시도', '구시군', '선거년도', '선거종류',
    '득표_1위_정당', '득표_2위_정당',
    '득표_1위_후보번호', '득표_1위_득표율',
    '득표_2위_후보번호', '득표_2위_득표율',
    '보수정당_후보자수', '진보정당_후보자수',
    '그외정당_후보자수', '무소속_후보자수'
]

# 나머지 열: fixed_cols에 포함되지 않은 열
other_cols = [col for col in df_jeonbuk.columns if col not in fixed_cols]

# 메소드 체이닝
df_jeonbuk = (
    df_jeonbuk
    .assign(선거종류='광역단체장', 선거년도='2022')
    .loc[:, fixed_cols + other_cols]
    .rename(columns={'시도': '지역'})
)

In [92]:
df_jeonbuk

Unnamed: 0,지역,구시군,선거년도,선거종류,득표_1위_정당,득표_2위_정당,득표_1위_후보번호,득표_1위_득표율,득표_2위_후보번호,득표_2위_득표율,...,무소속_후보자수,선거인수,투표수,보수정당,진보정당,그외정당,무소속,득표수_계,무효투표수,기권수
0,전북,합계,2022,광역단체장,진보정당,보수정당,1,0.8212,2,0.1788,...,0,1532133,745354,128828,591510,0,0,720338,25016,786779
1,전북,전주시완산구,2022,광역단체장,진보정당,보수정당,1,0.8087,2,0.1913,...,0,311508,125313,23509,99377,0,0,122886,2427,186195
2,전북,전주시덕진구,2022,광역단체장,진보정당,보수정당,1,0.7963,2,0.2037,...,0,238934,96938,19377,75741,0,0,95118,1820,141996
3,전북,군산시,2022,광역단체장,진보정당,보수정당,1,0.859,2,0.141,...,0,224926,87125,12003,73109,0,0,85112,2013,137801
4,전북,익산시,2022,광역단체장,진보정당,보수정당,1,0.8008,2,0.1992,...,0,239077,107245,20682,83134,0,0,103816,3429,131832
5,전북,정읍시,2022,광역단체장,진보정당,보수정당,1,0.8469,2,0.1531,...,0,93307,54283,7960,44026,0,0,51986,2297,39024
6,전북,남원시,2022,광역단체장,진보정당,보수정당,1,0.8343,2,0.1657,...,0,69007,44516,7046,35477,0,0,42523,1993,24491
7,전북,김제시,2022,광역단체장,진보정당,보수정당,1,0.826,2,0.174,...,0,72358,42668,7114,33765,0,0,40879,1789,29690
8,전북,완주군,2022,광역단체장,진보정당,보수정당,1,0.8201,2,0.1799,...,0,78284,41408,7144,32573,0,0,39717,1691,36876
9,전북,진안군,2022,광역단체장,진보정당,보수정당,1,0.8237,2,0.1763,...,0,22634,16616,2796,13062,0,0,15858,758,6018


### v4.1 ~ v4.3

In [93]:
# 1. 전체 데이터 저장
df_jeonbuk.to_csv("temp4_1_governor_jeonbuk_8.csv", index=False, encoding="utf-8-sig")

# 2. '구시군' != '합계' 행만 추출하여 저장
df_jeonbuk2 = df_jeonbuk.query("구시군 != '합계'")
df_jeonbuk2.to_csv("temp4_2_governor_jeonbuk_8.csv", index=False, encoding="utf-8-sig")

# 3. '구시군' == '합계' 행만 추출 후 '구시군' 열 제거하여 저장
df_jeonbuk3 = df_jeonbuk.query("구시군 == '합계'").drop(columns="구시군")
df_jeonbuk3.to_csv("temp4_3_governor_jeonbuk_8.csv", index=False, encoding="utf-8-sig")

## Jeonnam

In [94]:
df_jeonnam = election_results['df_jeonnam']

In [95]:
df_jeonnam

Unnamed: 0,시도,구시군,선거인수,투표수,보수정당,진보정당,그외정당,무소속,득표수_계,무효투표수,...,득표_1위_후보번호,득표_1위_득표율,득표_2위_후보번호,득표_2위_득표율,득표_1위_정당,득표_2위_정당,보수정당_후보자수,진보정당_후보자수,그외정당_후보자수,무소속_후보자수
0,전라남도,합계,1580098,923347,167020,672433,48336,0,887789,35558,...,1,0.7574,2,0.1881,진보정당,보수정당,1,1,1,0
1,전라남도,목포시,183412,96968,13045,76408,4489,0,93942,3026,...,1,0.8134,2,0.1389,진보정당,보수정당,1,1,1,0
2,전라남도,여수시,236881,109135,16912,85684,4180,0,106776,2359,...,1,0.8025,2,0.1584,진보정당,보수정당,1,1,1,0
3,전라남도,순천시,235432,128163,40082,79776,5472,0,125330,2833,...,1,0.6365,2,0.3198,진보정당,보수정당,1,1,1,0
4,전라남도,나주시,98951,53036,8638,39317,2984,0,50939,2097,...,1,0.7718,2,0.1696,진보정당,보수정당,1,1,1,0
5,전라남도,광양시,126604,69108,15322,47884,4079,0,67285,1823,...,1,0.7117,2,0.2277,진보정당,보수정당,1,1,1,0
6,전라남도,담양군,41720,26611,4358,19641,1615,0,25614,997,...,1,0.7668,2,0.1701,진보정당,보수정당,1,1,1,0
7,전라남도,장성군,38470,26499,3946,19721,1429,0,25096,1403,...,1,0.7858,2,0.1572,진보정당,보수정당,1,1,1,0
8,전라남도,곡성군,25196,18643,7225,9549,859,0,17633,1010,...,1,0.5415,2,0.4097,진보정당,보수정당,1,1,1,0
9,전라남도,구례군,22848,17417,3662,11783,1095,0,16540,877,...,1,0.7124,2,0.2214,진보정당,보수정당,1,1,1,0


### preprocessing

In [96]:
df_jeonnam = (
    df_jeonnam
    .assign(시도=lambda df: df['시도'].replace('전라남도', '전남'))
)

In [97]:
df_jeonnam.columns

Index(['시도', '구시군', '선거인수', '투표수', '보수정당', '진보정당', '그외정당', '무소속', '득표수_계',
       '무효투표수', '기권수', '득표_1위_후보번호', '득표_1위_득표율', '득표_2위_후보번호', '득표_2위_득표율',
       '득표_1위_정당', '득표_2위_정당', '보수정당_후보자수', '진보정당_후보자수', '그외정당_후보자수',
       '무소속_후보자수'],
      dtype='object')

In [98]:
# 고정 열 순서 정의
fixed_cols = [
    '시도', '구시군', '선거년도', '선거종류',
    '득표_1위_정당', '득표_2위_정당',
    '득표_1위_후보번호', '득표_1위_득표율',
    '득표_2위_후보번호', '득표_2위_득표율',
    '보수정당_후보자수', '진보정당_후보자수',
    '그외정당_후보자수', '무소속_후보자수'
]

# 나머지 열: fixed_cols에 포함되지 않은 열
other_cols = [col for col in df_jeonnam.columns if col not in fixed_cols]

# 메소드 체이닝
df_jeonnam = (
    df_jeonnam
    .assign(선거종류='광역단체장', 선거년도='2022')
    .loc[:, fixed_cols + other_cols]
    .rename(columns={'시도': '지역'})
)

In [99]:
df_jeonnam

Unnamed: 0,지역,구시군,선거년도,선거종류,득표_1위_정당,득표_2위_정당,득표_1위_후보번호,득표_1위_득표율,득표_2위_후보번호,득표_2위_득표율,...,무소속_후보자수,선거인수,투표수,보수정당,진보정당,그외정당,무소속,득표수_계,무효투표수,기권수
0,전남,합계,2022,광역단체장,진보정당,보수정당,1,0.7574,2,0.1881,...,0,1580098,923347,167020,672433,48336,0,887789,35558,656751
1,전남,목포시,2022,광역단체장,진보정당,보수정당,1,0.8134,2,0.1389,...,0,183412,96968,13045,76408,4489,0,93942,3026,86444
2,전남,여수시,2022,광역단체장,진보정당,보수정당,1,0.8025,2,0.1584,...,0,236881,109135,16912,85684,4180,0,106776,2359,127746
3,전남,순천시,2022,광역단체장,진보정당,보수정당,1,0.6365,2,0.3198,...,0,235432,128163,40082,79776,5472,0,125330,2833,107269
4,전남,나주시,2022,광역단체장,진보정당,보수정당,1,0.7718,2,0.1696,...,0,98951,53036,8638,39317,2984,0,50939,2097,45915
5,전남,광양시,2022,광역단체장,진보정당,보수정당,1,0.7117,2,0.2277,...,0,126604,69108,15322,47884,4079,0,67285,1823,57496
6,전남,담양군,2022,광역단체장,진보정당,보수정당,1,0.7668,2,0.1701,...,0,41720,26611,4358,19641,1615,0,25614,997,15109
7,전남,장성군,2022,광역단체장,진보정당,보수정당,1,0.7858,2,0.1572,...,0,38470,26499,3946,19721,1429,0,25096,1403,11971
8,전남,곡성군,2022,광역단체장,진보정당,보수정당,1,0.5415,2,0.4097,...,0,25196,18643,7225,9549,859,0,17633,1010,6553
9,전남,구례군,2022,광역단체장,진보정당,보수정당,1,0.7124,2,0.2214,...,0,22848,17417,3662,11783,1095,0,16540,877,5431


### v4.1 ~ v4.3

In [100]:
# 1. 전체 데이터 저장
df_jeonnam.to_csv("temp4_1_governor_jeonnam_8.csv", index=False, encoding="utf-8-sig")

# 2. '구시군' != '합계' 행만 추출하여 저장
df_jeonnam2 = df_jeonnam.query("구시군 != '합계'")
df_jeonnam2.to_csv("temp4_2_governor_jeonnam_8.csv", index=False, encoding="utf-8-sig")

# 3. '구시군' == '합계' 행만 추출 후 '구시군' 열 제거하여 저장
df_jeonnam3 = df_jeonnam.query("구시군 == '합계'").drop(columns="구시군")
df_jeonnam3.to_csv("temp4_3_governor_jeonnam_8.csv", index=False, encoding="utf-8-sig")

## Gyeongbuk

In [101]:
df_gyeongbuk = election_results['df_gyeongbuk']

In [102]:
df_gyeongbuk

Unnamed: 0,시도,구시군,선거인수,투표수,보수정당,진보정당,그외정당,무소속,득표수_계,무효투표수,...,득표_1위_후보번호,득표_1위_득표율,득표_2위_후보번호,득표_2위_득표율,득표_1위_정당,득표_2위_정당,보수정당_후보자수,진보정당_후보자수,그외정당_후보자수,무소속_후보자수
0,경상북도,합계,2268707,1194595,904675,255775,0,0,1160450,34145,...,2,0.7796,1,0.2204,보수정당,진보정당,1,1,0,0
1,경상북도,포항시북구,230960,105786,80784,23150,0,0,103934,1852,...,2,0.7773,1,0.2227,보수정당,진보정당,1,1,0,0
2,경상북도,포항시남구,196727,90434,68137,20195,0,0,88332,2102,...,2,0.7714,1,0.2286,보수정당,진보정당,1,1,0,0
3,경상북도,울릉군,8339,6795,5270,1217,0,0,6487,308,...,2,0.8124,1,0.1876,보수정당,진보정당,1,1,0,0
4,경상북도,경주시,220490,109647,83997,22875,0,0,106872,2775,...,2,0.786,1,0.214,보수정당,진보정당,1,1,0,0
5,경상북도,김천시,120471,67277,51776,13915,0,0,65691,1586,...,2,0.7882,1,0.2118,보수정당,진보정당,1,1,0,0
6,경상북도,안동시,135862,74540,51965,20435,0,0,72400,2140,...,2,0.7177,1,0.2823,보수정당,진보정당,1,1,0,0
7,경상북도,구미시,337510,144547,108033,34597,0,0,142630,1917,...,2,0.7574,1,0.2426,보수정당,진보정당,1,1,0,0
8,경상북도,영주시,89061,56297,41642,12783,0,0,54425,1872,...,2,0.7651,1,0.2349,보수정당,진보정당,1,1,0,0
9,경상북도,영천시,90932,51707,40517,9328,0,0,49845,1862,...,2,0.8129,1,0.1871,보수정당,진보정당,1,1,0,0


### preprocessing

In [103]:
df_gyeongbuk = (
    df_gyeongbuk
    .assign(시도=lambda df: df['시도'].replace('경상북도', '경북'))
)

In [104]:
df_gyeongbuk.columns

Index(['시도', '구시군', '선거인수', '투표수', '보수정당', '진보정당', '그외정당', '무소속', '득표수_계',
       '무효투표수', '기권수', '득표_1위_후보번호', '득표_1위_득표율', '득표_2위_후보번호', '득표_2위_득표율',
       '득표_1위_정당', '득표_2위_정당', '보수정당_후보자수', '진보정당_후보자수', '그외정당_후보자수',
       '무소속_후보자수'],
      dtype='object')

In [105]:
# 고정 열 순서 정의
fixed_cols = [
    '시도', '구시군', '선거년도', '선거종류',
    '득표_1위_정당', '득표_2위_정당',
    '득표_1위_후보번호', '득표_1위_득표율',
    '득표_2위_후보번호', '득표_2위_득표율',
    '보수정당_후보자수', '진보정당_후보자수',
    '그외정당_후보자수', '무소속_후보자수'
]

# 나머지 열: fixed_cols에 포함되지 않은 열
other_cols = [col for col in df_gyeongbuk.columns if col not in fixed_cols]

# 메소드 체이닝
df_gyeongbuk = (
    df_gyeongbuk
    .assign(선거종류='광역단체장', 선거년도='2022')
    .loc[:, fixed_cols + other_cols]
    .rename(columns={'시도': '지역'})
)

In [106]:
df_gyeongbuk

Unnamed: 0,지역,구시군,선거년도,선거종류,득표_1위_정당,득표_2위_정당,득표_1위_후보번호,득표_1위_득표율,득표_2위_후보번호,득표_2위_득표율,...,무소속_후보자수,선거인수,투표수,보수정당,진보정당,그외정당,무소속,득표수_계,무효투표수,기권수
0,경북,합계,2022,광역단체장,보수정당,진보정당,2,0.7796,1,0.2204,...,0,2268707,1194595,904675,255775,0,0,1160450,34145,1074112
1,경북,포항시북구,2022,광역단체장,보수정당,진보정당,2,0.7773,1,0.2227,...,0,230960,105786,80784,23150,0,0,103934,1852,125174
2,경북,포항시남구,2022,광역단체장,보수정당,진보정당,2,0.7714,1,0.2286,...,0,196727,90434,68137,20195,0,0,88332,2102,106293
3,경북,울릉군,2022,광역단체장,보수정당,진보정당,2,0.8124,1,0.1876,...,0,8339,6795,5270,1217,0,0,6487,308,1544
4,경북,경주시,2022,광역단체장,보수정당,진보정당,2,0.786,1,0.214,...,0,220490,109647,83997,22875,0,0,106872,2775,110843
5,경북,김천시,2022,광역단체장,보수정당,진보정당,2,0.7882,1,0.2118,...,0,120471,67277,51776,13915,0,0,65691,1586,53194
6,경북,안동시,2022,광역단체장,보수정당,진보정당,2,0.7177,1,0.2823,...,0,135862,74540,51965,20435,0,0,72400,2140,61322
7,경북,구미시,2022,광역단체장,보수정당,진보정당,2,0.7574,1,0.2426,...,0,337510,144547,108033,34597,0,0,142630,1917,192963
8,경북,영주시,2022,광역단체장,보수정당,진보정당,2,0.7651,1,0.2349,...,0,89061,56297,41642,12783,0,0,54425,1872,32764
9,경북,영천시,2022,광역단체장,보수정당,진보정당,2,0.8129,1,0.1871,...,0,90932,51707,40517,9328,0,0,49845,1862,39225


### v4.1 ~ v4.3

In [107]:
# 1. 전체 데이터 저장
df_gyeongbuk.to_csv("temp4_1_governor_gyeongbuk_8.csv", index=False, encoding="utf-8-sig")

# 2. '구시군' != '합계' 행만 추출하여 저장
df_gyeongbuk2 = df_gyeongbuk.query("구시군 != '합계'")
df_gyeongbuk2.to_csv("temp4_2_governor_gyeongbuk_8.csv", index=False, encoding="utf-8-sig")

# 3. '구시군' == '합계' 행만 추출 후 '구시군' 열 제거하여 저장
df_gyeongbuk3 = df_gyeongbuk.query("구시군 == '합계'").drop(columns="구시군")
df_gyeongbuk3.to_csv("temp4_3_governor_gyeongbuk_8.csv", index=False, encoding="utf-8-sig")

## Gyeongnam

In [108]:
df_gyeongnam = election_results['df_gyeongnam']

In [109]:
df_gyeongnam

Unnamed: 0,시도,구시군,선거인수,투표수,보수정당,진보정당,그외정당,무소속,득표수_계,무효투표수,...,득표_1위_후보번호,득표_1위_득표율,득표_2위_후보번호,득표_2위_득표율,득표_1위_정당,득표_2위_정당,보수정당_후보자수,진보정당_후보자수,그외정당_후보자수,무소속_후보자수
0,경상남도,합계,2804287,1497400,963473,431569,71286,0,1466328,31072,...,2,0.6571,1,0.2943,보수정당,진보정당,1,1,2,0
1,경상남도,창원시의창구,185184,94824,64989,23944,4736,0,93669,1155,...,2,0.6938,1,0.2556,보수정당,진보정당,1,1,2,0
2,경상남도,창원시성산구,212811,112825,67977,29902,13935,0,111814,1011,...,2,0.6079,1,0.2674,보수정당,진보정당,1,1,2,0
3,경상남도,창원시마산합포구,156850,83924,59943,19970,2747,0,82660,1264,...,2,0.7252,1,0.2416,보수정당,진보정당,1,1,2,0
4,경상남도,창원시마산회원구,159983,85597,59376,22063,2925,0,84364,1233,...,2,0.7038,1,0.2615,보수정당,진보정당,1,1,2,0
5,경상남도,창원시진해구,159730,75210,48889,22186,2985,0,74060,1150,...,2,0.6601,1,0.2996,보수정당,진보정당,1,1,2,0
6,경상남도,진주시,292168,155185,104561,42182,5811,0,152554,2631,...,2,0.6854,1,0.2765,보수정당,진보정당,1,1,2,0
7,경상남도,통영시,106064,61244,40307,18160,1554,0,60021,1223,...,2,0.6715,1,0.3026,보수정당,진보정당,1,1,2,0
8,경상남도,고성군,44745,30306,20130,7918,1270,0,29318,988,...,2,0.6866,1,0.2701,보수정당,진보정당,1,1,2,0
9,경상남도,사천시,93946,55534,38414,12887,2720,0,54021,1513,...,2,0.7111,1,0.2386,보수정당,진보정당,1,1,2,0


### preprocessing

In [110]:
df_gyeongnam = (
    df_gyeongnam
    .assign(시도=lambda df: df['시도'].replace('경상남도', '경남'))
)

In [111]:
df_gyeongnam.columns

Index(['시도', '구시군', '선거인수', '투표수', '보수정당', '진보정당', '그외정당', '무소속', '득표수_계',
       '무효투표수', '기권수', '득표_1위_후보번호', '득표_1위_득표율', '득표_2위_후보번호', '득표_2위_득표율',
       '득표_1위_정당', '득표_2위_정당', '보수정당_후보자수', '진보정당_후보자수', '그외정당_후보자수',
       '무소속_후보자수'],
      dtype='object')

In [112]:
# 고정 열 순서 정의
fixed_cols = [
    '시도', '구시군', '선거년도', '선거종류',
    '득표_1위_정당', '득표_2위_정당',
    '득표_1위_후보번호', '득표_1위_득표율',
    '득표_2위_후보번호', '득표_2위_득표율',
    '보수정당_후보자수', '진보정당_후보자수',
    '그외정당_후보자수', '무소속_후보자수'
]

# 나머지 열: fixed_cols에 포함되지 않은 열
other_cols = [col for col in df_gyeongnam.columns if col not in fixed_cols]

# 메소드 체이닝
df_gyeongnam = (
    df_gyeongnam
    .assign(선거종류='광역단체장', 선거년도='2022')
    .loc[:, fixed_cols + other_cols]
    .rename(columns={'시도': '지역'})
)

In [113]:
df_gyeongnam

Unnamed: 0,지역,구시군,선거년도,선거종류,득표_1위_정당,득표_2위_정당,득표_1위_후보번호,득표_1위_득표율,득표_2위_후보번호,득표_2위_득표율,...,무소속_후보자수,선거인수,투표수,보수정당,진보정당,그외정당,무소속,득표수_계,무효투표수,기권수
0,경남,합계,2022,광역단체장,보수정당,진보정당,2,0.6571,1,0.2943,...,0,2804287,1497400,963473,431569,71286,0,1466328,31072,1306887
1,경남,창원시의창구,2022,광역단체장,보수정당,진보정당,2,0.6938,1,0.2556,...,0,185184,94824,64989,23944,4736,0,93669,1155,90360
2,경남,창원시성산구,2022,광역단체장,보수정당,진보정당,2,0.6079,1,0.2674,...,0,212811,112825,67977,29902,13935,0,111814,1011,99986
3,경남,창원시마산합포구,2022,광역단체장,보수정당,진보정당,2,0.7252,1,0.2416,...,0,156850,83924,59943,19970,2747,0,82660,1264,72926
4,경남,창원시마산회원구,2022,광역단체장,보수정당,진보정당,2,0.7038,1,0.2615,...,0,159983,85597,59376,22063,2925,0,84364,1233,74386
5,경남,창원시진해구,2022,광역단체장,보수정당,진보정당,2,0.6601,1,0.2996,...,0,159730,75210,48889,22186,2985,0,74060,1150,84520
6,경남,진주시,2022,광역단체장,보수정당,진보정당,2,0.6854,1,0.2765,...,0,292168,155185,104561,42182,5811,0,152554,2631,136983
7,경남,통영시,2022,광역단체장,보수정당,진보정당,2,0.6715,1,0.3026,...,0,106064,61244,40307,18160,1554,0,60021,1223,44820
8,경남,고성군,2022,광역단체장,보수정당,진보정당,2,0.6866,1,0.2701,...,0,44745,30306,20130,7918,1270,0,29318,988,14439
9,경남,사천시,2022,광역단체장,보수정당,진보정당,2,0.7111,1,0.2386,...,0,93946,55534,38414,12887,2720,0,54021,1513,38412


### v4.1 ~ v4.3

In [114]:
# 1. 전체 데이터 저장
df_gyeongnam.to_csv("temp4_1_governor_gyeongnam_8.csv", index=False, encoding="utf-8-sig")

# 2. '구시군' != '합계' 행만 추출하여 저장
df_gyeongnam2 = df_gyeongnam.query("구시군 != '합계'")
df_gyeongnam2.to_csv("temp4_2_governor_gyeongnam_8.csv", index=False, encoding="utf-8-sig")

# 3. '구시군' == '합계' 행만 추출 후 '구시군' 열 제거하여 저장
df_gyeongnam3 = df_gyeongnam.query("구시군 == '합계'").drop(columns="구시군")
df_gyeongnam3.to_csv("temp4_3_governor_gyeongnam_8.csv", index=False, encoding="utf-8-sig")

## Jeju

In [115]:
df_jeju = election_results['df_jeju']

In [116]:
df_jeju

Unnamed: 0,시도,구시군,선거인수,투표수,보수정당,진보정당,그외정당,무소속,득표수_계,무효투표수,...,득표_1위_후보번호,득표_1위_득표율,득표_2위_후보번호,득표_2위_득표율,득표_1위_정당,득표_2위_정당,보수정당_후보자수,진보정당_후보자수,그외정당_후보자수,무소속_후보자수
0,제주특별자치도,합계,565084,300139,116786,163116,5750,10138,295790,4349,...,1,0.5515,2,0.3948,진보정당,보수정당,1,1,1,1
1,제주특별자치도,제주시,409110,216419,83539,118762,4389,6878,213568,2851,...,1,0.5561,2,0.3912,진보정당,보수정당,1,1,1,1
2,제주특별자치도,서귀포시,155974,83720,33247,44354,1361,3260,82222,1498,...,1,0.5394,2,0.4044,진보정당,보수정당,1,1,1,1


### preprocessing

In [117]:
df_jeju = (
    df_jeju
    .assign(시도=lambda df: df['시도'].replace('제주특별자치도', '제주'))
)

In [118]:
df_jeju.columns

Index(['시도', '구시군', '선거인수', '투표수', '보수정당', '진보정당', '그외정당', '무소속', '득표수_계',
       '무효투표수', '기권수', '득표_1위_후보번호', '득표_1위_득표율', '득표_2위_후보번호', '득표_2위_득표율',
       '득표_1위_정당', '득표_2위_정당', '보수정당_후보자수', '진보정당_후보자수', '그외정당_후보자수',
       '무소속_후보자수'],
      dtype='object')

In [119]:
# 고정 열 순서 정의
fixed_cols = [
    '시도', '구시군', '선거년도', '선거종류',
    '득표_1위_정당', '득표_2위_정당',
    '득표_1위_후보번호', '득표_1위_득표율',
    '득표_2위_후보번호', '득표_2위_득표율',
    '보수정당_후보자수', '진보정당_후보자수',
    '그외정당_후보자수', '무소속_후보자수'
]

# 나머지 열: fixed_cols에 포함되지 않은 열
other_cols = [col for col in df_jeju.columns if col not in fixed_cols]

# 메소드 체이닝
df_jeju = (
    df_jeju
    .assign(선거종류='광역단체장', 선거년도='2022')
    .loc[:, fixed_cols + other_cols]
    .rename(columns={'시도': '지역'})
)

In [120]:
df_jeju

Unnamed: 0,지역,구시군,선거년도,선거종류,득표_1위_정당,득표_2위_정당,득표_1위_후보번호,득표_1위_득표율,득표_2위_후보번호,득표_2위_득표율,...,무소속_후보자수,선거인수,투표수,보수정당,진보정당,그외정당,무소속,득표수_계,무효투표수,기권수
0,제주,합계,2022,광역단체장,진보정당,보수정당,1,0.5515,2,0.3948,...,1,565084,300139,116786,163116,5750,10138,295790,4349,264945
1,제주,제주시,2022,광역단체장,진보정당,보수정당,1,0.5561,2,0.3912,...,1,409110,216419,83539,118762,4389,6878,213568,2851,192691
2,제주,서귀포시,2022,광역단체장,진보정당,보수정당,1,0.5394,2,0.4044,...,1,155974,83720,33247,44354,1361,3260,82222,1498,72254


### v4.1 ~ v4.3

In [121]:
# 1. 전체 데이터 저장
df_jeju.to_csv("temp4_1_governor_jeju_8.csv", index=False, encoding="utf-8-sig")

# 2. '구시군' != '합계' 행만 추출하여 저장
df_jeju2 = df_jeju.query("구시군 != '합계'")
df_jeju2.to_csv("temp4_2_governor_jeju_8.csv", index=False, encoding="utf-8-sig")

# 3. '구시군' == '합계' 행만 추출 후 '구시군' 열 제거하여 저장
df_jeju3 = df_jeju.query("구시군 == '합계'").drop(columns="구시군")
df_jeju3.to_csv("temp4_3_governor_jeju_8.csv", index=False, encoding="utf-8-sig")

## Merge

### v4.1

In [122]:
AVAILABLE_REGIONS = [
    'seoul', 'busan', 'daegu', 'incheon', 'gwangju', 'daejeon',
    'ulsan', 'sejong', 'gyeonggi', 'gangwon', 'chungbuk', 'chungnam',
    'jeonbuk', 'jeonnam', 'gyeongbuk', 'gyeongnam', 'jeju'
]

df_combined = (
    pd.concat([globals()[f'df_{region}'] for region in AVAILABLE_REGIONS], ignore_index=True)
)

In [123]:
df_combined

Unnamed: 0,지역,구시군,선거년도,선거종류,득표_1위_정당,득표_2위_정당,득표_1위_후보번호,득표_1위_득표율,득표_2위_후보번호,득표_2위_득표율,...,무소속_후보자수,선거인수,투표수,보수정당,진보정당,그외정당,무소속,득표수_계,무효투표수,기권수
0,서울,합계,2022,광역단체장,보수정당,진보정당,2,0.5905,1,0.3924,...,1,8378339,4455161,2608277,1733183,66459,9000,4416919,38242,3923178
1,서울,종로구,2022,광역단체장,보수정당,진보정당,2,0.5734,1,0.4046,...,1,129816,70657,40145,28327,1377,167,70016,641,59159
2,서울,중구,2022,광역단체장,보수정당,진보정당,2,0.5846,1,0.3992,...,1,112039,60323,34866,23811,843,125,59645,678,51716
3,서울,용산구,2022,광역단체장,보수정당,진보정당,2,0.6494,1,0.3326,...,1,199061,104787,67579,34614,1683,192,104068,719,94274
4,서울,성동구,2022,광역단체장,보수정당,진보정당,2,0.6090,1,0.3756,...,1,251990,139761,84320,51996,1860,273,138449,1312,112229
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
261,경남,거창군,2022,광역단체장,보수정당,진보정당,2,0.7140,1,0.2262,...,0,52803,34758,23781,7534,1992,0,33307,1451,18045
262,경남,합천군,2022,광역단체장,보수정당,진보정당,2,0.7685,1,0.1727,...,0,39435,27363,19928,4479,1525,0,25932,1431,12072
263,제주,합계,2022,광역단체장,진보정당,보수정당,1,0.5515,2,0.3948,...,1,565084,300139,116786,163116,5750,10138,295790,4349,264945
264,제주,제주시,2022,광역단체장,진보정당,보수정당,1,0.5561,2,0.3912,...,1,409110,216419,83539,118762,4389,6878,213568,2851,192691


In [124]:
df_combined.to_csv("temp4_1_governor_8.csv", index=False, encoding="utf-8-sig")

### v4.2

In [125]:
AVAILABLE_REGIONS = [
    'seoul', 'busan', 'daegu', 'incheon', 'gwangju', 'daejeon',
    'ulsan', 'sejong', 'gyeonggi', 'gangwon', 'chungbuk', 'chungnam',
    'jeonbuk', 'jeonnam', 'gyeongbuk', 'gyeongnam', 'jeju'
]
df_combined2 = (
    pd.concat([globals()[f'df_{region}2'] for region in AVAILABLE_REGIONS], ignore_index=True)
)

In [126]:
df_combined2

Unnamed: 0,지역,구시군,선거년도,선거종류,득표_1위_정당,득표_2위_정당,득표_1위_후보번호,득표_1위_득표율,득표_2위_후보번호,득표_2위_득표율,...,무소속_후보자수,선거인수,투표수,보수정당,진보정당,그외정당,무소속,득표수_계,무효투표수,기권수
0,서울,종로구,2022,광역단체장,보수정당,진보정당,2,0.5734,1,0.4046,...,1,129816,70657,40145,28327,1377,167,70016,641,59159
1,서울,중구,2022,광역단체장,보수정당,진보정당,2,0.5846,1,0.3992,...,1,112039,60323,34866,23811,843,125,59645,678,51716
2,서울,용산구,2022,광역단체장,보수정당,진보정당,2,0.6494,1,0.3326,...,1,199061,104787,67579,34614,1683,192,104068,719,94274
3,서울,성동구,2022,광역단체장,보수정당,진보정당,2,0.6090,1,0.3756,...,1,251990,139761,84320,51996,1860,273,138449,1312,112229
4,서울,광진구,2022,광역단체장,보수정당,진보정당,2,0.5831,1,0.3998,...,1,305462,157005,90734,62217,2345,310,155606,1399,148457
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
244,경남,산청군,2022,광역단체장,보수정당,진보정당,2,0.6973,1,0.2372,...,0,31488,21927,14644,4982,1375,0,21001,926,9561
245,경남,거창군,2022,광역단체장,보수정당,진보정당,2,0.7140,1,0.2262,...,0,52803,34758,23781,7534,1992,0,33307,1451,18045
246,경남,합천군,2022,광역단체장,보수정당,진보정당,2,0.7685,1,0.1727,...,0,39435,27363,19928,4479,1525,0,25932,1431,12072
247,제주,제주시,2022,광역단체장,진보정당,보수정당,1,0.5561,2,0.3912,...,1,409110,216419,83539,118762,4389,6878,213568,2851,192691


In [127]:
df_combined2.to_csv("temp4_2_governor_8.csv", index=False, encoding="utf-8-sig")

### v4.3

In [128]:
AVAILABLE_REGIONS = [
    'seoul', 'busan', 'daegu', 'incheon', 'gwangju', 'daejeon',
    'ulsan', 'sejong', 'gyeonggi', 'gangwon', 'chungbuk', 'chungnam',
    'jeonbuk', 'jeonnam', 'gyeongbuk', 'gyeongnam', 'jeju'
]

df_combined3 = (
    pd.concat([globals()[f'df_{region}3'] for region in AVAILABLE_REGIONS], ignore_index=True)
)

In [129]:
df_combined3

Unnamed: 0,지역,선거년도,선거종류,득표_1위_정당,득표_2위_정당,득표_1위_후보번호,득표_1위_득표율,득표_2위_후보번호,득표_2위_득표율,보수정당_후보자수,...,무소속_후보자수,선거인수,투표수,보수정당,진보정당,그외정당,무소속,득표수_계,무효투표수,기권수
0,서울,2022,광역단체장,보수정당,진보정당,2,0.5905,1,0.3924,1,...,1,8378339,4455161,2608277,1733183,66459,9000,4416919,38242,3923178
1,부산,2022,광역단체장,보수정당,진보정당,2,0.6637,1,0.3224,1,...,0,2916832,1432194,938601,455901,19733,0,1414235,17959,1484638
2,대구,2022,광역단체장,보수정당,진보정당,2,0.7875,1,0.1798,1,...,0,2044579,883141,685159,156429,28446,0,870034,13107,1161438
3,인천,2022,광역단체장,보수정당,진보정당,2,0.5177,1,0.4456,1,...,0,2534338,1240469,634250,545885,45000,0,1225135,15334,1293869
4,광주,2022,광역단체장,진보정당,보수정당,1,0.7492,2,0.1591,1,...,0,1206886,454516,71062,334699,24414,0,446770,7746,752370
5,대전,2022,광역단체장,보수정당,진보정당,2,0.512,1,0.488,1,...,0,1233557,612639,310035,295555,0,0,605590,7049,620918
6,울산,2022,광역단체장,보수정당,진보정당,2,0.5979,1,0.4021,1,...,0,941189,491866,290563,195430,0,0,485993,5873,449323
7,세종,2022,광역단체장,보수정당,진보정당,2,0.5284,1,0.4716,1,...,0,292259,149751,78415,69995,0,0,148410,1341,142508
8,경기,2022,광역단체장,진보정당,보수정당,1,0.4907,2,0.4891,1,...,1,11497206,5820631,2818680,2827593,61778,54758,5762809,57822,5676575
9,강원,2022,광역단체장,보수정당,진보정당,2,0.5407,1,0.4593,1,...,0,1336080,772498,409461,347766,0,0,757227,15271,563582


In [130]:
df_combined3.to_csv("temp4_3_governor_8.csv", index=False, encoding="utf-8-sig")

# Batch CSV Files to ZIP

In [131]:
import zipfile
import glob

# Find all CSV files in current directory
csv_files = glob.glob('*.csv')

# Create ZIP file
with zipfile.ZipFile('all_csv_files.zip', 'w') as zipf:
   for file in csv_files:
       zipf.write(file)
       print(f"Added: {file}")  # Show progress

print(f"Total {len(csv_files)} files compressed.")

Added: temp4_1_governor_chungnam_8.csv
Added: temp4_3_governor_jeju_8.csv
Added: temp4_1_governor_8.csv
Added: temp4_2_governor_daejeon_8.csv
Added: temp4_2_governor_chungbuk_8.csv
Added: temp4_1_governor_gyeongnam_8.csv
Added: temp4_3_governor_8.csv
Added: temp4_3_governor_jeonnam_8.csv
Added: temp4_1_governor_incheon_8.csv
Added: temp4_3_governor_daegu_8.csv
Added: temp4_1_governor_sejong_8.csv
Added: temp4_2_governor_gyeongnam_8.csv
Added: temp4_2_governor_sejong_8.csv
Added: temp4_2_governor_jeju_8.csv
Added: temp4_3_governor_gangwon_8.csv
Added: temp4_1_governor_gyeongbuk_8.csv
Added: temp4_3_governor_gyeongnam_8.csv
Added: temp4_2_governor_gwangju_8.csv
Added: temp4_2_governor_incheon_8.csv
Added: temp4_1_governor_seoul_8.csv
Added: temp4_1_governor_daegu_8.csv
Added: temp4_3_governor_chungbuk_8.csv
Added: temp4_1_governor_ulsan_8.csv
Added: temp4_3_governor_ulsan_8.csv
Added: temp4_2_governor_gangwon_8.csv
Added: temp4_3_governor_busan_8.csv
Added: temp4_3_governor_gyeongbuk_8.c