# Functions

In [1]:
import pandas as pd
import numpy as np
import re
from typing import Dict, Tuple

def process_governor_election_data(region_name: str) -> pd.DataFrame:
    """
    특정 지역의 지사 선거 데이터를 처리하여 병합된 결과를 반환

    Args:
        region_name (str): 지역명 (예: 'busan', 'seoul', 'gyeonggi' 등)

    Returns:
        pd.DataFrame: 병합된 지사 선거 데이터
    """

    # URL 생성
    df1_url = f"https://raw.githubusercontent.com/sw1kwon/korean-elections/refs/heads/main/temp/v1_g/7th_2018/temp1_governor_{region_name}_7.csv"
    df2_url = f"https://raw.githubusercontent.com/sw1kwon/korean-elections/refs/heads/main/temp/v2_2_g/7th_2018/temp2_2_governor_{region_name}_7.csv"

    print(f"=== {region_name} 지사 선거 데이터 처리 시작 ===")
    print(f"상세 데이터 URL: {df1_url}")
    print(f"요약 데이터 URL: {df2_url}")

    try:
        # 첫 번째 CSV 파일 처리 (상세 득표 데이터)
        df1 = pd.read_csv(df1_url)
        print(f"상세 데이터 로드 완료: {df1.shape}")

        # 득표수_{숫자}_정당_이름 패턴의 컬럼들 찾기
        vote_columns = [col for col in df1.columns if col.startswith('득표수_') and col != '득표수_계']
        print(f"득표수 관련 컬럼 수: {len(vote_columns)}")

        # 각 행별로 1위와 2위 후보의 번호 찾기
        def find_top_two_candidates(row):
            vote_data = []

            # 모든 후보의 득표수와 번호를 수집
            for col in vote_columns:
                votes = row[col] if pd.notna(row[col]) else 0
                # 컬럼명에서 숫자 추출
                match = re.search(r'득표수_(\d+)_', col)
                if match:
                    candidate_number = int(match.group(1))
                    vote_data.append((votes, candidate_number, col))

            # 득표수 기준으로 내림차순 정렬
            vote_data.sort(key=lambda x: x[0], reverse=True)

            # 1위와 2위 후보 반환
            first_place = vote_data[0] if len(vote_data) > 0 else (0, None, None)
            second_place = vote_data[1] if len(vote_data) > 1 else (0, None, None)

            return first_place, second_place

        # 1위 후보 번호 찾기
        def find_winner_number(row):
            first_place, _ = find_top_two_candidates(row)
            return first_place[1]

        # 2위 후보 번호 찾기
        def find_second_number(row):
            _, second_place = find_top_two_candidates(row)
            return second_place[1]

        # 1위 득표율 계산
        def calculate_vote_rate_1st(row):
            first_place, _ = find_top_two_candidates(row)
            max_votes = first_place[0]
            total_votes = row['득표수_계'] if pd.notna(row['득표수_계']) else 0

            if total_votes > 0:
                return round(max_votes / total_votes, 4)
            else:
                return 0

        # 2위 득표율 계산
        def calculate_vote_rate_2nd(row):
            _, second_place = find_top_two_candidates(row)
            second_votes = second_place[0]
            total_votes = row['득표수_계'] if pd.notna(row['득표수_계']) else 0

            if total_votes > 0:
                return round(second_votes / total_votes, 4)
            else:
                return 0

        # 새로운 컬럼 추가 (1위, 2위)
        df1['득표_1위_후보번호'] = df1.apply(find_winner_number, axis=1)
        df1['득표_1위_득표율'] = df1.apply(calculate_vote_rate_1st, axis=1)
        df1['득표_2위_후보번호'] = df1.apply(find_second_number, axis=1)
        df1['득표_2위_득표율'] = df1.apply(calculate_vote_rate_2nd, axis=1)

        # 정당 카테고리 매핑 딕셔너리 (지역별로 다를 수 있음)
        category_mapping = get_governor_category_mapping(region_name, vote_columns)
        print(f"생성된 카테고리 매핑: {category_mapping}")

        # 1위, 2위 후보번호를 카테고리로 매핑
        # fillna()로 매핑되지 않은 후보는 '기타'로 처리
        df1['득표_1위_정당'] = df1['득표_1위_후보번호'].map(category_mapping).fillna('기타')
        df1['득표_2위_정당'] = df1['득표_2위_후보번호'].map(category_mapping).fillna('기타')

        # 매핑 결과 확인
        print("매핑 후 1위 정당 분포 (처리 중):")
        print(df1['득표_1위_정당'].value_counts())

        # 매핑되지 않은 후보 확인
        unmapped_1st = df1[df1['득표_1위_정당'] == '기타']['득표_1위_후보번호'].unique()
        unmapped_2nd = df1[df1['득표_2위_정당'] == '기타']['득표_2위_후보번호'].unique()
        if len(unmapped_1st) > 0:
            print(f"경고: 1위에서 매핑되지 않은 후보번호: {unmapped_1st}")
        if len(unmapped_2nd) > 0:
            print(f"경고: 2위에서 매핑되지 않은 후보번호: {unmapped_2nd}")

        # 카테고리별 후보자 수 계산 (모든 카테고리 포함)
        candidate_counts = {}
        all_categories = ['보수정당', '진보정당', '그외정당', '무소속']

        # 먼저 모든 카테고리를 0으로 초기화
        for category in all_categories:
            candidate_counts[category] = 0

        # 실제 매핑에서 카운트
        for candidate_num, category in category_mapping.items():
            if category in candidate_counts:
                candidate_counts[category] += 1
            else:
                candidate_counts[category] = 1

        print(f"카테고리별 후보자 수: {candidate_counts}")

        # 각 카테고리별로 개별 컬럼 생성 (없는 카테고리도 0으로 포함)
        for category in all_categories:
            candidate_count = candidate_counts.get(category, 0)  # 없으면 0
            df1[f'{category}_후보자수'] = candidate_count
            print(f"  {category}_후보자수: {candidate_count}")

        # 필요한 컬럼들만 선택 (병합용)
        merge_columns = ['시도', '구시군', '득표_1위_후보번호', '득표_1위_득표율', '득표_2위_후보번호', '득표_2위_득표율',
                         '득표_1위_정당', '득표_2위_정당'] + [f'{cat}_후보자수' for cat in all_categories]

        df1_for_merge = df1[merge_columns].copy()

        # 두 번째 CSV 파일 읽기 (요약 데이터)
        df2 = pd.read_csv(df2_url)
        print(f"요약 데이터 로드 완료: {df2.shape}")

        # 시도, 구시군을 키로 하여 병합
        merged_df = pd.merge(df2, df1_for_merge, on=['시도', '구시군'], how='left')

        # 병합 결과 검증
        missing_data = merged_df[merged_df['득표_1위_후보번호'].isna()]
        if len(missing_data) > 0:
            print(f"경고: 병합되지 않은 데이터가 {len(missing_data)}개 있습니다")
        else:
            print("모든 데이터가 성공적으로 병합되었습니다!")

        print(f"최종 데이터 형태: {merged_df.shape}")
        print(f"1위 정당 분포:")
        print(merged_df['득표_1위_정당'].value_counts())
        print(f"=== {region_name} 지사 선거 데이터 처리 완료 ===\n")

        return merged_df

    except Exception as e:
        print(f"오류 발생: {e}")
        return None

def get_governor_category_mapping(region_name: str, vote_columns: list) -> Dict[int, str]:
    """
    지역별 지사 선거 정당 카테고리 매핑을 반환
    각 지역마다 후보자와 정당이 다르므로 수동으로 설정 필요

    Args:
        region_name: 지역명
        vote_columns: 득표수 컬럼 리스트 (후보 확인용)

    Returns:
        해당 지역의 후보번호별 카테고리 매핑
    """

    print(f"\n=== {region_name} 지사 선거 후보 정보 ===")
    print("실제 후보 컬럼들:")
    for col in vote_columns:
        print(f"  {col}")

    # 지역별 매핑 설정 (2018년 제7회 지방선거 기준)
    if region_name == 'seoul':  # 서울
        mapping = {
            1: '진보정당',
            2: '보수정당',
            3: '그외정당',
            5: '그외정당',
            6: '그외정당',
            7: '그외정당',
            8: '그외정당',
            9: '그외정당',
            10: '그외정당'
        }

    elif region_name == 'busan':  # 부산
        mapping = {
            1: '진보정당',
            2: '보수정당',
            3: '그외정당',
            5: '그외정당',
            6: '무소속'
        }

    elif region_name == 'daegu':  # 대구
        mapping = {
            1: '진보정당',
            2: '보수정당',
            3: '그외정당'
        }

    elif region_name == 'incheon':  # 인천
        mapping = {
            1: '진보정당',
            2: '보수정당',
            3: '그외정당',
            5: '그외정당'
        }

    elif region_name == 'gwangju':  # 광주
        mapping = {
            1: '진보정당',
            3: '그외정당',
            5: '그외정당',
            6: '그외정당'
        }

    elif region_name == 'daejeon':  # 대전
        mapping = {
            1: '진보정당',
            2: '보수정당',
            3: '그외정당',
            5: '그외정당'
        }

    elif region_name == 'ulsan':  # 울산
        mapping = {
            1: '진보정당',
            2: '보수정당',
            3: '그외정당',
            6: '그외정당'
        }

    elif region_name == 'sejong':  # 세종
        mapping = {
            1: '진보정당',
            2: '보수정당',
            3: '그외정당'
        }

    elif region_name == 'gyeonggi':  # 경기
        mapping = {
            1: '진보정당',
            2: '보수정당',
            3: '그외정당',
            5: '그외정당',
            6: '그외정당'
        }

    elif region_name == 'gangwon':  # 강원
        mapping = {
            1: '진보정당',
            2: '보수정당'
        }

    elif region_name == 'chungbuk':  # 충북
        mapping = {
            1: '진보정당',
            2: '보수정당',
            3: '그외정당'
        }

    elif region_name == 'chungnam':  # 충남
        mapping = {
            1: '진보정당',
            2: '보수정당',
            6: '그외정당'
        }

    elif region_name == 'jeonbuk':  # 전북
        mapping = {
            1: '진보정당',
            2: '보수정당',
            4: '그외정당',
            5: '그외정당',
            6: '그외정당'
        }

    elif region_name == 'jeonnam':  # 전남
        mapping = {
            1: '진보정당',
            3: '그외정당',
            4: '그외정당',
            5: '그외정당',
            6: '그외정당'
        }

    elif region_name == 'gyeongbuk':  # 경북
        mapping = {
            1: '진보정당',
            2: '보수정당',
            3: '그외정당',
            5: '그외정당'
        }

    elif region_name == 'gyeongnam':  # 경남
        mapping = {
            1: '진보정당',
            2: '보수정당',
            3: '그외정당'
        }

    elif region_name == 'jeju':  # 제주
        mapping = {
            1: '진보정당',
            2: '보수정당',
            3: '그외정당',
            6: '그외정당',
            7: '무소속'
        }

    else:
        print(f"경고: {region_name} 지역에 대한 매핑이 정의되지 않았습니다.")
        print("기본 매핑을 사용합니다. 수동으로 매핑을 추가해주세요.")
        # 기본 매핑 (수정 필요)
        mapping = {
            1: '진보정당',
            2: '보수정당',
            3: '그외정당',
            4: '그외정당',
            5: '무소속'
        }

    # 실제 존재하는 후보 번호만 추출
    existing_candidates = set()
    for col in vote_columns:
        match = re.search(r'득표수_(\d+)_', col)
        if match:
            existing_candidates.add(int(match.group(1)))

    print(f"실제 존재하는 후보번호: {sorted(existing_candidates)}")

    # 존재하는 후보에 대해서만 매핑 적용
    filtered_mapping = {k: v for k, v in mapping.items() if k in existing_candidates}

    # 매핑되지 않은 후보 번호 확인
    unmapped_candidates = existing_candidates - set(mapping.keys())
    if unmapped_candidates:
        print(f"경고: 매핑되지 않은 후보번호들: {sorted(unmapped_candidates)}")
        print("이 후보들은 '기타' 카테고리로 분류됩니다.")

    print(f"적용된 매핑: {filtered_mapping}")
    print("=" * 50)

    return filtered_mapping

def process_multiple_governor_elections(region_names: list) -> Dict[str, pd.DataFrame]:
    """
    여러 지역의 지사 선거를 일괄 처리하여 df_지역명 형태로 변수 저장

    Args:
        region_names (list): 처리할 지역명 리스트

    Returns:
        Dict[str, pd.DataFrame]: 지역별 처리된 데이터프레임 딕셔너리
    """
    results = {}

    for region_name in region_names:
        print(f"\n{'='*50}")
        result_df = process_governor_election_data(region_name)

        if result_df is not None:
            # df_busan, df_seoul 형태로 변수명 지정
            var_name = f'df_{region_name}'
            results[var_name] = result_df

            print(f"데이터프레임 저장: {var_name} (shape: {result_df.shape})")
        else:
            print(f"{region_name} 지사 선거 데이터 처리 실패")

    return results

# 지역명들
AVAILABLE_REGIONS = [
    'seoul', 'busan', 'daegu', 'incheon', 'gwangju', 'daejeon',
    'ulsan', 'sejong', 'gyeonggi', 'gangwon', 'chungbuk', 'chungnam',
    'jeonbuk', 'jeonnam', 'gyeongbuk', 'gyeongnam', 'jeju'
]

# # 사용 예시
# if __name__ == "__main__":
#     # 방법 1: 특정 지역들만 처리
#     selected_regions = ['busan', 'seoul', 'gyeonggi', 'incheon']
#     governor_results = process_multiple_governor_elections(selected_regions)

#     # 개별 접근 예시:
#     # df_busan = governor_results['df_busan']
#     # df_seoul = governor_results['df_seoul']

#     # 방법 2: 모든 지역 일괄 처리
#     # all_governor_results = process_multiple_governor_elections(AVAILABLE_REGIONS)

#     print(f"\n사용 가능한 지역들: {AVAILABLE_REGIONS}")

# Preprocessing & Merge

In [2]:
AVAILABLE_REGIONS = [
    'seoul', 'busan', 'daegu', 'incheon', 'gwangju', 'daejeon',
    'ulsan', 'sejong', 'gyeonggi', 'gangwon', 'chungbuk', 'chungnam',
    'jeonbuk', 'jeonnam', 'gyeongbuk', 'gyeongnam', 'jeju'
]

election_results = process_multiple_governor_elections(AVAILABLE_REGIONS)


=== seoul 지사 선거 데이터 처리 시작 ===
상세 데이터 URL: https://raw.githubusercontent.com/sw1kwon/korean-elections/refs/heads/main/temp/v1_g/7th_2018/temp1_governor_seoul_7.csv
요약 데이터 URL: https://raw.githubusercontent.com/sw1kwon/korean-elections/refs/heads/main/temp/v2_2_g/7th_2018/temp2_2_governor_seoul_7.csv
상세 데이터 로드 완료: (26, 16)
득표수 관련 컬럼 수: 9

=== seoul 지사 선거 후보 정보 ===
실제 후보 컬럼들:
  득표수_1_더불어민주당_박원순
  득표수_2_자유한국당_김문수
  득표수_3_바른미래당_안철수
  득표수_5_정의당_김종민
  득표수_6_민중당_김진숙
  득표수_7_대한애국당_인지연
  득표수_8_녹색당_신지예
  득표수_9_미래당_우인철
  득표수_10_친박연대_최태현
실제 존재하는 후보번호: [1, 2, 3, 5, 6, 7, 8, 9, 10]
적용된 매핑: {1: '진보정당', 2: '보수정당', 3: '그외정당', 5: '그외정당', 6: '그외정당', 7: '그외정당', 8: '그외정당', 9: '그외정당', 10: '그외정당'}
생성된 카테고리 매핑: {1: '진보정당', 2: '보수정당', 3: '그외정당', 5: '그외정당', 6: '그외정당', 7: '그외정당', 8: '그외정당', 9: '그외정당', 10: '그외정당'}
매핑 후 1위 정당 분포 (처리 중):
득표_1위_정당
진보정당    26
Name: count, dtype: int64
카테고리별 후보자 수: {'보수정당': 1, '진보정당': 1, '그외정당': 7, '무소속': 0}
  보수정당_후보자수: 1
  진보정당_후보자수: 1
  그외정당_후보자수: 7
  무소속_후보자수: 0
요약 데이터 로드 완료: (26,

# Governor Election 7th

## Seoul

In [3]:
df_seoul = election_results['df_seoul']

In [4]:
df_seoul

Unnamed: 0,시도,구시군,선거인수,투표수,보수정당,진보정당,그외정당,무소속,득표수_계,무효투표수,...,득표_1위_후보번호,득표_1위_득표율,득표_2위_후보번호,득표_2위_득표율,득표_1위_정당,득표_2위_정당,보수정당_후보자수,진보정당_후보자수,그외정당_후보자수,무소속_후보자수
0,서울특별시,합계,8380947,5019098,1158487,2619497,1183888,0,4961872,57226,...,1,0.5279,2,0.2335,진보정당,보수정당,1,1,7,0
1,서울특별시,종로구,134963,81195,18777,41148,20166,0,80091,1104,...,1,0.5138,2,0.2344,진보정당,보수정당,1,1,7,0
2,서울특별시,중구,112336,66474,15618,34243,15630,0,65491,983,...,1,0.5229,2,0.2385,진보정당,보수정당,1,1,7,0
3,서울특별시,용산구,200009,116054,31073,53964,29625,0,114662,1392,...,1,0.4706,2,0.271,진보정당,보수정당,1,1,7,0
4,서울특별시,성동구,264634,157863,34878,84106,36882,0,155866,1997,...,1,0.5396,2,0.2238,진보정당,보수정당,1,1,7,0
5,서울특별시,광진구,308174,180373,38606,99366,40396,0,178368,2005,...,1,0.5571,2,0.2164,진보정당,보수정당,1,1,7,0
6,서울특별시,동대문구,302939,178077,40984,93913,40826,0,175723,2354,...,1,0.5344,2,0.2332,진보정당,보수정당,1,1,7,0
7,서울특별시,중랑구,352041,198943,45905,109318,40987,0,196210,2733,...,1,0.5571,2,0.234,진보정당,보수정당,1,1,7,0
8,서울특별시,성북구,373174,222184,47274,119898,52329,0,219501,2683,...,1,0.5462,2,0.2154,진보정당,보수정당,1,1,7,0
9,서울특별시,강북구,280860,156279,33537,85839,34605,0,153981,2298,...,1,0.5575,2,0.2178,진보정당,보수정당,1,1,7,0


### preprocessing

In [5]:
df_seoul = (
    df_seoul
    .assign(시도=lambda df: df['시도'].replace('서울특별시', '서울'))
)

In [6]:
df_seoul.columns

Index(['시도', '구시군', '선거인수', '투표수', '보수정당', '진보정당', '그외정당', '무소속', '득표수_계',
       '무효투표수', '기권수', '득표_1위_후보번호', '득표_1위_득표율', '득표_2위_후보번호', '득표_2위_득표율',
       '득표_1위_정당', '득표_2위_정당', '보수정당_후보자수', '진보정당_후보자수', '그외정당_후보자수',
       '무소속_후보자수'],
      dtype='object')

In [7]:
# 고정 열 순서 정의
fixed_cols = [
    '시도', '구시군', '선거년도', '선거종류',
    '득표_1위_정당', '득표_2위_정당',
    '득표_1위_후보번호', '득표_1위_득표율',
    '득표_2위_후보번호', '득표_2위_득표율',
    '보수정당_후보자수', '진보정당_후보자수',
    '그외정당_후보자수', '무소속_후보자수'
]

# 나머지 열: fixed_cols에 포함되지 않은 열
other_cols = [col for col in df_seoul.columns if col not in fixed_cols]

# 메소드 체이닝
df_seoul = (
    df_seoul
    .assign(선거종류='광역단체장', 선거년도='2018')
    .loc[:, fixed_cols + other_cols]
    .rename(columns={'시도': '지역'})
)

In [8]:
df_seoul

Unnamed: 0,지역,구시군,선거년도,선거종류,득표_1위_정당,득표_2위_정당,득표_1위_후보번호,득표_1위_득표율,득표_2위_후보번호,득표_2위_득표율,...,무소속_후보자수,선거인수,투표수,보수정당,진보정당,그외정당,무소속,득표수_계,무효투표수,기권수
0,서울,합계,2018,광역단체장,진보정당,보수정당,1,0.5279,2,0.2335,...,0,8380947,5019098,1158487,2619497,1183888,0,4961872,57226,3361849
1,서울,종로구,2018,광역단체장,진보정당,보수정당,1,0.5138,2,0.2344,...,0,134963,81195,18777,41148,20166,0,80091,1104,53768
2,서울,중구,2018,광역단체장,진보정당,보수정당,1,0.5229,2,0.2385,...,0,112336,66474,15618,34243,15630,0,65491,983,45862
3,서울,용산구,2018,광역단체장,진보정당,보수정당,1,0.4706,2,0.271,...,0,200009,116054,31073,53964,29625,0,114662,1392,83955
4,서울,성동구,2018,광역단체장,진보정당,보수정당,1,0.5396,2,0.2238,...,0,264634,157863,34878,84106,36882,0,155866,1997,106771
5,서울,광진구,2018,광역단체장,진보정당,보수정당,1,0.5571,2,0.2164,...,0,308174,180373,38606,99366,40396,0,178368,2005,127801
6,서울,동대문구,2018,광역단체장,진보정당,보수정당,1,0.5344,2,0.2332,...,0,302939,178077,40984,93913,40826,0,175723,2354,124862
7,서울,중랑구,2018,광역단체장,진보정당,보수정당,1,0.5571,2,0.234,...,0,352041,198943,45905,109318,40987,0,196210,2733,153098
8,서울,성북구,2018,광역단체장,진보정당,보수정당,1,0.5462,2,0.2154,...,0,373174,222184,47274,119898,52329,0,219501,2683,150990
9,서울,강북구,2018,광역단체장,진보정당,보수정당,1,0.5575,2,0.2178,...,0,280860,156279,33537,85839,34605,0,153981,2298,124581


### v4.1 ~ v4.3

In [9]:
# 1. 전체 데이터 저장
df_seoul.to_csv("temp4_1_governor_seoul_7.csv", index=False, encoding="utf-8-sig")

# 2. '구시군' != '합계' 행만 추출하여 저장
df_seoul2 = df_seoul.query("구시군 != '합계'")
df_seoul2.to_csv("temp4_2_governor_seoul_7.csv", index=False, encoding="utf-8-sig")

# 3. '구시군' == '합계' 행만 추출 후 '구시군' 열 제거하여 저장
df_seoul3 = df_seoul.query("구시군 == '합계'").drop(columns="구시군")
df_seoul3.to_csv("temp4_3_governor_seoul_7.csv", index=False, encoding="utf-8-sig")

## Busan

In [10]:
df_busan = election_results['df_busan']

In [11]:
df_busan

Unnamed: 0,시도,구시군,선거인수,투표수,보수정당,진보정당,그외정당,무소속,득표수_계,무효투표수,...,득표_1위_후보번호,득표_1위_득표율,득표_2위_후보번호,득표_2위_득표율,득표_1위_정당,득표_2위_정당,보수정당_후보자수,진보정당_후보자수,그외정당_후보자수,무소속_후보자수
0,부산광역시,합계,2939046,1727684,632806,940469,102801,26720,1702796,24888,...,1,0.5523,2,0.3716,진보정당,보수정당,1,1,2,1
1,부산광역시,중구,39832,22499,8993,11730,1080,300,22103,396,...,1,0.5307,2,0.4069,진보정당,보수정당,1,1,2,1
2,부산광역시,서구,97123,55377,22760,27919,2761,913,54353,1024,...,1,0.5137,2,0.4187,진보정당,보수정당,1,1,2,1
3,부산광역시,동구,79091,46288,18841,23476,2313,691,45321,967,...,1,0.518,2,0.4157,진보정당,보수정당,1,1,2,1
4,부산광역시,영도구,107930,61332,22179,34279,2973,879,60310,1022,...,1,0.5684,2,0.3677,진보정당,보수정당,1,1,2,1
5,부산광역시,부산진구,319402,184063,67354,98314,12017,3626,181311,2752,...,1,0.5422,2,0.3715,진보정당,보수정당,1,1,2,1
6,부산광역시,동래구,226713,135595,50115,72895,8893,1963,133866,1729,...,1,0.5445,2,0.3744,진보정당,보수정당,1,1,2,1
7,부산광역시,남구,234211,140451,52232,75978,8263,1902,138375,2076,...,1,0.5491,2,0.3775,진보정당,보수정당,1,1,2,1
8,부산광역시,북구,253907,152125,53265,85791,9038,2034,150128,1997,...,1,0.5715,2,0.3548,진보정당,보수정당,1,1,2,1
9,부산광역시,해운대구,342079,200332,73131,109622,12651,2770,198174,2158,...,1,0.5532,2,0.369,진보정당,보수정당,1,1,2,1


### preprocessing

In [12]:
df_busan = (
    df_busan
    .assign(시도=lambda df: df['시도'].replace('부산광역시', '부산'))
)

In [13]:
df_busan.columns

Index(['시도', '구시군', '선거인수', '투표수', '보수정당', '진보정당', '그외정당', '무소속', '득표수_계',
       '무효투표수', '기권수', '득표_1위_후보번호', '득표_1위_득표율', '득표_2위_후보번호', '득표_2위_득표율',
       '득표_1위_정당', '득표_2위_정당', '보수정당_후보자수', '진보정당_후보자수', '그외정당_후보자수',
       '무소속_후보자수'],
      dtype='object')

In [14]:
# 고정 열 순서 정의
fixed_cols = [
    '시도', '구시군', '선거년도', '선거종류',
    '득표_1위_정당', '득표_2위_정당',
    '득표_1위_후보번호', '득표_1위_득표율',
    '득표_2위_후보번호', '득표_2위_득표율',
    '보수정당_후보자수', '진보정당_후보자수',
    '그외정당_후보자수', '무소속_후보자수'
]

# 나머지 열: fixed_cols에 포함되지 않은 열
other_cols = [col for col in df_busan.columns if col not in fixed_cols]

# 메소드 체이닝
df_busan = (
    df_busan
    .assign(선거종류='광역단체장', 선거년도='2018')
    .loc[:, fixed_cols + other_cols]
    .rename(columns={'시도': '지역'})
)

In [15]:
df_busan

Unnamed: 0,지역,구시군,선거년도,선거종류,득표_1위_정당,득표_2위_정당,득표_1위_후보번호,득표_1위_득표율,득표_2위_후보번호,득표_2위_득표율,...,무소속_후보자수,선거인수,투표수,보수정당,진보정당,그외정당,무소속,득표수_계,무효투표수,기권수
0,부산,합계,2018,광역단체장,진보정당,보수정당,1,0.5523,2,0.3716,...,1,2939046,1727684,632806,940469,102801,26720,1702796,24888,1211362
1,부산,중구,2018,광역단체장,진보정당,보수정당,1,0.5307,2,0.4069,...,1,39832,22499,8993,11730,1080,300,22103,396,17333
2,부산,서구,2018,광역단체장,진보정당,보수정당,1,0.5137,2,0.4187,...,1,97123,55377,22760,27919,2761,913,54353,1024,41746
3,부산,동구,2018,광역단체장,진보정당,보수정당,1,0.518,2,0.4157,...,1,79091,46288,18841,23476,2313,691,45321,967,32803
4,부산,영도구,2018,광역단체장,진보정당,보수정당,1,0.5684,2,0.3677,...,1,107930,61332,22179,34279,2973,879,60310,1022,46598
5,부산,부산진구,2018,광역단체장,진보정당,보수정당,1,0.5422,2,0.3715,...,1,319402,184063,67354,98314,12017,3626,181311,2752,135339
6,부산,동래구,2018,광역단체장,진보정당,보수정당,1,0.5445,2,0.3744,...,1,226713,135595,50115,72895,8893,1963,133866,1729,91118
7,부산,남구,2018,광역단체장,진보정당,보수정당,1,0.5491,2,0.3775,...,1,234211,140451,52232,75978,8263,1902,138375,2076,93760
8,부산,북구,2018,광역단체장,진보정당,보수정당,1,0.5715,2,0.3548,...,1,253907,152125,53265,85791,9038,2034,150128,1997,101782
9,부산,해운대구,2018,광역단체장,진보정당,보수정당,1,0.5532,2,0.369,...,1,342079,200332,73131,109622,12651,2770,198174,2158,141747


### v4.1 ~ v4.3

In [16]:
# 1. 전체 데이터 저장
df_busan.to_csv("temp4_1_governor_busan_7.csv", index=False, encoding="utf-8-sig")

# 2. '구시군' != '합계' 행만 추출하여 저장
df_busan2 = df_busan.query("구시군 != '합계'")
df_busan2.to_csv("temp4_2_governor_busan_7.csv", index=False, encoding="utf-8-sig")

# 3. '구시군' == '합계' 행만 추출 후 '구시군' 열 제거하여 저장
df_busan3 = df_busan.query("구시군 == '합계'").drop(columns="구시군")
df_busan3.to_csv("temp4_3_governor_busan_7.csv", index=False, encoding="utf-8-sig")

## Daegu

In [17]:
df_daegu = election_results['df_daegu']

In [18]:
df_daegu

Unnamed: 0,시도,구시군,선거인수,투표수,보수정당,진보정당,그외정당,무소속,득표수_계,무효투표수,...,득표_1위_후보번호,득표_1위_득표율,득표_2위_후보번호,득표_2위_득표율,득표_1위_정당,득표_2위_정당,보수정당_후보자수,진보정당_후보자수,그외정당_후보자수,무소속_후보자수
0,대구광역시,합계,2047286,1172279,619165,458112,74955,0,1152232,20047,...,2,0.5374,1,0.3976,보수정당,진보정당,1,1,1,0
1,대구광역시,중구,68303,39863,21640,14983,2572,0,39195,668,...,2,0.5521,1,0.3823,보수정당,진보정당,1,1,1,0
2,대구광역시,동구,295860,171627,90378,65465,12894,0,168737,2890,...,2,0.5356,1,0.388,보수정당,진보정당,1,1,1,0
3,대구광역시,서구,167260,89634,53879,28180,5532,0,87591,2043,...,2,0.6151,1,0.3217,보수정당,진보정당,1,1,1,0
4,대구광역시,남구,134493,72239,41302,25478,3886,0,70666,1573,...,2,0.5845,1,0.3605,보수정당,진보정당,1,1,1,0
5,대구광역시,북구,360010,205858,106028,83084,13244,0,202356,3502,...,2,0.524,1,0.4106,보수정당,진보정당,1,1,1,0
6,대구광역시,수성구,351854,211819,111722,84824,12635,0,209181,2638,...,2,0.5341,1,0.4055,보수정당,진보정당,1,1,1,0
7,대구광역시,달서구,472669,266010,136139,108602,16996,0,261737,4273,...,2,0.5201,1,0.4149,보수정당,진보정당,1,1,1,0
8,대구광역시,달성군,196837,115229,58077,47496,7196,0,112769,2460,...,2,0.515,1,0.4212,보수정당,진보정당,1,1,1,0


### preprocessing

In [19]:
df_daegu = (
    df_daegu
    .assign(시도=lambda df: df['시도'].replace('대구광역시', '대구'))
)

In [20]:
df_daegu.columns

Index(['시도', '구시군', '선거인수', '투표수', '보수정당', '진보정당', '그외정당', '무소속', '득표수_계',
       '무효투표수', '기권수', '득표_1위_후보번호', '득표_1위_득표율', '득표_2위_후보번호', '득표_2위_득표율',
       '득표_1위_정당', '득표_2위_정당', '보수정당_후보자수', '진보정당_후보자수', '그외정당_후보자수',
       '무소속_후보자수'],
      dtype='object')

In [21]:
# 고정 열 순서 정의
fixed_cols = [
    '시도', '구시군', '선거년도', '선거종류',
    '득표_1위_정당', '득표_2위_정당',
    '득표_1위_후보번호', '득표_1위_득표율',
    '득표_2위_후보번호', '득표_2위_득표율',
    '보수정당_후보자수', '진보정당_후보자수',
    '그외정당_후보자수', '무소속_후보자수'
]

# 나머지 열: fixed_cols에 포함되지 않은 열
other_cols = [col for col in df_daegu.columns if col not in fixed_cols]

# 메소드 체이닝
df_daegu = (
    df_daegu
    .assign(선거종류='광역단체장', 선거년도='2018')
    .loc[:, fixed_cols + other_cols]
    .rename(columns={'시도': '지역'})
)

In [22]:
df_daegu

Unnamed: 0,지역,구시군,선거년도,선거종류,득표_1위_정당,득표_2위_정당,득표_1위_후보번호,득표_1위_득표율,득표_2위_후보번호,득표_2위_득표율,...,무소속_후보자수,선거인수,투표수,보수정당,진보정당,그외정당,무소속,득표수_계,무효투표수,기권수
0,대구,합계,2018,광역단체장,보수정당,진보정당,2,0.5374,1,0.3976,...,0,2047286,1172279,619165,458112,74955,0,1152232,20047,875007
1,대구,중구,2018,광역단체장,보수정당,진보정당,2,0.5521,1,0.3823,...,0,68303,39863,21640,14983,2572,0,39195,668,28440
2,대구,동구,2018,광역단체장,보수정당,진보정당,2,0.5356,1,0.388,...,0,295860,171627,90378,65465,12894,0,168737,2890,124233
3,대구,서구,2018,광역단체장,보수정당,진보정당,2,0.6151,1,0.3217,...,0,167260,89634,53879,28180,5532,0,87591,2043,77626
4,대구,남구,2018,광역단체장,보수정당,진보정당,2,0.5845,1,0.3605,...,0,134493,72239,41302,25478,3886,0,70666,1573,62254
5,대구,북구,2018,광역단체장,보수정당,진보정당,2,0.524,1,0.4106,...,0,360010,205858,106028,83084,13244,0,202356,3502,154152
6,대구,수성구,2018,광역단체장,보수정당,진보정당,2,0.5341,1,0.4055,...,0,351854,211819,111722,84824,12635,0,209181,2638,140035
7,대구,달서구,2018,광역단체장,보수정당,진보정당,2,0.5201,1,0.4149,...,0,472669,266010,136139,108602,16996,0,261737,4273,206659
8,대구,달성군,2018,광역단체장,보수정당,진보정당,2,0.515,1,0.4212,...,0,196837,115229,58077,47496,7196,0,112769,2460,81608


### v4.1 ~ v4.3

In [23]:
# 1. 전체 데이터 저장
df_daegu.to_csv("temp4_1_governor_daegu_7.csv", index=False, encoding="utf-8-sig")

# 2. '구시군' != '합계' 행만 추출하여 저장
df_daegu2 = df_daegu.query("구시군 != '합계'")
df_daegu2.to_csv("temp4_2_governor_daegu_7.csv", index=False, encoding="utf-8-sig")

# 3. '구시군' == '합계' 행만 추출 후 '구시군' 열 제거하여 저장
df_daegu3 = df_daegu.query("구시군 == '합계'").drop(columns="구시군")
df_daegu3.to_csv("temp4_3_governor_daegu_7.csv", index=False, encoding="utf-8-sig")

## Incheon

In [24]:
df_incheon = election_results['df_incheon']

In [25]:
df_incheon

Unnamed: 0,시도,구시군,선거인수,투표수,보수정당,진보정당,그외정당,무소속,득표수_계,무효투표수,...,득표_1위_후보번호,득표_1위_득표율,득표_2위_후보번호,득표_2위_득표율,득표_1위_정당,득표_2위_정당,보수정당_후보자수,진보정당_후보자수,그외정당_후보자수,무소속_후보자수
0,인천광역시,합계,2440779,1349084,470937,766186,91526,0,1328649,20435,...,1,0.5767,2,0.3544,진보정당,보수정당,1,1,2,0
1,인천광역시,중구,98608,53666,19724,29817,3261,0,52802,864,...,1,0.5647,2,0.3735,진보정당,보수정당,1,1,2,0
2,인천광역시,동구,57559,34757,14655,17288,2070,0,34013,744,...,1,0.5083,2,0.4309,진보정당,보수정당,1,1,2,0
3,인천광역시,남구,356508,183835,71278,97347,12072,0,180697,3138,...,1,0.5387,2,0.3945,진보정당,보수정당,1,1,2,0
4,인천광역시,연수구,269626,160548,58191,90969,9430,0,158590,1958,...,1,0.5736,2,0.3669,진보정당,보수정당,1,1,2,0
5,인천광역시,남동구,441132,246564,82245,144979,16040,0,243264,3300,...,1,0.596,2,0.3381,진보정당,보수정당,1,1,2,0
6,인천광역시,부평구,448618,241811,76655,140735,21016,0,238406,3405,...,1,0.5903,2,0.3215,진보정당,보수정당,1,1,2,0
7,인천광역시,계양구,266216,143889,44686,86747,10394,0,141827,2062,...,1,0.6116,2,0.3151,진보정당,보수정당,1,1,2,0
8,인천광역시,서구,422819,230166,74876,137825,14562,0,227263,2903,...,1,0.6065,2,0.3295,진보정당,보수정당,1,1,2,0
9,인천광역시,강화군,60755,39940,21723,14972,1837,0,38532,1408,...,2,0.5638,1,0.3886,보수정당,진보정당,1,1,2,0


### preprocessing

In [26]:
df_incheon = (
    df_incheon
    .assign(시도=lambda df: df['시도'].replace('인천광역시', '인천'))
)

In [27]:
df_incheon.columns

Index(['시도', '구시군', '선거인수', '투표수', '보수정당', '진보정당', '그외정당', '무소속', '득표수_계',
       '무효투표수', '기권수', '득표_1위_후보번호', '득표_1위_득표율', '득표_2위_후보번호', '득표_2위_득표율',
       '득표_1위_정당', '득표_2위_정당', '보수정당_후보자수', '진보정당_후보자수', '그외정당_후보자수',
       '무소속_후보자수'],
      dtype='object')

In [28]:
# 고정 열 순서 정의
fixed_cols = [
    '시도', '구시군', '선거년도', '선거종류',
    '득표_1위_정당', '득표_2위_정당',
    '득표_1위_후보번호', '득표_1위_득표율',
    '득표_2위_후보번호', '득표_2위_득표율',
    '보수정당_후보자수', '진보정당_후보자수',
    '그외정당_후보자수', '무소속_후보자수'
]

# 나머지 열: fixed_cols에 포함되지 않은 열
other_cols = [col for col in df_incheon.columns if col not in fixed_cols]

# 메소드 체이닝
df_incheon = (
    df_incheon
    .assign(선거종류='광역단체장', 선거년도='2018')
    .loc[:, fixed_cols + other_cols]
    .rename(columns={'시도': '지역'})
)

In [29]:
df_incheon

Unnamed: 0,지역,구시군,선거년도,선거종류,득표_1위_정당,득표_2위_정당,득표_1위_후보번호,득표_1위_득표율,득표_2위_후보번호,득표_2위_득표율,...,무소속_후보자수,선거인수,투표수,보수정당,진보정당,그외정당,무소속,득표수_계,무효투표수,기권수
0,인천,합계,2018,광역단체장,진보정당,보수정당,1,0.5767,2,0.3544,...,0,2440779,1349084,470937,766186,91526,0,1328649,20435,1091695
1,인천,중구,2018,광역단체장,진보정당,보수정당,1,0.5647,2,0.3735,...,0,98608,53666,19724,29817,3261,0,52802,864,44942
2,인천,동구,2018,광역단체장,진보정당,보수정당,1,0.5083,2,0.4309,...,0,57559,34757,14655,17288,2070,0,34013,744,22802
3,인천,남구,2018,광역단체장,진보정당,보수정당,1,0.5387,2,0.3945,...,0,356508,183835,71278,97347,12072,0,180697,3138,172673
4,인천,연수구,2018,광역단체장,진보정당,보수정당,1,0.5736,2,0.3669,...,0,269626,160548,58191,90969,9430,0,158590,1958,109078
5,인천,남동구,2018,광역단체장,진보정당,보수정당,1,0.596,2,0.3381,...,0,441132,246564,82245,144979,16040,0,243264,3300,194568
6,인천,부평구,2018,광역단체장,진보정당,보수정당,1,0.5903,2,0.3215,...,0,448618,241811,76655,140735,21016,0,238406,3405,206807
7,인천,계양구,2018,광역단체장,진보정당,보수정당,1,0.6116,2,0.3151,...,0,266216,143889,44686,86747,10394,0,141827,2062,122327
8,인천,서구,2018,광역단체장,진보정당,보수정당,1,0.6065,2,0.3295,...,0,422819,230166,74876,137825,14562,0,227263,2903,192653
9,인천,강화군,2018,광역단체장,보수정당,진보정당,2,0.5638,1,0.3886,...,0,60755,39940,21723,14972,1837,0,38532,1408,20815


### v4.1 ~ v4.3

In [30]:
# 1. 전체 데이터 저장
df_incheon.to_csv("temp4_1_governor_incheon_7.csv", index=False, encoding="utf-8-sig")

# 2. '구시군' != '합계' 행만 추출하여 저장
df_incheon2 = df_incheon.query("구시군 != '합계'")
df_incheon2.to_csv("temp4_2_governor_incheon_7.csv", index=False, encoding="utf-8-sig")

# 3. '구시군' == '합계' 행만 추출 후 '구시군' 열 제거하여 저장
df_incheon3 = df_incheon.query("구시군 == '합계'").drop(columns="구시군")
df_incheon3.to_csv("temp4_3_governor_incheon_7.csv", index=False, encoding="utf-8-sig")

## Gwangju

In [31]:
df_gwangju = election_results['df_gwangju']

In [32]:
df_gwangju

Unnamed: 0,시도,구시군,선거인수,투표수,보수정당,진보정당,그외정당,무소속,득표수_계,무효투표수,...,득표_1위_후보번호,득표_1위_득표율,득표_2위_후보번호,득표_2위_득표율,득표_1위_정당,득표_2위_정당,보수정당_후보자수,진보정당_후보자수,그외정당_후보자수,무소속_후보자수
0,광주광역시,합계,1172429,694252,34487,573995,74228,0,682710,11542,...,1,0.8408,5,0.0599,진보정당,그외정당,0,1,3,0
1,광주광역시,동구,82265,51550,3489,42139,4789,0,50417,1133,...,1,0.8358,3,0.0692,진보정당,그외정당,0,1,3,0
2,광주광역시,서구,247275,147798,7163,122803,15578,0,145544,2254,...,1,0.8438,5,0.0606,진보정당,그외정당,0,1,3,0
3,광주광역시,남구,176964,106294,5569,88699,10185,0,104453,1841,...,1,0.8492,5,0.0578,진보정당,그외정당,0,1,3,0
4,광주광역시,북구,358914,210587,9854,173766,23387,0,207007,3580,...,1,0.8394,5,0.0565,진보정당,그외정당,0,1,3,0
5,광주광역시,광산구,307011,178023,8412,146588,20289,0,175289,2734,...,1,0.8363,5,0.0666,진보정당,그외정당,0,1,3,0


### preprocessing

In [33]:
df_gwangju = (
    df_gwangju
    .assign(시도=lambda df: df['시도'].replace('광주광역시', '광주'))
)

In [34]:
df_gwangju.columns

Index(['시도', '구시군', '선거인수', '투표수', '보수정당', '진보정당', '그외정당', '무소속', '득표수_계',
       '무효투표수', '기권수', '득표_1위_후보번호', '득표_1위_득표율', '득표_2위_후보번호', '득표_2위_득표율',
       '득표_1위_정당', '득표_2위_정당', '보수정당_후보자수', '진보정당_후보자수', '그외정당_후보자수',
       '무소속_후보자수'],
      dtype='object')

In [35]:
# 고정 열 순서 정의
fixed_cols = [
    '시도', '구시군', '선거년도', '선거종류',
    '득표_1위_정당', '득표_2위_정당',
    '득표_1위_후보번호', '득표_1위_득표율',
    '득표_2위_후보번호', '득표_2위_득표율',
    '보수정당_후보자수', '진보정당_후보자수',
    '그외정당_후보자수', '무소속_후보자수'
]

# 나머지 열: fixed_cols에 포함되지 않은 열
other_cols = [col for col in df_gwangju.columns if col not in fixed_cols]

# 메소드 체이닝
df_gwangju = (
    df_gwangju
    .assign(선거종류='광역단체장', 선거년도='2018')
    .loc[:, fixed_cols + other_cols]
    .rename(columns={'시도': '지역'})
)

In [36]:
df_gwangju

Unnamed: 0,지역,구시군,선거년도,선거종류,득표_1위_정당,득표_2위_정당,득표_1위_후보번호,득표_1위_득표율,득표_2위_후보번호,득표_2위_득표율,...,무소속_후보자수,선거인수,투표수,보수정당,진보정당,그외정당,무소속,득표수_계,무효투표수,기권수
0,광주,합계,2018,광역단체장,진보정당,그외정당,1,0.8408,5,0.0599,...,0,1172429,694252,34487,573995,74228,0,682710,11542,478177
1,광주,동구,2018,광역단체장,진보정당,그외정당,1,0.8358,3,0.0692,...,0,82265,51550,3489,42139,4789,0,50417,1133,30715
2,광주,서구,2018,광역단체장,진보정당,그외정당,1,0.8438,5,0.0606,...,0,247275,147798,7163,122803,15578,0,145544,2254,99477
3,광주,남구,2018,광역단체장,진보정당,그외정당,1,0.8492,5,0.0578,...,0,176964,106294,5569,88699,10185,0,104453,1841,70670
4,광주,북구,2018,광역단체장,진보정당,그외정당,1,0.8394,5,0.0565,...,0,358914,210587,9854,173766,23387,0,207007,3580,148327
5,광주,광산구,2018,광역단체장,진보정당,그외정당,1,0.8363,5,0.0666,...,0,307011,178023,8412,146588,20289,0,175289,2734,128988


### v4.1 ~ v4.3

In [37]:
# 1. 전체 데이터 저장
df_gwangju.to_csv("temp4_1_governor_gwangju_7.csv", index=False, encoding="utf-8-sig")

# 2. '구시군' != '합계' 행만 추출하여 저장
df_gwangju2 = df_gwangju.query("구시군 != '합계'")
df_gwangju2.to_csv("temp4_2_governor_gwangju_7.csv", index=False, encoding="utf-8-sig")

# 3. '구시군' == '합계' 행만 추출 후 '구시군' 열 제거하여 저장
df_gwangju3 = df_gwangju.query("구시군 == '합계'").drop(columns="구시군")
df_gwangju3.to_csv("temp4_3_governor_gwangju_7.csv", index=False, encoding="utf-8-sig")

## Daejeon

In [38]:
df_daejeon = election_results['df_daejeon']

In [39]:
df_daejeon

Unnamed: 0,시도,구시군,선거인수,투표수,보수정당,진보정당,그외정당,무소속,득표수_계,무효투표수,...,득표_1위_후보번호,득표_1위_득표율,득표_2위_후보번호,득표_2위_득표율,득표_1위_정당,득표_2위_정당,보수정당_후보자수,진보정당_후보자수,그외정당_후보자수,무소속_후보자수
0,대전광역시,합계,1219513,706983,224306,393354,79622,0,697282,9701,...,1,0.5641,2,0.3217,진보정당,보수정당,1,1,2,0
1,대전광역시,동구,191362,106341,35880,55928,12505,0,104313,2028,...,1,0.5362,2,0.344,진보정당,보수정당,1,1,2,0
2,대전광역시,중구,206042,116950,41632,60878,12593,0,115103,1847,...,1,0.5289,2,0.3617,진보정당,보수정당,1,1,2,0
3,대전광역시,서구,395367,226532,71325,127151,25402,0,223878,2654,...,1,0.5679,2,0.3186,진보정당,보수정당,1,1,2,0
4,대전광역시,유성구,272663,168310,43981,101687,21037,0,166705,1605,...,1,0.61,2,0.2638,진보정당,보수정당,1,1,2,0
5,대전광역시,대덕구,154079,88850,31488,47710,8085,0,87283,1567,...,1,0.5466,2,0.3608,진보정당,보수정당,1,1,2,0


### preprocessing

In [40]:
df_daejeon = (
    df_daejeon
    .assign(시도=lambda df: df['시도'].replace('대전광역시', '대전'))
)

In [41]:
df_daejeon.columns

Index(['시도', '구시군', '선거인수', '투표수', '보수정당', '진보정당', '그외정당', '무소속', '득표수_계',
       '무효투표수', '기권수', '득표_1위_후보번호', '득표_1위_득표율', '득표_2위_후보번호', '득표_2위_득표율',
       '득표_1위_정당', '득표_2위_정당', '보수정당_후보자수', '진보정당_후보자수', '그외정당_후보자수',
       '무소속_후보자수'],
      dtype='object')

In [42]:
# 고정 열 순서 정의
fixed_cols = [
    '시도', '구시군', '선거년도', '선거종류',
    '득표_1위_정당', '득표_2위_정당',
    '득표_1위_후보번호', '득표_1위_득표율',
    '득표_2위_후보번호', '득표_2위_득표율',
    '보수정당_후보자수', '진보정당_후보자수',
    '그외정당_후보자수', '무소속_후보자수'
]

# 나머지 열: fixed_cols에 포함되지 않은 열
other_cols = [col for col in df_daejeon.columns if col not in fixed_cols]

# 메소드 체이닝
df_daejeon = (
    df_daejeon
    .assign(선거종류='광역단체장', 선거년도='2018')
    .loc[:, fixed_cols + other_cols]
    .rename(columns={'시도': '지역'})
)

In [43]:
df_daejeon

Unnamed: 0,지역,구시군,선거년도,선거종류,득표_1위_정당,득표_2위_정당,득표_1위_후보번호,득표_1위_득표율,득표_2위_후보번호,득표_2위_득표율,...,무소속_후보자수,선거인수,투표수,보수정당,진보정당,그외정당,무소속,득표수_계,무효투표수,기권수
0,대전,합계,2018,광역단체장,진보정당,보수정당,1,0.5641,2,0.3217,...,0,1219513,706983,224306,393354,79622,0,697282,9701,512530
1,대전,동구,2018,광역단체장,진보정당,보수정당,1,0.5362,2,0.344,...,0,191362,106341,35880,55928,12505,0,104313,2028,85021
2,대전,중구,2018,광역단체장,진보정당,보수정당,1,0.5289,2,0.3617,...,0,206042,116950,41632,60878,12593,0,115103,1847,89092
3,대전,서구,2018,광역단체장,진보정당,보수정당,1,0.5679,2,0.3186,...,0,395367,226532,71325,127151,25402,0,223878,2654,168835
4,대전,유성구,2018,광역단체장,진보정당,보수정당,1,0.61,2,0.2638,...,0,272663,168310,43981,101687,21037,0,166705,1605,104353
5,대전,대덕구,2018,광역단체장,진보정당,보수정당,1,0.5466,2,0.3608,...,0,154079,88850,31488,47710,8085,0,87283,1567,65229


### v4.1 ~ v4.3

In [44]:
# 1. 전체 데이터 저장
df_daejeon.to_csv("temp4_1_governor_daejeon_7.csv", index=False, encoding="utf-8-sig")

# 2. '구시군' != '합계' 행만 추출하여 저장
df_daejeon2 = df_daejeon.query("구시군 != '합계'")
df_daejeon2.to_csv("temp4_2_governor_daejeon_7.csv", index=False, encoding="utf-8-sig")

# 3. '구시군' == '합계' 행만 추출 후 '구시군' 열 제거하여 저장
df_daejeon3 = df_daejeon.query("구시군 == '합계'").drop(columns="구시군")
df_daejeon3.to_csv("temp4_3_governor_daejeon_7.csv", index=False, encoding="utf-8-sig")

## Ulsan

In [45]:
df_ulsan = election_results['df_ulsan']

In [46]:
df_ulsan

Unnamed: 0,시도,구시군,선거인수,투표수,보수정당,진보정당,그외정당,무소속,득표수_계,무효투표수,...,득표_1위_후보번호,득표_1위_득표율,득표_2위_후보번호,득표_2위_득표율,득표_1위_정당,득표_2위_정당,보수정당_후보자수,진보정당_후보자수,그외정당_후보자수,무소속_후보자수
0,울산광역시,합계,942550,610698,240475,317341,42210,0,600026,10672,...,1,0.5289,2,0.4008,진보정당,보수정당,1,1,2,0
1,울산광역시,중구,194908,128337,54227,65014,6631,0,125872,2465,...,1,0.5165,2,0.4308,진보정당,보수정당,1,1,2,0
2,울산광역시,남구,274930,176694,75240,89518,9491,0,174249,2445,...,1,0.5137,2,0.4318,진보정당,보수정당,1,1,2,0
3,울산광역시,동구,134316,86763,28418,47190,9305,0,84913,1850,...,1,0.5557,2,0.3347,진보정당,보수정당,1,1,2,0
4,울산광역시,북구,159030,104132,33758,58639,10449,0,102846,1286,...,1,0.5702,2,0.3282,진보정당,보수정당,1,1,2,0
5,울산광역시,울주군,179366,114772,48832,56980,6334,0,112146,2626,...,1,0.5081,2,0.4354,진보정당,보수정당,1,1,2,0


### preprocessing

In [47]:
df_ulsan = (
    df_ulsan
    .assign(시도=lambda df: df['시도'].replace('울산광역시', '울산'))
)

In [48]:
df_ulsan.columns

Index(['시도', '구시군', '선거인수', '투표수', '보수정당', '진보정당', '그외정당', '무소속', '득표수_계',
       '무효투표수', '기권수', '득표_1위_후보번호', '득표_1위_득표율', '득표_2위_후보번호', '득표_2위_득표율',
       '득표_1위_정당', '득표_2위_정당', '보수정당_후보자수', '진보정당_후보자수', '그외정당_후보자수',
       '무소속_후보자수'],
      dtype='object')

In [49]:
# 고정 열 순서 정의
fixed_cols = [
    '시도', '구시군', '선거년도', '선거종류',
    '득표_1위_정당', '득표_2위_정당',
    '득표_1위_후보번호', '득표_1위_득표율',
    '득표_2위_후보번호', '득표_2위_득표율',
    '보수정당_후보자수', '진보정당_후보자수',
    '그외정당_후보자수', '무소속_후보자수'
]

# 나머지 열: fixed_cols에 포함되지 않은 열
other_cols = [col for col in df_ulsan.columns if col not in fixed_cols]

# 메소드 체이닝
df_ulsan = (
    df_ulsan
    .assign(선거종류='광역단체장', 선거년도='2018')
    .loc[:, fixed_cols + other_cols]
    .rename(columns={'시도': '지역'})
)

In [50]:
df_ulsan

Unnamed: 0,지역,구시군,선거년도,선거종류,득표_1위_정당,득표_2위_정당,득표_1위_후보번호,득표_1위_득표율,득표_2위_후보번호,득표_2위_득표율,...,무소속_후보자수,선거인수,투표수,보수정당,진보정당,그외정당,무소속,득표수_계,무효투표수,기권수
0,울산,합계,2018,광역단체장,진보정당,보수정당,1,0.5289,2,0.4008,...,0,942550,610698,240475,317341,42210,0,600026,10672,331852
1,울산,중구,2018,광역단체장,진보정당,보수정당,1,0.5165,2,0.4308,...,0,194908,128337,54227,65014,6631,0,125872,2465,66571
2,울산,남구,2018,광역단체장,진보정당,보수정당,1,0.5137,2,0.4318,...,0,274930,176694,75240,89518,9491,0,174249,2445,98236
3,울산,동구,2018,광역단체장,진보정당,보수정당,1,0.5557,2,0.3347,...,0,134316,86763,28418,47190,9305,0,84913,1850,47553
4,울산,북구,2018,광역단체장,진보정당,보수정당,1,0.5702,2,0.3282,...,0,159030,104132,33758,58639,10449,0,102846,1286,54898
5,울산,울주군,2018,광역단체장,진보정당,보수정당,1,0.5081,2,0.4354,...,0,179366,114772,48832,56980,6334,0,112146,2626,64594


### v4.1 ~ v4.3

In [51]:
# 1. 전체 데이터 저장
df_ulsan.to_csv("temp4_1_governor_ulsan_7.csv", index=False, encoding="utf-8-sig")

# 2. '구시군' != '합계' 행만 추출하여 저장
df_ulsan2 = df_ulsan.query("구시군 != '합계'")
df_ulsan2.to_csv("temp4_2_governor_ulsan_7.csv", index=False, encoding="utf-8-sig")

# 3. '구시군' == '합계' 행만 추출 후 '구시군' 열 제거하여 저장
df_ulsan3 = df_ulsan.query("구시군 == '합계'").drop(columns="구시군")
df_ulsan3.to_csv("temp4_3_governor_ulsan_7.csv", index=False, encoding="utf-8-sig")

## Sejong

In [52]:
df_sejong = election_results['df_sejong']

In [53]:
df_sejong

Unnamed: 0,시도,구시군,선거인수,투표수,보수정당,진보정당,그외정당,무소속,득표수_계,무효투표수,...,득표_1위_후보번호,득표_1위_득표율,득표_2위_후보번호,득표_2위_득표율,득표_1위_정당,득표_2위_정당,보수정당_후보자수,진보정당_후보자수,그외정당_후보자수,무소속_후보자수
0,세종특별자치시,합계,222852,137603,24546,96896,14444,0,135886,1717,...,1,0.7131,2,0.1806,진보정당,보수정당,1,1,1,0


### preprocessing

In [54]:
df_sejong = (
    df_sejong
    .assign(시도=lambda df: df['시도'].replace('세종특별자치시', '세종'))
)

In [55]:
df_sejong.columns

Index(['시도', '구시군', '선거인수', '투표수', '보수정당', '진보정당', '그외정당', '무소속', '득표수_계',
       '무효투표수', '기권수', '득표_1위_후보번호', '득표_1위_득표율', '득표_2위_후보번호', '득표_2위_득표율',
       '득표_1위_정당', '득표_2위_정당', '보수정당_후보자수', '진보정당_후보자수', '그외정당_후보자수',
       '무소속_후보자수'],
      dtype='object')

In [56]:
# 고정 열 순서 정의
fixed_cols = [
    '시도', '구시군', '선거년도', '선거종류',
    '득표_1위_정당', '득표_2위_정당',
    '득표_1위_후보번호', '득표_1위_득표율',
    '득표_2위_후보번호', '득표_2위_득표율',
    '보수정당_후보자수', '진보정당_후보자수',
    '그외정당_후보자수', '무소속_후보자수'
]

# 나머지 열: fixed_cols에 포함되지 않은 열
other_cols = [col for col in df_sejong.columns if col not in fixed_cols]

# 메소드 체이닝
df_sejong = (
    df_sejong
    .assign(선거종류='광역단체장', 선거년도='2018')
    .loc[:, fixed_cols + other_cols]
    .rename(columns={'시도': '지역'})
)

In [57]:
df_sejong

Unnamed: 0,지역,구시군,선거년도,선거종류,득표_1위_정당,득표_2위_정당,득표_1위_후보번호,득표_1위_득표율,득표_2위_후보번호,득표_2위_득표율,...,무소속_후보자수,선거인수,투표수,보수정당,진보정당,그외정당,무소속,득표수_계,무효투표수,기권수
0,세종,합계,2018,광역단체장,진보정당,보수정당,1,0.7131,2,0.1806,...,0,222852,137603,24546,96896,14444,0,135886,1717,85249


### v4.1 ~ v4.3

- 세종은 구,시,군이 없고 읍,면만 있어서 df_sejong2의 row가 없음

In [58]:
# 1. 전체 데이터 저장
df_sejong.to_csv("temp4_1_governor_sejong_7.csv", index=False, encoding="utf-8-sig")

# 2. '구시군' != '합계' 행만 추출하여 저장
df_sejong2 = df_sejong.query("구시군 != '합계'")
df_sejong2.to_csv("temp4_2_governor_sejong_7.csv", index=False, encoding="utf-8-sig")

# 3. '구시군' == '합계' 행만 추출 후 '구시군' 열 제거하여 저장
df_sejong3 = df_sejong.query("구시군 == '합계'").drop(columns="구시군")
df_sejong3.to_csv("temp4_3_governor_sejong_7.csv", index=False, encoding="utf-8-sig")

## Gyeonggi

In [59]:
df_gyeonggi = election_results['df_gyeonggi']

In [60]:
df_gyeonggi

Unnamed: 0,시도,구시군,선거인수,투표수,보수정당,진보정당,그외정당,무소속,득표수_계,무효투표수,...,득표_1위_후보번호,득표_1위_득표율,득표_2위_후보번호,득표_2위_득표율,득표_1위_정당,득표_2위_정당,보수정당_후보자수,진보정당_후보자수,그외정당_후보자수,무소속_후보자수
0,경기도,합계,10533027,6084955,2122433,3370621,482473,0,5975527,109428,...,1,0.5641,2,0.3552,진보정당,보수정당,1,1,3,0
1,경기도,수원시장안구,202140,121951,45405,64965,9456,0,119826,2125,...,1,0.5422,2,0.3789,진보정당,보수정당,1,1,3,0
2,경기도,수원시권선구,342897,197363,69133,110066,14916,0,194115,3248,...,1,0.567,2,0.3561,진보정당,보수정당,1,1,3,0
3,경기도,수원시팔달구,161673,88388,36057,44464,6167,0,86688,1700,...,1,0.5129,2,0.4159,진보정당,보수정당,1,1,3,0
4,경기도,수원시영통구,273531,174692,59730,98513,13769,0,172012,2680,...,1,0.5727,2,0.3472,진보정당,보수정당,1,1,3,0
5,경기도,성남시수정구,204053,115704,34928,71400,7316,0,113644,2060,...,1,0.6283,2,0.3073,진보정당,보수정당,1,1,3,0
6,경기도,성남시중원구,195192,112200,32910,70848,6630,0,110388,1812,...,1,0.6418,2,0.2981,진보정당,보수정당,1,1,3,0
7,경기도,성남시분당구,401672,266966,102110,141379,19773,0,263262,3704,...,1,0.537,2,0.3879,진보정당,보수정당,1,1,3,0
8,경기도,의정부시,366459,198518,69513,111187,14277,0,194977,3541,...,1,0.5703,2,0.3565,진보정당,보수정당,1,1,3,0
9,경기도,안양시만안구,216498,129861,47791,70458,9272,0,127521,2340,...,1,0.5525,2,0.3748,진보정당,보수정당,1,1,3,0


### preprocessing

In [61]:
df_gyeonggi = (
    df_gyeonggi
    .assign(시도=lambda df: df['시도'].replace('경기도', '경기'))
)

In [62]:
df_gyeonggi.columns

Index(['시도', '구시군', '선거인수', '투표수', '보수정당', '진보정당', '그외정당', '무소속', '득표수_계',
       '무효투표수', '기권수', '득표_1위_후보번호', '득표_1위_득표율', '득표_2위_후보번호', '득표_2위_득표율',
       '득표_1위_정당', '득표_2위_정당', '보수정당_후보자수', '진보정당_후보자수', '그외정당_후보자수',
       '무소속_후보자수'],
      dtype='object')

In [63]:
# 고정 열 순서 정의
fixed_cols = [
    '시도', '구시군', '선거년도', '선거종류',
    '득표_1위_정당', '득표_2위_정당',
    '득표_1위_후보번호', '득표_1위_득표율',
    '득표_2위_후보번호', '득표_2위_득표율',
    '보수정당_후보자수', '진보정당_후보자수',
    '그외정당_후보자수', '무소속_후보자수'
]

# 나머지 열: fixed_cols에 포함되지 않은 열
other_cols = [col for col in df_gyeonggi.columns if col not in fixed_cols]

# 메소드 체이닝
df_gyeonggi = (
    df_gyeonggi
    .assign(선거종류='광역단체장', 선거년도='2018')
    .loc[:, fixed_cols + other_cols]
    .rename(columns={'시도': '지역'})
)

In [64]:
df_gyeonggi

Unnamed: 0,지역,구시군,선거년도,선거종류,득표_1위_정당,득표_2위_정당,득표_1위_후보번호,득표_1위_득표율,득표_2위_후보번호,득표_2위_득표율,...,무소속_후보자수,선거인수,투표수,보수정당,진보정당,그외정당,무소속,득표수_계,무효투표수,기권수
0,경기,합계,2018,광역단체장,진보정당,보수정당,1,0.5641,2,0.3552,...,0,10533027,6084955,2122433,3370621,482473,0,5975527,109428,4448072
1,경기,수원시장안구,2018,광역단체장,진보정당,보수정당,1,0.5422,2,0.3789,...,0,202140,121951,45405,64965,9456,0,119826,2125,80189
2,경기,수원시권선구,2018,광역단체장,진보정당,보수정당,1,0.567,2,0.3561,...,0,342897,197363,69133,110066,14916,0,194115,3248,145534
3,경기,수원시팔달구,2018,광역단체장,진보정당,보수정당,1,0.5129,2,0.4159,...,0,161673,88388,36057,44464,6167,0,86688,1700,73285
4,경기,수원시영통구,2018,광역단체장,진보정당,보수정당,1,0.5727,2,0.3472,...,0,273531,174692,59730,98513,13769,0,172012,2680,98839
5,경기,성남시수정구,2018,광역단체장,진보정당,보수정당,1,0.6283,2,0.3073,...,0,204053,115704,34928,71400,7316,0,113644,2060,88349
6,경기,성남시중원구,2018,광역단체장,진보정당,보수정당,1,0.6418,2,0.2981,...,0,195192,112200,32910,70848,6630,0,110388,1812,82992
7,경기,성남시분당구,2018,광역단체장,진보정당,보수정당,1,0.537,2,0.3879,...,0,401672,266966,102110,141379,19773,0,263262,3704,134706
8,경기,의정부시,2018,광역단체장,진보정당,보수정당,1,0.5703,2,0.3565,...,0,366459,198518,69513,111187,14277,0,194977,3541,167941
9,경기,안양시만안구,2018,광역단체장,진보정당,보수정당,1,0.5525,2,0.3748,...,0,216498,129861,47791,70458,9272,0,127521,2340,86637


### v4.1 ~ v4.3

In [65]:
# 1. 전체 데이터 저장
df_gyeonggi.to_csv("temp4_1_governor_gyeonggi_7.csv", index=False, encoding="utf-8-sig")

# 2. '구시군' != '합계' 행만 추출하여 저장
df_gyeonggi2 = df_gyeonggi.query("구시군 != '합계'")
df_gyeonggi2.to_csv("temp4_2_governor_gyeonggi_7.csv", index=False, encoding="utf-8-sig")

# 3. '구시군' == '합계' 행만 추출 후 '구시군' 열 제거하여 저장
df_gyeonggi3 = df_gyeonggi.query("구시군 == '합계'").drop(columns="구시군")
df_gyeonggi3.to_csv("temp4_3_governor_gyeonggi_7.csv", index=False, encoding="utf-8-sig")

## Gangwon

In [66]:
df_gangwon = election_results['df_gangwon']

In [67]:
df_gangwon

Unnamed: 0,시도,구시군,선거인수,투표수,보수정당,진보정당,그외정당,무소속,득표수_계,무효투표수,...,득표_1위_후보번호,득표_1위_득표율,득표_2위_후보번호,득표_2위_득표율,득표_1위_정당,득표_2위_정당,보수정당_후보자수,진보정당_후보자수,그외정당_후보자수,무소속_후보자수
0,강원도,합계,1296196,819193,282456,518447,0,0,800903,18290,...,1,0.6473,2,0.3527,진보정당,보수정당,1,1,0,0
1,강원도,춘천시,231430,144282,45024,96699,0,0,141723,2559,...,1,0.6823,2,0.3177,진보정당,보수정당,1,1,0,0
2,강원도,원주시,278991,159235,53060,103811,0,0,156871,2364,...,1,0.6618,2,0.3382,진보정당,보수정당,1,1,0,0
3,강원도,강릉시,179767,108772,44086,62434,0,0,106520,2252,...,1,0.5861,2,0.4139,진보정당,보수정당,1,1,0,0
4,강원도,동해시,75747,46923,16783,29034,0,0,45817,1106,...,1,0.6337,2,0.3663,진보정당,보수정당,1,1,0,0
5,강원도,삼척시,59117,41032,14369,25401,0,0,39770,1262,...,1,0.6387,2,0.3613,진보정당,보수정당,1,1,0,0
6,강원도,태백시,38386,25298,9272,15354,0,0,24626,672,...,1,0.6235,2,0.3765,진보정당,보수정당,1,1,0,0
7,강원도,정선군,33293,23092,7739,14708,0,0,22447,645,...,1,0.6552,2,0.3448,진보정당,보수정당,1,1,0,0
8,강원도,속초시,68234,40493,13443,26237,0,0,39680,813,...,1,0.6612,2,0.3388,진보정당,보수정당,1,1,0,0
9,강원도,고성군,25496,18354,6173,11620,0,0,17793,561,...,1,0.6531,2,0.3469,진보정당,보수정당,1,1,0,0


### preprocessing

In [68]:
df_gangwon = (
    df_gangwon
    .assign(시도=lambda df: df['시도'].replace('강원도', '강원'))
)

In [69]:
df_gangwon.columns

Index(['시도', '구시군', '선거인수', '투표수', '보수정당', '진보정당', '그외정당', '무소속', '득표수_계',
       '무효투표수', '기권수', '득표_1위_후보번호', '득표_1위_득표율', '득표_2위_후보번호', '득표_2위_득표율',
       '득표_1위_정당', '득표_2위_정당', '보수정당_후보자수', '진보정당_후보자수', '그외정당_후보자수',
       '무소속_후보자수'],
      dtype='object')

In [70]:
# 고정 열 순서 정의
fixed_cols = [
    '시도', '구시군', '선거년도', '선거종류',
    '득표_1위_정당', '득표_2위_정당',
    '득표_1위_후보번호', '득표_1위_득표율',
    '득표_2위_후보번호', '득표_2위_득표율',
    '보수정당_후보자수', '진보정당_후보자수',
    '그외정당_후보자수', '무소속_후보자수'
]

# 나머지 열: fixed_cols에 포함되지 않은 열
other_cols = [col for col in df_gangwon.columns if col not in fixed_cols]

# 메소드 체이닝
df_gangwon = (
    df_gangwon
    .assign(선거종류='광역단체장', 선거년도='2018')
    .loc[:, fixed_cols + other_cols]
    .rename(columns={'시도': '지역'})
)

In [71]:
df_gangwon

Unnamed: 0,지역,구시군,선거년도,선거종류,득표_1위_정당,득표_2위_정당,득표_1위_후보번호,득표_1위_득표율,득표_2위_후보번호,득표_2위_득표율,...,무소속_후보자수,선거인수,투표수,보수정당,진보정당,그외정당,무소속,득표수_계,무효투표수,기권수
0,강원,합계,2018,광역단체장,진보정당,보수정당,1,0.6473,2,0.3527,...,0,1296196,819193,282456,518447,0,0,800903,18290,477003
1,강원,춘천시,2018,광역단체장,진보정당,보수정당,1,0.6823,2,0.3177,...,0,231430,144282,45024,96699,0,0,141723,2559,87148
2,강원,원주시,2018,광역단체장,진보정당,보수정당,1,0.6618,2,0.3382,...,0,278991,159235,53060,103811,0,0,156871,2364,119756
3,강원,강릉시,2018,광역단체장,진보정당,보수정당,1,0.5861,2,0.4139,...,0,179767,108772,44086,62434,0,0,106520,2252,70995
4,강원,동해시,2018,광역단체장,진보정당,보수정당,1,0.6337,2,0.3663,...,0,75747,46923,16783,29034,0,0,45817,1106,28824
5,강원,삼척시,2018,광역단체장,진보정당,보수정당,1,0.6387,2,0.3613,...,0,59117,41032,14369,25401,0,0,39770,1262,18085
6,강원,태백시,2018,광역단체장,진보정당,보수정당,1,0.6235,2,0.3765,...,0,38386,25298,9272,15354,0,0,24626,672,13088
7,강원,정선군,2018,광역단체장,진보정당,보수정당,1,0.6552,2,0.3448,...,0,33293,23092,7739,14708,0,0,22447,645,10201
8,강원,속초시,2018,광역단체장,진보정당,보수정당,1,0.6612,2,0.3388,...,0,68234,40493,13443,26237,0,0,39680,813,27741
9,강원,고성군,2018,광역단체장,진보정당,보수정당,1,0.6531,2,0.3469,...,0,25496,18354,6173,11620,0,0,17793,561,7142


### v4.1 ~ v4.3

In [72]:
# 1. 전체 데이터 저장
df_gangwon.to_csv("temp4_1_governor_gangwon_7.csv", index=False, encoding="utf-8-sig")

# 2. '구시군' != '합계' 행만 추출하여 저장
df_gangwon2 = df_gangwon.query("구시군 != '합계'")
df_gangwon2.to_csv("temp4_2_governor_gangwon_7.csv", index=False, encoding="utf-8-sig")

# 3. '구시군' == '합계' 행만 추출 후 '구시군' 열 제거하여 저장
df_gangwon3 = df_gangwon.query("구시군 == '합계'").drop(columns="구시군")
df_gangwon3.to_csv("temp4_3_governor_gangwon_7.csv", index=False, encoding="utf-8-sig")

## Chungbuk

In [73]:
df_chungbuk = election_results['df_chungbuk']

In [74]:
df_chungbuk

Unnamed: 0,시도,구시군,선거인수,투표수,보수정당,진보정당,그외정당,무소속,득표수_계,무효투표수,...,득표_1위_후보번호,득표_1위_득표율,득표_2위_후보번호,득표_2위_득표율,득표_1위_정당,득표_2위_정당,보수정당_후보자수,진보정당_후보자수,그외정당_후보자수,무소속_후보자수
0,충청북도,합계,1318186,782316,227371,468750,70330,0,766451,15865,...,1,0.6116,2,0.2967,진보정당,보수정당,1,1,1,0
1,충청북도,청주시상당구,140405,80162,23920,47260,7676,0,78856,1306,...,1,0.5993,2,0.3033,진보정당,보수정당,1,1,1,0
2,충청북도,청주시서원구,174307,100218,28299,60096,10359,0,98754,1464,...,1,0.6085,2,0.2866,진보정당,보수정당,1,1,1,0
3,충청북도,청주시흥덕구,204639,110108,28114,67695,12684,0,108493,1615,...,1,0.624,2,0.2591,진보정당,보수정당,1,1,1,0
4,충청북도,청주시청원구,154249,85634,21312,54139,8988,0,84439,1195,...,1,0.6412,2,0.2524,진보정당,보수정당,1,1,1,0
5,충청북도,충주시,175414,103606,31253,62569,7887,0,101709,1897,...,1,0.6152,2,0.3073,진보정당,보수정당,1,1,1,0
6,충청북도,제천시,114513,70289,21580,41702,5428,0,68710,1579,...,1,0.6069,2,0.3141,진보정당,보수정당,1,1,1,0
7,충청북도,단양군,26796,19078,6807,10661,1070,0,18538,540,...,1,0.5751,2,0.3672,진보정당,보수정당,1,1,1,0
8,충청북도,영동군,43904,31226,10871,17317,1979,0,30167,1059,...,1,0.574,2,0.3604,진보정당,보수정당,1,1,1,0
9,충청북도,보은군,30016,22735,9856,10627,1391,0,21874,861,...,1,0.4858,2,0.4506,진보정당,보수정당,1,1,1,0


### preprocessing

In [75]:
df_chungbuk = (
    df_chungbuk
    .assign(시도=lambda df: df['시도'].replace('충청북도', '충북'))
)

In [76]:
df_chungbuk.columns

Index(['시도', '구시군', '선거인수', '투표수', '보수정당', '진보정당', '그외정당', '무소속', '득표수_계',
       '무효투표수', '기권수', '득표_1위_후보번호', '득표_1위_득표율', '득표_2위_후보번호', '득표_2위_득표율',
       '득표_1위_정당', '득표_2위_정당', '보수정당_후보자수', '진보정당_후보자수', '그외정당_후보자수',
       '무소속_후보자수'],
      dtype='object')

In [77]:
# 고정 열 순서 정의
fixed_cols = [
    '시도', '구시군', '선거년도', '선거종류',
    '득표_1위_정당', '득표_2위_정당',
    '득표_1위_후보번호', '득표_1위_득표율',
    '득표_2위_후보번호', '득표_2위_득표율',
    '보수정당_후보자수', '진보정당_후보자수',
    '그외정당_후보자수', '무소속_후보자수'
]

# 나머지 열: fixed_cols에 포함되지 않은 열
other_cols = [col for col in df_chungbuk.columns if col not in fixed_cols]

# 메소드 체이닝
df_chungbuk = (
    df_chungbuk
    .assign(선거종류='광역단체장', 선거년도='2018')
    .loc[:, fixed_cols + other_cols]
    .rename(columns={'시도': '지역'})
)

In [78]:
df_chungbuk

Unnamed: 0,지역,구시군,선거년도,선거종류,득표_1위_정당,득표_2위_정당,득표_1위_후보번호,득표_1위_득표율,득표_2위_후보번호,득표_2위_득표율,...,무소속_후보자수,선거인수,투표수,보수정당,진보정당,그외정당,무소속,득표수_계,무효투표수,기권수
0,충북,합계,2018,광역단체장,진보정당,보수정당,1,0.6116,2,0.2967,...,0,1318186,782316,227371,468750,70330,0,766451,15865,535870
1,충북,청주시상당구,2018,광역단체장,진보정당,보수정당,1,0.5993,2,0.3033,...,0,140405,80162,23920,47260,7676,0,78856,1306,60243
2,충북,청주시서원구,2018,광역단체장,진보정당,보수정당,1,0.6085,2,0.2866,...,0,174307,100218,28299,60096,10359,0,98754,1464,74089
3,충북,청주시흥덕구,2018,광역단체장,진보정당,보수정당,1,0.624,2,0.2591,...,0,204639,110108,28114,67695,12684,0,108493,1615,94531
4,충북,청주시청원구,2018,광역단체장,진보정당,보수정당,1,0.6412,2,0.2524,...,0,154249,85634,21312,54139,8988,0,84439,1195,68615
5,충북,충주시,2018,광역단체장,진보정당,보수정당,1,0.6152,2,0.3073,...,0,175414,103606,31253,62569,7887,0,101709,1897,71808
6,충북,제천시,2018,광역단체장,진보정당,보수정당,1,0.6069,2,0.3141,...,0,114513,70289,21580,41702,5428,0,68710,1579,44224
7,충북,단양군,2018,광역단체장,진보정당,보수정당,1,0.5751,2,0.3672,...,0,26796,19078,6807,10661,1070,0,18538,540,7718
8,충북,영동군,2018,광역단체장,진보정당,보수정당,1,0.574,2,0.3604,...,0,43904,31226,10871,17317,1979,0,30167,1059,12678
9,충북,보은군,2018,광역단체장,진보정당,보수정당,1,0.4858,2,0.4506,...,0,30016,22735,9856,10627,1391,0,21874,861,7281


### v4.1 ~ v4.3

In [79]:
# 1. 전체 데이터 저장
df_chungbuk.to_csv("temp4_1_governor_chungbuk_7.csv", index=False, encoding="utf-8-sig")

# 2. '구시군' != '합계' 행만 추출하여 저장
df_chungbuk2 = df_chungbuk.query("구시군 != '합계'")
df_chungbuk2.to_csv("temp4_2_governor_chungbuk_7.csv", index=False, encoding="utf-8-sig")

# 3. '구시군' == '합계' 행만 추출 후 '구시군' 열 제거하여 저장
df_chungbuk3 = df_chungbuk.query("구시군 == '합계'").drop(columns="구시군")
df_chungbuk3.to_csv("temp4_3_governor_chungbuk_7.csv", index=False, encoding="utf-8-sig")

## Chungnam

In [80]:
df_chungnam = election_results['df_chungnam']

In [81]:
df_chungnam

Unnamed: 0,시도,구시군,선거인수,투표수,보수정당,진보정당,그외정당,무소속,득표수_계,무효투표수,...,득표_1위_후보번호,득표_1위_득표율,득표_2위_후보번호,득표_2위_득표율,득표_1위_정당,득표_2위_정당,보수정당_후보자수,진보정당_후보자수,그외정당_후보자수,무소속_후보자수
0,충청남도,합계,1740413,1011447,345577,615870,23012,0,984459,26988,...,1,0.6256,2,0.351,진보정당,보수정당,1,1,1,0
1,충청남도,천안시서북구,298093,159458,37427,117098,2705,0,157230,2228,...,1,0.7448,2,0.238,진보정당,보수정당,1,1,1,0
2,충청남도,천안시동남구,211724,106417,32706,69999,1773,0,104478,1939,...,1,0.67,2,0.313,진보정당,보수정당,1,1,1,0
3,충청남도,공주시,92935,56756,21453,32526,1128,0,55107,1649,...,1,0.5902,2,0.3893,진보정당,보수정당,1,1,1,0
4,충청남도,보령시,87281,56352,23856,28981,1384,0,54221,2131,...,1,0.5345,2,0.44,진보정당,보수정당,1,1,1,0
5,충청남도,아산시,244293,129980,40171,84978,2498,0,127647,2333,...,1,0.6657,2,0.3147,진보정당,보수정당,1,1,1,0
6,충청남도,서산시,139529,82003,28417,49800,1662,0,79879,2124,...,1,0.6234,2,0.3558,진보정당,보수정당,1,1,1,0
7,충청남도,태안군,55792,37240,14832,19534,1146,0,35512,1728,...,1,0.5501,2,0.4177,진보정당,보수정당,1,1,1,0
8,충청남도,금산군,46310,30788,11711,16748,1199,0,29658,1130,...,1,0.5647,2,0.3949,진보정당,보수정당,1,1,1,0
9,충청남도,논산시,103367,62960,23871,35905,1251,0,61027,1933,...,1,0.5883,2,0.3912,진보정당,보수정당,1,1,1,0


### preprocessing

In [82]:
df_chungnam = (
    df_chungnam
    .assign(시도=lambda df: df['시도'].replace('충청남도', '충남'))
)

In [83]:
df_chungnam.columns

Index(['시도', '구시군', '선거인수', '투표수', '보수정당', '진보정당', '그외정당', '무소속', '득표수_계',
       '무효투표수', '기권수', '득표_1위_후보번호', '득표_1위_득표율', '득표_2위_후보번호', '득표_2위_득표율',
       '득표_1위_정당', '득표_2위_정당', '보수정당_후보자수', '진보정당_후보자수', '그외정당_후보자수',
       '무소속_후보자수'],
      dtype='object')

In [84]:
# 고정 열 순서 정의
fixed_cols = [
    '시도', '구시군', '선거년도', '선거종류',
    '득표_1위_정당', '득표_2위_정당',
    '득표_1위_후보번호', '득표_1위_득표율',
    '득표_2위_후보번호', '득표_2위_득표율',
    '보수정당_후보자수', '진보정당_후보자수',
    '그외정당_후보자수', '무소속_후보자수'
]

# 나머지 열: fixed_cols에 포함되지 않은 열
other_cols = [col for col in df_chungnam.columns if col not in fixed_cols]

# 메소드 체이닝
df_chungnam = (
    df_chungnam
    .assign(선거종류='광역단체장', 선거년도='2018')
    .loc[:, fixed_cols + other_cols]
    .rename(columns={'시도': '지역'})
)

In [85]:
df_chungnam

Unnamed: 0,지역,구시군,선거년도,선거종류,득표_1위_정당,득표_2위_정당,득표_1위_후보번호,득표_1위_득표율,득표_2위_후보번호,득표_2위_득표율,...,무소속_후보자수,선거인수,투표수,보수정당,진보정당,그외정당,무소속,득표수_계,무효투표수,기권수
0,충남,합계,2018,광역단체장,진보정당,보수정당,1,0.6256,2,0.351,...,0,1740413,1011447,345577,615870,23012,0,984459,26988,728966
1,충남,천안시서북구,2018,광역단체장,진보정당,보수정당,1,0.7448,2,0.238,...,0,298093,159458,37427,117098,2705,0,157230,2228,138635
2,충남,천안시동남구,2018,광역단체장,진보정당,보수정당,1,0.67,2,0.313,...,0,211724,106417,32706,69999,1773,0,104478,1939,105307
3,충남,공주시,2018,광역단체장,진보정당,보수정당,1,0.5902,2,0.3893,...,0,92935,56756,21453,32526,1128,0,55107,1649,36179
4,충남,보령시,2018,광역단체장,진보정당,보수정당,1,0.5345,2,0.44,...,0,87281,56352,23856,28981,1384,0,54221,2131,30929
5,충남,아산시,2018,광역단체장,진보정당,보수정당,1,0.6657,2,0.3147,...,0,244293,129980,40171,84978,2498,0,127647,2333,114313
6,충남,서산시,2018,광역단체장,진보정당,보수정당,1,0.6234,2,0.3558,...,0,139529,82003,28417,49800,1662,0,79879,2124,57526
7,충남,태안군,2018,광역단체장,진보정당,보수정당,1,0.5501,2,0.4177,...,0,55792,37240,14832,19534,1146,0,35512,1728,18552
8,충남,금산군,2018,광역단체장,진보정당,보수정당,1,0.5647,2,0.3949,...,0,46310,30788,11711,16748,1199,0,29658,1130,15522
9,충남,논산시,2018,광역단체장,진보정당,보수정당,1,0.5883,2,0.3912,...,0,103367,62960,23871,35905,1251,0,61027,1933,40407


### v4.1 ~ v4.3

In [86]:
# 1. 전체 데이터 저장
df_chungnam.to_csv("temp4_1_governor_chungnam_7.csv", index=False, encoding="utf-8-sig")

# 2. '구시군' != '합계' 행만 추출하여 저장
df_chungnam2 = df_chungnam.query("구시군 != '합계'")
df_chungnam2.to_csv("temp4_2_governor_chungnam_7.csv", index=False, encoding="utf-8-sig")

# 3. '구시군' == '합계' 행만 추출 후 '구시군' 열 제거하여 저장
df_chungnam3 = df_chungnam.query("구시군 == '합계'").drop(columns="구시군")
df_chungnam3.to_csv("temp4_3_governor_chungnam_7.csv", index=False, encoding="utf-8-sig")

## Jeonbuk

In [87]:
df_jeonbuk = election_results['df_jeonbuk']

In [88]:
df_jeonbuk

Unnamed: 0,시도,구시군,선거인수,투표수,보수정당,진보정당,그외정당,무소속,득표수_계,무효투표수,...,득표_1위_후보번호,득표_1위_득표율,득표_2위_후보번호,득표_2위_득표율,득표_1위_정당,득표_2위_정당,보수정당_후보자수,진보정당_후보자수,그외정당_후보자수,무소속_후보자수
0,전라북도,합계,1527729,996504,26374,682042,258051,0,966467,30037,...,1,0.7057,4,0.1911,진보정당,그외정당,1,1,3,0
1,전라북도,전주시완산구,313929,193303,4205,121422,64328,0,189955,3348,...,1,0.6392,4,0.2554,진보정당,그외정당,1,1,3,0
2,전라북도,전주시덕진구,210274,130716,2713,80531,45345,0,128589,2127,...,1,0.6263,4,0.2645,진보정당,그외정당,1,1,3,0
3,전라북도,군산시,223434,134447,4475,98640,27308,0,130423,4024,...,1,0.7563,4,0.1338,진보정당,그외정당,1,1,3,0
4,전라북도,익산시,245566,153275,3844,111862,33206,0,148912,4363,...,1,0.7512,4,0.1543,진보정당,그외정당,1,1,3,0
5,전라북도,정읍시,96002,64334,1468,47427,12847,0,61742,2592,...,1,0.7681,4,0.1424,진보정당,그외정당,1,1,3,0
6,전라북도,남원시,69930,51530,1532,37718,10302,0,49552,1978,...,1,0.7612,4,0.1444,진보정당,그외정당,1,1,3,0
7,전라북도,김제시,74992,51664,1242,37736,10716,0,49694,1970,...,1,0.7594,4,0.1662,진보정당,그외정당,1,1,3,0
8,전라북도,완주군,78531,52837,1206,31420,18549,0,51175,1662,...,1,0.614,4,0.2843,진보정당,그외정당,1,1,3,0
9,전라북도,진안군,23029,17585,913,11593,4321,0,16827,758,...,1,0.689,4,0.1733,진보정당,그외정당,1,1,3,0


### preprocessing

In [89]:
df_jeonbuk = (
    df_jeonbuk
    .assign(시도=lambda df: df['시도'].replace('전라북도', '전북'))
)

In [90]:
df_jeonbuk.columns

Index(['시도', '구시군', '선거인수', '투표수', '보수정당', '진보정당', '그외정당', '무소속', '득표수_계',
       '무효투표수', '기권수', '득표_1위_후보번호', '득표_1위_득표율', '득표_2위_후보번호', '득표_2위_득표율',
       '득표_1위_정당', '득표_2위_정당', '보수정당_후보자수', '진보정당_후보자수', '그외정당_후보자수',
       '무소속_후보자수'],
      dtype='object')

In [91]:
# 고정 열 순서 정의
fixed_cols = [
    '시도', '구시군', '선거년도', '선거종류',
    '득표_1위_정당', '득표_2위_정당',
    '득표_1위_후보번호', '득표_1위_득표율',
    '득표_2위_후보번호', '득표_2위_득표율',
    '보수정당_후보자수', '진보정당_후보자수',
    '그외정당_후보자수', '무소속_후보자수'
]

# 나머지 열: fixed_cols에 포함되지 않은 열
other_cols = [col for col in df_jeonbuk.columns if col not in fixed_cols]

# 메소드 체이닝
df_jeonbuk = (
    df_jeonbuk
    .assign(선거종류='광역단체장', 선거년도='2018')
    .loc[:, fixed_cols + other_cols]
    .rename(columns={'시도': '지역'})
)

In [92]:
df_jeonbuk

Unnamed: 0,지역,구시군,선거년도,선거종류,득표_1위_정당,득표_2위_정당,득표_1위_후보번호,득표_1위_득표율,득표_2위_후보번호,득표_2위_득표율,...,무소속_후보자수,선거인수,투표수,보수정당,진보정당,그외정당,무소속,득표수_계,무효투표수,기권수
0,전북,합계,2018,광역단체장,진보정당,그외정당,1,0.7057,4,0.1911,...,0,1527729,996504,26374,682042,258051,0,966467,30037,531225
1,전북,전주시완산구,2018,광역단체장,진보정당,그외정당,1,0.6392,4,0.2554,...,0,313929,193303,4205,121422,64328,0,189955,3348,120626
2,전북,전주시덕진구,2018,광역단체장,진보정당,그외정당,1,0.6263,4,0.2645,...,0,210274,130716,2713,80531,45345,0,128589,2127,79558
3,전북,군산시,2018,광역단체장,진보정당,그외정당,1,0.7563,4,0.1338,...,0,223434,134447,4475,98640,27308,0,130423,4024,88987
4,전북,익산시,2018,광역단체장,진보정당,그외정당,1,0.7512,4,0.1543,...,0,245566,153275,3844,111862,33206,0,148912,4363,92291
5,전북,정읍시,2018,광역단체장,진보정당,그외정당,1,0.7681,4,0.1424,...,0,96002,64334,1468,47427,12847,0,61742,2592,31668
6,전북,남원시,2018,광역단체장,진보정당,그외정당,1,0.7612,4,0.1444,...,0,69930,51530,1532,37718,10302,0,49552,1978,18400
7,전북,김제시,2018,광역단체장,진보정당,그외정당,1,0.7594,4,0.1662,...,0,74992,51664,1242,37736,10716,0,49694,1970,23328
8,전북,완주군,2018,광역단체장,진보정당,그외정당,1,0.614,4,0.2843,...,0,78531,52837,1206,31420,18549,0,51175,1662,25694
9,전북,진안군,2018,광역단체장,진보정당,그외정당,1,0.689,4,0.1733,...,0,23029,17585,913,11593,4321,0,16827,758,5444


### v4.1 ~ v4.3

In [93]:
# 1. 전체 데이터 저장
df_jeonbuk.to_csv("temp4_1_governor_jeonbuk_7.csv", index=False, encoding="utf-8-sig")

# 2. '구시군' != '합계' 행만 추출하여 저장
df_jeonbuk2 = df_jeonbuk.query("구시군 != '합계'")
df_jeonbuk2.to_csv("temp4_2_governor_jeonbuk_7.csv", index=False, encoding="utf-8-sig")

# 3. '구시군' == '합계' 행만 추출 후 '구시군' 열 제거하여 저장
df_jeonbuk3 = df_jeonbuk.query("구시군 == '합계'").drop(columns="구시군")
df_jeonbuk3.to_csv("temp4_3_governor_jeonbuk_7.csv", index=False, encoding="utf-8-sig")

## Jeonnam

In [94]:
df_jeonnam = election_results['df_jeonnam']

In [95]:
df_jeonnam

Unnamed: 0,시도,구시군,선거인수,투표수,보수정당,진보정당,그외정당,무소속,득표수_계,무효투표수,...,득표_1위_후보번호,득표_1위_득표율,득표_2위_후보번호,득표_2위_득표율,득표_1위_정당,득표_2위_정당,보수정당_후보자수,진보정당_후보자수,그외정당_후보자수,무소속_후보자수
0,전라남도,합계,1577224,1092107,40287,807902,199816,0,1048005,44102,...,1,0.7709,4,0.1059,진보정당,그외정당,0,1,4,0
1,전라남도,목포시,186864,119950,2644,87419,26702,0,116765,3185,...,1,0.7487,4,0.1591,진보정당,그외정당,0,1,4,0
2,전라남도,여수시,235359,153554,6198,120941,22557,0,149696,3858,...,1,0.8079,4,0.0733,진보정당,그외정당,0,1,4,0
3,전라남도,순천시,225673,144341,4942,113180,22614,0,140736,3605,...,1,0.8042,4,0.0668,진보정당,그외정당,0,1,4,0
4,전라남도,나주시,93513,61444,2317,47100,9694,0,59111,2333,...,1,0.7968,4,0.0804,진보정당,그외정당,0,1,4,0
5,전라남도,광양시,122154,83720,4210,63778,13107,0,81095,2625,...,1,0.7865,4,0.0674,진보정당,그외정당,0,1,4,0
6,전라남도,담양군,41703,29739,1361,22127,5051,0,28539,1200,...,1,0.7753,4,0.0735,진보정당,그외정당,0,1,4,0
7,전라남도,장성군,39603,29751,1294,20820,5959,0,28073,1678,...,1,0.7416,4,0.0874,진보정당,그외정당,0,1,4,0
8,전라남도,곡성군,26739,20723,1219,13989,4308,0,19516,1207,...,1,0.7168,4,0.1055,진보정당,그외정당,0,1,4,0
9,전라남도,구례군,23743,19333,723,13080,4471,0,18274,1059,...,1,0.7158,4,0.1224,진보정당,그외정당,0,1,4,0


### preprocessing

In [96]:
df_jeonnam = (
    df_jeonnam
    .assign(시도=lambda df: df['시도'].replace('전라남도', '전남'))
)

In [97]:
df_jeonnam.columns

Index(['시도', '구시군', '선거인수', '투표수', '보수정당', '진보정당', '그외정당', '무소속', '득표수_계',
       '무효투표수', '기권수', '득표_1위_후보번호', '득표_1위_득표율', '득표_2위_후보번호', '득표_2위_득표율',
       '득표_1위_정당', '득표_2위_정당', '보수정당_후보자수', '진보정당_후보자수', '그외정당_후보자수',
       '무소속_후보자수'],
      dtype='object')

In [98]:
# 고정 열 순서 정의
fixed_cols = [
    '시도', '구시군', '선거년도', '선거종류',
    '득표_1위_정당', '득표_2위_정당',
    '득표_1위_후보번호', '득표_1위_득표율',
    '득표_2위_후보번호', '득표_2위_득표율',
    '보수정당_후보자수', '진보정당_후보자수',
    '그외정당_후보자수', '무소속_후보자수'
]

# 나머지 열: fixed_cols에 포함되지 않은 열
other_cols = [col for col in df_jeonnam.columns if col not in fixed_cols]

# 메소드 체이닝
df_jeonnam = (
    df_jeonnam
    .assign(선거종류='광역단체장', 선거년도='2018')
    .loc[:, fixed_cols + other_cols]
    .rename(columns={'시도': '지역'})
)

In [99]:
df_jeonnam

Unnamed: 0,지역,구시군,선거년도,선거종류,득표_1위_정당,득표_2위_정당,득표_1위_후보번호,득표_1위_득표율,득표_2위_후보번호,득표_2위_득표율,...,무소속_후보자수,선거인수,투표수,보수정당,진보정당,그외정당,무소속,득표수_계,무효투표수,기권수
0,전남,합계,2018,광역단체장,진보정당,그외정당,1,0.7709,4,0.1059,...,0,1577224,1092107,40287,807902,199816,0,1048005,44102,485117
1,전남,목포시,2018,광역단체장,진보정당,그외정당,1,0.7487,4,0.1591,...,0,186864,119950,2644,87419,26702,0,116765,3185,66914
2,전남,여수시,2018,광역단체장,진보정당,그외정당,1,0.8079,4,0.0733,...,0,235359,153554,6198,120941,22557,0,149696,3858,81805
3,전남,순천시,2018,광역단체장,진보정당,그외정당,1,0.8042,4,0.0668,...,0,225673,144341,4942,113180,22614,0,140736,3605,81332
4,전남,나주시,2018,광역단체장,진보정당,그외정당,1,0.7968,4,0.0804,...,0,93513,61444,2317,47100,9694,0,59111,2333,32069
5,전남,광양시,2018,광역단체장,진보정당,그외정당,1,0.7865,4,0.0674,...,0,122154,83720,4210,63778,13107,0,81095,2625,38434
6,전남,담양군,2018,광역단체장,진보정당,그외정당,1,0.7753,4,0.0735,...,0,41703,29739,1361,22127,5051,0,28539,1200,11964
7,전남,장성군,2018,광역단체장,진보정당,그외정당,1,0.7416,4,0.0874,...,0,39603,29751,1294,20820,5959,0,28073,1678,9852
8,전남,곡성군,2018,광역단체장,진보정당,그외정당,1,0.7168,4,0.1055,...,0,26739,20723,1219,13989,4308,0,19516,1207,6016
9,전남,구례군,2018,광역단체장,진보정당,그외정당,1,0.7158,4,0.1224,...,0,23743,19333,723,13080,4471,0,18274,1059,4410


### v4.1 ~ v4.3

In [100]:
# 1. 전체 데이터 저장
df_jeonnam.to_csv("temp4_1_governor_jeonnam_7.csv", index=False, encoding="utf-8-sig")

# 2. '구시군' != '합계' 행만 추출하여 저장
df_jeonnam2 = df_jeonnam.query("구시군 != '합계'")
df_jeonnam2.to_csv("temp4_2_governor_jeonnam_7.csv", index=False, encoding="utf-8-sig")

# 3. '구시군' == '합계' 행만 추출 후 '구시군' 열 제거하여 저장
df_jeonnam3 = df_jeonnam.query("구시군 == '합계'").drop(columns="구시군")
df_jeonnam3.to_csv("temp4_3_governor_jeonnam_7.csv", index=False, encoding="utf-8-sig")

## Gyeongbuk

In [101]:
df_gyeongbuk = election_results['df_gyeongbuk']

In [102]:
df_gyeongbuk

Unnamed: 0,시도,구시군,선거인수,투표수,보수정당,진보정당,그외정당,무소속,득표수_계,무효투표수,...,득표_1위_후보번호,득표_1위_득표율,득표_2위_후보번호,득표_2위_득표율,득표_1위_정당,득표_2위_정당,보수정당_후보자수,진보정당_후보자수,그외정당_후보자수,무소속_후보자수
0,경상북도,합계,2251538,1457539,732785,482564,190659,0,1406008,51531,...,2,0.5212,1,0.3432,보수정당,진보정당,1,1,2,0
1,경상북도,포항시북구,223110,139955,64206,57440,15073,0,136719,3236,...,2,0.4696,1,0.4201,보수정당,진보정당,1,1,2,0
2,경상북도,포항시남구,198696,119400,54509,49535,12559,0,116603,2797,...,2,0.4675,1,0.4248,보수정당,진보정당,1,1,2,0
3,경상북도,울릉군,9057,7479,4222,2087,835,0,7144,335,...,2,0.591,1,0.2921,보수정당,진보정당,1,1,2,0
4,경상북도,경주시,218273,139142,73050,44294,16953,0,134297,4845,...,2,0.5439,1,0.3298,보수정당,진보정당,1,1,2,0
5,경상북도,김천시,119511,83567,45278,26500,8985,0,80763,2804,...,2,0.5606,1,0.3281,보수정당,진보정당,1,1,2,0
6,경상북도,안동시,137423,94434,40002,28292,22952,0,91246,3188,...,2,0.4384,1,0.3101,보수정당,진보정당,1,1,2,0
7,경상북도,구미시,332873,187236,82603,78892,21708,0,183203,4033,...,2,0.4509,1,0.4306,보수정당,진보정당,1,1,2,0
8,경상북도,영주시,91447,63733,31646,19849,9865,0,61360,2373,...,2,0.5157,1,0.3235,보수정당,진보정당,1,1,2,0
9,경상북도,영천시,88000,60058,32770,16867,7783,0,57420,2638,...,2,0.5707,1,0.2937,보수정당,진보정당,1,1,2,0


### preprocessing

In [103]:
df_gyeongbuk = (
    df_gyeongbuk
    .assign(시도=lambda df: df['시도'].replace('경상북도', '경북'))
)

In [104]:
df_gyeongbuk.columns

Index(['시도', '구시군', '선거인수', '투표수', '보수정당', '진보정당', '그외정당', '무소속', '득표수_계',
       '무효투표수', '기권수', '득표_1위_후보번호', '득표_1위_득표율', '득표_2위_후보번호', '득표_2위_득표율',
       '득표_1위_정당', '득표_2위_정당', '보수정당_후보자수', '진보정당_후보자수', '그외정당_후보자수',
       '무소속_후보자수'],
      dtype='object')

In [105]:
# 고정 열 순서 정의
fixed_cols = [
    '시도', '구시군', '선거년도', '선거종류',
    '득표_1위_정당', '득표_2위_정당',
    '득표_1위_후보번호', '득표_1위_득표율',
    '득표_2위_후보번호', '득표_2위_득표율',
    '보수정당_후보자수', '진보정당_후보자수',
    '그외정당_후보자수', '무소속_후보자수'
]

# 나머지 열: fixed_cols에 포함되지 않은 열
other_cols = [col for col in df_gyeongbuk.columns if col not in fixed_cols]

# 메소드 체이닝
df_gyeongbuk = (
    df_gyeongbuk
    .assign(선거종류='광역단체장', 선거년도='2018')
    .loc[:, fixed_cols + other_cols]
    .rename(columns={'시도': '지역'})
)

In [106]:
df_gyeongbuk

Unnamed: 0,지역,구시군,선거년도,선거종류,득표_1위_정당,득표_2위_정당,득표_1위_후보번호,득표_1위_득표율,득표_2위_후보번호,득표_2위_득표율,...,무소속_후보자수,선거인수,투표수,보수정당,진보정당,그외정당,무소속,득표수_계,무효투표수,기권수
0,경북,합계,2018,광역단체장,보수정당,진보정당,2,0.5212,1,0.3432,...,0,2251538,1457539,732785,482564,190659,0,1406008,51531,793999
1,경북,포항시북구,2018,광역단체장,보수정당,진보정당,2,0.4696,1,0.4201,...,0,223110,139955,64206,57440,15073,0,136719,3236,83155
2,경북,포항시남구,2018,광역단체장,보수정당,진보정당,2,0.4675,1,0.4248,...,0,198696,119400,54509,49535,12559,0,116603,2797,79296
3,경북,울릉군,2018,광역단체장,보수정당,진보정당,2,0.591,1,0.2921,...,0,9057,7479,4222,2087,835,0,7144,335,1578
4,경북,경주시,2018,광역단체장,보수정당,진보정당,2,0.5439,1,0.3298,...,0,218273,139142,73050,44294,16953,0,134297,4845,79131
5,경북,김천시,2018,광역단체장,보수정당,진보정당,2,0.5606,1,0.3281,...,0,119511,83567,45278,26500,8985,0,80763,2804,35944
6,경북,안동시,2018,광역단체장,보수정당,진보정당,2,0.4384,1,0.3101,...,0,137423,94434,40002,28292,22952,0,91246,3188,42989
7,경북,구미시,2018,광역단체장,보수정당,진보정당,2,0.4509,1,0.4306,...,0,332873,187236,82603,78892,21708,0,183203,4033,145637
8,경북,영주시,2018,광역단체장,보수정당,진보정당,2,0.5157,1,0.3235,...,0,91447,63733,31646,19849,9865,0,61360,2373,27714
9,경북,영천시,2018,광역단체장,보수정당,진보정당,2,0.5707,1,0.2937,...,0,88000,60058,32770,16867,7783,0,57420,2638,27942


### v4.1 ~ v4.3

In [107]:
# 1. 전체 데이터 저장
df_gyeongbuk.to_csv("temp4_1_governor_gyeongbuk_7.csv", index=False, encoding="utf-8-sig")

# 2. '구시군' != '합계' 행만 추출하여 저장
df_gyeongbuk2 = df_gyeongbuk.query("구시군 != '합계'")
df_gyeongbuk2.to_csv("temp4_2_governor_gyeongbuk_7.csv", index=False, encoding="utf-8-sig")

# 3. '구시군' == '합계' 행만 추출 후 '구시군' 열 제거하여 저장
df_gyeongbuk3 = df_gyeongbuk.query("구시군 == '합계'").drop(columns="구시군")
df_gyeongbuk3.to_csv("temp4_3_governor_gyeongbuk_7.csv", index=False, encoding="utf-8-sig")

## Gyeongnam

In [108]:
df_gyeongnam = election_results['df_gyeongnam']

In [109]:
df_gyeongnam

Unnamed: 0,시도,구시군,선거인수,투표수,보수정당,진보정당,그외정당,무소속,득표수_계,무효투표수,...,득표_1위_후보번호,득표_1위_득표율,득표_2위_후보번호,득표_2위_득표율,득표_1위_정당,득표_2위_정당,보수정당_후보자수,진보정당_후보자수,그외정당_후보자수,무소속_후보자수
0,경상남도,합계,2765485,1819391,765809,941491,75418,0,1782718,36673,...,1,0.5281,2,0.4296,진보정당,보수정당,1,1,1,0
1,경상남도,창원시의창구,210327,135147,54066,72745,6264,0,133075,2072,...,1,0.5466,2,0.4063,진보정당,보수정당,1,1,1,0
2,경상남도,창원시성산구,184600,125797,42016,76104,6012,0,124132,1665,...,1,0.6131,2,0.3385,진보정당,보수정당,1,1,1,0
3,경상남도,창원시마산합포구,154311,99281,48933,44305,4348,0,97586,1695,...,2,0.5014,1,0.454,보수정당,진보정당,1,1,1,0
4,경상남도,창원시마산회원구,166500,109609,50373,52918,4628,0,107919,1690,...,1,0.4903,2,0.4668,진보정당,보수정당,1,1,1,0
5,경상남도,창원시진해구,150396,93901,37660,50350,4381,0,92391,1510,...,1,0.545,2,0.4076,진보정당,보수정당,1,1,1,0
6,경상남도,진주시,282049,190842,83637,96104,8004,0,187745,3097,...,1,0.5119,2,0.4455,진보정당,보수정당,1,1,1,0
7,경상남도,통영시,110317,72815,35501,32916,2890,0,71307,1508,...,2,0.4979,1,0.4616,보수정당,진보정당,1,1,1,0
8,경상남도,고성군,46588,34075,15438,16355,1091,0,32884,1191,...,1,0.4974,2,0.4695,진보정당,보수정당,1,1,1,0
9,경상남도,사천시,94449,65481,31787,29390,2599,0,63776,1705,...,2,0.4984,1,0.4608,보수정당,진보정당,1,1,1,0


### preprocessing

In [110]:
df_gyeongnam = (
    df_gyeongnam
    .assign(시도=lambda df: df['시도'].replace('경상남도', '경남'))
)

In [111]:
df_gyeongnam.columns

Index(['시도', '구시군', '선거인수', '투표수', '보수정당', '진보정당', '그외정당', '무소속', '득표수_계',
       '무효투표수', '기권수', '득표_1위_후보번호', '득표_1위_득표율', '득표_2위_후보번호', '득표_2위_득표율',
       '득표_1위_정당', '득표_2위_정당', '보수정당_후보자수', '진보정당_후보자수', '그외정당_후보자수',
       '무소속_후보자수'],
      dtype='object')

In [112]:
# 고정 열 순서 정의
fixed_cols = [
    '시도', '구시군', '선거년도', '선거종류',
    '득표_1위_정당', '득표_2위_정당',
    '득표_1위_후보번호', '득표_1위_득표율',
    '득표_2위_후보번호', '득표_2위_득표율',
    '보수정당_후보자수', '진보정당_후보자수',
    '그외정당_후보자수', '무소속_후보자수'
]

# 나머지 열: fixed_cols에 포함되지 않은 열
other_cols = [col for col in df_gyeongnam.columns if col not in fixed_cols]

# 메소드 체이닝
df_gyeongnam = (
    df_gyeongnam
    .assign(선거종류='광역단체장', 선거년도='2018')
    .loc[:, fixed_cols + other_cols]
    .rename(columns={'시도': '지역'})
)

In [113]:
df_gyeongnam

Unnamed: 0,지역,구시군,선거년도,선거종류,득표_1위_정당,득표_2위_정당,득표_1위_후보번호,득표_1위_득표율,득표_2위_후보번호,득표_2위_득표율,...,무소속_후보자수,선거인수,투표수,보수정당,진보정당,그외정당,무소속,득표수_계,무효투표수,기권수
0,경남,합계,2018,광역단체장,진보정당,보수정당,1,0.5281,2,0.4296,...,0,2765485,1819391,765809,941491,75418,0,1782718,36673,946094
1,경남,창원시의창구,2018,광역단체장,진보정당,보수정당,1,0.5466,2,0.4063,...,0,210327,135147,54066,72745,6264,0,133075,2072,75180
2,경남,창원시성산구,2018,광역단체장,진보정당,보수정당,1,0.6131,2,0.3385,...,0,184600,125797,42016,76104,6012,0,124132,1665,58803
3,경남,창원시마산합포구,2018,광역단체장,보수정당,진보정당,2,0.5014,1,0.454,...,0,154311,99281,48933,44305,4348,0,97586,1695,55030
4,경남,창원시마산회원구,2018,광역단체장,진보정당,보수정당,1,0.4903,2,0.4668,...,0,166500,109609,50373,52918,4628,0,107919,1690,56891
5,경남,창원시진해구,2018,광역단체장,진보정당,보수정당,1,0.545,2,0.4076,...,0,150396,93901,37660,50350,4381,0,92391,1510,56495
6,경남,진주시,2018,광역단체장,진보정당,보수정당,1,0.5119,2,0.4455,...,0,282049,190842,83637,96104,8004,0,187745,3097,91207
7,경남,통영시,2018,광역단체장,보수정당,진보정당,2,0.4979,1,0.4616,...,0,110317,72815,35501,32916,2890,0,71307,1508,37502
8,경남,고성군,2018,광역단체장,진보정당,보수정당,1,0.4974,2,0.4695,...,0,46588,34075,15438,16355,1091,0,32884,1191,12513
9,경남,사천시,2018,광역단체장,보수정당,진보정당,2,0.4984,1,0.4608,...,0,94449,65481,31787,29390,2599,0,63776,1705,28968


### v4.1 ~ v4.3

In [114]:
# 1. 전체 데이터 저장
df_gyeongnam.to_csv("temp4_1_governor_gyeongnam_7.csv", index=False, encoding="utf-8-sig")

# 2. '구시군' != '합계' 행만 추출하여 저장
df_gyeongnam2 = df_gyeongnam.query("구시군 != '합계'")
df_gyeongnam2.to_csv("temp4_2_governor_gyeongnam_7.csv", index=False, encoding="utf-8-sig")

# 3. '구시군' == '합계' 행만 추출 후 '구시군' 열 제거하여 저장
df_gyeongnam3 = df_gyeongnam.query("구시군 == '합계'").drop(columns="구시군")
df_gyeongnam3.to_csv("temp4_3_governor_gyeongnam_7.csv", index=False, encoding="utf-8-sig")

## Jeju

In [115]:
df_jeju = election_results['df_jeju']

In [116]:
df_jeju

Unnamed: 0,시도,구시군,선거인수,투표수,보수정당,진보정당,그외정당,무소속,득표수_계,무효투표수,...,득표_1위_후보번호,득표_1위_득표율,득표_2위_후보번호,득표_2위_득표율,득표_1위_정당,득표_2위_정당,보수정당_후보자수,진보정당_후보자수,그외정당_후보자수,무소속_후보자수
0,제주특별자치도,합계,532515,350943,11241,137901,17207,178255,344604,6339,...,7,0.5173,1,0.4002,무소속,진보정당,1,1,2,1
1,제주특별자치도,제주시,383917,250176,8785,97935,12734,126594,246048,4128,...,7,0.5145,1,0.398,무소속,진보정당,1,1,2,1
2,제주특별자치도,서귀포시,148598,100767,2456,39966,4473,51661,98556,2211,...,7,0.5242,1,0.4055,무소속,진보정당,1,1,2,1


### preprocessing

In [117]:
df_jeju = (
    df_jeju
    .assign(시도=lambda df: df['시도'].replace('제주특별자치도', '제주'))
)

In [118]:
df_jeju.columns

Index(['시도', '구시군', '선거인수', '투표수', '보수정당', '진보정당', '그외정당', '무소속', '득표수_계',
       '무효투표수', '기권수', '득표_1위_후보번호', '득표_1위_득표율', '득표_2위_후보번호', '득표_2위_득표율',
       '득표_1위_정당', '득표_2위_정당', '보수정당_후보자수', '진보정당_후보자수', '그외정당_후보자수',
       '무소속_후보자수'],
      dtype='object')

In [119]:
# 고정 열 순서 정의
fixed_cols = [
    '시도', '구시군', '선거년도', '선거종류',
    '득표_1위_정당', '득표_2위_정당',
    '득표_1위_후보번호', '득표_1위_득표율',
    '득표_2위_후보번호', '득표_2위_득표율',
    '보수정당_후보자수', '진보정당_후보자수',
    '그외정당_후보자수', '무소속_후보자수'
]

# 나머지 열: fixed_cols에 포함되지 않은 열
other_cols = [col for col in df_jeju.columns if col not in fixed_cols]

# 메소드 체이닝
df_jeju = (
    df_jeju
    .assign(선거종류='광역단체장', 선거년도='2018')
    .loc[:, fixed_cols + other_cols]
    .rename(columns={'시도': '지역'})
)

In [120]:
df_jeju

Unnamed: 0,지역,구시군,선거년도,선거종류,득표_1위_정당,득표_2위_정당,득표_1위_후보번호,득표_1위_득표율,득표_2위_후보번호,득표_2위_득표율,...,무소속_후보자수,선거인수,투표수,보수정당,진보정당,그외정당,무소속,득표수_계,무효투표수,기권수
0,제주,합계,2018,광역단체장,무소속,진보정당,7,0.5173,1,0.4002,...,1,532515,350943,11241,137901,17207,178255,344604,6339,181572
1,제주,제주시,2018,광역단체장,무소속,진보정당,7,0.5145,1,0.398,...,1,383917,250176,8785,97935,12734,126594,246048,4128,133741
2,제주,서귀포시,2018,광역단체장,무소속,진보정당,7,0.5242,1,0.4055,...,1,148598,100767,2456,39966,4473,51661,98556,2211,47831


### v4.1 ~ v4.3

In [121]:
# 1. 전체 데이터 저장
df_jeju.to_csv("temp4_1_governor_jeju_7.csv", index=False, encoding="utf-8-sig")

# 2. '구시군' != '합계' 행만 추출하여 저장
df_jeju2 = df_jeju.query("구시군 != '합계'")
df_jeju2.to_csv("temp4_2_governor_jeju_7.csv", index=False, encoding="utf-8-sig")

# 3. '구시군' == '합계' 행만 추출 후 '구시군' 열 제거하여 저장
df_jeju3 = df_jeju.query("구시군 == '합계'").drop(columns="구시군")
df_jeju3.to_csv("temp4_3_governor_jeju_7.csv", index=False, encoding="utf-8-sig")

## Merge

### v4.1

In [122]:
AVAILABLE_REGIONS = [
    'seoul', 'busan', 'daegu', 'incheon', 'gwangju', 'daejeon',
    'ulsan', 'sejong', 'gyeonggi', 'gangwon', 'chungbuk', 'chungnam',
    'jeonbuk', 'jeonnam', 'gyeongbuk', 'gyeongnam', 'jeju'
]

df_combined = (
    pd.concat([globals()[f'df_{region}'] for region in AVAILABLE_REGIONS], ignore_index=True)
)

In [123]:
df_combined

Unnamed: 0,지역,구시군,선거년도,선거종류,득표_1위_정당,득표_2위_정당,득표_1위_후보번호,득표_1위_득표율,득표_2위_후보번호,득표_2위_득표율,...,무소속_후보자수,선거인수,투표수,보수정당,진보정당,그외정당,무소속,득표수_계,무효투표수,기권수
0,서울,합계,2018,광역단체장,진보정당,보수정당,1,0.5279,2,0.2335,...,0,8380947,5019098,1158487,2619497,1183888,0,4961872,57226,3361849
1,서울,종로구,2018,광역단체장,진보정당,보수정당,1,0.5138,2,0.2344,...,0,134963,81195,18777,41148,20166,0,80091,1104,53768
2,서울,중구,2018,광역단체장,진보정당,보수정당,1,0.5229,2,0.2385,...,0,112336,66474,15618,34243,15630,0,65491,983,45862
3,서울,용산구,2018,광역단체장,진보정당,보수정당,1,0.4706,2,0.2710,...,0,200009,116054,31073,53964,29625,0,114662,1392,83955
4,서울,성동구,2018,광역단체장,진보정당,보수정당,1,0.5396,2,0.2238,...,0,264634,157863,34878,84106,36882,0,155866,1997,106771
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
261,경남,거창군,2018,광역단체장,보수정당,진보정당,2,0.6003,1,0.3630,...,0,53061,39547,23022,13919,1407,0,38348,1199,13514
262,경남,합천군,2018,광역단체장,보수정당,진보정당,2,0.6264,1,0.3335,...,0,42165,32499,19403,10330,1240,0,30973,1526,9666
263,제주,합계,2018,광역단체장,무소속,진보정당,7,0.5173,1,0.4002,...,1,532515,350943,11241,137901,17207,178255,344604,6339,181572
264,제주,제주시,2018,광역단체장,무소속,진보정당,7,0.5145,1,0.3980,...,1,383917,250176,8785,97935,12734,126594,246048,4128,133741


In [124]:
df_combined.to_csv("temp4_1_governor_7.csv", index=False, encoding="utf-8-sig")

### v4.2

In [125]:
AVAILABLE_REGIONS = [
    'seoul', 'busan', 'daegu', 'incheon', 'gwangju', 'daejeon',
    'ulsan', 'sejong', 'gyeonggi', 'gangwon', 'chungbuk', 'chungnam',
    'jeonbuk', 'jeonnam', 'gyeongbuk', 'gyeongnam', 'jeju'
]
df_combined2 = (
    pd.concat([globals()[f'df_{region}2'] for region in AVAILABLE_REGIONS], ignore_index=True)
)

In [126]:
df_combined2

Unnamed: 0,지역,구시군,선거년도,선거종류,득표_1위_정당,득표_2위_정당,득표_1위_후보번호,득표_1위_득표율,득표_2위_후보번호,득표_2위_득표율,...,무소속_후보자수,선거인수,투표수,보수정당,진보정당,그외정당,무소속,득표수_계,무효투표수,기권수
0,서울,종로구,2018,광역단체장,진보정당,보수정당,1,0.5138,2,0.2344,...,0,134963,81195,18777,41148,20166,0,80091,1104,53768
1,서울,중구,2018,광역단체장,진보정당,보수정당,1,0.5229,2,0.2385,...,0,112336,66474,15618,34243,15630,0,65491,983,45862
2,서울,용산구,2018,광역단체장,진보정당,보수정당,1,0.4706,2,0.2710,...,0,200009,116054,31073,53964,29625,0,114662,1392,83955
3,서울,성동구,2018,광역단체장,진보정당,보수정당,1,0.5396,2,0.2238,...,0,264634,157863,34878,84106,36882,0,155866,1997,106771
4,서울,광진구,2018,광역단체장,진보정당,보수정당,1,0.5571,2,0.2164,...,0,308174,180373,38606,99366,40396,0,178368,2005,127801
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
244,경남,산청군,2018,광역단체장,보수정당,진보정당,2,0.5445,1,0.4117,...,0,32408,24648,12932,9779,1040,0,23751,897,7760
245,경남,거창군,2018,광역단체장,보수정당,진보정당,2,0.6003,1,0.3630,...,0,53061,39547,23022,13919,1407,0,38348,1199,13514
246,경남,합천군,2018,광역단체장,보수정당,진보정당,2,0.6264,1,0.3335,...,0,42165,32499,19403,10330,1240,0,30973,1526,9666
247,제주,제주시,2018,광역단체장,무소속,진보정당,7,0.5145,1,0.3980,...,1,383917,250176,8785,97935,12734,126594,246048,4128,133741


In [127]:
df_combined2.to_csv("temp4_2_governor_7.csv", index=False, encoding="utf-8-sig")

### v4.3

In [128]:
AVAILABLE_REGIONS = [
    'seoul', 'busan', 'daegu', 'incheon', 'gwangju', 'daejeon',
    'ulsan', 'sejong', 'gyeonggi', 'gangwon', 'chungbuk', 'chungnam',
    'jeonbuk', 'jeonnam', 'gyeongbuk', 'gyeongnam', 'jeju'
]

df_combined3 = (
    pd.concat([globals()[f'df_{region}3'] for region in AVAILABLE_REGIONS], ignore_index=True)
)

In [129]:
df_combined3

Unnamed: 0,지역,선거년도,선거종류,득표_1위_정당,득표_2위_정당,득표_1위_후보번호,득표_1위_득표율,득표_2위_후보번호,득표_2위_득표율,보수정당_후보자수,...,무소속_후보자수,선거인수,투표수,보수정당,진보정당,그외정당,무소속,득표수_계,무효투표수,기권수
0,서울,2018,광역단체장,진보정당,보수정당,1,0.5279,2,0.2335,1,...,0,8380947,5019098,1158487,2619497,1183888,0,4961872,57226,3361849
1,부산,2018,광역단체장,진보정당,보수정당,1,0.5523,2,0.3716,1,...,1,2939046,1727684,632806,940469,102801,26720,1702796,24888,1211362
2,대구,2018,광역단체장,보수정당,진보정당,2,0.5374,1,0.3976,1,...,0,2047286,1172279,619165,458112,74955,0,1152232,20047,875007
3,인천,2018,광역단체장,진보정당,보수정당,1,0.5767,2,0.3544,1,...,0,2440779,1349084,470937,766186,91526,0,1328649,20435,1091695
4,광주,2018,광역단체장,진보정당,그외정당,1,0.8408,5,0.0599,0,...,0,1172429,694252,34487,573995,74228,0,682710,11542,478177
5,대전,2018,광역단체장,진보정당,보수정당,1,0.5641,2,0.3217,1,...,0,1219513,706983,224306,393354,79622,0,697282,9701,512530
6,울산,2018,광역단체장,진보정당,보수정당,1,0.5289,2,0.4008,1,...,0,942550,610698,240475,317341,42210,0,600026,10672,331852
7,세종,2018,광역단체장,진보정당,보수정당,1,0.7131,2,0.1806,1,...,0,222852,137603,24546,96896,14444,0,135886,1717,85249
8,경기,2018,광역단체장,진보정당,보수정당,1,0.5641,2,0.3552,1,...,0,10533027,6084955,2122433,3370621,482473,0,5975527,109428,4448072
9,강원,2018,광역단체장,진보정당,보수정당,1,0.6473,2,0.3527,1,...,0,1296196,819193,282456,518447,0,0,800903,18290,477003


In [130]:
df_combined3.to_csv("temp4_3_governor_7.csv", index=False, encoding="utf-8-sig")

# Batch CSV Files to ZIP

In [131]:
import zipfile
import glob

# Find all CSV files in current directory
csv_files = glob.glob('*.csv')

# Create ZIP file
with zipfile.ZipFile('all_csv_files.zip', 'w') as zipf:
   for file in csv_files:
       zipf.write(file)
       print(f"Added: {file}")  # Show progress

print(f"Total {len(csv_files)} files compressed.")

Added: temp4_3_governor_daegu_7.csv
Added: temp4_3_governor_seoul_7.csv
Added: temp4_3_governor_daejeon_7.csv
Added: temp4_3_governor_jeonnam_7.csv
Added: temp4_1_governor_busan_7.csv
Added: temp4_1_governor_chungnam_7.csv
Added: temp4_2_governor_gwangju_7.csv
Added: temp4_2_governor_jeonnam_7.csv
Added: temp4_1_governor_sejong_7.csv
Added: temp4_1_governor_jeonnam_7.csv
Added: temp4_3_governor_gwangju_7.csv
Added: temp4_2_governor_chungbuk_7.csv
Added: temp4_2_governor_incheon_7.csv
Added: temp4_1_governor_7.csv
Added: temp4_2_governor_jeonbuk_7.csv
Added: temp4_2_governor_gyeonggi_7.csv
Added: temp4_1_governor_ulsan_7.csv
Added: temp4_1_governor_gyeongbuk_7.csv
Added: temp4_1_governor_daegu_7.csv
Added: temp4_2_governor_seoul_7.csv
Added: temp4_3_governor_gyeongnam_7.csv
Added: temp4_3_governor_busan_7.csv
Added: temp4_2_governor_chungnam_7.csv
Added: temp4_1_governor_gwangju_7.csv
Added: temp4_1_governor_gyeongnam_7.csv
Added: temp4_3_governor_incheon_7.csv
Added: temp4_3_governor_u