# Functions

In [1]:
import pandas as pd

def show_column_info(df):
    """
    데이터프레임의 열 이름과 인덱스를 확인하는 함수
    """
    print("=== 열 이름과 인덱스 ===")
    for idx, col_name in enumerate(df.columns):
        print(f"{idx}: {col_name}")
    print(f"\n총 열 개수: {len(df.columns)}")
    print(f"총 행 개수: {len(df)}")

def rename_columns_by_index(df, index_or_indices, new_name_or_names):
    """
    인덱스를 통해 열 이름을 변경하는 함수

    Parameters:
    - df: pandas DataFrame
    - index_or_indices: 단일 인덱스(int) 또는 인덱스 리스트([int, int, ...])
    - new_name_or_names: 단일 새 이름(str) 또는 새 이름 리스트([str, str, ...])

    Returns:
    - 열 이름이 변경된 DataFrame
    """
    # 단일 값을 리스트로 변환
    if isinstance(index_or_indices, int):
        indices = [index_or_indices]
    else:
        indices = index_or_indices

    if isinstance(new_name_or_names, str):
        new_names = [new_name_or_names]
    else:
        new_names = new_name_or_names

    # 길이 검증
    if len(indices) != len(new_names):
        raise ValueError(f"인덱스 개수({len(indices)})와 새 이름 개수({len(new_names)})가 일치하지 않습니다.")

    # 인덱스 유효성 검사
    for idx in indices:
        if idx < 0 or idx >= len(df.columns):
            raise ValueError(f"유효하지 않은 인덱스: {idx}. 유효 범위: 0 ~ {len(df.columns) - 1}")

    # 데이터프레임 복사
    result_df = df.copy()

    # 열 이름 변경
    rename_dict = {}
    for idx, new_name in zip(indices, new_names):
        old_name = df.columns[idx]
        rename_dict[old_name] = new_name
        print(f"인덱스 {idx}: '{old_name}' -> '{new_name}'")

    result_df = result_df.rename(columns=rename_dict)

    print(f"\n=== 열 이름 변경 완료 ===")
    print(f"변경된 열 개수: {len(rename_dict)}")

    return result_df

def add_column_with_value(df, column_name, value):
    """
    DataFrame 마지막에 모든 원소가 지정된 값인 열을 추가하는 함수

    Parameters:
    - df: pandas DataFrame
    - column_name: 추가할 열의 이름 (str)
    - value: 모든 행에 채울 값 (int, str, float 등)

    Returns:
    - 새로운 열이 추가된 DataFrame
    """
    result_df = df.copy()
    result_df[column_name] = value
    return result_df

# v3.1

## Preprocessing

In [2]:
url14_1 = "https://raw.githubusercontent.com/sw1kwon/korean-elections/refs/heads/main/temp/v2_1_p/temp2_1_president_14.csv"
p14_1 = pd.read_csv(url14_1)
p14_1 = p14_1[p14_1['구시군']=='합계']
p14_1 = add_column_with_value(p14_1, '선거종류', '대통령')
p14_1 = add_column_with_value(p14_1, '선거년도', '1992')
p14_1 = p14_1.rename(columns={'시도': '지역'}).drop(columns=['구시군']).reset_index(drop=True)
p14_1

Unnamed: 0,지역,선거인수,투표수,보수정당,진보정당,그외정당,득표수_계,무효투표수,기권수,선거종류,선거년도
0,전국,28676547,24095170,9977332,8041284,5756793,23775409,319761,4581377,대통령,1992
1,서울,7235830,6021311,2167298,2246636,1537843,5951777,69534,1214519,대통령,1992
2,부산,2506539,2135546,1551473,265055,298861,2115389,20157,370993,대통령,1992
3,대구,1458247,1172636,690245,90641,377307,1158193,14443,285611,대통령,1992
4,인천,1321616,1081011,397361,338538,330389,1066288,14723,240605,대통령,1992
5,광주,745270,685797,14504,652337,13759,680600,5197,59473,대통령,1992
6,대전,705227,582613,202137,165067,207199,574403,8210,122614,대통령,1992
7,경기,4270726,3502774,1254025,1103498,1093872,3451395,51379,767952,대통령,1992
8,강원,990821,834891,340528,127265,352462,820255,14636,155930,대통령,1992
9,충북,894837,750483,281678,191743,262750,736171,14312,144354,대통령,1992


In [3]:
url15_1 = "https://raw.githubusercontent.com/sw1kwon/korean-elections/refs/heads/main/temp/v2_1_p/temp2_1_president_15.csv"
p15_1 = pd.read_csv(url15_1)
p15_1 = p15_1[p15_1['구시군']=='합계']
p15_1 = add_column_with_value(p15_1, '선거종류', '대통령')
p15_1 = add_column_with_value(p15_1, '선거년도', '1997')
p15_1 = p15_1.rename(columns={'시도': '지역'}).drop(columns=['구시군']).reset_index(drop=True)
p15_1

Unnamed: 0,지역,선거인수,투표수,보수정당,진보정당,그외정당,득표수_계,무효투표수,기권수,선거종류,선거년도
0,전국,32290416,26042633,9935718,10326275,5380445,25642438,400195,6247783,대통령,1997
1,서울,7358547,5926743,2394309,2627308,833156,5854773,71970,1431804,대통령,1997
2,부산,2692311,2124010,1117069,320178,657159,2094406,29604,568301,대통령,1997
3,대구,1707338,1347018,965607,166576,196905,1329088,17930,360320,대통령,1997
4,인천,1639655,1311512,470560,497839,324212,1292611,18901,328143,대통령,1997
5,광주,870554,783025,13294,754159,7746,775199,7826,87529,대통령,1997
6,대전,881474,692821,199266,307493,176134,682893,9928,188653,대통령,1997
7,울산,654125,530459,268998,80751,174014,523763,6696,123666,대통령,1997
8,경기,5707087,4600005,1612108,1781577,1141839,4535524,64481,1107082,대통령,1997
9,강원,1077853,846596,358921,197438,274584,830943,15653,231257,대통령,1997


In [4]:
url16_1 = "https://raw.githubusercontent.com/sw1kwon/korean-elections/refs/heads/main/temp/v2_1_p/temp2_1_president_16.csv"
p16_1 = pd.read_csv(url16_1)
p16_1 = p16_1[p16_1['구시군']=='합계']
p16_1 = add_column_with_value(p16_1, '선거종류', '대통령')
p16_1 = add_column_with_value(p16_1, '선거년도', '2002')
p16_1 = p16_1.rename(columns={'시도': '지역'}).drop(columns=['구시군']).reset_index(drop=True)
p16_1

Unnamed: 0,지역,선거인수,투표수,보수정당,진보정당,그외정당,득표수_계,무효투표수,기권수,선거종류,선거년도
0,전국,34991529,24784963,11443297,12014277,1104342,24561916,223047,10206566,대통령,2002
1,서울,7670682,5475715,2447376,2792957,203657,5443990,31725,2194967,대통령,2002
2,부산,2786142,1983492,1314274,587946,66873,1969093,14399,802650,대통령,2002
3,대구,1827162,1299968,1002164,240745,46000,1288909,11059,527194,대통령,2002
4,인천,1824905,1236447,547205,611766,68845,1227816,8631,588458,대통령,2002
5,광주,967222,755398,26869,715182,9365,751416,3982,211824,대통령,2002
6,대전,998541,675029,266760,369046,34040,669846,5183,323512,대통령,2002
7,울산,729645,510496,267737,178584,60001,506322,4174,219149,대통령,2002
8,경기,6944934,4831412,2120191,2430193,247622,4798006,33406,2113522,대통령,2002
9,강원,1131168,773560,400405,316722,45810,762937,10623,357608,대통령,2002


In [5]:
url17_1 = "https://raw.githubusercontent.com/sw1kwon/korean-elections/refs/heads/main/temp/v2_1_p/temp2_1_president_17.csv"
p17_1 = pd.read_csv(url17_1)
p17_1 = p17_1[p17_1['구시군']=='합계']
p17_1 = add_column_with_value(p17_1, '선거종류', '대통령')
p17_1 = add_column_with_value(p17_1, '선거년도', '2007')
p17_1 = p17_1.rename(columns={'시도': '지역'}).drop(columns=['구시군']).reset_index(drop=True)
p17_1

Unnamed: 0,지역,선거인수,투표수,보수정당,진보정당,그외정당,득표수_계,무효투표수,기권수,선거종류,선거년도
0,전국,37653518,23732854,11492389,6174681,5945810,23612880,119974,13920664,대통령,2007
1,서울,8051696,5066022,2689162,1237812,1124395,5051369,14653,2985674,대통령,2007
2,부산,2843063,1765231,1018715,236708,503829,1759252,5979,1077832,대통령,2007
3,대구,1896866,1267969,876719,75932,311027,1263678,4291,628897,대통령,2007
4,인천,2005874,1210220,593283,286565,325509,1205357,4863,795654,대통령,2007
5,광주,1031333,663338,56875,527588,77089,661552,1786,367995,대통령,2007
6,대전,1098977,680264,246008,159700,272240,677948,2316,418713,대통령,2007
7,울산,806423,521216,279891,70736,167959,518586,2630,285207,대통령,2007
8,경기,8222124,5035641,2603443,1181936,1232028,5017407,18234,3186483,대통령,2007
9,강원,1164655,728895,376004,136668,210831,723503,5392,435760,대통령,2007


In [6]:
url18_1 = "https://raw.githubusercontent.com/sw1kwon/korean-elections/refs/heads/main/temp/v2_1_p/temp2_1_president_18.csv"
p18_1 = pd.read_csv(url18_1)
p18_1 = p18_1[p18_1['구시군']=='합계']
p18_1 = add_column_with_value(p18_1, '선거종류', '대통령')
p18_1 = add_column_with_value(p18_1, '선거년도', '2012')
p18_1 = p18_1.rename(columns={'시도': '지역'}).drop(columns=['구시군']).reset_index(drop=True)
p18_1

Unnamed: 0,지역,선거인수,투표수,보수정당,진보정당,그외정당,득표수_계,무효투표수,기권수,선거종류,선거년도
0,전국,40507842,30721459,15773128,14692632,128861,30594621,126838,9786383,대통령,2012
1,서울특별시,8393847,6307869,3024572,3227639,24488,6276699,31170,2085978,대통령,2012
2,부산광역시,2911700,2219699,1324159,882511,6735,2213405,6294,692001,대통령,2012
3,대구광역시,1990746,1585806,1267789,309034,5017,1581840,3966,404940,대통령,2012
4,인천광역시,2241366,1657821,852600,794213,6153,1652966,4855,583545,대통령,2012
5,광주광역시,1117781,898416,69574,823737,2275,895586,2830,219365,대통령,2012
6,대전광역시,1182321,904367,450576,448310,2992,901878,2489,277954,대통령,2012
7,울산광역시,886061,694938,413977,275451,3005,692433,2505,191123,대통령,2012
8,세종특별자치시,87707,64990,33587,30787,323,64697,293,22717,대통령,2012
9,경기도,9364077,7018577,3528915,3442084,25724,6996723,21854,2345500,대통령,2012


In [7]:
url19_1 = "https://raw.githubusercontent.com/sw1kwon/korean-elections/refs/heads/main/temp/v2_1_p/temp2_1_president_19.csv"
p19_1 = pd.read_csv(url19_1)
p19_1 = p19_1[p19_1['구시군']=='합계']
p19_1 = add_column_with_value(p19_1, '선거종류', '대통령')
p19_1 = add_column_with_value(p19_1, '선거년도', '2017')
p19_1 = p19_1.rename(columns={'시도': '지역'}).drop(columns=['구시군']).reset_index(drop=True)
p19_1

Unnamed: 0,지역,선거인수,투표수,보수정당,진보정당,그외정당,득표수_계,무효투표수,기권수,선거종류,선거년도
0,전국,42479710,32807908,7852849,13423800,11395526,32672175,135733,9671802,대통령,2017
1,서울특별시,8382999,6590646,1365285,2781345,2422287,6568917,21729,1792353,대통령,2017
2,부산광역시,2950224,2261633,720484,872127,660098,2252709,8924,688591,대통령,2017
3,대구광역시,2043276,1581347,714205,342620,517551,1574376,6971,461929,대통령,2017
4,인천광역시,2409031,1820091,379191,747090,686682,1812963,7128,588940,대통령,2017
5,광주광역시,1166901,957321,14882,583847,356103,954832,2489,209580,대통령,2017
6,대전광역시,1220602,945897,191376,404545,346380,942301,3596,274705,대통령,2017
7,울산광역시,941093,744960,203602,282794,254966,741362,3598,196133,대통령,2017
8,세종특별자치시,189421,152801,23211,77767,51259,152237,564,36620,대통령,2017
9,경기도,10262309,7916009,1637345,3319812,2930488,7887645,28364,2346300,대통령,2017


In [8]:
url20_1 = "https://raw.githubusercontent.com/sw1kwon/korean-elections/refs/heads/main/temp/v2_1_p/temp2_1_president_20.csv"
p20_1 = pd.read_csv(url20_1)
p20_1 = p20_1[p20_1['구시군']=='합계']
p20_1 = add_column_with_value(p20_1, '선거종류', '대통령')
p20_1 = add_column_with_value(p20_1, '선거년도', '2022')
p20_1 = p20_1.rename(columns={'시도': '지역'}).drop(columns=['구시군']).reset_index(drop=True)
p20_1

Unnamed: 0,지역,선거인수,투표수,보수정당,진보정당,그외정당,득표수_계,무효투표수,기권수,선거종류,선거년도
0,전국,44197692,34067853,16394815,16147738,1217758,33760311,307542,10129839,대통령,2022
1,서울특별시,8346647,6501831,3255747,2944981,238411,6439139,62692,1844816,대통령,2022
2,부산광역시,2921510,2200224,1270072,831896,78210,2180178,20046,721286,대통령,2022
3,대구광역시,2046714,1611512,1199888,345045,51873,1596806,14706,435202,대통령,2022
4,인천광역시,2519225,1883504,878560,913320,75174,1867054,16450,635721,대통령,2022
5,광주광역시,1209206,985492,124511,830058,24032,978601,6891,223714,대통령,2022
6,대전광역시,1233177,945308,464060,434950,37391,936401,8907,287869,대통령,2022
7,울산광역시,942210,735461,396321,297134,34935,728390,7071,206749,대통령,2022
8,세종특별자치시,288895,231832,101491,119349,9051,229891,1941,57063,대통령,2022
9,경기도,11433288,8763727,3965341,4428151,297884,8691376,72351,2669561,대통령,2022


In [9]:
url21_1 = "https://raw.githubusercontent.com/sw1kwon/korean-elections/refs/heads/main/temp/v2_1_p/temp2_1_president_21.csv"
p21_1 = pd.read_csv(url21_1)
p21_1 = p21_1[p21_1['구시군']=='합계']
p21_1 = add_column_with_value(p21_1, '선거종류', '대통령')
p21_1 = add_column_with_value(p21_1, '선거년도', '2025')
p21_1 = p21_1.rename(columns={'시도': '지역'}).drop(columns=['구시군']).reset_index(drop=True)
p21_1

Unnamed: 0,지역,선거인수,투표수,보수정당,진보정당,그외정당,득표수_계,무효투표수,기권수,선거종류,선거년도
0,전국,44391871,35236497,14395639,17287513,3297464,34980616,255881,9155374,대통령,2025
1,서울특별시,8293885,6641606,2738405,3105459,745244,6589108,52498,1652279,대통령,2025
2,부산광역시,2865552,2245755,1146238,895213,188761,2230212,15543,619797,대통령,2025
3,대구광역시,2049078,1643051,1103913,379130,149269,1632312,10739,406027,대통령,2025
4,인천광역시,2619348,2035355,776952,1044295,199580,2020827,14528,583993,대통령,2025
5,광주광역시,1194471,1002149,79937,844682,71805,996424,5725,192322,대통령,2025
6,대전광역시,1241882,977609,393549,470321,105738,969608,8001,264273,대통령,2025
7,울산광역시,934509,747950,353180,315820,73375,742375,5575,186559,대통령,2025
8,세종특별자치시,307067,254695,83965,140620,28200,252785,1910,52372,대통령,2025
9,경기도,11715343,9297448,3504620,4821148,908865,9234633,62815,2417895,대통령,2025


## Merge

In [10]:
df_31 = pd.concat([p14_1, p15_1, p16_1, p17_1, p18_1, p19_1, p20_1, p21_1], ignore_index=True)
df_31.head()

Unnamed: 0,지역,선거인수,투표수,보수정당,진보정당,그외정당,득표수_계,무효투표수,기권수,선거종류,선거년도
0,전국,28676547,24095170,9977332,8041284,5756793,23775409,319761,4581377,대통령,1992
1,서울,7235830,6021311,2167298,2246636,1537843,5951777,69534,1214519,대통령,1992
2,부산,2506539,2135546,1551473,265055,298861,2115389,20157,370993,대통령,1992
3,대구,1458247,1172636,690245,90641,377307,1158193,14443,285611,대통령,1992
4,인천,1321616,1081011,397361,338538,330389,1066288,14723,240605,대통령,1992


In [11]:
df_31['지역'].unique()

array(['전국', '서울', '부산', '대구', '인천', '광주', '대전', '경기', '강원', '충북', '충남',
       '전북', '전남', '경북', '경남', '제주', '울산', '서울특별시', '부산광역시', '대구광역시',
       '인천광역시', '광주광역시', '대전광역시', '울산광역시', '세종특별자치시', '경기도', '강원도',
       '충청북도', '충청남도', '전라북도', '전라남도', '경상북도', '경상남도', '제주특별자치도',
       '강원특별자치도', '전북특별자치도'], dtype=object)

In [12]:
# 지역명 매핑 딕셔너리
region_mapping = {
    '서울특별시': '서울',
    '부산광역시': '부산',
    '대구광역시': '대구',
    '인천광역시': '인천',
    '광주광역시': '광주',
    '대전광역시': '대전',
    '울산광역시': '울산',
    '경기도': '경기',
    '강원도': '강원',
    '충청북도': '충북',
    '충청남도': '충남',
    '전라북도': '전북',
    '전라남도': '전남',
    '경상북도': '경북',
    '경상남도': '경남',
    '제주특별자치도': '제주',
    '세종특별자치시': '세종',
    '강원특별자치도': '강원',
    '전북특별자치도': '전북'
}

df_31 = df_31.replace({'지역': region_mapping})

df_31['지역'].unique()

array(['전국', '서울', '부산', '대구', '인천', '광주', '대전', '경기', '강원', '충북', '충남',
       '전북', '전남', '경북', '경남', '제주', '울산', '세종'], dtype=object)

In [13]:
df_31.to_csv("temp3_1_president.csv", index=False, encoding="utf-8-sig")

# v3.2

## Preprocessing

In [14]:
url14_2 = "https://raw.githubusercontent.com/sw1kwon/korean-elections/refs/heads/main/temp/v2_2_p/temp2_2_president_14.csv"
p14_2 = pd.read_csv(url14_2)
p14_2 = p14_2[p14_2['구시군']=='합계']
p14_2 = add_column_with_value(p14_2, '선거종류', '대통령')
p14_2 = add_column_with_value(p14_2, '선거년도', '1992')
p14_2 = p14_2.rename(columns={'시도': '지역'}).drop(columns=['구시군']).reset_index(drop=True)
p14_2

Unnamed: 0,지역,선거인수,투표수,보수정당,진보정당,그외정당,무소속,득표수_계,무효투표수,기권수,선거종류,선거년도
0,전국,28676547,24095170,9977332,8041284,5431853,324940,23775409,319761,4581377,대통령,1992
1,서울,7235830,6021311,2167298,2246636,1456961,80882,5951777,69534,1214519,대통령,1992
2,부산,2506539,2135546,1551473,265055,273889,24972,2115389,20157,370993,대통령,1992
3,대구,1458247,1172636,690245,90641,361782,15525,1158193,14443,285611,대통령,1992
4,인천,1321616,1081011,397361,338538,314067,16322,1066288,14723,240605,대통령,1992
5,광주,745270,685797,14504,652337,11045,2714,680600,5197,59473,대통령,1992
6,대전,705227,582613,202137,165067,199133,8066,574403,8210,122614,대통령,1992
7,경기,4270726,3502774,1254025,1103498,1043795,50077,3451395,51379,767952,대통령,1992
8,강원,990821,834891,340528,127265,338856,13606,820255,14636,155930,대통령,1992
9,충북,894837,750483,281678,191743,249511,13239,736171,14312,144354,대통령,1992


In [15]:
url15_2 = "https://raw.githubusercontent.com/sw1kwon/korean-elections/refs/heads/main/temp/v2_2_p/temp2_2_president_15.csv"
p15_2 = pd.read_csv(url15_2)
p15_2 = p15_2[p15_2['구시군']=='합계']
p15_2 = add_column_with_value(p15_2, '선거종류', '대통령')
p15_2 = add_column_with_value(p15_2, '선거년도', '1997')
p15_2 = p15_2.rename(columns={'시도': '지역'}).drop(columns=['구시군']).reset_index(drop=True)
p15_2

Unnamed: 0,지역,선거인수,투표수,보수정당,진보정당,그외정당,무소속,득표수_계,무효투표수,기권수,선거종류,선거년도
0,전국,32290416,26042633,9935718,10326275,5380445,0,25642438,400195,6247783,대통령,1997
1,서울,7358547,5926743,2394309,2627308,833156,0,5854773,71970,1431804,대통령,1997
2,부산,2692311,2124010,1117069,320178,657159,0,2094406,29604,568301,대통령,1997
3,대구,1707338,1347018,965607,166576,196905,0,1329088,17930,360320,대통령,1997
4,인천,1639655,1311512,470560,497839,324212,0,1292611,18901,328143,대통령,1997
5,광주,870554,783025,13294,754159,7746,0,775199,7826,87529,대통령,1997
6,대전,881474,692821,199266,307493,176134,0,682893,9928,188653,대통령,1997
7,울산,654125,530459,268998,80751,174014,0,523763,6696,123666,대통령,1997
8,경기,5707087,4600005,1612108,1781577,1141839,0,4535524,64481,1107082,대통령,1997
9,강원,1077853,846596,358921,197438,274584,0,830943,15653,231257,대통령,1997


In [16]:
url16_2 = "https://raw.githubusercontent.com/sw1kwon/korean-elections/refs/heads/main/temp/v2_2_p/temp2_2_president_16.csv"
p16_2 = pd.read_csv(url16_2)
p16_2 = p16_2[p16_2['구시군']=='합계']
p16_2 = add_column_with_value(p16_2, '선거종류', '대통령')
p16_2 = add_column_with_value(p16_2, '선거년도', '2002')
p16_2 = p16_2.rename(columns={'시도': '지역'}).drop(columns=['구시군']).reset_index(drop=True)
p16_2

Unnamed: 0,지역,선거인수,투표수,보수정당,진보정당,그외정당,무소속,득표수_계,무효투표수,기권수,선거종류,선거년도
0,전국,34991529,24784963,11443297,12014277,1104342,0,24561916,223047,10206566,대통령,2002
1,서울,7670682,5475715,2447376,2792957,203657,0,5443990,31725,2194967,대통령,2002
2,부산,2786142,1983492,1314274,587946,66873,0,1969093,14399,802650,대통령,2002
3,대구,1827162,1299968,1002164,240745,46000,0,1288909,11059,527194,대통령,2002
4,인천,1824905,1236447,547205,611766,68845,0,1227816,8631,588458,대통령,2002
5,광주,967222,755398,26869,715182,9365,0,751416,3982,211824,대통령,2002
6,대전,998541,675029,266760,369046,34040,0,669846,5183,323512,대통령,2002
7,울산,729645,510496,267737,178584,60001,0,506322,4174,219149,대통령,2002
8,경기,6944934,4831412,2120191,2430193,247622,0,4798006,33406,2113522,대통령,2002
9,강원,1131168,773560,400405,316722,45810,0,762937,10623,357608,대통령,2002


In [17]:
url17_2 = "https://raw.githubusercontent.com/sw1kwon/korean-elections/refs/heads/main/temp/v2_2_p/temp2_2_president_17.csv"
p17_2 = pd.read_csv(url17_2)
p17_2 = p17_2[p17_2['구시군']=='합계']
p17_2 = add_column_with_value(p17_2, '선거종류', '대통령')
p17_2 = add_column_with_value(p17_2, '선거년도', '2007')
p17_2 = p17_2.rename(columns={'시도': '지역'}).drop(columns=['구시군']).reset_index(drop=True)
p17_2

Unnamed: 0,지역,선거인수,투표수,보수정당,진보정당,그외정당,무소속,득표수_계,무효투표수,기권수,선거종류,선거년도
0,전국,37653518,23732854,11492389,6174681,2385847,3559963,23612880,119974,13920664,대통령,2007
1,서울,8051696,5066022,2689162,1237812,528169,596226,5051369,14653,2985674,대통령,2007
2,부산,2843063,1765231,1018715,236708,157510,346319,1759252,5979,1077832,대통령,2007
3,대구,1896866,1267969,876719,75932,82828,228199,1263678,4291,628897,대통령,2007
4,인천,2005874,1210220,593283,286565,142452,183057,1205357,4863,795654,대통령,2007
5,광주,1031333,663338,56875,527588,54569,22520,661552,1786,367995,대통령,2007
6,대전,1098977,680264,246008,159700,76283,195957,677948,2316,418713,대통령,2007
7,울산,806423,521216,279891,70736,77054,90905,518586,2630,285207,대통령,2007
8,경기,8222124,5035641,2603443,1181936,561286,670742,5017407,18234,3186483,대통령,2007
9,강원,1164655,728895,376004,136668,83729,127102,723503,5392,435760,대통령,2007


In [18]:
url18_2 = "https://raw.githubusercontent.com/sw1kwon/korean-elections/refs/heads/main/temp/v2_2_p/temp2_2_president_18.csv"
p18_2 = pd.read_csv(url18_2)
p18_2 = p18_2[p18_2['구시군']=='합계']
p18_2 = add_column_with_value(p18_2, '선거종류', '대통령')
p18_2 = add_column_with_value(p18_2, '선거년도', '2012')
p18_2 = p18_2.rename(columns={'시도': '지역'}).drop(columns=['구시군']).reset_index(drop=True)
p18_2

Unnamed: 0,지역,선거인수,투표수,보수정당,진보정당,그외정당,무소속,득표수_계,무효투표수,기권수,선거종류,선거년도
0,전국,40507842,30721459,15773128,14692632,0,128861,30594621,126838,9786383,대통령,2012
1,서울특별시,8393847,6307869,3024572,3227639,0,24488,6276699,31170,2085978,대통령,2012
2,부산광역시,2911700,2219699,1324159,882511,0,6735,2213405,6294,692001,대통령,2012
3,대구광역시,1990746,1585806,1267789,309034,0,5017,1581840,3966,404940,대통령,2012
4,인천광역시,2241366,1657821,852600,794213,0,6153,1652966,4855,583545,대통령,2012
5,광주광역시,1117781,898416,69574,823737,0,2275,895586,2830,219365,대통령,2012
6,대전광역시,1182321,904367,450576,448310,0,2992,901878,2489,277954,대통령,2012
7,울산광역시,886061,694938,413977,275451,0,3005,692433,2505,191123,대통령,2012
8,세종특별자치시,87707,64990,33587,30787,0,323,64697,293,22717,대통령,2012
9,경기도,9364077,7018577,3528915,3442084,0,25724,6996723,21854,2345500,대통령,2012


In [19]:
url19_2 = "https://raw.githubusercontent.com/sw1kwon/korean-elections/refs/heads/main/temp/v2_2_p/temp2_2_president_19.csv"
p19_2 = pd.read_csv(url19_2)
p19_2 = p19_2[p19_2['구시군']=='합계']
p19_2 = add_column_with_value(p19_2, '선거종류', '대통령')
p19_2 = add_column_with_value(p19_2, '선거년도', '2017')
p19_2 = p19_2.rename(columns={'시도': '지역'}).drop(columns=['구시군']).reset_index(drop=True)
p19_2

Unnamed: 0,지역,선거인수,투표수,보수정당,진보정당,그외정당,무소속,득표수_계,무효투표수,기권수,선거종류,선거년도
0,전국,42479710,32807908,7852849,13423800,11361536,33990,32672175,135733,9671802,대통령,2017
1,서울특별시,8382999,6590646,1365285,2781345,2418337,3950,6568917,21729,1792353,대통령,2017
2,부산광역시,2950224,2261633,720484,872127,657942,2156,2252709,8924,688591,대통령,2017
3,대구광역시,2043276,1581347,714205,342620,516050,1501,1574376,6971,461929,대통령,2017
4,인천광역시,2409031,1820091,379191,747090,685001,1681,1812963,7128,588940,대통령,2017
5,광주광역시,1166901,957321,14882,583847,355489,614,954832,2489,209580,대통령,2017
6,대전광역시,1220602,945897,191376,404545,345622,758,942301,3596,274705,대통령,2017
7,울산광역시,941093,744960,203602,282794,254040,926,741362,3598,196133,대통령,2017
8,세종특별자치시,189421,152801,23211,77767,51124,135,152237,564,36620,대통령,2017
9,경기도,10262309,7916009,1637345,3319812,2923935,6553,7887645,28364,2346300,대통령,2017


In [20]:
url20_2 = "https://raw.githubusercontent.com/sw1kwon/korean-elections/refs/heads/main/temp/v2_2_p/temp2_2_president_20.csv"
p20_2 = pd.read_csv(url20_2)
p20_2 = p20_2[p20_2['구시군']=='합계']
p20_2 = add_column_with_value(p20_2, '선거종류', '대통령')
p20_2 = add_column_with_value(p20_2, '선거년도', '2022')
p20_2 = p20_2.rename(columns={'시도': '지역'}).drop(columns=['구시군']).reset_index(drop=True)
p20_2

Unnamed: 0,지역,선거인수,투표수,보수정당,진보정당,그외정당,무소속,득표수_계,무효투표수,기권수,선거종류,선거년도
0,전국,44197692,34067853,16394815,16147738,1217758,0,33760311,307542,10129839,대통령,2022
1,서울특별시,8346647,6501831,3255747,2944981,238411,0,6439139,62692,1844816,대통령,2022
2,부산광역시,2921510,2200224,1270072,831896,78210,0,2180178,20046,721286,대통령,2022
3,대구광역시,2046714,1611512,1199888,345045,51873,0,1596806,14706,435202,대통령,2022
4,인천광역시,2519225,1883504,878560,913320,75174,0,1867054,16450,635721,대통령,2022
5,광주광역시,1209206,985492,124511,830058,24032,0,978601,6891,223714,대통령,2022
6,대전광역시,1233177,945308,464060,434950,37391,0,936401,8907,287869,대통령,2022
7,울산광역시,942210,735461,396321,297134,34935,0,728390,7071,206749,대통령,2022
8,세종특별자치시,288895,231832,101491,119349,9051,0,229891,1941,57063,대통령,2022
9,경기도,11433288,8763727,3965341,4428151,297884,0,8691376,72351,2669561,대통령,2022


In [21]:
url21_2 = "https://raw.githubusercontent.com/sw1kwon/korean-elections/refs/heads/main/temp/v2_2_p/temp2_2_president_21.csv"
p21_2 = pd.read_csv(url21_2)
p21_2 = p21_2[p21_2['구시군']=='합계']
p21_2 = add_column_with_value(p21_2, '선거종류', '대통령')
p21_2 = add_column_with_value(p21_2, '선거년도', '2025')
p21_2 = p21_2.rename(columns={'시도': '지역'}).drop(columns=['구시군']).reset_index(drop=True)
p21_2

Unnamed: 0,지역,선거인수,투표수,보수정당,진보정당,그외정당,무소속,득표수_계,무효투표수,기권수,선거종류,선거년도
0,전국,44391871,35236497,14395639,17287513,3261673,35791,34980616,255881,9155374,대통령,2025
1,서울특별시,8293885,6641606,2738405,3105459,739246,5998,6589108,52498,1652279,대통령,2025
2,부산광역시,2865552,2245755,1146238,895213,186662,2099,2230212,15543,619797,대통령,2025
3,대구광역시,2049078,1643051,1103913,379130,147907,1362,1632312,10739,406027,대통령,2025
4,인천광역시,2619348,2035355,776952,1044295,197482,2098,2020827,14528,583993,대통령,2025
5,광주광역시,1194471,1002149,79937,844682,70871,934,996424,5725,192322,대통령,2025
6,대전광역시,1241882,977609,393549,470321,104629,1109,969608,8001,264273,대통령,2025
7,울산광역시,934509,747950,353180,315820,72476,899,742375,5575,186559,대통령,2025
8,세종특별자치시,307067,254695,83965,140620,27965,235,252785,1910,52372,대통령,2025
9,경기도,11715343,9297448,3504620,4821148,900509,8356,9234633,62815,2417895,대통령,2025


## Merge

In [22]:
df_32 = pd.concat([p14_2, p15_2, p16_2, p17_2, p18_2, p19_2, p20_2, p21_2], ignore_index=True)
df_32.head()

Unnamed: 0,지역,선거인수,투표수,보수정당,진보정당,그외정당,무소속,득표수_계,무효투표수,기권수,선거종류,선거년도
0,전국,28676547,24095170,9977332,8041284,5431853,324940,23775409,319761,4581377,대통령,1992
1,서울,7235830,6021311,2167298,2246636,1456961,80882,5951777,69534,1214519,대통령,1992
2,부산,2506539,2135546,1551473,265055,273889,24972,2115389,20157,370993,대통령,1992
3,대구,1458247,1172636,690245,90641,361782,15525,1158193,14443,285611,대통령,1992
4,인천,1321616,1081011,397361,338538,314067,16322,1066288,14723,240605,대통령,1992


In [23]:
df_32['지역'].unique()

array(['전국', '서울', '부산', '대구', '인천', '광주', '대전', '경기', '강원', '충북', '충남',
       '전북', '전남', '경북', '경남', '제주', '울산', '서울특별시', '부산광역시', '대구광역시',
       '인천광역시', '광주광역시', '대전광역시', '울산광역시', '세종특별자치시', '경기도', '강원도',
       '충청북도', '충청남도', '전라북도', '전라남도', '경상북도', '경상남도', '제주특별자치도',
       '강원특별자치도', '전북특별자치도'], dtype=object)

In [24]:
# 지역명 매핑 딕셔너리
region_mapping = {
    '서울특별시': '서울',
    '부산광역시': '부산',
    '대구광역시': '대구',
    '인천광역시': '인천',
    '광주광역시': '광주',
    '대전광역시': '대전',
    '울산광역시': '울산',
    '경기도': '경기',
    '강원도': '강원',
    '충청북도': '충북',
    '충청남도': '충남',
    '전라북도': '전북',
    '전라남도': '전남',
    '경상북도': '경북',
    '경상남도': '경남',
    '제주특별자치도': '제주',
    '세종특별자치시': '세종',
    '강원특별자치도': '강원',
    '전북특별자치도': '전북'
}

df_32 = df_32.replace({'지역': region_mapping})

df_32['지역'].unique()

array(['전국', '서울', '부산', '대구', '인천', '광주', '대전', '경기', '강원', '충북', '충남',
       '전북', '전남', '경북', '경남', '제주', '울산', '세종'], dtype=object)

In [25]:
df_32.to_csv("temp3_2_president.csv", index=False, encoding="utf-8-sig")