# 판매전력추이 컬럼정리
---

In [16]:
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
import statsmodels.tsa.api as tsa

## [1] 함수 정리
---

In [17]:
def datetime_convert(data: pd.DataFrame, column: str, split="-"):
    """convert column to date time

    Args:
        data (pd.DataFrame): datadrame
        column (str): column name

    Returns:
        pd.DataFrame: dataframe
    """
    data_c = data.copy()
    data_c[column].astype(str)

    if split == "-":
        data_c[column] = pd.to_datetime(data_c[column])
    else:
        data_c[column] = data_c[column].astype(str).str.replace(split, "-")
        data_c[column] = pd.to_datetime(data_c[column])

    return data_c

In [18]:
def connect_year_month(data: pd.DataFrame, year: str, month: str, is_num=True):
    """connect year and month column to datetime
    Args:
        data (pd.DataFrame): dataframe
        year (str): year column name
        month (str): month column name

    Returns:
        pd.DataFrame: dataframe
    """
    data_c = data.copy()
    data_c[year] = data_c[year].astype(str)
    data_c[month] = data_c[month].astype(str)

    if is_num == False:
        if data_c[year].iloc[0][-1] == "년":
            data_c[year] = data_c[year].str[:-1]
        if data_c[month].iloc[0][-1] == "월":
            data_c[month] = data_c[month].str[:-1]

    data_c[year] = data_c[year] + "-" + data_c[month]
    data_c[year] = pd.to_datetime(data_c[year])
    data_c.drop(columns=month, inplace=True)

    return data_c

In [19]:
def insert_sum_columns(data: pd.DataFrame, sum_columns: dict, last=True):
    data_c = data.copy()

    for key in sum_columns.keys():
        col_list = list(sum_columns[key])
        sum_col_name = key
        sum_col = [0] * len(data_c[col_list[0]])
        sum_idx_last = 0
        sum_idx_first = 100

        for col in col_list:
            data_c[col] = data_c[col].astype(int)
            sum_col += data_c[col]
            col_idx = list(data_c.columns).index(col)
            if last == True:
                if sum_idx_last <= col_idx + 1:
                    sum_idx_last = col_idx + 1
            elif last == False:
                if sum_idx_first >= col_idx:
                    sum_idx_first = col_idx

        if last == True:
            data_c.insert(sum_idx_last, sum_col_name, sum_col)
        elif last == False:
            data_c.insert(sum_idx_first, sum_col_name, sum_col)

    return data_c

## [2] 데이터 로딩
---

In [20]:
# 파일 불러오기
data_path_1 = "./용도별판매전력량추이_1997_2014.csv"
data_path_2 = "./용도별판매전력량추이_2015_2021.csv"

# 97 ~ 14 년도 데이터 정리
df_97_14 = pd.read_csv(data_path_1, encoding="euc-kr")

# 15 ~ 21 년도 데이터 정리
df_15_21_raw = pd.read_csv(data_path_2, encoding="euc-kr")
df_15_21 = df_15_21_raw.drop(0, axis=0)

# 조건
c1 = df_15_21["시점"] == "2015"
c2 = df_15_21["시점"] == "2016"
c3 = df_15_21["시점"] == "2021"

# 15, 16년도 데이터 따로 추출
df_15_16 = df_15_21[c1 + c2]

# 17 ~ 20 년도 데이터 정리
idx_151621 = list(df_15_21[c1 + c2 + c3].index)
df_17_20 = df_15_21.drop(idx_151621)
idx_sum = list(df_17_20[df_17_20["월별(1)"] == "합계"].index)
df_17_20 = df_17_20.drop(idx_sum)

## [3] 데이터 전처리
---

In [21]:
df_97_14.head(5)

Unnamed: 0,시점,소계,가정용,공공용,서비스업,농림어업,광업,제조업
0,1997.01,16224867,2814138,557379,3616542,301811,80421,8854576
1,1997.02,15363840,2788312,558610,3551611,327617,70471,8067220
2,1997.03,16094268,2555048,528672,3333225,345359,81877,9250086
3,1997.04,16266027,2629204,555354,3430624,292235,84596,9274014
4,1997.05,16028072,2525076,511468,3333483,302913,80963,9274169


In [22]:
df_17_20.head()

Unnamed: 0,시점,월별(1),합계,가정용,공공용,서비스업 및 기타,농림어업,광업,제조업
4,2017,1월,45196953,5818186,2242732,13615459,1551606,152302,21816668
5,2017,2월,44422400,5908998,2223466,13847567,1664694,143111,20634564
6,2017,3월,42558535,5114346,2055510,11889602,1423129,158768,21917181
7,2017,4월,40811060,5350759,1866996,10995210,1282616,156978,21158500
8,2017,5월,38743894,4948471,1606449,9897405,1051543,153789,21086238


### [3-1] 데이터프레임 합치기

In [23]:
# 컬럼명 리스트 생성
elec_col_names = ["시점", "합계", "가정용", "공공용", "서비스업", "농림어업", "광업", "제조업"]

# 시점 전처리
df_97_14_pre = datetime_convert(df_97_14, column="시점", split=".")
df_17_20_pre = connect_year_month(df_17_20, year="시점", month="월별(1)", is_num=False)

# 컬럼명 정리
df_97_14_pre.columns = elec_col_names
df_17_20_pre.columns = elec_col_names

# 데이터프레임 합치기
df_97_20_pre = pd.concat([df_97_14_pre, df_17_20_pre], axis=0, ignore_index=True)

  data_c[column] = data_c[column].astype(str).str.replace(split, "-")


In [24]:
df_97_20_pre.head(5)

Unnamed: 0,시점,합계,가정용,공공용,서비스업,농림어업,광업,제조업
0,1997-01-01,16224867,2814138,557379,3616542,301811,80421,8854576
1,1997-02-01,15363840,2788312,558610,3551611,327617,70471,8067220
2,1997-03-01,16094268,2555048,528672,3333225,345359,81877,9250086
3,1997-04-01,16266027,2629204,555354,3430624,292235,84596,9274014
4,1997-05-01,16028072,2525076,511468,3333483,302913,80963,9274169


### [3-2] 합계 컬럼 추가하기

In [25]:
sum_col = {"산업용_합계": ["농림어업", "광업", "제조업"], "민수용_합계": ["가정용", "공공용", "서비스업"]}

df_97_20_sum = insert_sum_columns(df_97_20_pre, sum_col)

In [26]:
df_97_20_sum.head(5)

Unnamed: 0,시점,합계,가정용,공공용,서비스업,민수용_합계,농림어업,광업,제조업,산업용_합계
0,1997-01-01,16224867,2814138,557379,3616542,6988059,301811,80421,8854576,9236808
1,1997-02-01,15363840,2788312,558610,3551611,6898533,327617,70471,8067220,8465308
2,1997-03-01,16094268,2555048,528672,3333225,6416945,345359,81877,9250086,9677322
3,1997-04-01,16266027,2629204,555354,3430624,6615182,292235,84596,9274014,9650845
4,1997-05-01,16028072,2525076,511468,3333483,6370027,302913,80963,9274169,9658045


### [3-3] 데이터 저장

In [28]:
df_97_20_sum.to_csv("용도별판매전략_97_20.csv", index=False)