# Library


In [1]:
import pandas as pd
import numpy as np
import os
from tqdm import tqdm
from datetime import datetime
import pyexasol

In [2]:
# 소수점 옵션
pd.options.display.float_format = '{:.2f}'.format
# pd.reset_option('display.float_format')

# Folder

In [3]:
df1_fold = 'D:/data/big2/BD1/df'

# Server

In [None]:
# # insider db
# wd = pyexasol.connect(dsn='', user='', password='', compression=True, schema='')
# we = pyexasol.connect(dsn='', user='', password='', compression=True, schema='')

In [None]:
# exasol db
ws = pyexasol.connect(dsn='', user='', password='', compression=True, schema='')
wd = pyexasol.connect(dsn='', user='', password='', compression=True, schema='')
we = pyexasol.connect(dsn='', user='', password='', compression=True, schema='')

# Load

## 등록정보(STD_CEG_CAR_MIG) 4등급만

In [6]:
# exasol db
# 14.2s
car = wd.export_to_pandas("SELECT VHRNO, VIN, BSPL_STDG_CD, EXHST_GAS_GRD_CD, EXHST_GAS_CERT_NO, VHCL_ERSR_YN, MANP_MNG_NO, YRIDNW, VHCTY_CD, PURPS_CD2, FRST_REG_YMD, VHCL_FBCTN_YMD, VHCL_MNG_NO FROM STD_CEG_CAR_MIG WHERE EXHST_GAS_GRD_CD = 'A0504' OR EXHST_GAS_GRD_CD = 'A05T4';")

In [7]:
car_ch_col = {
    'VHRNO':'자동차등록번호', 
    'VIN':'차대번호', 
    'BSPL_STDG_CD':'법정동코드', 
    'EXHST_GAS_GRD_CD':'배출가스등급', 
    'EXHST_GAS_CERT_NO':'배출가스인증번호',
    'VHCL_ERSR_YN':'차량말소YN',
    'MANP_MNG_NO':'제원관리번호', 
    'YRIDNW':'차량연식', 
    'VHCTY_CD':'차종', 
    'PURPS_CD2':'용도', 
    'FRST_REG_YMD':'최초등록일자',
    'VHCL_FBCTN_YMD':'제작일자',
    'VHCL_MNG_NO':'차량관리번호', 
}

In [8]:
carr = car.rename(columns=car_ch_col)

In [9]:
carr.columns

Index(['자동차등록번호', '차대번호', '법정동코드', '배출가스등급', '배출가스인증번호', '차량말소YN', '제원관리번호',
       '차량연식', '차종', '용도', '최초등록일자', '제작일자', '차량관리번호'],
      dtype='object')

## 제원정보(STD_CEG_CAR_SRC_MIG)

In [10]:
# 3.8s
src = wd.export_to_pandas("SELECT MANP_MNG_NO, FUEL_CD, VHCTY_TY_CD2, MNFCTR_NM, VHCNM, VHCL_FRM, EGIN_TY FROM STD_CEG_CAR_SRC_MIG;")

In [11]:
src_ch_col = {
    'MANP_MNG_NO':'제원관리번호', 
    'FUEL_CD':'연료',
    'VHCTY_TY_CD2':'차종유형', 
    'MNFCTR_NM':'제작사명', 
    'VHCNM':'차명', 
    'VHCL_FRM':'자동차형식', 
    'EGIN_TY':'엔진형식', 
}

In [12]:
srcr = src.rename(columns=src_ch_col)
srcr.columns

Index(['제원관리번호', '연료', '차종유형', '제작사명', '차명', '자동차형식', '엔진형식'], dtype='object')

## 저감장치 부착이력(STD_DLM_TB_ERP_ATT_HIS)

In [13]:
# 3.3s
att = wd.export_to_pandas("SELECT VIN, RDCDVC_SE_CD, RDCDVC_KND_CD FROM STD_DLM_TB_ERP_ATT_HIS;")
att.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1015941 entries, 0 to 1015940
Data columns (total 3 columns):
 #   Column         Non-Null Count    Dtype 
---  ------         --------------    ----- 
 0   VIN            1015941 non-null  object
 1   RDCDVC_SE_CD   1015941 non-null  object
 2   RDCDVC_KND_CD  792038 non-null   object
dtypes: object(3)
memory usage: 23.3+ MB


In [14]:
att_ch_col = {
    'VIN':'차대번호', 
    'RDCDVC_SE_CD':'저감장치구분', 
    'RDCDVC_KND_CD':'저감장치종류', 
}

In [15]:
attr = att.rename(columns=att_ch_col)
attr.columns

Index(['차대번호', '저감장치구분', '저감장치종류'], dtype='object')

## 노후차 조기폐차 관리정보(수도권)(STD_DLM_TB_ERP_EARLY_ERASE_AEA)

In [16]:
# 2.4s
aea = wd.export_to_pandas("SELECT VIN, ELPDSRC_STTS_CD, ELPDSRC_LST_APRV_YN, ERSR_YMD FROM STD_DLM_TB_ERP_EARLY_ERASE_AEA;")
aea.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 811412 entries, 0 to 811411
Data columns (total 4 columns):
 #   Column               Non-Null Count   Dtype  
---  ------               --------------   -----  
 0   VIN                  811412 non-null  object 
 1   ELPDSRC_STTS_CD      811412 non-null  object 
 2   ELPDSRC_LST_APRV_YN  710812 non-null  object 
 3   ERSR_YMD             718610 non-null  float64
dtypes: float64(1), object(3)
memory usage: 24.8+ MB


In [17]:
aea.columns

Index(['VIN', 'ELPDSRC_STTS_CD', 'ELPDSRC_LST_APRV_YN', 'ERSR_YMD'], dtype='object')

In [18]:
aea_ch_col = {
    'VIN':'차대번호', 
    'ELPDSRC_STTS_CD':'조기폐차상태코드', 
    'ELPDSRC_LST_APRV_YN':'조기폐차최종승인YN', 
    'ERSR_YMD':'말소일자', 
}

In [19]:
aear = aea.rename(columns=aea_ch_col)
aear.columns

Index(['차대번호', '조기폐차상태코드', '조기폐차최종승인YN', '말소일자'], dtype='object')

In [20]:
aear.head()

Unnamed: 0,차대번호,조기폐차상태코드,조기폐차최종승인YN,말소일자
0,KMZKL17BP1U441173,A32Y,Y,20130917.0
1,KNHUP7513WS711743,A32Y,Y,20100802.0
2,KPBEA3D81TP079170,A32Y,Y,20100806.0
3,KPBEA3D81SP063998,A32Y,Y,20100802.0
4,KPBEA3D81RP011984,A32Y,Y,20100802.0


## 노후차 조기폐차 관리정보(수도권외)(STD_DLM_TB_ERP_EARLY_ERASE_LGV)

In [21]:
# 1.8s
lgv = wd.export_to_pandas("SELECT VIN, ELPDSRC_STTS_CD, ELPDSRC_LST_APRV_YN, ERSR_YMD FROM STD_DLM_TB_ERP_EARLY_ERASE_LGV;")
lgv.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 388958 entries, 0 to 388957
Data columns (total 4 columns):
 #   Column               Non-Null Count   Dtype  
---  ------               --------------   -----  
 0   VIN                  388958 non-null  object 
 1   ELPDSRC_STTS_CD      388958 non-null  object 
 2   ELPDSRC_LST_APRV_YN  176711 non-null  object 
 3   ERSR_YMD             190163 non-null  float64
dtypes: float64(1), object(3)
memory usage: 11.9+ MB


In [22]:
lgv.columns

Index(['VIN', 'ELPDSRC_STTS_CD', 'ELPDSRC_LST_APRV_YN', 'ERSR_YMD'], dtype='object')

In [23]:
lgv_ch_col = {
    'VIN':'차대번호', 
    'ELPDSRC_STTS_CD':'조기폐차상태코드', 
    'ELPDSRC_LST_APRV_YN':'조기폐차최종승인YN', 
    'ERSR_YMD':'말소일자', 
}

In [24]:
lgvr = lgv.rename(columns=lgv_ch_col)
lgvr.columns

Index(['차대번호', '조기폐차상태코드', '조기폐차최종승인YN', '말소일자'], dtype='object')

In [25]:
lgvr.head()

Unnamed: 0,차대번호,조기폐차상태코드,조기폐차최종승인YN,말소일자
0,KMHSD81VP4U668720,A32M,,
1,KPBEA2MC12P281515,A32M,,
2,KMHMH81VP2U154512,A32M,,
3,KMFWVH7HP6U722855,A32M,,
4,KN90J4MDD2GZ00019,A32C,,


## 법정동코드(STD_BJCD_INFO)

In [26]:
# 1.3s
code = wd.export_to_pandas("SELECT STDG_CD, STDG_CTPV_NM, STDG_SGG_NM FROM STD_BJCD_INFO;")
code.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 47916 entries, 0 to 47915
Data columns (total 3 columns):
 #   Column        Non-Null Count  Dtype 
---  ------        --------------  ----- 
 0   STDG_CD       47916 non-null  int64 
 1   STDG_CTPV_NM  47916 non-null  object
 2   STDG_SGG_NM   47892 non-null  object
dtypes: int64(1), object(2)
memory usage: 1.1+ MB


In [27]:
code.columns

Index(['STDG_CD', 'STDG_CTPV_NM', 'STDG_SGG_NM'], dtype='object')

In [28]:
code_ch_col = {
    'STDG_CD':'법정동코드', 
    'STDG_CTPV_NM':'시도', 
    'STDG_SGG_NM':'시군구',  
}

In [29]:
coder = code.rename(columns=code_ch_col)
coder.columns

Index(['법정동코드', '시도', '시군구'], dtype='object')

In [30]:
coder.head()

Unnamed: 0,법정동코드,시도,시군구
0,4119011200,경기도,부천시
1,4119011300,경기도,부천시
2,4119011400,경기도,부천시
3,4119011500,경기도,부천시
4,4119011600,경기도,부천시


In [31]:
coder['시도'].unique()

array(['경기도', '대전광역시', '울산광역시', '대구광역시', '세종특별자치시', '인천광역시', '충청북도',
       '광주광역시', '강원도', '충청남도', '경상북도', '전라남도', '전라북도', '대구직할시', '부산광역시',
       '서울특별시', '인천직할시', '광주직할시', '부산직할시', '경상남도', '제주도', '제주특별자치도',
       '대전직할시', '강원특별자치도'], dtype=object)

## 등록이력(CEG_CAR_HISTORY_MIG)

In [32]:
# 1m 13.2s
his = ws.export_to_pandas("SELECT VHCL_ERSR_YN, CHNG_DE, VHMNO FROM CEG_CAR_HISTORY_MIG;")
his_ch_col = {
    'VHCL_ERSR_YN':'차량말소YN', 
    'CHNG_DE':'변경일자',
    'VHMNO':'차량관리번호'
}
hisr = his.rename(columns=his_ch_col)
hisr.columns

Index(['차량말소YN', '변경일자', '차량관리번호'], dtype='object')

In [33]:
# # 1.8s
# edb_id = 'vsysd'
# edb_database = 'edb'
# edb_port = 5444
# edb_url = '172.29.135.50'
# edb_pwd = 'vsyswynn'
# conn = psycopg2.connect(dbname=edb_database, user=edb_id, password=edb_pwd, host=edb_url, port=edb_port)
# cur = conn.cursor()
# sql = 'select VHCL_ERSR_YN, CHNG_DE, VHMNO from vsysd.ceg_car_history_mig'
# cur.execute(sql)
# his = pd.DataFrame(cur.fetchall())
# his.columns = [desc[0].upper() for desc in cur.description]
# cur.close()
# conn.close()

## RH에서 제공한 법정동코드

In [34]:
rh = we.export_to_pandas("SELECT DONG_CODE, CTPRVN_NM, SIGNGU_NM FROM STD_BD_TB_MAPDATA;")
rh.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3799 entries, 0 to 3798
Data columns (total 3 columns):
 #   Column     Non-Null Count  Dtype 
---  ------     --------------  ----- 
 0   DONG_CODE  3799 non-null   int64 
 1   CTPRVN_NM  3799 non-null   object
 2   SIGNGU_NM  3757 non-null   object
dtypes: int64(1), object(2)
memory usage: 89.2+ KB


In [35]:
rh = rh.rename(columns={'DONG_CODE':'법정동코드_rh', 'CTPRVN_NM':'시도', 'SIGNGU_NM':'시군구'})
rh.head()

Unnamed: 0,법정동코드_rh,시도,시군구
0,4882000000,경상남도,고성군
1,4882025000,경상남도,고성군
2,4882031000,경상남도,고성군
3,4882032000,경상남도,고성군
4,4882033000,경상남도,고성군


In [36]:
rh.shape

(3799, 3)

## 4등급 result(for DPF유무)

In [37]:
rs = we.export_to_pandas("SELECT 차대번호, DPF유무_수정 FROM STD_BD_GRD4_RESULT;")
rs.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1502235 entries, 0 to 1502234
Data columns (total 2 columns):
 #   Column    Non-Null Count    Dtype 
---  ------    --------------    ----- 
 0   차대번호      1502235 non-null  object
 1   DPF유무_수정  1153813 non-null  object
dtypes: object(2)
memory usage: 22.9+ MB


# 전처리

## 등록정보(CEG_CAR_MIG)

### 중복 차대번호 제거

In [38]:
carr.shape, len(carr['차대번호'].unique())

((1906876, 13), 1906823)

In [39]:
carr['최초등록일자'] = pd.to_numeric(carr['최초등록일자'], errors='coerce')
carr = carr.sort_values('최초등록일자', ascending=False).drop_duplicates('차대번호').reset_index(drop=True)
carr.shape

(1906823, 13)

### 배출가스등급 코드 변환

In [40]:
## 배출가스등급 코드 변환
grd_dict = {
    'A0501':'1', 
    'A0502':'2', 
    'A0503':'3', 
    'A0504':'4', 
    'A0505':'5', 
    'A05T2':'2',
    'A05T3':'3', 
    'A05T4':'4', 
    'A05T5':'5', 
    'A05X':'X', 
}
carr['배출가스등급'] = carr['배출가스등급'].replace(grd_dict)
carr['배출가스등급'].unique()

array(['4'], dtype=object)

### 차종 코드 변환

In [41]:
cd_dict = {
    'A31M':'이륜', 
    'A31P':'승용', 
    'A31S':'특수', 
    'A31T':'화물', 
    'A31V':'승합'
}
carr['차종'] = carr['차종'].replace(cd_dict)
carr['차종'].unique()

array(['승용', '화물', '승합', '특수'], dtype=object)

### 용도 코드 변환

In [42]:
purps_dict = {
    'A08P':'개인용', 
    'A08B':'영업용', 
    'A08O':'관용',
}
carr['용도'] = carr['용도'].replace(purps_dict)
carr['용도'].unique()

array(['개인용', '영업용', '관용'], dtype=object)

### 등록정보 말소 제거

In [43]:
carn = carr[carr['차량말소YN'] == 'N'].reset_index(drop=True)
carn.shape

(1390280, 13)

## 제원정보(CEG_CAR_SRC_MIG)

### 연료 코드 변환

In [44]:
fuel_dict = {
    'A90GS':'휘발유', 
    'A91DS':'경유',
    'A92LP':'LPG(액화석유가스)', 
    'A90GH':'휘발유 하이브리드', 
    'A93EV':'전기', 
    'A91DH':'경유 하이브리드', 
    'A92CN':'CNG(압축천연가스)', 
    'A93HD':'수소', 
    'A92LH':'LPG 하이브리드', 
    'A94OT':'기타연료', 
    'A92CH':'CNG 하이브리드',
    'A90AC':'알코올', 
    'A93SH':'태양열', 
    'A91KS':'등유', 
    'A92LN':'LNG(액화천연가스)', 
    'A90PH':'플러그인 하이브리드', 
}
srcr['연료'] = srcr['연료'].replace(fuel_dict)
srcr['연료'].unique()

array(['휘발유', '경유', '기타연료', 'LPG(액화석유가스)', nan, '휘발유 하이브리드',
       'CNG(압축천연가스)', '알코올', '등유', '전기', '태양열', '경유 하이브리드', 'LPG 하이브리드',
       '수소', 'LNG(액화천연가스)', 'CNG 하이브리드'], dtype=object)

### 차종유형 코드 변환

In [45]:
ty_dict = {
    'A30C':'경형', 
    'A30L':'대형', 
    'A30M':'중형', 
    'A30S':'소형',
}
srcr['차종유형'] = srcr['차종유형'].replace(ty_dict)
srcr['차종유형'].unique()

array(['대형', '중형', '소형', '경형', nan], dtype=object)

## 등록&제원 병합

In [46]:
# 19.4s
cs = carn.merge(srcr, on='제원관리번호', how='left')
cs.shape

(1390280, 19)

## 저감장치 부착이력(DLM_TB_ERP_ATT_HIS)

### 저감장치구분 코드 변환

In [47]:
attr['저감장치구분'].unique()

array(['A1001', 'A1003', 'A1005', 'A1002', 'A1006', 'A1007', 'A1004'],
      dtype=object)

In [48]:
# 27.5s
rdcdvc_dict = {
    'A1001':'1종', 
    'A1002':'2종', 
    'A1003':'3종', 
    'A1004':'1종+SCR', 
    'A1005':'엔진개조', 
    'A1006':'엔진교체',
    'A1007':'삼원촉매',
}
attr['저감장치구분'] = attr['저감장치구분'].replace(rdcdvc_dict)
attr['저감장치구분'].unique()

array(['1종', '3종', '엔진개조', '2종', '엔진교체', '삼원촉매', '1종+SCR'], dtype=object)

### 저감장치종류 코드 변환

In [49]:
attr['저감장치종류'].unique()

array(['A1313', 'A1311', nan, 'A1309', 'A1306', 'A1301', 'A1303', 'A1310',
       'A1305', 'A1307', 'A1302', 'A1308', 'A1304', 'A1312'], dtype=object)

In [50]:
rdcdvc_knd_dict = {
    'A1301':'dPDF',
    'A1302':'대형',
    'A1303':'복합대형',
    'A1304':'복합대형+SCR',
    'A1305':'복합소형',
    'A1306':'복합중형',
    'A1307':'소형',
    'A1308':'자연대형+SCR',
    'A1309':'자연중형',
    'A1310':'중형',
    'A1311':'DOC',
    'A1312':'자연소형',
    'A1313':'자연대형',
}
attr['저감장치종류'] = attr['저감장치종류'].replace(rdcdvc_knd_dict)
attr['저감장치종류'].unique()

array(['자연대형', 'DOC', nan, '자연중형', '복합중형', 'dPDF', '복합대형', '중형', '복합소형',
       '소형', '대형', '자연대형+SCR', '복합대형+SCR', '자연소형'], dtype=object)

### 저감장치 부착 유무

In [51]:
attr.loc[(attr['저감장치구분'] == '1종') | (attr['저감장치구분'] == '1종+SCR'), 'DPF_YN'] = '유'

In [52]:
attr['DPF_YN'].value_counts(dropna=False)

NaN    533631
유      482310
Name: DPF_YN, dtype: int64

In [53]:
attr = attr.sort_values('DPF_YN').drop_duplicates('차대번호').reset_index(drop=True)
attr.shape

(1014369, 4)

## 노후차 조기폐차(STD_DLM_TB_ERP_EARLY_ERASE_AEA, LGV)

### 조기폐차상태코드 코드 변환

In [54]:
aear['조기폐차상태코드'].unique()

array(['A32Y', 'A32B', 'A32D', 'A32M', 'A32C', 'A32X', 'A32A', 'A32N',
       'A32G', 'A32P', 'A32I'], dtype=object)

In [55]:
erase_dict = {
    'A32E':'조기폐차상태코드(추가보조금신청대상)',
    'A32G':'조기폐차상태코드(보조금청구)',
    'A32I':'조기폐차상태코드(신청등록)',
    'A32K':'조기폐차상태코드(추가보조금청구승인)',
    'A32M':'조기폐차상태코드(보조금산정)',
    'A32N':'조기폐차상태코드(보조금청구반려(제외))',
    'A32P':'조기폐차상태코드(보조금대상)',
    'A32T':'조기폐차상태코드(추가보조금청구)',
    'A32X':'조기폐차상태코드(신청취소(제외))',
    'A32Y':'조기폐차상태코드(보조금청구승인)',
    'A32C':'조기폐차상태코드(성능확인검사등록)',
    'A32D':'조기폐차상태코드(기간초과)',
    'A32A':'조기폐차상태코드(성능확인검사신청)',
    'A32B':'조기폐차상태코드(보조금미대상)',
}
aear['조기폐차상태코드'] = aear['조기폐차상태코드'].replace(erase_dict)
aear['조기폐차상태코드'].unique()

array(['조기폐차상태코드(보조금청구승인)', '조기폐차상태코드(보조금미대상)', '조기폐차상태코드(기간초과)',
       '조기폐차상태코드(보조금산정)', '조기폐차상태코드(성능확인검사등록)', '조기폐차상태코드(신청취소(제외))',
       '조기폐차상태코드(성능확인검사신청)', '조기폐차상태코드(보조금청구반려(제외))', '조기폐차상태코드(보조금청구)',
       '조기폐차상태코드(보조금대상)', '조기폐차상태코드(신청등록)'], dtype=object)

In [56]:
lgvr['조기폐차상태코드'].unique()

array(['A32M', 'A32C', 'A32X', 'A32B', 'A32Y', 'A32P', 'A32G', 'A32D',
       'A32I', 'A32N'], dtype=object)

In [57]:
lgvr['조기폐차상태코드'] = lgvr['조기폐차상태코드'].replace(erase_dict)
lgvr['조기폐차상태코드'].unique()

array(['조기폐차상태코드(보조금산정)', '조기폐차상태코드(성능확인검사등록)', '조기폐차상태코드(신청취소(제외))',
       '조기폐차상태코드(보조금미대상)', '조기폐차상태코드(보조금청구승인)', '조기폐차상태코드(보조금대상)',
       '조기폐차상태코드(보조금청구)', '조기폐차상태코드(기간초과)', '조기폐차상태코드(신청등록)',
       '조기폐차상태코드(보조금청구반려(제외))'], dtype=object)

### 조기폐차 신청 정보 추가

In [58]:
aear['조기폐차신청여부'] = 'Y'
lgvr['조기폐차신청여부'] = 'Y'

## 조기폐차 병합

In [59]:
elp = pd.concat([aear, lgvr], ignore_index=True)
elp.shape

(1200370, 5)

In [60]:
elp.shape, len(elp['차대번호'].unique())

((1200370, 5), 1125610)

In [61]:
elpm = elp.sort_values('조기폐차최종승인YN', ascending=False).drop_duplicates('차대번호').reset_index(drop=True)
elpm.shape

(1125610, 5)

In [62]:
elpm = elpm[elpm['조기폐차최종승인YN'] == 'Y'].reset_index(drop=True)
elpm.shape

(887431, 5)

## 등록(말소유지)&제원

In [63]:
carr.shape

(1906823, 13)

In [64]:
cse = carr.merge(srcr, on='제원관리번호', how='left')
cse.shape

(1906823, 19)

## 등록(말소유지)&제원&법정동

In [65]:
cse['법정동코드'] = cse['법정동코드'].astype('str')
cse['법정동코드'] = cse['법정동코드'].str[:5] + '00000'
cse['법정동코드'] = pd.to_numeric(cse['법정동코드'], errors='coerce')

In [66]:
csec = cse.merge(coder, on='법정동코드', how='left')
csec.shape

(1906823, 21)

In [67]:
csec['시도'].isnull().sum()

0

## 등록(유지)&제원&법정동&조기폐차

In [68]:
csec.shape

(1906823, 21)

In [69]:
csece = csec.merge(elpm, on='차대번호', how='left')
csece.shape

(1906823, 25)

## 등록(유지)&제원&법정동&조기폐차

In [70]:
dfe = csece.merge(attr, on='차대번호', how='left')
dfe.shape

(1906823, 28)

## 등록&제원&저감

In [71]:
cs.shape

(1390280, 19)

In [72]:
csa = cs.merge(attr, on='차대번호', how='left')
csa.shape

(1390280, 22)

## 등록&제원&저감&법정동

In [73]:
csa['법정동코드'] = csa['법정동코드'].astype('str')
csa['법정동코드'] = csa['법정동코드'].str[:5] + '00000'
csa['법정동코드'] = pd.to_numeric(csa['법정동코드'], errors='coerce')

In [74]:
csac = csa.merge(coder, on='법정동코드', how='left')
csac.shape

(1390280, 24)

In [75]:
csac['시도'].isnull().sum()

0

## 등록&제원&저감&법정동&조기폐차

In [76]:
csac.shape

(1390280, 24)

In [77]:
df = csac.merge(elpm, on='차대번호', how='left')
df.shape

(1390280, 28)

In [78]:
df['연료'].value_counts(dropna=False)

경유             1052802
휘발유             313038
LPG(액화석유가스)      24355
NaN                 49
기타연료                34
CNG(압축천연가스)          1
알코올                  1
Name: 연료, dtype: int64

In [79]:
df['법정동코드_수정'] = df['법정동코드'].copy()

## 4등급 result 파일 참고하여 DPF유무 수정

In [80]:
rdf = df.copy()

In [81]:
len(set(rdf['차대번호'].unique()) - set(rs['차대번호'].unique()))

2092

In [82]:
rdf['DPF_YN'].value_counts(dropna=False)

NaN    1390227
유           53
Name: DPF_YN, dtype: int64

In [83]:
rs['DPF유무_수정'].value_counts(dropna=False)

무       879481
NaN     348422
유       261897
확인불가     12435
Name: DPF유무_수정, dtype: int64

In [84]:
rs = rs.drop_duplicates('차대번호').reset_index(drop=True)
rs.shape

(1502213, 2)

In [85]:
rs['DPF유무_수정'].value_counts(dropna=False)

무       879480
NaN     348401
유       261897
확인불가     12435
Name: DPF유무_수정, dtype: int64

In [86]:
rdf1 = rdf.merge(rs, on='차대번호', how='left')

In [87]:
rdf1.loc[(rdf1['DPF_YN'] == '유') | (rdf1['DPF유무_수정'] == '유'), 'DPF_YN'] = '유'
rdf1.loc[(rdf1['DPF유무_수정'] == '무'), 'DPF_YN'] = '무'
rdf1.loc[(rdf1['DPF유무_수정'] == '확인불가'), 'DPF_YN'] = '확인불가'

In [88]:
rdf1['DPF_YN'].value_counts(dropna=False)

무       806991
NaN     337658
유       234291
확인불가     11340
Name: DPF_YN, dtype: int64

In [89]:
rdf1.columns

Index(['자동차등록번호', '차대번호', '법정동코드', '배출가스등급', '배출가스인증번호', '차량말소YN', '제원관리번호',
       '차량연식', '차종', '용도', '최초등록일자', '제작일자', '차량관리번호', '연료', '차종유형', '제작사명',
       '차명', '자동차형식', '엔진형식', '저감장치구분', '저감장치종류', 'DPF_YN', '시도', '시군구',
       '조기폐차상태코드', '조기폐차최종승인YN', '말소일자', '조기폐차신청여부', '법정동코드_수정', 'DPF유무_수정'],
      dtype='object')

In [90]:
df = rdf1.drop('DPF유무_수정', axis=1)

In [91]:
df.shape

(1390280, 29)

In [92]:
rdf = dfe.copy()

In [93]:
rdf1 = rdf.merge(rs, on='차대번호', how='left')

In [94]:
rdf1.loc[(rdf1['DPF_YN'] == '유') | (rdf1['DPF유무_수정'] == '유'), 'DPF_YN'] = '유'
rdf1.loc[(rdf1['DPF유무_수정'] == '무'), 'DPF_YN'] = '무'
rdf1.loc[(rdf1['DPF유무_수정'] == '확인불가'), 'DPF_YN'] = '확인불가'

In [95]:
dfe = rdf1.drop('DPF유무_수정', axis=1)

In [96]:
dfe.shape

(1906823, 28)

## 말소차량 데이터

### 등록 & 제원 정보 병합(말소 유지)

In [97]:
cse.shape

(1906823, 19)

### 1\. 차량관리번호 기준 병합

In [98]:
cse.shape   

(1906823, 19)

In [99]:
# 58.3s
ersr = cse.merge(hisr, on='차량관리번호', how='left')
ersr.shape

(4057265, 21)

### 2\. 차량말소YN 만 추출

In [100]:
errm = ersr[(ersr['차량말소YN_x'] == 'Y') & (ersr['차량말소YN_y'] == 'Y')].reset_index(drop=True)
errm.shape

(355877, 21)

### 3\. 변경일자 최신으로 차대번호 중복 제거

In [101]:
errm = errm.sort_values('변경일자', ascending=False).drop_duplicates('차대번호').reset_index(drop=True)

### 4\. 변경일자 2019.01.01 이상만 추출

In [102]:
errm = errm[errm['변경일자'] >= 20190101].reset_index(drop=True)

In [103]:
errm.shape

(353233, 21)

In [104]:
errm.columns

Index(['자동차등록번호', '차대번호', '법정동코드', '배출가스등급', '배출가스인증번호', '차량말소YN_x', '제원관리번호',
       '차량연식', '차종', '용도', '최초등록일자', '제작일자', '차량관리번호', '연료', '차종유형', '제작사명',
       '차명', '자동차형식', '엔진형식', '차량말소YN_y', '변경일자'],
      dtype='object')

### 5\. 법정동 코드 정보 병합

In [105]:
errm['법정동코드'] = errm['법정동코드'].astype('str')
errm['법정동코드'] = errm['법정동코드'].str[:5] + '00000'
errm['법정동코드'] = pd.to_numeric(errm['법정동코드'], errors='coerce')

In [106]:
errc = errm.merge(coder, on='법정동코드', how='left')
errc.shape

(353233, 23)

In [107]:
errc['시도'].isnull().sum()

0

In [108]:
errc['시도'].unique()

array(['경기도', '부산광역시', '울산광역시', '제주특별자치도', '전라북도', '경상남도', '대전광역시',
       '서울특별시', '인천광역시', '전라남도', '충청남도', '세종특별자치시', '광주광역시', '충청북도',
       '대구광역시', '경상북도', '강원특별자치도'], dtype=object)

In [109]:
errc[errc['시도'] == '강원특별자치도']

Unnamed: 0,자동차등록번호,차대번호,법정동코드,배출가스등급,배출가스인증번호,차량말소YN_x,제원관리번호,차량연식,차종,용도,...,연료,차종유형,제작사명,차명,자동차형식,엔진형식,차량말소YN_y,변경일자,시도,시군구
256640,85도0186,KMFZSZ7JAAU663689,5111000000,4,7MY-HD-14-68,Y,A0810006208633109,2010,화물,개인용,...,경유,소형,현대자동차(주),포터Ⅱ 내장탑차 (PORTERⅡ),HR-3HDS3JKM-1001,D4CB,Y,20200408.0,강원특별자치도,춘천시
289306,95가0814,KMFZSZ7JABU745861,5113000000,4,7MY-HD-14-68,Y,A0810006209563110,2011,화물,개인용,...,경유,소형,현대자동차(주),포터Ⅱ내장탑차,HR-3HDS3JKM-1101,D4CB,Y,20191223.0,강원특별자치도,원주시


# 분석

## 4등급 경유 조기폐차

In [230]:
df1 = dfe[dfe['연료'] == '경유'].reset_index(drop=True)
df1.shape

(1411497, 28)

In [231]:
df1.columns

Index(['자동차등록번호', '차대번호', '법정동코드', '배출가스등급', '배출가스인증번호', '차량말소YN', '제원관리번호',
       '차량연식', '차종', '용도', '최초등록일자', '제작일자', '차량관리번호', '연료', '차종유형', '제작사명',
       '차명', '자동차형식', '엔진형식', '시도', '시군구', '조기폐차상태코드', '조기폐차최종승인YN', '말소일자',
       '조기폐차신청여부', '저감장치구분', '저감장치종류', 'DPF_YN'],
      dtype='object')

In [232]:
df1['말소일자'].dtype

dtype('float64')

In [233]:
df1['조기폐차최종승인YN'].value_counts(dropna=False)

NaN    1380714
Y        30783
Name: 조기폐차최종승인YN, dtype: int64

In [234]:
df1.loc[df1['조기폐차최종승인YN'] == 'Y', '말소일자'].isnull().sum()

0

In [235]:
df1.loc[df1['조기폐차최종승인YN'] == 'Y', '말소일자'].head()

0     20230418.00
56    20230419.00
63    20230316.00
66    20230324.00
139   20230510.00
Name: 말소일자, dtype: float64

In [236]:
idx = df1.loc[df1['조기폐차최종승인YN'] == 'Y', '말소일자'].index
len(idx)

30783

In [237]:
df1_ey = df1.loc[idx]
df1_en = df1.loc[set(df1.index) - set(idx)]
df1_ey.shape[0] + df1_en.shape[0]

  df1_en = df1.loc[set(df1.index) - set(idx)]


1411497

In [238]:
df1_ey['말소일자'].dtype

dtype('float64')

In [239]:
df1_ey['말소일자'].head()

0     20230418.00
56    20230419.00
63    20230316.00
66    20230324.00
139   20230510.00
Name: 말소일자, dtype: float64

In [240]:
df1_ey['말소일자'] = df1_ey['말소일자'].astype('str')
df1_ey['말소일자'].head()

0      20230418.0
56     20230419.0
63     20230316.0
66     20230324.0
139    20230510.0
Name: 말소일자, dtype: object

In [241]:
df1_ey['기준연월'] = df1_ey['말소일자'].str[:4] + '.' + df1_ey['말소일자'].str[4:6]

In [242]:
df1_ey[['말소일자', '기준연월']].head()

Unnamed: 0,말소일자,기준연월
0,20230418.0,2023.04
56,20230419.0,2023.04
63,20230316.0,2023.03
66,20230324.0,2023.03
139,20230510.0,2023.05


In [243]:
df1 = pd.concat([df1_ey, df1_en], ignore_index=True)
df1.shape

(1411497, 29)

In [244]:
df1.loc[df1['조기폐차최종승인YN'] == 'Y', ['말소일자', '기준연월']].head()

Unnamed: 0,말소일자,기준연월
0,20230418.0,2023.04
1,20230419.0,2023.04
2,20230316.0,2023.03
3,20230324.0,2023.03
4,20230510.0,2023.05


In [247]:
df1[['말소일자', '기준연월']].head()

Unnamed: 0,말소일자,기준연월
0,20230418.0,2023.04
1,20230419.0,2023.04
2,20230316.0,2023.03
3,20230324.0,2023.03
4,20230510.0,2023.05


In [246]:
df1[['말소일자', '기준연월']].tail()

Unnamed: 0,말소일자,기준연월
1411492,,
1411493,,
1411494,,
1411495,,
1411496,,


In [220]:
today_date = datetime.today().strftime("%Y%m%d")
today_date

'20231010'

In [221]:
STD_BD_GRD4_ELPDSRC_CURSTT = df1[[
    '기준연월', 
    '차대번호', 
    '법정동코드', 
    '차종', 
    '용도', 
    '연료', 
    '차종유형', 
    '시도',
    '시군구', 
    '조기폐차상태코드', 
    '조기폐차최종승인YN',
]]

In [222]:
STD_BD_GRD4_ELPDSRC_CURSTT['테이블생성일자'] = today_date

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  STD_BD_GRD4_ELPDSRC_CURSTT['테이블생성일자'] = today_date


In [223]:
STD_BD_GRD4_ELPDSRC_CURSTT = STD_BD_GRD4_ELPDSRC_CURSTT[[
    '기준연월', 
    '차대번호', 
    '법정동코드', 
    '차종', 
    '용도', 
    '연료', 
    '차종유형', 
    '시도', 
    '시군구', 
    '조기폐차상태코드',
    '조기폐차최종승인YN', 
    '테이블생성일자', 
]]

In [224]:
chc_dict = {
    '기준연월':'CRTR_YM', 
    '차대번호':'VIN', 
    '법정동코드':'STDG_CD', 
    '차종':'VHCTY_CD', 
    '용도':'PURPS_CD2', 
    '연료':'FUEL_CD', 
    '차종유형':'VHCTY_TY', 
    '시도':'CTPV', 
    '시군구':'SGG', 
    '조기폐차상태코드':'ELPDSRC_STTS_CD',
    '조기폐차최종승인YN':'ELPDSRC_LAST_APRV_YN', 
    '테이블생성일자':'LOAD_DT', 
}

In [225]:
STD_BD_GRD4_ELPDSRC_CURSTT = STD_BD_GRD4_ELPDSRC_CURSTT.rename(columns=chc_dict)
STD_BD_GRD4_ELPDSRC_CURSTT.columns

Index(['CRTR_YM', 'VIN', 'STDG_CD', 'VHCTY_CD', 'PURPS_CD2', 'FUEL_CD',
       'VHCTY_TY', 'CTPV', 'SGG', 'ELPDSRC_STTS_CD', 'ELPDSRC_LAST_APRV_YN',
       'LOAD_DT'],
      dtype='object')

In [226]:
STD_BD_GRD4_ELPDSRC_CURSTT.head()

Unnamed: 0,CRTR_YM,VIN,STDG_CD,VHCTY_CD,PURPS_CD2,FUEL_CD,VHCTY_TY,CTPV,SGG,ELPDSRC_STTS_CD,ELPDSRC_LAST_APRV_YN,LOAD_DT
0,2023.04,KMHSJ81XBAU555611,2826000000,승용,개인용,경유,대형,인천광역시,서구,조기폐차상태코드(보조금청구승인),Y,20231010
1,2023.04,KMJWAH7JP8U033092,4148000000,승합,개인용,경유,중형,경기도,파주시,조기폐차상태코드(보조금청구승인),Y,20231010
2,2023.03,KMHJN81VP7U573542,2814000000,승용,개인용,경유,중형,인천광역시,동구,조기폐차상태코드(보조금청구승인),Y,20231010
3,2023.03,KNAJE55537K304711,2811000000,승용,개인용,경유,중형,인천광역시,중구,조기폐차상태코드(보조금청구승인),Y,20231010
4,2023.05,KN3HNP4D26K140539,4413100000,화물,개인용,경유,소형,충청남도,천안시,조기폐차상태코드(보조금청구승인),Y,20231010


### [출력] STD_BD_GRD4_ELPDSRC_CURSTT

In [122]:
# expdf = STD_BD_GRD4_ELPDSRC_CURSTT
# table_nm = 'STD_BD_GRD4_ELPDSRC_CURSTT'.upper()

# # 테이블 생성
# sql = 'create or replace table ' + table_nm + '( \n'

# for idx,column in enumerate(expdf.columns):
#     # if 'float' in expdf[column].dtype.name:
#     #     sql += column + ' float'
#     # elif 'int' in expdf[column].dtype.name:
#     #     sql += column + ' number'
#     # else:
#     sql += column + ' varchar(255)'

#     if len(expdf.columns) - 1 != idx:
#         sql += ','
#     sql += '\n'
# sql += ')'    
# we.execute(sql)

# # 데이터 추가
# # 5s
# we.import_from_pandas(expdf, table_nm)

In [123]:
# # 
# STD_BD_GRD4_ELPDSRC_CURSTT.to_csv(os.path.join(df1_fold, 'STD_BD_GRD4_ELPDSRC_CURSTT.csv'), index=False)

## 4등급 세분류

In [124]:
df['연료'].value_counts(dropna=False)

경유             1052802
휘발유             313038
LPG(액화석유가스)      24355
NaN                 49
기타연료                34
CNG(압축천연가스)          1
알코올                  1
Name: 연료, dtype: int64

### fuel 컬럼 추가

In [305]:
df1 = df.copy()

In [306]:
df1.loc[df1['연료'] == '경유', 'fuel'] = '경유'
df1.loc[(df1['연료'] == '휘발유') | (df1['연료'] == 'LPG(액화석유가스)'), 'fuel'] = '휘발유_가스'

In [307]:
df1['fuel'].value_counts(dropna=False)

fuel
경유        1052802
휘발유_가스     337393
NaN            85
Name: count, dtype: int64

### EG 분류

In [308]:
grade_list = []
for f, y, cy, e in tqdm(df1[['fuel', '제작일자', '차량연식', 'DPF_YN']].values):
    if (f == '휘발유_가스') and ( (19980101 <= y <= 20001231) or (1998 <= cy <= 2000) ):
        grade_list.append('A')
    elif (f == '휘발유_가스') and ( (y <= 19971231) or (cy <= 1997) ):
        grade_list.append('B')
    elif (f == '경유') and ( (y >= 20080101) or (cy >= 2008) ) and (e == '유'):
        grade_list.append('A')
    elif (f == '경유') and ( (y <= 20071231) or (cy <= 2007) )and (e == '유'):
        grade_list.append('B')
    elif (f == '경유') and ( (y >= 20080101) or (cy >= 2008) ) and (e == '무'):
        grade_list.append('C')
    elif (f == '경유') and ( (y <= 20071231) or (cy <= 2007) ) and (e == '무'):
        grade_list.append('D')
    else:
        grade_list.append('X')
len(grade_list)

 18%|█▊        | 247467/1390280 [00:00<00:02, 498752.03it/s]

100%|██████████| 1390280/1390280 [00:02<00:00, 523348.23it/s]


1390280

In [309]:
df1['EG'] = grade_list

In [310]:
df1.columns

Index(['자동차등록번호', '차대번호', '법정동코드', '배출가스등급', '배출가스인증번호', '차량말소YN', '제원관리번호',
       '차량연식', '차종', '용도', '최초등록일자', '제작일자', '차량관리번호', '연료', '차종유형', '제작사명',
       '차명', '자동차형식', '엔진형식', '저감장치구분', '저감장치종류', 'DPF_YN', '시도', '시군구',
       '조기폐차상태코드', '조기폐차최종승인YN', '말소일자', '조기폐차신청여부', '법정동코드_수정', 'fuel', 'EG'],
      dtype='object')

In [311]:
STD_BD_GRD4_MLSFC_RSLT = df1[[
    '차대번호', 
    '제원관리번호',
    '차종', 
    '용도', 
    '차량연식', 
    '차종유형', 
    '연료', 
    '법정동코드', 
    '시도', 
    '시군구', 
    'DPF_YN',
    'EG',
    '법정동코드_수정',
    ]]
STD_BD_GRD4_MLSFC_RSLT.columns

Index(['차대번호', '제원관리번호', '차종', '용도', '차량연식', '차종유형', '연료', '법정동코드', '시도',
       '시군구', 'DPF_YN', 'EG', '법정동코드_수정'],
      dtype='object')

In [312]:
today_date = datetime.today().strftime("%Y%m%d")
today_date

'20230823'

In [313]:
STD_BD_GRD4_MLSFC_RSLT['테이블생성일자'] = today_date

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  STD_BD_GRD4_MLSFC_RSLT['테이블생성일자'] = today_date


In [314]:
STD_BD_GRD4_MLSFC_RSLT = STD_BD_GRD4_MLSFC_RSLT[[
    '테이블생성일자', 
    '차대번호', 
    '제원관리번호', 
    '차종', 
    '용도', 
    '차량연식', 
    '차종유형', 
    '연료', 
    '법정동코드', 
    '시도',
    '시군구', 
    'DPF_YN', 
    'EG', 
    '법정동코드_수정',
    ]]
STD_BD_GRD4_MLSFC_RSLT.columns

Index(['테이블생성일자', '차대번호', '제원관리번호', '차종', '용도', '차량연식', '차종유형', '연료', '법정동코드',
       '시도', '시군구', 'DPF_YN', 'EG', '법정동코드_수정'],
      dtype='object')

In [315]:
STD_BD_GRD4_MLSFC_RSLT['EG'].value_counts(dropna=False)

EG
C    774627
B    364737
A    193587
D     32365
X     24964
Name: count, dtype: int64

In [316]:
STD_BD_GRD4_MLSFC_RSLT['법정동코드_수정'] = STD_BD_GRD4_MLSFC_RSLT['법정동코드_수정'].astype('str')

In [317]:
ch_col_dict = {
                '테이블생성일자':'LOAD_DT',
                '차대번호':'VIN', 
                '제원관리번호':'MANG_MNG_NO',
                '차종':'VHCTY_CD', 
                '용도':'PURPS_CD2',
                '차량연식':'YRIDNW', 
                '차종유형':'VHCTY_TY', 
                '연료':'FUEL_CD', 
                '법정동코드':'STDG_CD', 
                '시도':'CTPV_NM',
                '시군구':'SGG_NM',
                'DPF_YN':'DPF_MNTNG_YN',
                'EG':'GRD4_MLSFC', 
                '법정동코드_수정':'STDG_CD_MOD'
                }

In [318]:
STD_BD_GRD4_MLSFC_RSLT = STD_BD_GRD4_MLSFC_RSLT.rename(columns=ch_col_dict)
STD_BD_GRD4_MLSFC_RSLT.columns

Index(['LOAD_DT', 'VIN', 'MANG_MNG_NO', 'VHCTY_CD', 'PURPS_CD2', 'YRIDNW',
       'VHCTY_TY', 'FUEL_CD', 'STDG_CD', 'CTPV_NM', 'SGG_NM', 'DPF_MNTNG_YN',
       'GRD4_MLSFC', 'STDG_CD_MOD'],
      dtype='object')

In [319]:
STD_BD_GRD4_MLSFC_RSLT.tail()

Unnamed: 0,LOAD_DT,VIN,MANG_MNG_NO,VHCTY_CD,PURPS_CD2,YRIDNW,VHCTY_TY,FUEL_CD,STDG_CD,CTPV_NM,SGG_NM,DPF_MNTNG_YN,GRD4_MLSFC,STDG_CD_MOD
1390275,20230823,KLAER1931TB044115,10002200860001,승용,개인용,1996,중형,휘발유,5182000000,강원특별자치도,고성군,,B,5182000000
1390276,20230823,KLY7T11YDNC004413,99932601001092403,승합,개인용,1992,경형,휘발유,2623000000,부산광역시,부산진구,,B,2623000000
1390277,20230823,KMHJF31JPPU555457,10022700140000,승용,개인용,1993,소형,휘발유,1132000000,서울특별시,도봉구,,B,1132000000
1390278,20230823,KMHVF21JPMU415752,99934103019601103,승용,개인용,1991,소형,휘발유,4122000000,경기도,평택시,,B,4122000000
1390279,20230823,KNADA23K2NS120848,99934125002081103,승용,개인용,1992,소형,휘발유,4122000000,경기도,평택시,,B,4122000000


### [출력] STD_BD_GRD4_MLSFC_RSLT

In [320]:
# expdf = STD_BD_GRD4_MLSFC_RSLT
# table_nm = 'STD_BD_GRD4_MLSFC_RSLT'.upper()

# # 테이블 생성
# sql = 'create or replace table ' + table_nm + '( \n'

# for idx,column in enumerate(expdf.columns):
#     # if 'float' in expdf[column].dtype.name:
#     #     sql += column + ' float'
#     # elif 'int' in expdf[column].dtype.name:
#     #     sql += column + ' number'
#     # else:
#     sql += column + ' varchar(255)'

#     if len(expdf.columns) - 1 != idx:
#         sql += ','
#     sql += '\n'
# sql += ')'    
# we.execute(sql)

# # 데이터 추가
# # 7s
# we.import_from_pandas(expdf, table_nm)

In [321]:
# # 
# STD_BD_GRD4_MLSFC_RSLT.to_csv(os.path.join(df1_fold, 'STD_BD_GRD4_MLSFC_RSLT.csv'), index=False)

## 4등급 등급세분류

In [322]:
df1.columns

Index(['자동차등록번호', '차대번호', '법정동코드', '배출가스등급', '배출가스인증번호', '차량말소YN', '제원관리번호',
       '차량연식', '차종', '용도', '최초등록일자', '제작일자', '차량관리번호', '연료', '차종유형', '제작사명',
       '차명', '자동차형식', '엔진형식', '저감장치구분', '저감장치종류', 'DPF_YN', '시도', '시군구',
       '조기폐차상태코드', '조기폐차최종승인YN', '말소일자', '조기폐차신청여부', '법정동코드_수정', 'fuel', 'EG'],
      dtype='object')

In [323]:
dat_mlsfc = df1.copy()

In [324]:
dat_mlsfc['시군구_수정'] = dat_mlsfc['시군구'].str.split(' ').str[0]

In [325]:
dat_mlsfc['EG'].isnull().sum()

0

In [326]:
grp1 = dat_mlsfc.groupby(['연료', '시도', '시군구_수정', '차종', '차종유형', '용도', 'EG'])['차대번호'].count().unstack('EG').reset_index()
grp1

EG,연료,시도,시군구_수정,차종,차종유형,용도,A,B,C,D,X
0,CNG(압축천연가스),전라남도,해남군,화물,대형,개인용,,,,,1.00
1,LPG(액화석유가스),전라남도,강진군,승용,중형,개인용,2.00,2.00,,,
2,LPG(액화석유가스),전라남도,강진군,승합,경형,개인용,,4.00,,,
3,LPG(액화석유가스),전라남도,강진군,승합,소형,개인용,,1.00,,,
4,LPG(액화석유가스),전라남도,강진군,승합,중형,개인용,4.00,1.00,,,
...,...,...,...,...,...,...,...,...,...,...,...
8293,휘발유,충청북도,충주시,승합,경형,개인용,,13.00,,,
8294,휘발유,충청북도,충주시,승합,소형,개인용,,3.00,,,
8295,휘발유,충청북도,충주시,승합,중형,개인용,1.00,,,,
8296,휘발유,충청북도,충주시,화물,경형,개인용,,7.00,,,


In [327]:
# 연도 설정
# grp1['연도'] = '2022'
grp1['연도'] = today_date[:4]
grp1['테이블생성일자'] = today_date

In [328]:
grp1.columns

Index(['연료', '시도', '시군구_수정', '차종', '차종유형', '용도', 'A', 'B', 'C', 'D', 'X', '연도',
       '테이블생성일자'],
      dtype='object', name='EG')

In [329]:
STD_BD_DAT_GRD4_MLSFC = grp1[[
    '연도', 
    '연료', 
    '시도', 
    '시군구_수정', 
    '차종', 
    '차종유형', 
    '용도', 
    'A', 
    'B', 
    'C', 
    'D', 
    'X',
    '테이블생성일자',
]]

In [330]:
cdict = {
    '연도':'YR', 
    '연료':'FUEL_CD', 
    '시도':'CTPV', 
    '시군구_수정':'SGG', 
    '차종':'VHCTY_CD', 
    '차종유형':'VHCTY_TY', 
    '용도':'PURPS_CD2', 
    'A':'A_MKCNT', 
    'B':'B_MKCNT', 
    'C':'C_MKCNT', 
    'D':'D_MKCNT', 
    'X':'X_MKCNT', 
    '테이블생성일자':'LOAD_DT', 
}
STD_BD_DAT_GRD4_MLSFC = grp1.rename(columns=cdict)
STD_BD_DAT_GRD4_MLSFC.columns

Index(['FUEL_CD', 'CTPV', 'SGG', 'VHCTY_CD', 'VHCTY_TY', 'PURPS_CD2',
       'A_MKCNT', 'B_MKCNT', 'C_MKCNT', 'D_MKCNT', 'X_MKCNT', 'YR', 'LOAD_DT'],
      dtype='object', name='EG')

### [출력] STD_BD_DAT_GRD4_MLSFC

In [331]:
# expdf = STD_BD_DAT_GRD4_MLSFC
# table_nm = 'STD_BD_DAT_GRD4_MLSFC'.upper()

# # 테이블 생성
# sql = 'create or replace table ' + table_nm + '( \n'

# for idx,column in enumerate(expdf.columns):
#     # if 'float' in expdf[column].dtype.name:
#     #     sql += column + ' float'
#     # elif 'int' in expdf[column].dtype.name:
#     #     sql += column + ' number'
#     # else:
#     sql += column + ' varchar(255)'

#     if len(expdf.columns) - 1 != idx:
#         sql += ','
#     sql += '\n'
# sql += ')'    
# we.execute(sql)

# # 데이터 추가
# # 7s
# we.import_from_pandas(expdf, table_nm)

In [332]:
# # 
# STD_BD_DAT_GRD4_MLSFC.to_csv(os.path.join(df1_fold, 'STD_BD_DAT_GRD4_MLSFC.csv'), index=False)

## 4등급차량 상세정보

In [333]:
dfe.shape

(1906823, 28)

In [334]:
dfe.columns

Index(['자동차등록번호', '차대번호', '법정동코드', '배출가스등급', '배출가스인증번호', '차량말소YN', '제원관리번호',
       '차량연식', '차종', '용도', '최초등록일자', '제작일자', '차량관리번호', '연료', '차종유형', '제작사명',
       '차명', '자동차형식', '엔진형식', '시도', '시군구', '조기폐차상태코드', '조기폐차최종승인YN', '말소일자',
       '조기폐차신청여부', '저감장치구분', '저감장치종류', 'DPF_YN'],
      dtype='object')

### 4등급 result 파일 참고하여 DPF유무 수정

In [335]:
rdf = dfe.copy()

In [336]:
len(set(rdf['차대번호'].unique()) - set(rs['차대번호'].unique()))

410633

In [337]:
rdf['DPF_YN'].value_counts(dropna=False)

DPF_YN
무       876662
NaN     758753
유       259030
확인불가     12378
Name: count, dtype: int64

In [338]:
rs['DPF유무_수정'].value_counts(dropna=False)

DPF유무_수정
무       879481
NaN     348400
유       261897
확인불가     12435
Name: count, dtype: int64

In [339]:
rs = rs.drop_duplicates('차대번호').reset_index(drop=True)
rs.shape

(1502213, 2)

In [340]:
rs['DPF유무_수정'].value_counts(dropna=False)

DPF유무_수정
무       879481
NaN     348400
유       261897
확인불가     12435
Name: count, dtype: int64

In [341]:
rdf1 = rdf.merge(rs, on='차대번호', how='left')

In [342]:
rdf1.loc[(rdf1['DPF_YN'] == '유') | (rdf1['DPF유무_수정'] == '유'), 'DPF_YN'] = '유'
rdf1.loc[(rdf1['DPF유무_수정'] == '무'), 'DPF_YN'] = '무'
rdf1.loc[(rdf1['DPF유무_수정'] == '확인불가'), 'DPF_YN'] = '확인불가'

In [343]:
rdf1['DPF_YN'].value_counts(dropna=False)

DPF_YN
무       876662
NaN     758753
유       259030
확인불가     12378
Name: count, dtype: int64

In [344]:
rdf1.columns

Index(['자동차등록번호', '차대번호', '법정동코드', '배출가스등급', '배출가스인증번호', '차량말소YN', '제원관리번호',
       '차량연식', '차종', '용도', '최초등록일자', '제작일자', '차량관리번호', '연료', '차종유형', '제작사명',
       '차명', '자동차형식', '엔진형식', '시도', '시군구', '조기폐차상태코드', '조기폐차최종승인YN', '말소일자',
       '조기폐차신청여부', '저감장치구분', '저감장치종류', 'DPF_YN', 'DPF유무_수정'],
      dtype='object')

In [345]:
dfe = rdf1.drop('DPF유무_수정', axis=1)

In [346]:
dfe.shape

(1906823, 28)

In [347]:
dfe.shape, len(dfe['차대번호'].unique())

((1906823, 28), 1906823)

In [348]:
dfee = dfe.merge(errc[['차대번호', '변경일자']], on='차대번호', how='left')
dfee.shape

(1906823, 29)

In [349]:
dfeem = dfee.merge(df1[['차대번호', 'EG']], on='차대번호', how='left')
dfeem.shape

(1906823, 30)

In [350]:
today_date = datetime.today().strftime("%Y%m%d")
today_date

'20230823'

In [351]:
dfeem['테이블생성일자'] = today_date

In [352]:
list(dfeem.columns)

['자동차등록번호',
 '차대번호',
 '법정동코드',
 '배출가스등급',
 '배출가스인증번호',
 '차량말소YN',
 '제원관리번호',
 '차량연식',
 '차종',
 '용도',
 '최초등록일자',
 '제작일자',
 '차량관리번호',
 '연료',
 '차종유형',
 '제작사명',
 '차명',
 '자동차형식',
 '엔진형식',
 '시도',
 '시군구',
 '조기폐차상태코드',
 '조기폐차최종승인YN',
 '말소일자',
 '조기폐차신청여부',
 '저감장치구분',
 '저감장치종류',
 'DPF_YN',
 '변경일자',
 'EG',
 '테이블생성일자']

In [353]:
STD_BD_DAT_GRD4_DTL_INFO = dfeem[[
    '자동차등록번호',
    '차대번호',
    'EG',
    '차종',
    '차종유형',
    '용도',
    '연료',
    '시도',
    '시군구',
    '차량연식',
    'DPF_YN',
    '저감장치종류',
    '최초등록일자',
    '조기폐차신청여부',
    '조기폐차상태코드',
    '변경일자',
    '차량말소YN',
    '테이블생성일자', 
    # '법정동코드',
    # '배출가스등급',
    # '배출가스인증번호',
    # '제원관리번호',
    # '제작일자',
    # '차량관리번호',
    # '제작사명',
    # '차명',
    # '자동차형식',
    # '엔진형식',
    # '저감장치구분',
    # '조기폐차최종승인YN',
]]
STD_BD_DAT_GRD4_DTL_INFO.shape

(1906823, 18)

In [354]:
cdict = {
    '자동차등록번호':'VHRNO',
    '차대번호':'VIN',
    'EG':'GRD4_MLSFC',
    '차종':'VHCTY_CD',
    '차종유형':'VHCTY_TY',
    '용도':'PURPS_CD2',
    '연료':'FUEL_CD',
    '시도':'CTPV',
    '시군구':'SGG',
    '차량연식':'YRIDNW',
    'DPF_YN':'DPF_MNTNG_YN',
    '저감장치종류':'RDCDVC_KND',
    '최초등록일자':'FRST_REG_YMD',
    '조기폐차신청여부':'ELPDSRC_APLY_YN',
    '조기폐차상태코드':'ELPDSRC_STTS_CD',
    '변경일자':'CHNG_DE',
    '차량말소YN':'VHCL_ERSR_YN',
    '테이블생성일자':'LOAD_DT', 
}
STD_BD_DAT_GRD4_DTL_INFO = STD_BD_DAT_GRD4_DTL_INFO.rename(columns=cdict)
STD_BD_DAT_GRD4_DTL_INFO.columns

Index(['VHRNO', 'VIN', 'GRD4_MLSFC', 'VHCTY_CD', 'VHCTY_TY', 'PURPS_CD2',
       'FUEL_CD', 'CTPV', 'SGG', 'YRIDNW', 'DPF_MNTNG_YN', 'RDCDVC_KND',
       'FRST_REG_YMD', 'ELPDSRC_APLY_YN', 'ELPDSRC_STTS_CD', 'CHNG_DE',
       'VHCL_ERSR_YN', 'LOAD_DT'],
      dtype='object')

In [355]:
list(STD_BD_DAT_GRD4_DTL_INFO.columns)

['VHRNO',
 'VIN',
 'GRD4_MLSFC',
 'VHCTY_CD',
 'VHCTY_TY',
 'PURPS_CD2',
 'FUEL_CD',
 'CTPV',
 'SGG',
 'YRIDNW',
 'DPF_MNTNG_YN',
 'RDCDVC_KND',
 'FRST_REG_YMD',
 'ELPDSRC_APLY_YN',
 'ELPDSRC_STTS_CD',
 'CHNG_DE',
 'VHCL_ERSR_YN',
 'LOAD_DT']

In [356]:
STD_BD_DAT_GRD4_DTL_INFO.shape

(1906823, 18)

### [출력] STD_BD_DAT_GRD4_DTL_INFO

In [357]:
# expdf = STD_BD_DAT_GRD4_DTL_INFO
# table_nm = 'STD_BD_DAT_GRD4_DTL_INFO'.upper()

# # 테이블 생성
# sql = 'create or replace table ' + table_nm + '( \n'

# for idx,column in enumerate(expdf.columns):
#     # if 'float' in expdf[column].dtype.name:
#     #     sql += column + ' float'
#     # elif 'int' in expdf[column].dtype.name:
#     #     sql += column + ' number'
#     # else:
#     sql += column + ' varchar(255)'

#     if len(expdf.columns) - 1 != idx:
#         sql += ','
#     sql += '\n'
# sql += ')'    
# we.execute(sql)

# # 데이터 추가
# # 7s
# we.import_from_pandas(expdf, table_nm)

In [358]:
# # 9.1s
# STD_BD_DAT_GRD4_DTL_INFO.to_csv(os.path.join(df1_fold, 'STD_BD_DAT_GRD4_DTL_INFO.csv'), index=False)

## 4등급 연월, 시도, 시군구별 차량 대수

In [359]:
dfm = df.copy()

In [360]:
dfm['최초등록일자'] = dfm['최초등록일자'].astype('str')

In [361]:
dfm['최초등록일자_년'] = dfm['최초등록일자'].str[:4]
dfm['최초등록일자_월'] = dfm['최초등록일자'].str[4:6]
dfm['최초등록일자_일'] = dfm['최초등록일자'].str[6:8]
dfm['최초등록일자'] = dfm['최초등록일자_년'] + dfm['최초등록일자_월'] + dfm['최초등록일자_일']
dfm['최초등록일자'] = pd.to_numeric(dfm['최초등록일자'], errors='coerce')

### 시군구명 앞쪽 지역명만 남기기(dfm)

In [362]:
# 시군구명 앞쪽 지역명만 남기기(dfm)
dfm['시군구_수정'] = dfm['시군구'].str.split(' ').str[0]

### 현재 연료 지역별 차량대수

In [363]:
num_car_by_local1 = dfm.groupby(['연료', '시도', '시군구_수정'], dropna=False)['차대번호'].count().reset_index()
num_car_by_local1 = num_car_by_local1.rename(columns={'차대번호':'차량대수'})
num_car_by_local1

Unnamed: 0,연료,시도,시군구_수정,차량대수
0,CNG(압축천연가스),전라남도,해남군,1
1,LPG(액화석유가스),강원특별자치도,강릉시,62
2,LPG(액화석유가스),강원특별자치도,고성군,13
3,LPG(액화석유가스),강원특별자치도,동해시,183
4,LPG(액화석유가스),강원특별자치도,삼척시,18
...,...,...,...,...
726,,전라남도,고흥군,1
727,,전라북도,군산시,3
728,,전라북도,익산시,1
729,,충청북도,진천군,1


In [364]:
# max_date = str(dfm['최초등록일자'].max())
# max_year = max_date[:4]
# max_month = max_date[4:6]
# max_year, max_month

In [365]:
# date = '20220601'
# max_year = '2022'
# max_month = '06'
date = today_date
max_year = today_date[:4]
max_month = today_date[4:6]
max_year, max_month

('2023', '08')

In [366]:
num_car_by_local1[['연도', '월']] = [max_year, max_month]
num_car_by_local1

Unnamed: 0,연료,시도,시군구_수정,차량대수,연도,월
0,CNG(압축천연가스),전라남도,해남군,1,2023,08
1,LPG(액화석유가스),강원특별자치도,강릉시,62,2023,08
2,LPG(액화석유가스),강원특별자치도,고성군,13,2023,08
3,LPG(액화석유가스),강원특별자치도,동해시,183,2023,08
4,LPG(액화석유가스),강원특별자치도,삼척시,18,2023,08
...,...,...,...,...,...,...
726,,전라남도,고흥군,1,2023,08
727,,전라북도,군산시,3,2023,08
728,,전라북도,익산시,1,2023,08
729,,충청북도,진천군,1,2023,08


### 연료 지역별 등록차량대수

In [367]:
num_car_by_local2 = dfm.groupby(['연료', '시도', '시군구_수정', '최초등록일자_년', '최초등록일자_월'], as_index=False)['차대번호'].count()
num_car_by_local2 = num_car_by_local2.rename(columns={'차대번호':'등록차량대수', '최초등록일자_년':'연도', '최초등록일자_월':'월'})
num_car_by_local2

Unnamed: 0,연료,시도,시군구_수정,연도,월,등록차량대수
0,CNG(압축천연가스),전라남도,해남군,2005,04,1
1,LPG(액화석유가스),강원특별자치도,강릉시,1990,03,1
2,LPG(액화석유가스),강원특별자치도,강릉시,1992,04,1
3,LPG(액화석유가스),강원특별자치도,강릉시,1993,05,1
4,LPG(액화석유가스),강원특별자치도,강릉시,1993,07,1
...,...,...,...,...,...,...
68912,휘발유,충청북도,충주시,2008,06,2
68913,휘발유,충청북도,충주시,2008,07,1
68914,휘발유,충청북도,충주시,2008,08,1
68915,휘발유,충청북도,충주시,2009,02,1


In [368]:
num_car_by_local2[num_car_by_local2['시도'] == '강원특별자치도']

Unnamed: 0,연료,시도,시군구_수정,연도,월,등록차량대수
1,LPG(액화석유가스),강원특별자치도,강릉시,1990,03,1
2,LPG(액화석유가스),강원특별자치도,강릉시,1992,04,1
3,LPG(액화석유가스),강원특별자치도,강릉시,1993,05,1
4,LPG(액화석유가스),강원특별자치도,강릉시,1993,07,1
5,LPG(액화석유가스),강원특별자치도,강릉시,1993,08,1
...,...,...,...,...,...,...
32096,휘발유,강원특별자치도,횡성군,2001,06,2
32097,휘발유,강원특별자치도,횡성군,2003,07,1
32098,휘발유,강원특별자치도,횡성군,2006,03,1
32099,휘발유,강원특별자치도,횡성군,2007,11,1


In [369]:
num_car_by_local2[(num_car_by_local2['시도'] == '강원특별자치도') & (num_car_by_local2['연도'] > '2022')]

Unnamed: 0,연료,시도,시군구_수정,연도,월,등록차량대수


In [370]:
num_car_by_local2.loc[num_car_by_local2['시도'] == '강원특별자치도', '연도'].max()

'2021'

### 연월, 연료 지역별 말소 대수

In [371]:
errc['변경일자'] = errc['변경일자'].astype('str')

In [372]:
errc['변경일자_년'] = errc['변경일자'].str[:4]
errc['변경일자_월'] = errc['변경일자'].str[4:6]
errc['변경일자_일'] = errc['변경일자'].str[6:8]

### 시군구명 앞쪽 지역명만 남기기(errc)

In [373]:
# 시군구명 앞쪽 지역명만 남기기(errc)
errc['시군구_수정'] = errc['시군구'].str.split(' ').str[0]

In [374]:
grp_erase = errc.groupby(['변경일자_년', '변경일자_월', '연료', '시도', '시군구_수정'], as_index=False)['차대번호'].count() # !!! 수정(2023.08.23)
grp_erase = grp_erase.rename(columns={'차대번호':'말소차량대수', '변경일자_년':'연도', '변경일자_월':'월'})
grp_erase = grp_erase.sort_values(['시도', '시군구_수정'])
grp_erase

Unnamed: 0,연도,월,연료,시도,시군구_수정,말소차량대수
3488,2019,12,경유,강원특별자치도,원주시,1
5715,2020,04,경유,강원특별자치도,춘천시,1
1,2019,05,휘발유,경기도,가평군,1
106,2019,06,경유,경기도,가평군,4
303,2019,06,휘발유,경기도,가평군,3
...,...,...,...,...,...,...
18864,2022,04,휘발유,충청북도,충주시,6
19172,2022,05,경유,충청북도,충주시,47
19368,2022,05,휘발유,충청북도,충주시,7
19664,2022,06,경유,충청북도,충주시,44


In [375]:
grp_erase[grp_erase['시도'] == '강원특별자치도']

Unnamed: 0,연도,월,연료,시도,시군구_수정,말소차량대수
3488,2019,12,경유,강원특별자치도,원주시,1
5715,2020,4,경유,강원특별자치도,춘천시,1


In [376]:
list(pd.date_range(end=date, periods=12, freq="MS").month)

[9, 10, 11, 12, 1, 2, 3, 4, 5, 6, 7, 8]

In [377]:
periods = 12 # !!! 수정(2023.08.23)
y_plist = list(pd.date_range(end=date, periods=periods, freq="MS").year) # !!! 수정(2023.08.23)
mth_plist = list(pd.date_range(end=date, periods=periods, freq="MS").month) # !!! 수정(2023.08.23)
y_plist, mth_plist

([2022, 2022, 2022, 2022, 2023, 2023, 2023, 2023, 2023, 2023, 2023, 2023],
 [9, 10, 11, 12, 1, 2, 3, 4, 5, 6, 7, 8])

In [378]:
yr_list, mth_list, fuel_list, ctpv_list, sgg_list = [], [], [], [], [] 
sl = num_car_by_local1.drop_duplicates(['시도', '시군구_수정']).reset_index(drop=True)
for ctpv, sgg in sl[['시도', '시군구_수정']].values:
    for fuel in sl['연료'].unique():
        for yr, mth in zip(y_plist, mth_plist):
            mthm = f'{mth:0>2}'
            yr_list.append(str(yr))
            mth_list.append(mthm)
            fuel_list.append(fuel)
            ctpv_list.append(ctpv)
            sgg_list.append(sgg)
base = pd.DataFrame({'연도':yr_list, '월':mth_list, '연료':fuel_list, '시도':ctpv_list, '시군구_수정':sgg_list})

In [379]:
base

Unnamed: 0,연도,월,연료,시도,시군구_수정
0,2022,09,CNG(압축천연가스),전라남도,해남군
1,2022,10,CNG(압축천연가스),전라남도,해남군
2,2022,11,CNG(압축천연가스),전라남도,해남군
3,2022,12,CNG(압축천연가스),전라남도,해남군
4,2023,01,CNG(압축천연가스),전라남도,해남군
...,...,...,...,...,...
11131,2023,04,휘발유,인천광역시,남구
11132,2023,05,휘발유,인천광역시,남구
11133,2023,06,휘발유,인천광역시,남구
11134,2023,07,휘발유,인천광역시,남구


In [380]:
num_car_by_local1.head()

Unnamed: 0,연료,시도,시군구_수정,차량대수,연도,월
0,CNG(압축천연가스),전라남도,해남군,1,2023,8
1,LPG(액화석유가스),강원특별자치도,강릉시,62,2023,8
2,LPG(액화석유가스),강원특별자치도,고성군,13,2023,8
3,LPG(액화석유가스),강원특별자치도,동해시,183,2023,8
4,LPG(액화석유가스),강원특별자치도,삼척시,18,2023,8


In [381]:
base1 = base.merge(num_car_by_local1, on=['연도', '월', '연료', '시도', '시군구_수정'], how='left')

In [382]:
base2 = base1.merge(num_car_by_local2, on=['연도', '월', '연료', '시도', '시군구_수정'], how='left')

In [383]:
base3 = base2.merge(grp_erase, on=['연도', '월', '연료', '시도', '시군구_수정'], how='left')

In [384]:
base3[['차량대수', '등록차량대수', '말소차량대수']] = base3[['차량대수', '등록차량대수', '말소차량대수']].fillna(0)
base3[['차량대수', '등록차량대수', '말소차량대수']].isnull().sum()

차량대수      0
등록차량대수    0
말소차량대수    0
dtype: int64

In [385]:
periods

12

In [386]:
# n = len(base3['월'].unique())
n = periods # !!! 수정(2023.08.23)
for i in range(base3.shape[0] // n):
    for j in range(2, n+1):
        base3.loc[(i+1)*n - j, '차량대수'] = base3.loc[(i+1)*n - (j-1), '차량대수'] + base3.loc[(i+1)*n - (j-1), '말소차량대수'] - base3.loc[(i+1)*n - (j-1), '등록차량대수']

In [387]:
base3

Unnamed: 0,연도,월,연료,시도,시군구_수정,차량대수,등록차량대수,말소차량대수
0,2022,09,CNG(압축천연가스),전라남도,해남군,1.00,0.00,0.00
1,2022,10,CNG(압축천연가스),전라남도,해남군,1.00,0.00,0.00
2,2022,11,CNG(압축천연가스),전라남도,해남군,1.00,0.00,0.00
3,2022,12,CNG(압축천연가스),전라남도,해남군,1.00,0.00,0.00
4,2023,01,CNG(압축천연가스),전라남도,해남군,1.00,0.00,0.00
...,...,...,...,...,...,...,...,...
11131,2023,04,휘발유,인천광역시,남구,9.00,0.00,0.00
11132,2023,05,휘발유,인천광역시,남구,9.00,0.00,0.00
11133,2023,06,휘발유,인천광역시,남구,9.00,0.00,0.00
11134,2023,07,휘발유,인천광역시,남구,9.00,0.00,0.00


In [388]:
base3[(base3['시도'] == '강원특별자치도') & (base3['시군구_수정'] == '강릉시') & (base3['연료'] == 'CNG(압축천연가스)')]

Unnamed: 0,연도,월,연료,시도,시군구_수정,차량대수,등록차량대수,말소차량대수
48,2022,9,CNG(압축천연가스),강원특별자치도,강릉시,0.0,0.0,0.0
49,2022,10,CNG(압축천연가스),강원특별자치도,강릉시,0.0,0.0,0.0
50,2022,11,CNG(압축천연가스),강원특별자치도,강릉시,0.0,0.0,0.0
51,2022,12,CNG(압축천연가스),강원특별자치도,강릉시,0.0,0.0,0.0
52,2023,1,CNG(압축천연가스),강원특별자치도,강릉시,0.0,0.0,0.0
53,2023,2,CNG(압축천연가스),강원특별자치도,강릉시,0.0,0.0,0.0
54,2023,3,CNG(압축천연가스),강원특별자치도,강릉시,0.0,0.0,0.0
55,2023,4,CNG(압축천연가스),강원특별자치도,강릉시,0.0,0.0,0.0
56,2023,5,CNG(압축천연가스),강원특별자치도,강릉시,0.0,0.0,0.0
57,2023,6,CNG(압축천연가스),강원특별자치도,강릉시,0.0,0.0,0.0


In [389]:
base3[(base3['시도'] == '강원특별자치도') & (base3['시군구_수정'] == '강릉시') & (base3['연료'] == 'LPG(액화석유가스)')]

Unnamed: 0,연도,월,연료,시도,시군구_수정,차량대수,등록차량대수,말소차량대수
60,2022,9,LPG(액화석유가스),강원특별자치도,강릉시,62.0,0.0,0.0
61,2022,10,LPG(액화석유가스),강원특별자치도,강릉시,62.0,0.0,0.0
62,2022,11,LPG(액화석유가스),강원특별자치도,강릉시,62.0,0.0,0.0
63,2022,12,LPG(액화석유가스),강원특별자치도,강릉시,62.0,0.0,0.0
64,2023,1,LPG(액화석유가스),강원특별자치도,강릉시,62.0,0.0,0.0
65,2023,2,LPG(액화석유가스),강원특별자치도,강릉시,62.0,0.0,0.0
66,2023,3,LPG(액화석유가스),강원특별자치도,강릉시,62.0,0.0,0.0
67,2023,4,LPG(액화석유가스),강원특별자치도,강릉시,62.0,0.0,0.0
68,2023,5,LPG(액화석유가스),강원특별자치도,강릉시,62.0,0.0,0.0
69,2023,6,LPG(액화석유가스),강원특별자치도,강릉시,62.0,0.0,0.0


In [390]:
base3[(base3['시도'] == '강원특별자치도') & (base3['시군구_수정'] == '강릉시') & (base3['연료'] == '경유')]

Unnamed: 0,연도,월,연료,시도,시군구_수정,차량대수,등록차량대수,말소차량대수
72,2022,9,경유,강원특별자치도,강릉시,5348.0,0.0,0.0
73,2022,10,경유,강원특별자치도,강릉시,5348.0,0.0,0.0
74,2022,11,경유,강원특별자치도,강릉시,5348.0,0.0,0.0
75,2022,12,경유,강원특별자치도,강릉시,5348.0,0.0,0.0
76,2023,1,경유,강원특별자치도,강릉시,5348.0,0.0,0.0
77,2023,2,경유,강원특별자치도,강릉시,5348.0,0.0,0.0
78,2023,3,경유,강원특별자치도,강릉시,5348.0,0.0,0.0
79,2023,4,경유,강원특별자치도,강릉시,5348.0,0.0,0.0
80,2023,5,경유,강원특별자치도,강릉시,5348.0,0.0,0.0
81,2023,6,경유,강원특별자치도,강릉시,5348.0,0.0,0.0


In [391]:
base3[(base3['시도'] == '강원특별자치도') & (base3['시군구_수정'] == '강릉시') & (base3['연료'] == '휘발유')]

Unnamed: 0,연도,월,연료,시도,시군구_수정,차량대수,등록차량대수,말소차량대수
84,2022,9,휘발유,강원특별자치도,강릉시,1062.0,0.0,0.0
85,2022,10,휘발유,강원특별자치도,강릉시,1062.0,0.0,0.0
86,2022,11,휘발유,강원특별자치도,강릉시,1062.0,0.0,0.0
87,2022,12,휘발유,강원특별자치도,강릉시,1062.0,0.0,0.0
88,2023,1,휘발유,강원특별자치도,강릉시,1062.0,0.0,0.0
89,2023,2,휘발유,강원특별자치도,강릉시,1062.0,0.0,0.0
90,2023,3,휘발유,강원특별자치도,강릉시,1062.0,0.0,0.0
91,2023,4,휘발유,강원특별자치도,강릉시,1062.0,0.0,0.0
92,2023,5,휘발유,강원특별자치도,강릉시,1062.0,0.0,0.0
93,2023,6,휘발유,강원특별자치도,강릉시,1062.0,0.0,0.0


In [392]:
today_date = datetime.today().strftime("%Y%m%d")
today_date

'20230823'

In [393]:
base3['테이블생성일자'] = today_date

In [394]:
base3['기준연월'] = base3['연도'] + '.' + base3['월']
base3

Unnamed: 0,연도,월,연료,시도,시군구_수정,차량대수,등록차량대수,말소차량대수,테이블생성일자,기준연월
0,2022,09,CNG(압축천연가스),전라남도,해남군,1.00,0.00,0.00,20230823,2022.09
1,2022,10,CNG(압축천연가스),전라남도,해남군,1.00,0.00,0.00,20230823,2022.10
2,2022,11,CNG(압축천연가스),전라남도,해남군,1.00,0.00,0.00,20230823,2022.11
3,2022,12,CNG(압축천연가스),전라남도,해남군,1.00,0.00,0.00,20230823,2022.12
4,2023,01,CNG(압축천연가스),전라남도,해남군,1.00,0.00,0.00,20230823,2023.01
...,...,...,...,...,...,...,...,...,...,...
11131,2023,04,휘발유,인천광역시,남구,9.00,0.00,0.00,20230823,2023.04
11132,2023,05,휘발유,인천광역시,남구,9.00,0.00,0.00,20230823,2023.05
11133,2023,06,휘발유,인천광역시,남구,9.00,0.00,0.00,20230823,2023.06
11134,2023,07,휘발유,인천광역시,남구,9.00,0.00,0.00,20230823,2023.07


In [395]:
base4 = base3[[
    '테이블생성일자', 
    '기준연월',
    '연도',
    '월', 
    '연료', 
    '시도', 
    '시군구_수정', 
    '차량대수',
]]

In [396]:
chc_col = {
    '테이블생성일자':'LOAD_DT', 
    '기준연월':'CRTR_YM',
    '연도':'YR', 
    '월':'MM', 
    '연료':'FUEL_CD', 
    '시도':'CTPV', 
    '시군구_수정':'SGG', 
    '차량대수':'VHCL_MKCNT', 
}

In [397]:
STD_BD_GRD4_RGN_CURSTT = base4.rename(columns=chc_col)
STD_BD_GRD4_RGN_CURSTT.columns

Index(['LOAD_DT', 'CRTR_YM', 'YR', 'MM', 'FUEL_CD', 'CTPV', 'SGG',
       'VHCL_MKCNT'],
      dtype='object')

### [출력] STD_BD_GRD4_RGN_CURSTT

In [214]:
# expdf = STD_BD_GRD4_RGN_CURSTT
# table_nm = 'STD_BD_GRD4_RGN_CURSTT'.upper()

# # 테이블 생성
# sql = 'create or replace table ' + table_nm + '( \n'

# for idx,column in enumerate(expdf.columns):
#     # if 'float' in expdf[column].dtype.name:
#     #     sql += column + ' float'
#     # elif 'int' in expdf[column].dtype.name:
#     #     sql += column + ' number'
#     # else:
#     sql += column + ' varchar(255)'

#     if len(expdf.columns) - 1 != idx:
#         sql += ','
#     sql += '\n'
# sql += ')'    
# we.execute(sql)

# # 데이터 추가
# # 5s
# we.import_from_pandas(expdf, table_nm)

In [215]:
# # 0s
# STD_BD_GRD4_RGN_CURSTT.to_csv(os.path.join(df1_fold, 'STD_BD_GRD4_RGN_CURSTT.csv'), index=False)

## 4등급 연도, 시도, 차종별 차량 대수

### 현재 차량 대수

In [242]:
num_car_by_local1 = dfm.groupby(['시도', '차종'], dropna=False)['차대번호'].count().reset_index()
num_car_by_local1 = num_car_by_local1.rename(columns={'차대번호':'차량대수'})
num_car_by_local1

Unnamed: 0,시도,차종,차량대수
0,강원특별자치도,승용,25921
1,강원특별자치도,승합,4630
2,강원특별자치도,특수,161
3,강원특별자치도,화물,20474
4,경기도,승용,201304
...,...,...,...
63,충청남도,화물,31439
64,충청북도,승용,29747
65,충청북도,승합,5183
66,충청북도,특수,202


In [243]:
num_car_by_local1['연도'] = max_year
num_car_by_local1

Unnamed: 0,시도,차종,차량대수,연도
0,강원특별자치도,승용,25921,2022
1,강원특별자치도,승합,4630,2022
2,강원특별자치도,특수,161,2022
3,강원특별자치도,화물,20474,2022
4,경기도,승용,201304,2022
...,...,...,...,...
63,충청남도,화물,31439,2022
64,충청북도,승용,29747,2022
65,충청북도,승합,5183,2022
66,충청북도,특수,202,2022


### 등록 차량 대수

In [244]:
num_car_by_local2 = dfm.groupby(['시도', '차종', '최초등록일자_년'], as_index=False)['차대번호'].count()
num_car_by_local2 = num_car_by_local2.rename(columns={'차대번호':'등록차량대수', '최초등록일자_년':'연도'})
num_car_by_local2

Unnamed: 0,시도,차종,연도,등록차량대수
0,강원특별자치도,승용,1920,1
1,강원특별자치도,승용,1987,21
2,강원특별자치도,승용,1988,246
3,강원특별자치도,승용,1989,479
4,강원특별자치도,승용,1990,630
...,...,...,...,...
1507,충청북도,화물,2017,2
1508,충청북도,화물,2018,2
1509,충청북도,화물,2019,3
1510,충청북도,화물,2020,1


### 말소 차량 대수

In [245]:
grp_erase = errc.groupby(['변경일자_년', '시도', '차종'], as_index=False)['차대번호'].count()
grp_erase = grp_erase.rename(columns={'차대번호':'말소차량대수', '변경일자_년':'연도'})
grp_erase = grp_erase.sort_values(['시도'])
grp_erase

Unnamed: 0,연도,시도,차종,말소차량대수
0,2019,강원특별자치도,화물,1
64,2020,강원특별자치도,화물,1
195,2022,경기도,특수,35
196,2022,경기도,화물,3478
132,2021,경기도,화물,7523
...,...,...,...,...
62,2019,충청북도,특수,13
61,2019,충청북도,승합,263
60,2019,충청북도,승용,1791
192,2021,충청북도,화물,1149


In [246]:
y_plist = list(pd.date_range(end=date, periods=4, freq="YS").year)
y_plist

[2019, 2020, 2021, 2022]

In [247]:
yr_list, fuel_list, ctpv_list, cd_list = [], [], [], []
for ctpv in num_car_by_local1['시도'].unique():
    for cd in ['승용', '승합', '화물', '특수']:
        for yrm in y_plist:
            yr_list.append(str(yrm))
            fuel_list.append(fuel)
            ctpv_list.append(ctpv)
            cd_list.append(cd)
base = pd.DataFrame({'연도':yr_list, '시도':ctpv_list, '차종':cd_list})

In [248]:
base

Unnamed: 0,연도,시도,차종
0,2019,강원특별자치도,승용
1,2020,강원특별자치도,승용
2,2021,강원특별자치도,승용
3,2022,강원특별자치도,승용
4,2019,강원특별자치도,승합
...,...,...,...
267,2022,충청북도,화물
268,2019,충청북도,특수
269,2020,충청북도,특수
270,2021,충청북도,특수


In [249]:
base1 = base.merge(num_car_by_local1, on=['연도', '시도', '차종'], how='left')
base1

Unnamed: 0,연도,시도,차종,차량대수
0,2019,강원특별자치도,승용,
1,2020,강원특별자치도,승용,
2,2021,강원특별자치도,승용,
3,2022,강원특별자치도,승용,25921.00
4,2019,강원특별자치도,승합,
...,...,...,...,...
267,2022,충청북도,화물,20986.00
268,2019,충청북도,특수,
269,2020,충청북도,특수,
270,2021,충청북도,특수,


In [250]:
base2 = base1.merge(num_car_by_local2, on=['연도', '시도', '차종'], how='left')
base2

Unnamed: 0,연도,시도,차종,차량대수,등록차량대수
0,2019,강원특별자치도,승용,,2.00
1,2020,강원특별자치도,승용,,
2,2021,강원특별자치도,승용,,
3,2022,강원특별자치도,승용,25921.00,
4,2019,강원특별자치도,승합,,1.00
...,...,...,...,...,...
267,2022,충청북도,화물,20986.00,2.00
268,2019,충청북도,특수,,
269,2020,충청북도,특수,,
270,2021,충청북도,특수,,


In [251]:
base3 = base2.merge(grp_erase, on=['연도', '시도', '차종'], how='left')
base3

Unnamed: 0,연도,시도,차종,차량대수,등록차량대수,말소차량대수
0,2019,강원특별자치도,승용,,2.00,
1,2020,강원특별자치도,승용,,,
2,2021,강원특별자치도,승용,,,
3,2022,강원특별자치도,승용,25921.00,,
4,2019,강원특별자치도,승합,,1.00,
...,...,...,...,...,...,...
267,2022,충청북도,화물,20986.00,2.00,571.00
268,2019,충청북도,특수,,,13.00
269,2020,충청북도,특수,,,10.00
270,2021,충청북도,특수,,,17.00


In [252]:
base3[['차량대수', '등록차량대수', '말소차량대수']] = base3[['차량대수', '등록차량대수', '말소차량대수']].fillna(0)
base3[['차량대수', '등록차량대수', '말소차량대수']].isnull().sum()

차량대수      0
등록차량대수    0
말소차량대수    0
dtype: int64

In [253]:
n = len(base3['연도'].unique())
for i in range(base3.shape[0] // n):
    for j in range(2, n+1):
        base3.loc[(i+1)*n - j, '차량대수'] = base3.loc[(i+1)*n - (j-1), '차량대수'] + base3.loc[(i+1)*n - (j-1), '말소차량대수'] - base3.loc[(i+1)*n - (j-1), '등록차량대수']

In [254]:
base3.tail(20)

Unnamed: 0,연도,시도,차종,차량대수,등록차량대수,말소차량대수
252,2019,충청남도,특수,342.0,0.0,5.0
253,2020,충청남도,특수,335.0,0.0,7.0
254,2021,충청남도,특수,320.0,0.0,15.0
255,2022,충청남도,특수,318.0,0.0,2.0
256,2019,충청북도,승용,37558.0,1.0,1791.0
257,2020,충청북도,승용,34378.0,0.0,3180.0
258,2021,충청북도,승용,31140.0,0.0,3238.0
259,2022,충청북도,승용,29747.0,0.0,1393.0
260,2019,충청북도,승합,6241.0,0.0,263.0
261,2020,충청북도,승합,5848.0,2.0,395.0


In [255]:
today_date = datetime.today().strftime("%Y%m%d")
today_date

'20230811'

In [256]:
base3['테이블생성일자'] = today_date

In [257]:
base4 = base3[[
    '테이블생성일자', 
    '연도', 
    '시도', 
    '차종', 
    '차량대수', 
]]

In [258]:
chc_col = {
    '테이블생성일자':'LOAD_DT', 
    '연도':'CRTR_Y', 
    '시도':'CTPV', 
    '차종':'VHCTY_CD', 
    '차량대수':'VHCL_MKCNT', 
}

In [259]:
STD_BD_GRD4_RGN_CURSTT_MOD = base4.rename(columns=chc_col)
STD_BD_GRD4_RGN_CURSTT_MOD.columns

Index(['LOAD_DT', 'CRTR_Y', 'CTPV', 'VHCTY_CD', 'VHCL_MKCNT'], dtype='object')

In [260]:
STD_BD_GRD4_RGN_CURSTT_MOD.head()

Unnamed: 0,LOAD_DT,CRTR_Y,CTPV,VHCTY_CD,VHCL_MKCNT
0,20230811,2019,강원특별자치도,승용,25921.0
1,20230811,2020,강원특별자치도,승용,25921.0
2,20230811,2021,강원특별자치도,승용,25921.0
3,20230811,2022,강원특별자치도,승용,25921.0
4,20230811,2019,강원특별자치도,승합,4629.0


### [출력] STD_BD_GRD4_RGN_CURSTT_MOD

In [261]:
# expdf = STD_BD_GRD4_RGN_CURSTT_MOD
# table_nm = 'STD_BD_GRD4_RGN_CURSTT_MOD'.upper()

# # 테이블 생성
# sql = 'create or replace table ' + table_nm + '( \n'

# for idx,column in enumerate(expdf.columns):
#     # if 'float' in expdf[column].dtype.name:
#     #     sql += column + ' float'
#     # elif 'int' in expdf[column].dtype.name:
#     #     sql += column + ' number'
#     # else:
#     sql += column + ' varchar(255)'

#     if len(expdf.columns) - 1 != idx:
#         sql += ','
#     sql += '\n'
# sql += ')'    
# we.execute(sql)

# # 데이터 추가
# # 5s
# we.import_from_pandas(expdf, table_nm)

In [262]:
# # 0s
# STD_BD_GRD4_RGN_CURSTT_MOD.to_csv(os.path.join(df1_fold, 'STD_BD_GRD4_RGN_CURSTT_MOD.csv'), index=False)

## [❗] 4등급 차량현황(그룹)
- 연도, 월, 시도, 시군구, 연료, 차종, 차종유형, 용도

### 현재 차량 대수

In [None]:
dfe['시도'].isnull().sum()

In [270]:
dfe['시군구_수정'] = dfe['시군구'].str.split(' ').str[0]

In [271]:
dfe.columns

Index(['자동차등록번호', '차대번호', '법정동코드', '배출가스등급', '배출가스인증번호', '차량말소YN', '제원관리번호',
       '차량연식', '차종', '용도', '최초등록일자', '제작일자', '차량관리번호', '연료', '차종유형', '제작사명',
       '차명', '자동차형식', '엔진형식', '조기폐차상태코드', '조기폐차최종승인YN', '말소일자', '조기폐차신청여부',
       '저감장치구분', '저감장치종류', 'DPF_YN', '시도', '시군구', '시군구_수정'],
      dtype='object')

In [272]:
errc.columns

Index(['자동차등록번호', '차대번호', '법정동코드', '배출가스등급', '배출가스인증번호', '차량말소YN_x', '제원관리번호',
       '차량연식', '차종', '용도', '최초등록일자', '제작일자', '차량관리번호', '연료', '차종유형', '제작사명',
       '차명', '자동차형식', '엔진형식', '차량말소YN_y', '변경일자', '시도', '시군구', '변경일자_년',
       '변경일자_월', '변경일자_일', '시군구_수정'],
      dtype='object')

In [273]:
dfe.shape, errc.shape

((1906823, 29), (353233, 27))

In [274]:
ere = errc.merge(elpm, on='차대번호', how='left')
ere.shape

(353233, 31)

In [275]:
erea = ere.merge(attr, on='차대번호', how='left')
erea.shape

(353233, 34)

In [276]:
erea.columns

Index(['자동차등록번호', '차대번호', '법정동코드', '배출가스등급', '배출가스인증번호', '차량말소YN_x', '제원관리번호',
       '차량연식', '차종', '용도', '최초등록일자', '제작일자', '차량관리번호', '연료', '차종유형', '제작사명',
       '차명', '자동차형식', '엔진형식', '차량말소YN_y', '변경일자', '시도', '시군구', '변경일자_년',
       '변경일자_월', '변경일자_일', '시군구_수정', '조기폐차상태코드', '조기폐차최종승인YN', '말소일자',
       '조기폐차신청여부', '저감장치구분', '저감장치종류', 'DPF_YN'],
      dtype='object')

In [288]:
dfe.shape

(1906823, 29)

In [289]:
dfe['연도'] = max_year
dfe['월'] = max_month

In [290]:
dfe['DPF_YN'].value_counts(dropna=False)

DPF_YN
무       876661
NaN     758754
유       259030
확인불가     12378
Name: count, dtype: int64

In [291]:
dfe['최초등록일자'] = dfe['최초등록일자'].astype('str')
dfe['최초등록일자_년'] = dfe['최초등록일자'].str[:4]
dfe['최초등록일자_월'] = dfe['최초등록일자'].str[4:6]
dfe['최초등록일자_일'] = dfe['최초등록일자'].str[6:8]

In [292]:
dfe.loc[dfe['DPF_YN'] == '유', '저감장치부착유무'] = 'Y'
erea.loc[erea['DPF_YN'] == '유', '저감장치부착유무'] = 'Y'

In [293]:
dfe['말소일자'] = dfe['말소일자'].astype('str')
dfe['말소일자_년'] = dfe['말소일자'].str[:4]
dfe['말소일자_월'] = dfe['말소일자'].str[4:6]
dfe['말소일자_일'] = dfe['말소일자'].str[6:8]

In [294]:
erea['말소일자'] = erea['말소일자'].astype('str')
erea['말소일자_년'] = erea['말소일자'].str[:4]
erea['말소일자_월'] = erea['말소일자'].str[4:6]
erea['말소일자_일'] = erea['말소일자'].str[6:8]

In [295]:
dfe[['시도', '시군구_수정', '연료', '차종', '차종유형', '용도']].isnull().sum()

시도            0
시군구_수정    10692
연료           60
차종            0
차종유형          0
용도            0
dtype: int64

In [296]:
# 2022년 차량 대수
grp1 = dfe[dfe['차량말소YN'] == 'N'].groupby(['연도', '월', '시도', '시군구_수정', '연료', '차종', '차종유형', '용도']).agg({'차대번호':'count', '저감장치부착유무':'count'}).reset_index()
grp1 = grp1.rename(columns={'차대번호':'차량대수', '저감장치부착유무':'저감대수'})
grp1

Unnamed: 0,연도,월,시도,시군구_수정,연료,차종,차종유형,용도,차량대수,저감대수
0,2022,06,강원특별자치도,강릉시,LPG(액화석유가스),승용,대형,개인용,2,0
1,2022,06,강원특별자치도,강릉시,LPG(액화석유가스),승용,중형,개인용,15,0
2,2022,06,강원특별자치도,강릉시,LPG(액화석유가스),승용,중형,영업용,3,0
3,2022,06,강원특별자치도,강릉시,LPG(액화석유가스),승합,경형,개인용,3,0
4,2022,06,강원특별자치도,강릉시,LPG(액화석유가스),승합,소형,개인용,1,0
...,...,...,...,...,...,...,...,...,...,...
8293,2022,06,충청북도,충주시,휘발유,승합,경형,개인용,13,0
8294,2022,06,충청북도,충주시,휘발유,승합,소형,개인용,3,0
8295,2022,06,충청북도,충주시,휘발유,승합,중형,개인용,1,0
8296,2022,06,충청북도,충주시,휘발유,화물,경형,개인용,7,0


In [297]:
# 연도별 등록대수
grp2 = dfe[dfe['차량말소YN'] == 'N'].groupby(['최초등록일자_년', '최초등록일자_월', '시도', '시군구_수정', '연료', '차종', '차종유형', '용도']).agg({'차대번호':'count', '저감장치부착유무':'count'}).reset_index()
grp2 = grp2.rename(columns={'차대번호':'등록대수', '저감장치부착유무':'등록저감대수', '최초등록일자_년':'연도', '최초등록일자_월':'월'})
grp2

Unnamed: 0,연도,월,시도,시군구_수정,연료,차종,차종유형,용도,등록대수,등록저감대수
0,1920,01,강원특별자치도,고성군,휘발유,승용,중형,개인용,1,0
1,1920,01,경기도,여주시,경유,화물,소형,개인용,1,0
2,1920,01,부산광역시,부산진구,휘발유,승합,경형,개인용,1,0
3,1920,01,서울특별시,용산구,휘발유,승용,소형,개인용,2,0
4,1920,01,울산광역시,북구,경유,승용,대형,개인용,1,0
...,...,...,...,...,...,...,...,...,...,...
204207,2021,09,전라남도,여수시,경유,화물,소형,개인용,1,0
204208,2021,09,전라북도,정읍시,경유,승용,대형,개인용,1,0
204209,2022,04,전라북도,정읍시,경유,화물,중형,개인용,1,1
204210,2022,09,충청북도,영동군,경유,화물,소형,개인용,2,0


In [298]:
# 연도별 말소대수
grp3 = erea.groupby(['변경일자_년', '변경일자_월', '시도', '시군구_수정', '연료', '차종', '차종유형', '용도']).agg({'차대번호':'count', '저감장치부착유무':'count'}).reset_index()
grp3 = grp3.rename(columns={'차대번호':'말소대수', '저감장치부착유무':'말소저감대수', '변경일자_년':'연도', '변경일자_월':'월'})
grp3

Unnamed: 0,연도,월,시도,시군구_수정,연료,차종,차종유형,용도,말소대수,말소저감대수
0,2019,05,경기도,가평군,휘발유,승용,중형,개인용,1,0
1,2019,05,경상북도,김천시,경유,화물,소형,개인용,1,0
2,2019,06,경기도,가평군,경유,화물,소형,개인용,3,0
3,2019,06,경기도,가평군,경유,화물,소형,영업용,1,0
4,2019,06,경기도,가평군,휘발유,승용,중형,개인용,3,0
...,...,...,...,...,...,...,...,...,...,...
71848,2022,06,충청북도,충주시,경유,화물,소형,영업용,1,0
71849,2022,06,충청북도,충주시,휘발유,승용,경형,개인용,2,0
71850,2022,06,충청북도,충주시,휘발유,승용,대형,개인용,1,0
71851,2022,06,충청북도,충주시,휘발유,승용,소형,개인용,2,0


In [299]:
# 연도별 조기폐차 대수
grp4 = dfe.groupby(['말소일자_년', '말소일자_월', '시도', '시군구_수정', '연료', '차종', '차종유형', '용도']).agg({'조기폐차최종승인YN':'count'}).reset_index()
grp4 = grp4.rename(columns={'말소일자_년':'연도', '말소일자_월':'월', '조기폐차최종승인YN':'조기폐차'})
grp4

Unnamed: 0,연도,월,시도,시군구_수정,연료,차종,차종유형,용도,조기폐차
0,2020,12,경기도,김포시,경유,승용,중형,개인용,1
1,2023,02,경기도,고양시,경유,승용,대형,개인용,17
2,2023,02,경기도,고양시,경유,승용,소형,개인용,4
3,2023,02,경기도,고양시,경유,승용,중형,개인용,22
4,2023,02,경기도,고양시,경유,승합,중형,개인용,10
...,...,...,...,...,...,...,...,...,...
12864,,,충청북도,충주시,휘발유,승합,경형,개인용,0
12865,,,충청북도,충주시,휘발유,승합,소형,개인용,0
12866,,,충청북도,충주시,휘발유,승합,중형,개인용,0
12867,,,충청북도,충주시,휘발유,화물,경형,개인용,0


In [300]:
date

'20220601'

In [301]:
y_plist = list(pd.date_range(end=date, periods=4, freq="MS").year)
mth_plist = list(pd.date_range(end=date, periods=4, freq="MS").month)
y_plist, mth_plist

([2022, 2022, 2022, 2022], [3, 4, 5, 6])

In [302]:
ctpv_sgg = grp1.drop_duplicates(['시도', '시군구_수정']).reset_index(drop=True)
# for ctpv, sgg in ctpv_sgg[['시도', '시군구_수정']].values:
#     print(ctpv, sgg)

In [303]:
# 18s
# 4개월 차량 통계 기본 데이터셋
ctpv_list, sgg_list, fuel_list, vhcty_list, ty_list, purps_list, yr_list, month_list = [], [], [], [], [], [], [], []
ctpv_sgg = grp1.drop_duplicates(['시도', '시군구_수정']).reset_index(drop=True)
for ctpv, sgg in ctpv_sgg[['시도', '시군구_수정']].values:
    for fuel in grp1['연료'].unique():
        for vhcty in grp1['차종'].unique():
            for ty in grp1['차종유형'].unique():
                for purps in grp1['용도'].unique():
                    for yr, month in zip(y_plist, mth_plist):
                        ctpv_list.append(ctpv)
                        sgg_list.append(sgg)
                        fuel_list.append(fuel)
                        vhcty_list.append(vhcty)
                        ty_list.append(ty)
                        purps_list.append(purps)
                        yr_list.append(str(yr))
                        month_list.append(f'{month:0>2}')
base = pd.DataFrame({'연도':yr_list, '월':month_list, '시도':ctpv_list, '시군구_수정':sgg_list, '연료':fuel_list, '차종':vhcty_list, '차종유형':ty_list, '용도':purps_list})
base

Unnamed: 0,연도,월,시도,시군구_수정,연료,차종,차종유형,용도
0,2022,03,강원특별자치도,강릉시,LPG(액화석유가스),승용,대형,개인용
1,2022,04,강원특별자치도,강릉시,LPG(액화석유가스),승용,대형,개인용
2,2022,05,강원특별자치도,강릉시,LPG(액화석유가스),승용,대형,개인용
3,2022,06,강원특별자치도,강릉시,LPG(액화석유가스),승용,대형,개인용
4,2022,03,강원특별자치도,강릉시,LPG(액화석유가스),승용,대형,영업용
...,...,...,...,...,...,...,...,...
266107,2022,06,충청북도,충주시,CNG(압축천연가스),특수,소형,영업용
266108,2022,03,충청북도,충주시,CNG(압축천연가스),특수,소형,관용
266109,2022,04,충청북도,충주시,CNG(압축천연가스),특수,소형,관용
266110,2022,05,충청북도,충주시,CNG(압축천연가스),특수,소형,관용


In [304]:
base['연료'].unique()

array(['LPG(액화석유가스)', '경유', '휘발유', '기타연료', '알코올', 'CNG(압축천연가스)'],
      dtype=object)

In [305]:
base1 = base.merge(grp1, on=['연도', '월', '시도', '시군구_수정', '연료', '차종', '차종유형', '용도'], how='left')
base1

Unnamed: 0,연도,월,시도,시군구_수정,연료,차종,차종유형,용도,차량대수,저감대수
0,2022,03,강원특별자치도,강릉시,LPG(액화석유가스),승용,대형,개인용,,
1,2022,04,강원특별자치도,강릉시,LPG(액화석유가스),승용,대형,개인용,,
2,2022,05,강원특별자치도,강릉시,LPG(액화석유가스),승용,대형,개인용,,
3,2022,06,강원특별자치도,강릉시,LPG(액화석유가스),승용,대형,개인용,2.00,0.00
4,2022,03,강원특별자치도,강릉시,LPG(액화석유가스),승용,대형,영업용,,
...,...,...,...,...,...,...,...,...,...,...
266107,2022,06,충청북도,충주시,CNG(압축천연가스),특수,소형,영업용,,
266108,2022,03,충청북도,충주시,CNG(압축천연가스),특수,소형,관용,,
266109,2022,04,충청북도,충주시,CNG(압축천연가스),특수,소형,관용,,
266110,2022,05,충청북도,충주시,CNG(압축천연가스),특수,소형,관용,,


In [306]:
base2 = base1.merge(grp2, on=['연도', '월', '시도', '시군구_수정', '연료', '차종', '차종유형', '용도'], how='left')
base2

Unnamed: 0,연도,월,시도,시군구_수정,연료,차종,차종유형,용도,차량대수,저감대수,등록대수,등록저감대수
0,2022,03,강원특별자치도,강릉시,LPG(액화석유가스),승용,대형,개인용,,,,
1,2022,04,강원특별자치도,강릉시,LPG(액화석유가스),승용,대형,개인용,,,,
2,2022,05,강원특별자치도,강릉시,LPG(액화석유가스),승용,대형,개인용,,,,
3,2022,06,강원특별자치도,강릉시,LPG(액화석유가스),승용,대형,개인용,2.00,0.00,,
4,2022,03,강원특별자치도,강릉시,LPG(액화석유가스),승용,대형,영업용,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...
266107,2022,06,충청북도,충주시,CNG(압축천연가스),특수,소형,영업용,,,,
266108,2022,03,충청북도,충주시,CNG(압축천연가스),특수,소형,관용,,,,
266109,2022,04,충청북도,충주시,CNG(압축천연가스),특수,소형,관용,,,,
266110,2022,05,충청북도,충주시,CNG(압축천연가스),특수,소형,관용,,,,


In [307]:
base3 = base2.merge(grp3, on=['연도', '월', '시도', '시군구_수정', '연료', '차종', '차종유형', '용도'], how='left')
base3

Unnamed: 0,연도,월,시도,시군구_수정,연료,차종,차종유형,용도,차량대수,저감대수,등록대수,등록저감대수,말소대수,말소저감대수
0,2022,03,강원특별자치도,강릉시,LPG(액화석유가스),승용,대형,개인용,,,,,,
1,2022,04,강원특별자치도,강릉시,LPG(액화석유가스),승용,대형,개인용,,,,,,
2,2022,05,강원특별자치도,강릉시,LPG(액화석유가스),승용,대형,개인용,,,,,,
3,2022,06,강원특별자치도,강릉시,LPG(액화석유가스),승용,대형,개인용,2.00,0.00,,,,
4,2022,03,강원특별자치도,강릉시,LPG(액화석유가스),승용,대형,영업용,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
266107,2022,06,충청북도,충주시,CNG(압축천연가스),특수,소형,영업용,,,,,,
266108,2022,03,충청북도,충주시,CNG(압축천연가스),특수,소형,관용,,,,,,
266109,2022,04,충청북도,충주시,CNG(압축천연가스),특수,소형,관용,,,,,,
266110,2022,05,충청북도,충주시,CNG(압축천연가스),특수,소형,관용,,,,,,


In [308]:
base4 = base3.merge(grp4, on=['연도', '월', '시도', '시군구_수정', '연료', '차종', '차종유형', '용도'], how='left')
base4

Unnamed: 0,연도,월,시도,시군구_수정,연료,차종,차종유형,용도,차량대수,저감대수,등록대수,등록저감대수,말소대수,말소저감대수,조기폐차
0,2022,03,강원특별자치도,강릉시,LPG(액화석유가스),승용,대형,개인용,,,,,,,
1,2022,04,강원특별자치도,강릉시,LPG(액화석유가스),승용,대형,개인용,,,,,,,
2,2022,05,강원특별자치도,강릉시,LPG(액화석유가스),승용,대형,개인용,,,,,,,
3,2022,06,강원특별자치도,강릉시,LPG(액화석유가스),승용,대형,개인용,2.00,0.00,,,,,
4,2022,03,강원특별자치도,강릉시,LPG(액화석유가스),승용,대형,영업용,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
266107,2022,06,충청북도,충주시,CNG(압축천연가스),특수,소형,영업용,,,,,,,
266108,2022,03,충청북도,충주시,CNG(압축천연가스),특수,소형,관용,,,,,,,
266109,2022,04,충청북도,충주시,CNG(압축천연가스),특수,소형,관용,,,,,,,
266110,2022,05,충청북도,충주시,CNG(압축천연가스),특수,소형,관용,,,,,,,


In [309]:
base4.columns

Index(['연도', '월', '시도', '시군구_수정', '연료', '차종', '차종유형', '용도', '차량대수', '저감대수',
       '등록대수', '등록저감대수', '말소대수', '말소저감대수', '조기폐차'],
      dtype='object')

In [310]:
base4[['차량대수', '조기폐차', '저감대수', '등록대수', '등록저감대수', '말소대수', '말소저감대수']].isnull().sum()

차량대수      257814
조기폐차      266112
저감대수      257814
등록대수      266111
등록저감대수    266111
말소대수      258784
말소저감대수    258784
dtype: int64

In [311]:
base4[['차량대수', '조기폐차', '저감대수', '등록대수', '등록저감대수', '말소대수', '말소저감대수']] = base4[['차량대수', '조기폐차', '저감대수', '등록대수', '등록저감대수', '말소대수', '말소저감대수']].fillna(0)

In [312]:
base4[base4['차량대수'] != 0].shape

(8298, 15)

In [313]:
base4[base4['차량대수'] != 0].head()

Unnamed: 0,연도,월,시도,시군구_수정,연료,차종,차종유형,용도,차량대수,저감대수,등록대수,등록저감대수,말소대수,말소저감대수,조기폐차
3,2022,6,강원특별자치도,강릉시,LPG(액화석유가스),승용,대형,개인용,2.0,0.0,0.0,0.0,0.0,0.0,0.0
15,2022,6,강원특별자치도,강릉시,LPG(액화석유가스),승용,중형,개인용,15.0,0.0,0.0,0.0,0.0,0.0,0.0
19,2022,6,강원특별자치도,강릉시,LPG(액화석유가스),승용,중형,영업용,3.0,0.0,0.0,0.0,0.0,0.0,0.0
63,2022,6,강원특별자치도,강릉시,LPG(액화석유가스),승합,중형,개인용,7.0,0.0,0.0,0.0,0.0,0.0,0.0
75,2022,6,강원특별자치도,강릉시,LPG(액화석유가스),승합,경형,개인용,3.0,0.0,0.0,0.0,0.0,0.0,0.0


In [314]:
base4[(base4['차량대수'] != 0) & (base4['등록대수'] != 0)]

Unnamed: 0,연도,월,시도,시군구_수정,연료,차종,차종유형,용도,차량대수,저감대수,등록대수,등록저감대수,말소대수,말소저감대수,조기폐차


In [315]:
n = 64803
base4.iloc[n-3:n+1]

Unnamed: 0,연도,월,시도,시군구_수정,연료,차종,차종유형,용도,차량대수,저감대수,등록대수,등록저감대수,말소대수,말소저감대수,조기폐차
64800,2022,3,경상남도,사천시,경유,화물,대형,개인용,0.0,0.0,0.0,0.0,0.0,0.0,0.0
64801,2022,4,경상남도,사천시,경유,화물,대형,개인용,0.0,0.0,0.0,0.0,0.0,0.0,0.0
64802,2022,5,경상남도,사천시,경유,화물,대형,개인용,0.0,0.0,0.0,0.0,0.0,0.0,0.0
64803,2022,6,경상남도,사천시,경유,화물,대형,개인용,41.0,21.0,0.0,0.0,0.0,0.0,0.0


In [316]:
base4[['차량대수_1', '등록대수_1', '말소대수_1', '저감대수_1', '등록저감대수_1', '말소저감대수_1']] = base4[['차량대수', '등록대수', '말소대수', '저감대수', '등록저감대수', '말소저감대수']].shift(-1)
base4.iloc[n-3:n+1]

Unnamed: 0,연도,월,시도,시군구_수정,연료,차종,차종유형,용도,차량대수,저감대수,...,등록저감대수,말소대수,말소저감대수,조기폐차,차량대수_1,등록대수_1,말소대수_1,저감대수_1,등록저감대수_1,말소저감대수_1
64800,2022,3,경상남도,사천시,경유,화물,대형,개인용,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
64801,2022,4,경상남도,사천시,경유,화물,대형,개인용,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
64802,2022,5,경상남도,사천시,경유,화물,대형,개인용,0.0,0.0,...,0.0,0.0,0.0,0.0,41.0,0.0,0.0,21.0,0.0,0.0
64803,2022,6,경상남도,사천시,경유,화물,대형,개인용,41.0,21.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [317]:
base4.loc[n, ['차량대수', '저감대수']].values

array([41.0, 21.0], dtype=object)

In [318]:
base4.loc[[x for x in range(3, base4.shape[0], 4)], ['차량대수_1', '저감대수_1']]  = base4.loc[[x for x in range(3, base4.shape[0], 4)], ['차량대수', '저감대수']].values
base4.loc[[x for x in range(3, base4.shape[0], 4)], ['등록대수_1', '말소대수_1', '등록저감대수_1', '말소저감대수_1']] = 0

In [319]:
base4.iloc[n-3:n+1]

Unnamed: 0,연도,월,시도,시군구_수정,연료,차종,차종유형,용도,차량대수,저감대수,...,등록저감대수,말소대수,말소저감대수,조기폐차,차량대수_1,등록대수_1,말소대수_1,저감대수_1,등록저감대수_1,말소저감대수_1
64800,2022,3,경상남도,사천시,경유,화물,대형,개인용,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
64801,2022,4,경상남도,사천시,경유,화물,대형,개인용,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
64802,2022,5,경상남도,사천시,경유,화물,대형,개인용,0.0,0.0,...,0.0,0.0,0.0,0.0,41.0,0.0,0.0,21.0,0.0,0.0
64803,2022,6,경상남도,사천시,경유,화물,대형,개인용,41.0,21.0,...,0.0,0.0,0.0,0.0,41.0,0.0,0.0,21.0,0.0,0.0


In [320]:
base4['차량대수'] = base4['차량대수_1'] - base4['등록대수_1'] + base4['말소대수_1']

In [321]:
base4.iloc[n-3:n+1]

Unnamed: 0,연도,월,시도,시군구_수정,연료,차종,차종유형,용도,차량대수,저감대수,...,등록저감대수,말소대수,말소저감대수,조기폐차,차량대수_1,등록대수_1,말소대수_1,저감대수_1,등록저감대수_1,말소저감대수_1
64800,2022,3,경상남도,사천시,경유,화물,대형,개인용,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
64801,2022,4,경상남도,사천시,경유,화물,대형,개인용,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
64802,2022,5,경상남도,사천시,경유,화물,대형,개인용,41.0,0.0,...,0.0,0.0,0.0,0.0,41.0,0.0,0.0,21.0,0.0,0.0
64803,2022,6,경상남도,사천시,경유,화물,대형,개인용,41.0,21.0,...,0.0,0.0,0.0,0.0,41.0,0.0,0.0,21.0,0.0,0.0


In [322]:
# # 1h 16m 23.3s
# n = len(base4['월'].unique())
# for i in range(base4.shape[0] // n):
#     for j in range(2, n+1):
#         base4.loc[(i+1)*n - j, '차량대수'] = base4.loc[(i+1)*n - (j-1), '차량대수'] + base4.loc[(i+1)*n - (j-1), '말소대수'] - base4.loc[(i+1)*n - (j-1), '등록대수']
#         base4.loc[(i+1)*n - j, '저감대수'] = base4.loc[(i+1)*n - (j-1), '저감대수'] + base4.loc[(i+1)*n - (j-1), '말소저감대수'] - base4.loc[(i+1)*n - (j-1), '등록저감대수']

In [323]:
base5 = base4[['연도', '월', '시도', '시군구_수정', '연료', '차종', '차종유형', '용도', '차량대수', '조기폐차', '저감대수']]
base5

Unnamed: 0,연도,월,시도,시군구_수정,연료,차종,차종유형,용도,차량대수,조기폐차,저감대수
0,2022,03,강원특별자치도,강릉시,LPG(액화석유가스),승용,대형,개인용,0.00,0.00,0.00
1,2022,04,강원특별자치도,강릉시,LPG(액화석유가스),승용,대형,개인용,0.00,0.00,0.00
2,2022,05,강원특별자치도,강릉시,LPG(액화석유가스),승용,대형,개인용,2.00,0.00,0.00
3,2022,06,강원특별자치도,강릉시,LPG(액화석유가스),승용,대형,개인용,2.00,0.00,0.00
4,2022,03,강원특별자치도,강릉시,LPG(액화석유가스),승용,대형,영업용,0.00,0.00,0.00
...,...,...,...,...,...,...,...,...,...,...,...
266107,2022,06,충청북도,충주시,CNG(압축천연가스),특수,소형,영업용,0.00,0.00,0.00
266108,2022,03,충청북도,충주시,CNG(압축천연가스),특수,소형,관용,0.00,0.00,0.00
266109,2022,04,충청북도,충주시,CNG(압축천연가스),특수,소형,관용,0.00,0.00,0.00
266110,2022,05,충청북도,충주시,CNG(압축천연가스),특수,소형,관용,0.00,0.00,0.00


In [324]:
base5['감소대수'] = base5['차량대수'].shift() - base5['차량대수']
base5['감소율'] = base5['감소대수'] / base5['차량대수'].shift()
base5

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  base5['감소대수'] = base5['차량대수'].shift() - base5['차량대수']
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  base5['감소율'] = base5['감소대수'] / base5['차량대수'].shift()


Unnamed: 0,연도,월,시도,시군구_수정,연료,차종,차종유형,용도,차량대수,조기폐차,저감대수,감소대수,감소율
0,2022,03,강원특별자치도,강릉시,LPG(액화석유가스),승용,대형,개인용,0.00,0.00,0.00,,
1,2022,04,강원특별자치도,강릉시,LPG(액화석유가스),승용,대형,개인용,0.00,0.00,0.00,0.00,
2,2022,05,강원특별자치도,강릉시,LPG(액화석유가스),승용,대형,개인용,2.00,0.00,0.00,-2.00,-inf
3,2022,06,강원특별자치도,강릉시,LPG(액화석유가스),승용,대형,개인용,2.00,0.00,0.00,0.00,0.00
4,2022,03,강원특별자치도,강릉시,LPG(액화석유가스),승용,대형,영업용,0.00,0.00,0.00,2.00,1.00
...,...,...,...,...,...,...,...,...,...,...,...,...,...
266107,2022,06,충청북도,충주시,CNG(압축천연가스),특수,소형,영업용,0.00,0.00,0.00,0.00,
266108,2022,03,충청북도,충주시,CNG(압축천연가스),특수,소형,관용,0.00,0.00,0.00,0.00,
266109,2022,04,충청북도,충주시,CNG(압축천연가스),특수,소형,관용,0.00,0.00,0.00,0.00,
266110,2022,05,충청북도,충주시,CNG(압축천연가스),특수,소형,관용,0.00,0.00,0.00,0.00,


In [325]:
base5[base5['차량대수'] > 0]

Unnamed: 0,연도,월,시도,시군구_수정,연료,차종,차종유형,용도,차량대수,조기폐차,저감대수,감소대수,감소율
2,2022,05,강원특별자치도,강릉시,LPG(액화석유가스),승용,대형,개인용,2.00,0.00,0.00,-2.00,-inf
3,2022,06,강원특별자치도,강릉시,LPG(액화석유가스),승용,대형,개인용,2.00,0.00,0.00,0.00,0.00
14,2022,05,강원특별자치도,강릉시,LPG(액화석유가스),승용,중형,개인용,15.00,0.00,0.00,-15.00,-inf
15,2022,06,강원특별자치도,강릉시,LPG(액화석유가스),승용,중형,개인용,15.00,0.00,0.00,0.00,0.00
18,2022,05,강원특별자치도,강릉시,LPG(액화석유가스),승용,중형,영업용,3.00,0.00,0.00,-3.00,-inf
...,...,...,...,...,...,...,...,...,...,...,...,...,...
265431,2022,06,충청북도,충주시,휘발유,승합,소형,개인용,3.00,0.00,0.00,0.00,0.00
265466,2022,05,충청북도,충주시,휘발유,화물,경형,개인용,7.00,0.00,0.00,-7.00,-inf
265467,2022,06,충청북도,충주시,휘발유,화물,경형,개인용,7.00,0.00,0.00,0.00,0.00
265478,2022,05,충청북도,충주시,휘발유,화물,소형,개인용,15.00,0.00,0.00,-15.00,-inf


In [326]:
base5.iloc[12:16]

Unnamed: 0,연도,월,시도,시군구_수정,연료,차종,차종유형,용도,차량대수,조기폐차,저감대수,감소대수,감소율
12,2022,3,강원특별자치도,강릉시,LPG(액화석유가스),승용,중형,개인용,0.0,0.0,0.0,0.0,
13,2022,4,강원특별자치도,강릉시,LPG(액화석유가스),승용,중형,개인용,0.0,0.0,0.0,0.0,
14,2022,5,강원특별자치도,강릉시,LPG(액화석유가스),승용,중형,개인용,15.0,0.0,0.0,-15.0,-inf
15,2022,6,강원특별자치도,강릉시,LPG(액화석유가스),승용,중형,개인용,15.0,0.0,0.0,0.0,0.0


In [327]:
base5[(base5['감소율'] == -np.inf) | (base5['감소율'] == np.inf)].shape

(8509, 13)

In [328]:
base5.loc[(base5['감소율'] == -np.inf) | (base5['감소율'] == np.inf), '감소율'] = 0
base5['감소율'] = base5['감소율'].fillna(0)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  base5['감소율'] = base5['감소율'].fillna(0)


In [329]:
base5['저감장치미부착대수'] = base5['차량대수'] - base5['저감대수']

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  base5['저감장치미부착대수'] = base5['차량대수'] - base5['저감대수']


In [330]:
base5[base5['차량대수'] < 0].shape

(0, 14)

In [331]:
base5[base5['저감장치미부착대수'] < 0].shape

(0, 14)

In [332]:
base5.loc[base5['차량대수'] < 0, '차량대수'] = 0
base5.loc[base5['저감장치미부착대수'] < 0, '저감장치미부착대수'] = 0

In [333]:
base5.head()

Unnamed: 0,연도,월,시도,시군구_수정,연료,차종,차종유형,용도,차량대수,조기폐차,저감대수,감소대수,감소율,저감장치미부착대수
0,2022,3,강원특별자치도,강릉시,LPG(액화석유가스),승용,대형,개인용,0.0,0.0,0.0,,0.0,0.0
1,2022,4,강원특별자치도,강릉시,LPG(액화석유가스),승용,대형,개인용,0.0,0.0,0.0,0.0,0.0,0.0
2,2022,5,강원특별자치도,강릉시,LPG(액화석유가스),승용,대형,개인용,2.0,0.0,0.0,-2.0,0.0,2.0
3,2022,6,강원특별자치도,강릉시,LPG(액화석유가스),승용,대형,개인용,2.0,0.0,0.0,0.0,0.0,2.0
4,2022,3,강원특별자치도,강릉시,LPG(액화석유가스),승용,대형,영업용,0.0,0.0,0.0,2.0,1.0,0.0


In [334]:
base5.loc[[x for x in range(0, base5.shape[0], 4)], '감소율'] = 0

In [335]:
base5.head()

Unnamed: 0,연도,월,시도,시군구_수정,연료,차종,차종유형,용도,차량대수,조기폐차,저감대수,감소대수,감소율,저감장치미부착대수
0,2022,3,강원특별자치도,강릉시,LPG(액화석유가스),승용,대형,개인용,0.0,0.0,0.0,,0.0,0.0
1,2022,4,강원특별자치도,강릉시,LPG(액화석유가스),승용,대형,개인용,0.0,0.0,0.0,0.0,0.0,0.0
2,2022,5,강원특별자치도,강릉시,LPG(액화석유가스),승용,대형,개인용,2.0,0.0,0.0,-2.0,0.0,2.0
3,2022,6,강원특별자치도,강릉시,LPG(액화석유가스),승용,대형,개인용,2.0,0.0,0.0,0.0,0.0,2.0
4,2022,3,강원특별자치도,강릉시,LPG(액화석유가스),승용,대형,영업용,0.0,0.0,0.0,2.0,0.0,0.0


In [336]:
base5['테이블생성일자'] = today_date

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  base5['테이블생성일자'] = today_date


In [337]:
base5 = base5.rename(columns={'조기폐차':'조기폐차대수', '저감대수':'저감장치부착대수'})
base5.columns

Index(['연도', '월', '시도', '시군구_수정', '연료', '차종', '차종유형', '용도', '차량대수', '조기폐차대수',
       '저감장치부착대수', '감소대수', '감소율', '저감장치미부착대수', '테이블생성일자'],
      dtype='object')

In [338]:
STD_BD_DAT_GRD4_CAR_CURSTT = base5[[
    '연도',
    '월',
    '시도',
    '시군구_수정',
    '연료',
    '차종',
    '차종유형', 
    '용도',
    '차량대수',
    '감소율',
    '저감장치부착대수',
    '저감장치미부착대수',
    '조기폐차대수',
    '테이블생성일자',
]]

In [339]:
chc_col = {
    '연도':'YR',
    '월':'MM',
    '시도':'CTPV',
    '시군구_수정':'SGG',
    '연료':'FUEL_CD',
    '차종':'VHCTY_CD',
    '차종유형':'VHCTY_TY', 
    '용도':'PURPS_CD2',
    '차량대수':'VHCL_MKCNT',
    '감소율':'DEC_RT',
    '저감장치부착대수':'RDCDVC_EXTRNS_MKCNT',
    '저감장치미부착대수':'RDCDVC_UNAT_MKCNT',
    '조기폐차대수':'ELPDSRC_MKCNT',
    '테이블생성일자':'LOAD_DT', 
}

In [340]:
STD_BD_DAT_GRD4_CAR_CURSTT = STD_BD_DAT_GRD4_CAR_CURSTT.rename(columns=chc_col)
STD_BD_DAT_GRD4_CAR_CURSTT.columns

Index(['YR', 'MM', 'CTPV', 'SGG', 'FUEL_CD', 'VHCTY_CD', 'VHCTY_TY',
       'PURPS_CD2', 'VHCL_MKCNT', 'DEC_RT', 'RDCDVC_EXTRNS_MKCNT',
       'RDCDVC_UNAT_MKCNT', 'ELPDSRC_MKCNT', 'LOAD_DT'],
      dtype='object')

In [341]:
# expdf = STD_BD_DAT_GRD4_CAR_CURSTT
# table_nm = 'STD_BD_DAT_GRD4_CAR_CURSTT'.upper()

# # 테이블 생성
# sql = 'create or replace table ' + table_nm + '( \n'

# for idx,column in enumerate(expdf.columns):
#     # if 'float' in expdf[column].dtype.name:
#     #     sql += column + ' float'
#     # elif 'int' in expdf[column].dtype.name:
#     #     sql += column + ' number'
#     # else:
#     sql += column + ' varchar(255)'

#     if len(expdf.columns) - 1 != idx:
#         sql += ','
#     sql += '\n'
# sql += ')'    
# we.execute(sql)

# # 데이터 추가
# # 5s
# we.import_from_pandas(expdf, table_nm)

In [428]:
# # 2s
# STD_BD_DAT_GRD4_CAR_CURSTT.to_csv(os.path.join(df1_fold, 'STD_BD_DAT_GRD4_CAR_CURSTT(edited).csv'), index=False)

# code end