In [1]:
import pandas as pd

# 데이터 로드
train_df = pd.read_csv('../data/raw/train.csv')
test_df = pd.read_csv('../data/raw/test.csv')

# PRD를 통해 확정한, 모델링에 반드시 필요한 핵심 컬럼 리스트
# 'target'은 train 데이터에만 있으므로, test 데이터 처리 시에는 제외됩니다.
necessary_cols = [
    # 가격 정보
    'target',
    # 기본 물리적 특성
    '전용면적(㎡)', '층', '건축년도',
    # 시간 정보
    '계약년월', '계약일',
    # 위치 정보 (결측치는 다음 단계에서 채울 예정)
    '시군구', '도로명', '좌표X', '좌표Y',
    # 단지 정보
    'k-전체세대수', '주차대수', 'k-건설사(시공사)',
    # 단지 상세 분류
    'k-관리방식', 'k-난방방식', 'k-복도유형', '거래유형'
]

# train 데이터에서 필요한 컬럼만 선택
# 'target'을 포함한 모든 necessary_cols를 사용
train_final = train_df[necessary_cols]

# test 데이터에서 필요한 컬럼만 선택
# 'target'을 제외한 necessary_cols를 사용
test_cols = [col for col in necessary_cols if col != 'target']
test_final = test_df[test_cols]


print("['필요한 것만 선택' 방식 완료] 최종 데이터 shape:")
print(f"Train 최종 데이터: {train_final.shape}")
print(f"Test 최종 데이터: {test_final.shape}")

print("\n최종 선택된 컬럼 목록:")
print(train_final.columns.tolist())

  train_df = pd.read_csv('../data/raw/train.csv')


['필요한 것만 선택' 방식 완료] 최종 데이터 shape:
Train 최종 데이터: (1118822, 17)
Test 최종 데이터: (9272, 16)

최종 선택된 컬럼 목록:
['target', '전용면적(㎡)', '층', '건축년도', '계약년월', '계약일', '시군구', '도로명', '좌표X', '좌표Y', 'k-전체세대수', '주차대수', 'k-건설사(시공사)', 'k-관리방식', 'k-난방방식', 'k-복도유형', '거래유형']


In [2]:
# train_df의 처음 30개 행 출력
display(train_df.head(30))
display(train_df.tail(30))


Unnamed: 0,시군구,번지,본번,부번,아파트명,전용면적(㎡),계약년월,계약일,층,건축년도,...,건축면적,주차대수,기타/의무/임대/임의=1/2/3/4,단지승인일,사용허가여부,관리비 업로드,좌표X,좌표Y,단지신청일,target
0,서울특별시 강남구 개포동,658-1,658.0,1.0,개포6차우성,79.97,201712,8,3,1987,...,4858.0,262.0,임의,2022-11-17 13:00:29.0,Y,N,127.05721,37.476763,2022-11-17 10:19:06.0,124000
1,서울특별시 강남구 개포동,658-1,658.0,1.0,개포6차우성,79.97,201712,22,4,1987,...,4858.0,262.0,임의,2022-11-17 13:00:29.0,Y,N,127.05721,37.476763,2022-11-17 10:19:06.0,123500
2,서울특별시 강남구 개포동,658-1,658.0,1.0,개포6차우성,54.98,201712,28,5,1987,...,4858.0,262.0,임의,2022-11-17 13:00:29.0,Y,N,127.05721,37.476763,2022-11-17 10:19:06.0,91500
3,서울특별시 강남구 개포동,658-1,658.0,1.0,개포6차우성,79.97,201801,3,4,1987,...,4858.0,262.0,임의,2022-11-17 13:00:29.0,Y,N,127.05721,37.476763,2022-11-17 10:19:06.0,130000
4,서울특별시 강남구 개포동,658-1,658.0,1.0,개포6차우성,79.97,201801,8,2,1987,...,4858.0,262.0,임의,2022-11-17 13:00:29.0,Y,N,127.05721,37.476763,2022-11-17 10:19:06.0,117000
5,서울특별시 강남구 개포동,658-1,658.0,1.0,개포6차우성,79.97,201801,11,1,1987,...,4858.0,262.0,임의,2022-11-17 13:00:29.0,Y,N,127.05721,37.476763,2022-11-17 10:19:06.0,130000
6,서울특별시 강남구 개포동,658-1,658.0,1.0,개포6차우성,79.97,201803,19,2,1987,...,4858.0,262.0,임의,2022-11-17 13:00:29.0,Y,N,127.05721,37.476763,2022-11-17 10:19:06.0,139500
7,서울특별시 강남구 개포동,658-1,658.0,1.0,개포6차우성,54.98,201804,5,5,1987,...,4858.0,262.0,임의,2022-11-17 13:00:29.0,Y,N,127.05721,37.476763,2022-11-17 10:19:06.0,107500
8,서울특별시 강남구 개포동,658-1,658.0,1.0,개포6차우성,79.97,201806,28,3,1987,...,4858.0,262.0,임의,2022-11-17 13:00:29.0,Y,N,127.05721,37.476763,2022-11-17 10:19:06.0,145000
9,서울특별시 강남구 개포동,658-1,658.0,1.0,개포6차우성,54.98,201807,9,3,1987,...,4858.0,262.0,임의,2022-11-17 13:00:29.0,Y,N,127.05721,37.476763,2022-11-17 10:19:06.0,112000


Unnamed: 0,시군구,번지,본번,부번,아파트명,전용면적(㎡),계약년월,계약일,층,건축년도,...,건축면적,주차대수,기타/의무/임대/임의=1/2/3/4,단지승인일,사용허가여부,관리비 업로드,좌표X,좌표Y,단지신청일,target
1118792,서울특별시 은평구 갈현동,545,545.0,0.0,코오롱하늘채,84.9729,200709,12,12,2003,...,2079.43,235.0,의무,2019-03-13 15:21:14.0,Y,N,126.910985,37.623216,2013-03-07 09:46:22.0,34000
1118793,서울특별시 은평구 갈현동,545,545.0,0.0,코오롱하늘채,123.2858,200709,17,13,2003,...,2079.43,235.0,의무,2019-03-13 15:21:14.0,Y,N,126.910985,37.623216,2013-03-07 09:46:22.0,45000
1118794,서울특별시 은평구 갈현동,545,545.0,0.0,코오롱하늘채,84.9462,200709,18,11,2003,...,2079.43,235.0,의무,2019-03-13 15:21:14.0,Y,N,126.910985,37.623216,2013-03-07 09:46:22.0,33200
1118795,서울특별시 은평구 구산동,382,382.0,0.0,갈현현대,59.94,200701,7,2,1998,...,0.0,366.0,의무,2013-06-04 16:18:51.0,Y,N,126.905638,37.612962,2013-03-07 09:46:27.0,14700
1118796,서울특별시 은평구 구산동,382,382.0,0.0,갈현현대,84.83,200702,4,12,1998,...,0.0,366.0,의무,2013-06-04 16:18:51.0,Y,N,126.905638,37.612962,2013-03-07 09:46:27.0,25400
1118797,서울특별시 은평구 구산동,382,382.0,0.0,갈현현대,59.94,200702,13,14,1998,...,0.0,366.0,의무,2013-06-04 16:18:51.0,Y,N,126.905638,37.612962,2013-03-07 09:46:27.0,15500
1118798,서울특별시 은평구 구산동,382,382.0,0.0,갈현현대,53.54,200702,28,7,1998,...,0.0,366.0,의무,2013-06-04 16:18:51.0,Y,N,126.905638,37.612962,2013-03-07 09:46:27.0,12700
1118799,서울특별시 은평구 구산동,382,382.0,0.0,갈현현대,59.94,200703,13,13,1998,...,0.0,366.0,의무,2013-06-04 16:18:51.0,Y,N,126.905638,37.612962,2013-03-07 09:46:27.0,19000
1118800,서울특별시 은평구 구산동,382,382.0,0.0,갈현현대,59.94,200703,16,6,1998,...,0.0,366.0,의무,2013-06-04 16:18:51.0,Y,N,126.905638,37.612962,2013-03-07 09:46:27.0,17850
1118801,서울특별시 은평구 구산동,382,382.0,0.0,갈현현대,53.54,200703,27,2,1998,...,0.0,366.0,의무,2013-06-04 16:18:51.0,Y,N,126.905638,37.612962,2013-03-07 09:46:27.0,12500


In [3]:
print(train_df.columns.tolist())

['시군구', '번지', '본번', '부번', '아파트명', '전용면적(㎡)', '계약년월', '계약일', '층', '건축년도', '도로명', '해제사유발생일', '등기신청일자', '거래유형', '중개사소재지', 'k-단지분류(아파트,주상복합등등)', 'k-전화번호', 'k-팩스번호', '단지소개기존clob', 'k-세대타입(분양형태)', 'k-관리방식', 'k-복도유형', 'k-난방방식', 'k-전체동수', 'k-전체세대수', 'k-건설사(시공사)', 'k-시행사', 'k-사용검사일-사용승인일', 'k-연면적', 'k-주거전용면적', 'k-관리비부과면적', 'k-전용면적별세대현황(60㎡이하)', 'k-전용면적별세대현황(60㎡~85㎡이하)', 'k-85㎡~135㎡이하', 'k-135㎡초과', 'k-홈페이지', 'k-등록일자', 'k-수정일자', '고용보험관리번호', '경비비관리형태', '세대전기계약방법', '청소비관리형태', '건축면적', '주차대수', '기타/의무/임대/임의=1/2/3/4', '단지승인일', '사용허가여부', '관리비 업로드', '좌표X', '좌표Y', '단지신청일', 'target']
