In [3]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

region_table = [
    ('강원도','gw'), ('경기도','gg'), ('경상남도','gsn'),
    ('경상북도','gsb'), ('광주광역시','gj'), ('대구광역시','dg'),
    ('대전광역시','dj'), ('부산광역시','bs'), ('서울특별시','so'),
    ('세종특별자치시','sj'), ('울산광역시','us'), ('인천광역시','ic'),
    ('전라남도','jrn'), ('전라북도','jrb'), ('제주특별자치도','jj'),
    ('충청남도','ccn'), ('충청북도','ccb')
]


In [1]:
def make_default_data(region):
    readD = pd.read_csv(f'region-ml-data-files/{region}.csv')

    test = readD.tail(1)
    test = test.drop(['price_per_pyung_present', 'price_cat'], axis=1)
    test['region_sub'] = readD['region_sub'].value_counts().index[0]
    test['apt_floor'] = 5
    test['period_completed'] = 0
    test['pyung'] = 25.71
    print(test.to_dict('records'))

# for region in region_table:
#     make_default_data(region[1]+'_region')


In [3]:
def make_region_sub_data(region):
    readD = pd.read_csv(f'region-ml-data-files/{region}.csv')
    print(np.unique(readD['region_sub'].values))

# for region in region_table:
#     make_region_sub_data(region[1]+'_region')

In [4]:
def get_apt_floor_data(region):
    readD = pd.read_csv(f'region-ml-data-files/{region}.csv')
    floor = readD['apt_floor'].value_counts()
    print(region, floor.index[0])

# for region in region_table:
#     get_apt_floor_data(region[1]+'_region')

In [4]:
# 전지역 모델 기본값
gw_default = [{
    'region_sub': '원주시', 'apt_floor': 5, 'period_completed': 0, 'pyung': 25.71,
    'kospi200': 280.09, 'kospi200construct': 189.47, 'kosdaq': 737.97, 'dowjones': 25812.88,
    'shanghai': 2984.67, 'exchange_rate': 1199.0, 'gold': 1793.6, 'oil': 39.84,
    'export': 39229801, 'import': 35597740, 'cpi_living': 104.88, 'loan_interest_rate': 2.67,
    'gw_pop': 1540094, 'gg_pop': 13351891, 'gsb_pop': 2644001, 'ccb_pop': 1597936,
    'gw_re_ccsi': 110.0, 'gw_loan': 21721.7
}]

gg_default = [{
    'region_sub': '화성시', 'apt_floor': 5, 'period_completed': 0, 'pyung': 25.71,
    'kospi200': 280.09, 'kospi200construct': 189.47, 'kosdaq': 737.97, 'dowjones': 25812.88,
    'shanghai': 2984.67, 'exchange_rate': 1199.0, 'gold': 1793.6, 'oil': 39.84,
    'export': 39229801, 'import': 35597740, 'cpi_living': 104.88, 'loan_interest_rate': 2.67,
    'gg_pop': 13351891, 'so_pop': 9715429, 'ccn_pop': 2120692, 'gg_re_ccsi': 123.8,
    'gg_loan': 275491.3
}]

gsn_default = [{
    'region_sub': '김해시', 'apt_floor': 5, 'period_completed': 0, 'pyung': 25.71,
    'kospi200': 280.09, 'kospi200construct': 189.47, 'kosdaq': 737.97, 'dowjones': 25812.88,
    'shanghai': 2984.67, 'exchange_rate': 1199.0, 'gold': 1793.6, 'oil': 39.84,
    'export': 39229801, 'import': 35597740, 'cpi_living': 104.88, 'loan_interest_rate': 2.67,
    'gsn_pop': 3347637, 'gsb_pop': 2644001, 'jrn_pop': 1853339, 'gsn_re_ccsi': 110.6,
    'gsn_loan': 57313.4
}]

gsb_default = [{
    'region_sub': '구미시', 'apt_floor': 5, 'period_completed': 0, 'pyung': 25.71,
    'kospi200': 280.09, 'kospi200construct': 189.47, 'kosdaq': 737.97, 'dowjones': 25812.88,
    'shanghai': 2984.67, 'exchange_rate': 1199.0, 'gold': 1793.6, 'oil': 39.84,
    'export': 39229801, 'import': 35597740, 'cpi_living': 104.88, 'loan_interest_rate': 2.67,
    'gw_pop': 1540094, 'gsn_pop': 3347637, 'gsb_pop': 2644001, 'ccb_pop': 1597936,
    'gsb_re_ccsi': 109.5, 'gsb_loan': 39039.1
}]

gj_default = [{
    'region_sub': '북구', 'apt_floor': 5, 'period_completed': 0, 'pyung': 25.71,
    'kospi200': 280.09, 'kospi200construct': 189.47, 'kosdaq': 737.97, 'dowjones': 25812.88,
    'shanghai': 2984.67, 'exchange_rate': 1199.0, 'gold': 1793.6, 'oil': 39.84,
    'export': 39229801, 'import': 35597740, 'cpi_living': 104.88, 'loan_interest_rate': 2.67,
    'gj_pop': 1454709, 'jrn_pop': 1853339, 'jrb_pop': 1808044, 'gj_re_ccsi': 108.3,
    'gj_loan': 26480.8
}]

dg_default = [{
    'region_sub': '달서구', 'apt_floor': 5, 'period_completed': 0, 'pyung': 25.71,
    'kospi200': 280.09, 'kospi200construct': 189.47, 'kosdaq': 737.97, 'dowjones': 25812.88,
    'shanghai': 2984.67, 'exchange_rate': 1199.0, 'gold': 1793.6, 'oil': 39.84,
    'export': 39229801, 'import': 35597740, 'cpi_living': 104.88, 'loan_interest_rate': 2.67,
    'gsn_pop': 3347637, 'gsb_pop': 2644001, 'dg_pop': 2428022, 'dg_re_ccsi': 121.2,
    'dg_loan': 44604.4
}]

dj_default = [{
    'region_sub': '서구', 'apt_floor': 5, 'period_completed': 0, 'pyung': 25.71,
    'kospi200': 280.09, 'kospi200construct': 189.47, 'kosdaq': 737.97, 'dowjones': 25812.88,
    'shanghai': 2984.67, 'exchange_rate': 1199.0, 'gold': 1793.6, 'oil': 39.84,
    'export': 39229801, 'import': 35597740, 'cpi_living': 104.88, 'loan_interest_rate': 2.67,
    'dj_pop': 1470225, 'sj_pop': 346217, 'ccn_pop': 2120692, 'ccb_pop': 1597936,
    'gj_re_ccsi': 108.3, 'dj_loan': 26862.0
}]

bs_default = [{
    'region_sub': '해운대구', 'apt_floor': 5, 'period_completed': 0, 'pyung': 25.71,
    'kospi200': 280.09, 'kospi200construct': 189.47, 'kosdaq': 737.97, 'dowjones': 25812.88,
    'shanghai': 2984.67, 'exchange_rate': 1199.0, 'gold': 1793.6, 'oil': 39.84,
    'export': 39229801, 'import': 35597740, 'cpi_living': 104.88, 'loan_interest_rate': 2.67,
    'gsn_pop': 3347637, 'bs_pop': 3402776, 'so_pop': 9715429, 'us_pop': 1141362,
    'bs_re_ccsi': 112.1, 'bs_loan': 66776.8
}]

so_default = [{
    'region_sub': '노원구', 'apt_floor': 5, 'period_completed': 0, 'pyung': 25.71,
    'kospi200': 280.09, 'kospi200construct': 189.47, 'kosdaq': 737.97, 'dowjones': 25812.88,
    'shanghai': 2984.67, 'exchange_rate': 1199.0, 'gold': 1793.6, 'oil': 39.84,
    'export': 39229801, 'import': 35597740, 'cpi_living': 104.88, 'loan_interest_rate': 2.67,
    'gg_pop': 13351891, 'so_pop': 9715429, 'sj_pop': 346217, 'so_re_ccsi': 131.9,
    'so_loan': 336897.1
}]

sj_default = [{
    'region_sub': '세종특별자치도', 'apt_floor': 5, 'period_completed': 0, 'pyung': 25.71,
    'kospi200': 280.09, 'kospi200construct': 189.47, 'kosdaq': 737.97, 'dowjones': 25812.88,
    'shanghai': 2984.67, 'exchange_rate': 1199.0, 'gold': 1793.6, 'oil': 39.84,
    'export': 39229801, 'import': 35597740, 'cpi_living': 104.88, 'loan_interest_rate': 2.67,
    'so_pop': 9715429, 'sj_pop': 346217, 'ccn_pop': 2120692, 'ccb_pop': 1597936,
    'so_re_ccsi': 131.9, 'ccb_re_ccsi': 119.6, 'ccn_re_ccsi': 120.3, 'sj_loan': 8117.9
}]

us_default = [{
    'region_sub': '남구', 'apt_floor': 5, 'period_completed': 0, 'pyung': 25.71,
    'kospi200': 280.09, 'kospi200construct': 189.47, 'kosdaq': 737.97, 'dowjones': 25812.88,
    'shanghai': 2984.67, 'exchange_rate': 1199.0, 'gold': 1793.6, 'oil': 39.84,
    'export': 39229801, 'import': 35597740, 'cpi_living': 104.88, 'loan_interest_rate': 2.67,
    'gsn_pop': 3347637, 'dg_pop': 2428022, 'bs_pop': 3402776, 'us_pop': 1141362,
    'us_re_ccsi': 116.1, 'us_loan': 21518.6
}]

ic_default = [{
    'region_sub': '부평구', 'apt_floor': 5, 'period_completed': 0, 'pyung': 25.71,
    'kospi200': 280.09, 'kospi200construct': 189.47, 'kosdaq': 737.97, 'dowjones': 25812.88,
    'shanghai': 2984.67, 'exchange_rate': 1199.0, 'gold': 1793.6, 'oil': 39.84,
    'export': 39229801, 'import': 35597740, 'cpi_living': 104.88, 'loan_interest_rate': 2.67,
    'gg_pop': 13351891, 'so_pop': 9715429, 'ic_pop': 2945565, 'ic_re_ccsi': 116.1,
    'ic_loan': 59575.8
}]

jrn_default = [{
    'region_sub': '순천시', 'apt_floor': 5, 'period_completed': 0, 'pyung': 25.71,
    'kospi200': 280.09, 'kospi200construct': 189.47, 'kosdaq': 737.97, 'dowjones': 25812.88,
    'shanghai': 2984.67, 'exchange_rate': 1199.0, 'gold': 1793.6, 'oil': 39.84,
    'export': 39229801, 'import': 35597740, 'cpi_living': 104.88, 'loan_interest_rate': 2.67,
    'gsn_pop': 3347637, 'gj_pop': 1454709, 'jrn_pop': 1853339, 'jrn_re_ccsi': 113.0,
    'jrn_loan': 24747.1
}]

jrb_default = [{
    'region_sub': '전주완산구', 'apt_floor': 5, 'period_completed': 0, 'pyung': 25.71,
    'kospi200': 280.09, 'kospi200construct': 189.47, 'kosdaq': 737.97, 'dowjones': 25812.88,
    'shanghai': 2984.67, 'exchange_rate': 1199.0, 'gold': 1793.6, 'oil': 39.84,
    'export': 39229801, 'import': 35597740, 'cpi_living': 104.88, 'loan_interest_rate': 2.67,
    'gsn_pop': 3347637, 'jrb_pop': 1808044, 'ccn_pop': 2120692, 'jrb_re_ccsi': 107.3,
    'jrb_loan': 26482.5
}]

jj_default = [{
    'region_sub': '제주시', 'apt_floor': 5, 'period_completed': 0, 'pyung': 25.71,
    'kospi200': 280.09, 'kospi200construct': 189.47, 'kosdaq': 737.97,
    'dowjones': 25812.88, 'shanghai': 2984.67, 'exchange_rate': 1199.0, 'gold': 1793.6,
    'oil': 39.84, 'export': 39229801, 'import': 35597740, 'cpi_living': 104.88,
    'loan_interest_rate': 2.67, 'gg_pop': 13351891, 'so_pop': 9715429, 'jj_pop': 671913,
    'so_re_ccsi': 131.9, 'gg_re_ccsi': 123.8, 'jj_loan': 16286.1
}]

ccn_default = [{
    'region_sub': '천안서북구', 'apt_floor': 5, 'period_completed': 0, 'pyung': 25.71,
    'kospi200': 280.09, 'kospi200construct': 189.47, 'kosdaq': 737.97, 'dowjones': 25812.88,
    'shanghai': 2984.67, 'exchange_rate': 1199.0, 'gold': 1793.6, 'oil': 39.84,
    'export': 39229801, 'import': 35597740, 'cpi_living': 104.88, 'loan_interest_rate': 2.67,
    'sj_pop': 346217, 'jrb_pop': 1808044, 'ccn_pop': 2120692, 'ccb_pop': 1597936,
    'ccn_re_ccsi': 120.3, 'ccn_loan': 34158.3
}]

ccb_default = [{
    'region_sub': '청주흥덕구', 'apt_floor': 5, 'period_completed': 0, 'pyung': 25.71,
    'kospi200': 280.09, 'kospi200construct': 189.47, 'kosdaq': 737.97, 'dowjones': 25812.88,
    'shanghai': 2984.67, 'exchange_rate': 1199.0, 'gold': 1793.6, 'oil': 39.84,
    'export': 39229801, 'import': 35597740, 'cpi_living': 104.88, 'loan_interest_rate': 2.67,
    'gw_pop': 1540094, 'gg_pop': 13351891, 'gsb_pop': 2644001, 'ccb_pop': 1597936,
    'ccb_re_ccsi': 119.6, 'ccb_loan': 23045.3
}]


all_default = [
    gw_default, gg_default, gsn_default, gsb_default, gj_default,
    dg_default, dj_default, bs_default, so_default, sj_default,
    us_default, ic_default, jrn_default, jrb_default, jj_default,
    ccn_default, ccb_default
]

In [5]:
# 기본값 예측 테스트

import joblib
from sklearn.base import BaseEstimator, TransformerMixin

class DataFrameSelector(BaseEstimator, TransformerMixin):
    
    def __init__(self, attribute_names):
        self.attribute_names = attribute_names
    
    def fit(self, x, y=None):
        return self
    
    def transform(self, x):
        return x[self.attribute_names].values


def test_load_predict(region, default_data):

    predD = pd.DataFrame.from_records(default_data)
    
    model = joblib.load(f'models/{region+"_region"}.pkl')
    
    test_pred = model.predict(predD)
    print(region, test_pred)

for region, default in zip(region_table, all_default):
    test_load_predict(region[1], default)


gw [10777797.93185249]
gg [20229856.46281156]
gsn [12520148.60899209]
gsb [11647022.91840059]
gj [13684543.56813291]
dg [12952426.21721984]
dj [29543811.28466105]
bs [21326895.44078093]
so [23874274.13615971]
sj [23064061.4021448]
us [22851853.60236808]
ic [13957461.84119417]
jrn [12797270.9510964]
jrb [11680642.5775578]
jj [14801202.79997045]
ccn [22871428.96545945]
ccb [13676230.7016771]


In [None]:
# gw [10777797.93185249]
# gg [20229856.46281156]
# gsn [12520148.60899209]
# gsb [11647022.91840059]
# gj [13684543.56813291]
# dg [12952426.21721984]
# dj [28052961.46364948]
# bs [21326895.44078093]
# so [23874274.13615971]
# sj [23064061.4021448]
# us [22851853.60236808]
# ic [13957461.84119417]
# jrn [12797270.9510964]
# jrb [11680642.5775578]
# jj [14801202.79997045]
# ccn [22871428.96545945]
# ccb [13676230.7016771]

## 2020년 분양 아파트 가격 예측 비교

In [6]:
import numpy as np
import pandas as pd
from prediction_man import predictionMan
from sklearn.base import BaseEstimator, TransformerMixin

class DataFrameSelector(BaseEstimator, TransformerMixin):
    
    def __init__(self, attribute_names):
        self.attribute_names = attribute_names
    
    def fit(self, x, y=None):
        return self
    
    def transform(self, x):
        return x[self.attribute_names].values


In [7]:
readD = pd.read_csv('sub-data-files/apply_home_2020.csv')
readD.columns = ['공급위치', '아파트명', '건설사', '전용면적', '분양가격']
readD.info()
readD.head()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 169 entries, 0 to 168
Data columns (total 5 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   공급위치    169 non-null    object 
 1   아파트명    169 non-null    object 
 2   건설사     169 non-null    object 
 3   전용면적    169 non-null    float64
 4   분양가격    169 non-null    int64  
dtypes: float64(1), int64(1), object(3)
memory usage: 6.7+ KB


Unnamed: 0,공급위치,아파트명,건설사,전용면적,분양가격
0,강원도 강릉시 포남동 1117외 5필지,KTX강릉역 동도센트리움,동도건설(주),59.99,287000000
1,강원도 원주시 c4블럭,원주혁신도시 제일풍경채 센텀포레,제일건설(주),91.4,323700000
2,강원도 원주시 c4블럭,원주혁신도시 제일풍경채 센텀포레,제일건설(주),110.58,391500000
3,강원도 속초시 우렁3길 20 (동명동),속초 롯데캐슬 인더스카이,롯데건설(주),79.94,403150000
4,강원도 속초시 우렁3길 20 (동명동),속초 롯데캐슬 인더스카이,롯데건설(주),84.98,466370000


In [8]:
dataD = readD.copy()
dataD['지역'] = dataD['공급위치'].str.split(expand=True)[0]
dataD['세부지역'] = dataD['공급위치'].str.split(expand=True)[1]
dataD['평'] = round(dataD['전용면적'] / 3.3, 2)
dataD['평당 분양가격'] = round(dataD['분양가격'] / dataD['평']).astype('int64')
dataD['평당 예측가격'] = 0
dataD['평당 분양-예측'] = 0
dataD['평당 분양/예측%'] = 0
dataD['예측가격'] = 0
dataD['분양-예측'] = 0
dataD['분양/예측%'] = 0
dataD.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 169 entries, 0 to 168
Data columns (total 15 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   공급위치       169 non-null    object 
 1   아파트명       169 non-null    object 
 2   건설사        169 non-null    object 
 3   전용면적       169 non-null    float64
 4   분양가격       169 non-null    int64  
 5   지역         169 non-null    object 
 6   세부지역       169 non-null    object 
 7   평          169 non-null    float64
 8   평당 분양가격    169 non-null    int64  
 9   평당 예측가격    169 non-null    int64  
 10  평당 분양-예측   169 non-null    int64  
 11  평당 분양/예측%  169 non-null    int64  
 12  예측가격       169 non-null    int64  
 13  분양-예측      169 non-null    int64  
 14  분양/예측%     169 non-null    int64  
dtypes: float64(2), int64(8), object(5)
memory usage: 19.9+ KB


In [9]:
pm = predictionMan()

def add_data(D):
    D['평당 예측가격'] = pm.predict_price(D['지역'], region_sub=D['세부지역'], pyung=D['평'])
    D['예측가격'] = round(D['평당 예측가격'] * D['평'])
    D['평당 분양-예측'] = round(D['평당 분양가격'] - D['평당 예측가격'])
    D['평당 분양/예측%'] = round(D['평당 분양가격'] / D['평당 예측가격'] * 100)
    D['분양-예측'] = round(D['분양가격'] - D['예측가격'])
    D['분양/예측%'] = round(D['분양가격'] / D['예측가격'] * 100)
    return D

dataD = dataD.apply(add_data, axis=1)

predictionMan init


In [10]:
dataD.to_csv('sub-data-files/apply_home_2020_ex.csv', index=False, encoding='utf-8-sig')

In [11]:
readD = pd.read_csv('sub-data-files/apply_home_2020_ex.csv')
readD.info()
readD.head()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 169 entries, 0 to 168
Data columns (total 15 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   공급위치       169 non-null    object 
 1   아파트명       169 non-null    object 
 2   건설사        169 non-null    object 
 3   전용면적       169 non-null    float64
 4   분양가격       169 non-null    int64  
 5   지역         169 non-null    object 
 6   세부지역       169 non-null    object 
 7   평          169 non-null    float64
 8   평당 분양가격    169 non-null    int64  
 9   평당 예측가격    169 non-null    int64  
 10  평당 분양-예측   169 non-null    int64  
 11  평당 분양/예측%  169 non-null    int64  
 12  예측가격       169 non-null    int64  
 13  분양-예측      169 non-null    int64  
 14  분양/예측%     169 non-null    int64  
dtypes: float64(2), int64(8), object(5)
memory usage: 19.9+ KB


Unnamed: 0,공급위치,아파트명,건설사,전용면적,분양가격,지역,세부지역,평,평당 분양가격,평당 예측가격,평당 분양-예측,평당 분양/예측%,예측가격,분양-예측,분양/예측%
0,강원도 강릉시 포남동 1117외 5필지,KTX강릉역 동도센트리움,동도건설(주),59.99,287000000,강원도,강릉시,18.18,15786579,12683849,3102730,124,230592375,56407625,124
1,강원도 원주시 c4블럭,원주혁신도시 제일풍경채 센텀포레,제일건설(주),91.4,323700000,강원도,원주시,27.7,11685921,11086345,599576,105,307091756,16608244,105
2,강원도 원주시 c4블럭,원주혁신도시 제일풍경채 센텀포레,제일건설(주),110.58,391500000,강원도,원주시,33.51,11683080,11698000,-14920,100,391999980,-499980,100
3,강원도 속초시 우렁3길 20 (동명동),속초 롯데캐슬 인더스카이,롯데건설(주),79.94,403150000,강원도,속초시,24.22,16645334,11420491,5224843,146,276604292,126545708,146
4,강원도 속초시 우렁3길 20 (동명동),속초 롯데캐슬 인더스카이,롯데건설(주),84.98,466370000,강원도,속초시,25.75,18111456,11375982,6735474,159,292931536,173438464,159


In [1]:
import os

stat_files = os.listdir('statistics')
stat_files


['healstate_region_avg_price_per_pyung.csv',
 'healstate_region_deal_count.csv',
 'lottecastle_region_avg_price_per_pyung.csv',
 'lottecastle_region_deal_count.csv',
 'prugio_region_avg_price_per_pyung.csv',
 'prugio_region_deal_count.csv',
 'raemian_region_avg_price_per_pyung.csv',
 'raemian_region_deal_count.csv',
 'region_avg_price_per_pyung.csv',
 'region_avg_price_per_pyung_month.csv',
 'region_deal_count.csv',
 'region_deal_count_month.csv',
 'region_size_avg_price_per_pyung.csv',
 'region_size_avg_price_per_pyung_month.csv',
 'region_size_deal_count.csv',
 'region_size_deal_count_month.csv',
 'xi_region_avg_price_per_pyung.csv',
 'xi_region_deal_count.csv']