In [1]:
import numpy as np
import pandas as pd
import cx_Oracle

In [2]:
# 시계열 정보
time_table1 =[
    ['2012-08-01', '2012-12-31'],
    ['2013-01-01', '2013-12-31'],
    ['2014-01-01', '2014-12-31'],
    ['2015-01-01', '2015-12-31'],
    ['2016-01-01', '2016-12-31'],
    ['2017-01-01', '2017-12-31'],
    ['2018-01-01', '2018-12-31'],
    ['2019-01-01', '2019-12-31'],
    ['2020-01-01', '2020-07-31']]

time_table2 = [
    '2012', '2013', '2014', '2015', '2016',
    '2017', '2018', '2019', '2020']

In [3]:
# 지역 정보
region_table1 = [
    '강원도', '경기도', '경상남도', '경상북도', '광주광역시', '대구광역시',
    '대전광역시', '부산광역시', '서울특별시', '세종특별자치시', '울산광역시',
    '인천광역시', '전라남도', '전라북도', '제주특별자치도', '충청남도', '충청북도']

In [4]:
def get_data_from_db(query):
    conn = cx_Oracle.connect('hoseo', 'hoseo', 'localhost:1521/xe')
    df = pd.read_sql(query, conn)
    conn.close()
    return df

In [5]:
# 년도별 지역별 전체 매매건수
def make_region_deal_count():
    for year in time_table2:

        query = f"""
            SELECT adp.region, COUNT(*) AS y{year} 
            FROM apt_deal_price adp 
            WHERE TO_CHAR(adp.contract_date, 'YYYY') = '{year}' 
            GROUP BY adp.region 
            ORDER BY adp.region ASC
            """

        queD = get_data_from_db(query)

        if year == '2012':
            dataD = queD.copy()
        else:
            dataD = dataD.merge(queD)

    dataD.info()
    lower_col = dataD.columns.values.tolist()
    dataD.columns = [ l.lower() for l in lower_col ]
    dataD.to_csv('sub-data-files/region_deal_count.csv', index=False, encoding='utf-8-sig')
    

In [6]:
# make_region_deal_count()

In [7]:
testD = pd.read_csv('sub-data-files/region_deal_count.csv')
testD


Unnamed: 0,region,y2012,y2013,y2014,y2015,y2016,y2017,y2018,y2019,y2020
0,강원도,6630,17830,19607,22038,20351,16608,12825,13914,12550
1,경기도,41582,147064,172370,200666,175042,154053,151747,141529,150660
2,경상남도,12818,39806,47725,48027,37483,30780,25463,33552,24848
3,경상북도,11594,29417,27867,27196,20114,20705,18861,22647,18548
4,광주광역시,8387,22310,25424,23859,21420,22764,25384,20135,13465
5,대구광역시,16495,39240,34041,32299,19047,32067,32582,30142,21410
6,대전광역시,6432,20935,20923,21186,22785,20283,19602,27390,17102
7,부산광역시,13255,42777,52758,62263,56409,37938,25857,36080,30418
8,서울특별시,18633,68136,85537,120023,110183,105063,81389,74917,50428
9,세종특별자치시,546,1130,1204,2033,3520,4669,3430,5802,5722


In [8]:
# 국민임대주택 규모
# 전용면적 50m²(15평) 미만, 무주택 세대주 월평균 소득 50% 이하
# 전용면적 50m²(15평)이상∼60m²(18평) 이하, 무주택 세대주 월평균 소득 70% 이하

# 국민주택 규모
# 전용면적 85m²(25.7평) 이하
# 전용면적 100m²(30.2평) 이하, 수도권을 제외한 도시지역이 아닌 읍·면 지역

# 전용면적 100m²(30.2평)초과~135m²(40.8평)이하
# 전용면적 135m²(40.8평)초과

# ~ 50m²
# 50m² ~ 60m²
# 60m² ~ 85m²
# 85m² ~ 100m²
# 100m² ~ 135m²
# 135m² ~

In [9]:
size_conditions = [
    ('under_50', 'adp.apt_size < 50'),
    ('50_60', 'adp.apt_size >= 50 AND adp.apt_size <= 60'),
    ('60_85', 'adp.apt_size > 60 AND adp.apt_size <= 85'),
    ('85_100', 'adp.apt_size > 85 AND adp.apt_size <= 100'),
    ('100_135', 'adp.apt_size > 100 AND adp.apt_size <= 135'),
    ('135_over', 'adp.apt_size > 135')]

size_conditions2 = [
    ('under_50', 'A.apt_size < 50'),
    ('50_60', 'A.apt_size >= 50 AND A.apt_size <= 60'),
    ('60_85', 'A.apt_size > 60 AND A.apt_size <= 85'),
    ('85_100', 'A.apt_size > 85 AND A.apt_size <= 100'),
    ('100_135', 'A.apt_size > 100 AND A.apt_size <= 135'),
    ('135_over', 'A.apt_size > 135')]

col_apt_size_data = [
    'region', 'apt_size', 'y2012', 'y2013', 'y2014',
    'y2015', 'y2016', 'y2017', 'y2018', 'y2019', 'y2020']
    

In [10]:
# 지역별 사이즈별 매매건수 데이터 수집
def make_region_size_count(year = 2012):
    for cond in size_conditions:

        query = f"""
            SELECT adp.region AS region, COUNT(adp.apt_size) AS "{cond[0]}"
            FROM apt_deal_price adp
            WHERE to_char(adp.contract_date, 'YYYY') = '{year}' AND {cond[1]}
            GROUP BY adp.region
            ORDER BY adp.region ASC
            """

        queD = get_data_from_db(query)

        if cond[0] == 'under_50':
            dataD = queD.copy()
        else:
            dataD = dataD.merge(queD, how='left')
        
    dataD.fillna(0, inplace=True)
    dataD = dataD.astype( \
        {'under_50':'int64', '50_60':'int64', '60_85':'int64',
        '85_100':'int64', '100_135':'int64', '135_over':'int64'})
    
    # dataD.info()

    dataD.set_index('REGION', inplace=True)
    data_list = dataD.values.reshape(len(region_table1) * len(size_conditions), )

    return data_list


In [11]:
# 년도별 지역별 사이즈별 매매건수 데이터 만들기
def complete_region_size_count():

    # 최종 데이터 테이블 형태 만들기
    row_count = len(region_table1) * len(size_conditions)
    col_count = len(col_apt_size_data)
    allD = pd.DataFrame(np.zeros((row_count, col_count), dtype='int64'), columns=col_apt_size_data)

    size_list = [ x[0] for x in size_conditions ]
    allD['apt_size'] = size_list * len(region_table1)

    region_list = []
    for r in region_table1:
        region_list.extend([r] * len(size_conditions))
    allD['region'] = region_list

    # 년도별 데이터 수집
    for year in time_table2:
        data_list = make_region_size_count(year)
        allD['y' + year] = data_list
        
    allD.info()
    allD.to_csv('sub-data-files/region_size_deal_count.csv', index=False, encoding='utf-8-sig')

In [12]:
# complete_region_size_count()

In [13]:
testD = pd.read_csv('sub-data-files/region_size_deal_count.csv')
testD.head(12)

Unnamed: 0,region,apt_size,y2012,y2013,y2014,y2015,y2016,y2017,y2018,y2019,y2020
0,강원도,under_50,1745,5230,5375,5129,4973,4335,2953,2599,2222
1,강원도,50_60,2164,5709,6467,6971,6165,5092,4104,4500,4139
2,강원도,60_85,2063,5477,6316,7868,7326,5814,4712,5820,5144
3,강원도,85_100,75,194,218,278,262,198,135,154,160
4,강원도,100_135,432,894,992,1443,1282,937,742,660,704
5,강원도,135_over,151,326,239,349,343,232,179,181,181
6,경기도,under_50,5283,17442,23048,26504,23472,20073,17546,15751,16352
7,경기도,50_60,11330,40630,46031,52233,46651,40635,37919,35299,38755
8,경기도,60_85,17987,63772,73604,86951,75077,65850,67028,64718,69997
9,경기도,85_100,638,2393,3079,3755,3081,2675,2951,3153,3176


In [14]:
def get_yyyymm():
    query = """
        SELECT DISTINCT to_char(adp.contract_date, 'YYYYMM') AS ym
        FROM apt_deal_price adp
        ORDER BY ym ASC
        """

    queD = get_data_from_db(query)
    return list(queD['YM'])
    

In [15]:
# 월별 지역별 전체 매매건수
def make_region_deal_count_month():
    yyyymm = get_yyyymm()

    for ym in yyyymm:

        query = f"""
            SELECT adp.region, COUNT(*) AS ym{ym} 
            FROM apt_deal_price adp 
            WHERE TO_CHAR(adp.contract_date, 'YYYYMM') = '{ym}' 
            GROUP BY adp.region 
            ORDER BY adp.region ASC
            """

        queD = get_data_from_db(query)

        if ym == '201208':
            dataD = queD.copy()
        else:
            dataD = dataD.merge(queD, how='left')

    dataD.info()
    lower_col = dataD.columns.values.tolist()
    dataD.columns = [ l.lower() for l in lower_col ]
    dataD.to_csv('sub-data-files/region_deal_count_month.csv', index=False, encoding='utf-8-sig')

In [16]:
# make_region_deal_count_month()

In [17]:
testD = pd.read_csv('sub-data-files/region_deal_count_month.csv')
testD.head(5)


Unnamed: 0,region,ym201208,ym201209,ym201210,ym201211,ym201212,ym201301,ym201302,ym201303,ym201304,...,ym201910,ym201911,ym201912,ym202001,ym202002,ym202003,ym202004,ym202005,ym202006,ym202007
0,강원도,933,1191,1495,1513,1498,1023,1227,2383,1660,...,1343,1377,1751,1439,1868,1579,1625,1982,2312,1745
1,경기도,5631,8083,10645,9178,8045,7115,8588,13003,14632,...,18101,20779,20988,20747,31851,16368,12759,16979,34952,17004
2,경상남도,1623,2113,3168,3101,2813,2286,2801,3747,4001,...,4026,4869,3822,3183,3704,2625,2633,3376,5764,3563
3,경상북도,1781,2087,2701,2519,2506,1947,2324,2837,3134,...,2549,2514,2400,2193,2689,1915,2083,3392,3843,2433
4,광주광역시,1204,1483,2012,1956,1732,1413,1799,2206,2051,...,2046,2133,2128,1954,2251,1738,1490,1778,2423,1831


In [18]:
# 월별 지역별 사이즈별 매매건수 데이터 수집
def make_region_size_count_month(ym = 201208):
    for cond in size_conditions:

        query = f"""
            SELECT adp.region AS region, COUNT(adp.apt_size) AS "{cond[0]}"
            FROM apt_deal_price adp
            WHERE to_char(adp.contract_date, 'YYYYMM') = '{ym}' AND {cond[1]}
            GROUP BY adp.region
            ORDER BY adp.region ASC
            """

        queD = get_data_from_db(query)

        if cond[0] == 'under_50':
            dataD = queD.copy()
        else:
            dataD = dataD.merge(queD, how='left')
    
    # dataD empty 처리

    dataD.fillna(0, inplace=True)
    dataD = dataD.astype( \
        {'under_50':'int64', '50_60':'int64', '60_85':'int64',
        '85_100':'int64', '100_135':'int64', '135_over':'int64'})
    
    # dataD.info()

    dataD.set_index('REGION', inplace=True)
    data_list = dataD.values.reshape(len(region_table1) * len(size_conditions), )

    return data_list

In [19]:
# 월별 지역별 사이즈별 매매건수 데이터 만들기
def complete_region_size_count():
    yyyymm = get_yyyymm()
    col_ym = [ 'ym' + y for y in yyyymm ]
    col_apt_size_month = ['region', 'apt_size']
    col_apt_size_month += col_ym

    # 최종 데이터 테이블 형태 만들기
    row_count = len(region_table1) * len(size_conditions)
    col_count = len(col_apt_size_month)
    allD = pd.DataFrame(np.zeros((row_count, col_count), dtype='int64'), columns=col_apt_size_month)

    size_list = [ x[0] for x in size_conditions ]
    allD['apt_size'] = size_list * len(region_table1)

    region_list = []
    for r in region_table1:
        region_list.extend([r] * len(size_conditions))
    allD['region'] = region_list

    # 년도별 데이터 수집
    for ym in yyyymm:
        data_list = make_region_size_count_month(ym)
        allD['ym' + ym] = data_list
        
    allD.info()
    allD.to_csv('sub-data-files/region_size_deal_count_month.csv', index=False, encoding='utf-8-sig')

In [20]:
# 1시간 정도 걸림
# complete_region_size_count()

In [21]:
testD = pd.read_csv('sub-data-files/region_size_deal_count_month.csv')
testD.head(6)

Unnamed: 0,region,apt_size,ym201208,ym201209,ym201210,ym201211,ym201212,ym201301,ym201302,ym201303,...,ym201910,ym201911,ym201912,ym202001,ym202002,ym202003,ym202004,ym202005,ym202006,ym202007
0,강원도,under_50,229,239,357,439,481,231,268,1120,...,237,217,240,224,351,292,278,358,383,336
1,강원도,50_60,296,470,540,466,392,371,467,569,...,415,483,469,441,603,526,599,672,735,563
2,강원도,60_85,318,362,452,455,476,325,395,561,...,587,555,924,649,756,630,630,790,1007,682
3,강원도,85_100,7,14,23,11,20,9,15,13,...,10,19,19,18,24,22,16,18,30,32
4,강원도,100_135,63,79,100,112,78,66,63,91,...,81,84,83,90,109,86,79,115,122,103
5,강원도,135_over,20,27,23,30,51,21,19,29,...,13,19,16,17,25,23,23,29,35,29


In [22]:
# 년도별 지역별 평균 평당 가격
def make_region_avg_price_per_pyung():
    for year in time_table2:

        query = f"""
            SELECT adp.region, round(AVG(adp.price_per_pyung)) AS y{year}
            FROM (SELECT A.region, (A.contract_price / (A.apt_size / 3.3)) price_per_pyung
                  FROM apt_deal_price A
                  WHERE to_char(A.contract_date, 'YYYY') = '{year}') adp
            GROUP BY adp.region
            ORDER BY adp.region ASC
            """

        queD = get_data_from_db(query)

        if year == '2012':
            dataD = queD.copy()
        else:
            dataD = dataD.merge(queD, how='left')

    dataD.info()
    lower_col = dataD.columns.values.tolist()
    dataD.columns = [ l.lower() for l in lower_col ]
    dataD.to_csv('sub-data-files/region_avg_price_per_pyung.csv', index=False, encoding='utf-8-sig')

In [23]:
# make_region_avg_price_per_pyung()

In [24]:
testD = pd.read_csv('sub-data-files/region_avg_price_per_pyung.csv')
testD

Unnamed: 0,region,y2012,y2013,y2014,y2015,y2016,y2017,y2018,y2019,y2020
0,강원도,5017704,5073569,5244047,5697026,6288439,6610171,6572811,6470442,6895954
1,경기도,10268867,10721242,11194598,11861218,12768256,13887902,14639393,15421171,15659928
2,경상남도,7049242,7580157,8063321,8177517,8115336,8054941,7964061,8358840,9254155
3,경상북도,5154609,5510731,6015412,6365920,6152243,6295791,6194193,6360006,6903557
4,광주광역시,6159958,6387516,6988350,7665966,8172572,8712565,9219905,9511984,10086411
5,대구광역시,7630697,8177568,9304979,10538937,10625995,11316737,11834449,11919879,12458865
6,대전광역시,7994548,8229245,8379888,8445784,8762061,9130145,9834420,10633223,11737035
7,부산광역시,8955963,8681508,9182984,9920745,11047033,11730806,11791791,13086323,13951932
8,서울특별시,18912444,18659223,19201809,20206966,22287993,25421426,27119611,34400559,33660626
9,세종특별자치시,6241319,6168699,7381741,9044967,10432436,11415572,12988359,15411576,17264926


In [25]:
# 월별 지역별 평균 평당 가격
def make_region_avg_price_per_pyung_month():
    yyyymm = get_yyyymm()

    for ym in yyyymm:

        query = f"""
            SELECT adp.region, round(AVG(adp.price_per_pyung)) AS ym{ym}
            FROM (SELECT A.region, (A.contract_price / (A.apt_size / 3.3)) price_per_pyung
                  FROM apt_deal_price A
                  WHERE to_char(A.contract_date, 'YYYYMM') = '{ym}') adp
            GROUP BY adp.region
            ORDER BY adp.region ASC
            """

        queD = get_data_from_db(query)

        if ym == '201208':
            dataD = queD.copy()
        else:
            dataD = dataD.merge(queD, how='left')

    dataD.info()
    lower_col = dataD.columns.values.tolist()
    dataD.columns = [ l.lower() for l in lower_col ]
    dataD.to_csv('sub-data-files/region_avg_price_per_pyung_month.csv', index=False, encoding='utf-8-sig')

In [26]:
# make_region_avg_price_per_pyung_month()

In [27]:
testD = pd.read_csv('sub-data-files/region_avg_price_per_pyung_month.csv')
testD.head(5)


Unnamed: 0,region,ym201208,ym201209,ym201210,ym201211,ym201212,ym201301,ym201302,ym201303,ym201304,...,ym201910,ym201911,ym201912,ym202001,ym202002,ym202003,ym202004,ym202005,ym202006,ym202007
0,강원도,5119250,5095286,4998207,4991621,4938578,5095420,5105624,4545184,5075322,...,6506124,6513893,6946799,6742633,6690813,6620664,6526321,6865083,7441503,7147556
1,경기도,9775160,10125170,10421214,10398610,10409208,10238318,10351601,10466027,10549645,...,16601336,17185471,16412237,15685046,15325904,14077135,13613289,14859319,17084424,17185610
2,경상남도,6881622,7086819,7188409,7023321,6989572,7291461,7225936,7359711,7608994,...,9037953,9391425,8759668,9078625,8667616,8114775,8141990,8940838,10604776,9793935
3,경상북도,5103845,5202156,5234563,5226411,4992737,5261965,5331156,5312663,5281205,...,6669935,6807977,6744104,6796241,6780109,6594742,6362860,7034261,7428216,6831766
4,광주광역시,5988665,6194381,6151116,6209057,6204378,6202909,6242817,6222630,6185710,...,9712906,9993954,10268687,10447595,10017985,9695587,9420916,9720282,10538647,10454689


In [28]:
# 지역별 사이즈별 평균 평당 가격
def make_region_size_per_pyung(year = 2012):
    for cond in size_conditions2:

        query = f"""
            SELECT adp.region, round(AVG(adp.price_per_pyung)) AS "{cond[0]}"
            FROM (SELECT A.region, (A.contract_price / (A.apt_size / 3.3)) price_per_pyung
                  FROM apt_deal_price A
                  WHERE to_char(A.contract_date, 'YYYY') = '{year}' AND {cond[1]}) adp
            GROUP BY adp.region
            ORDER BY adp.region ASC
            """

        queD = get_data_from_db(query)

        if cond[0] == 'under_50':
            dataD = queD.copy()
        else:
            dataD = dataD.merge(queD, how='left')
        
    dataD.fillna(0, inplace=True)
    dataD = dataD.astype( \
        {'under_50':'int64', '50_60':'int64', '60_85':'int64',
        '85_100':'int64', '100_135':'int64', '135_over':'int64'})
    
    # dataD.info()

    dataD.set_index('REGION', inplace=True)
    data_list = dataD.values.reshape(len(region_table1) * len(size_conditions2), )

    return data_list

In [29]:
# 년도별 지역별 사이즈별 평균 평당 가격 데이터 만들기
def complete_region_size_avg_price_per_pyung():

    # 최종 데이터 테이블 형태 만들기
    row_count = len(region_table1) * len(size_conditions2)
    col_count = len(col_apt_size_data)
    allD = pd.DataFrame(np.zeros((row_count, col_count), dtype='int64'), columns=col_apt_size_data)

    size_list = [ x[0] for x in size_conditions ]
    allD['apt_size'] = size_list * len(region_table1)

    region_list = []
    for r in region_table1:
        region_list.extend([r] * len(size_conditions2))
    allD['region'] = region_list

    # 년도별 데이터 수집
    for year in time_table2:
        data_list = make_region_size_per_pyung(year)
        allD['y' + year] = data_list
        
    allD.info()
    allD.to_csv('sub-data-files/region_size_avg_price_per_pyung.csv', index=False, encoding='utf-8-sig')

In [30]:
# complete_region_size_avg_price_per_pyung()

In [31]:
testD = pd.read_csv('sub-data-files/region_size_avg_price_per_pyung.csv')
testD.head(12)

Unnamed: 0,region,apt_size,y2012,y2013,y2014,y2015,y2016,y2017,y2018,y2019,y2020
0,강원도,under_50,3834824,3930871,4194376,4435724,5019125,5274994,5190360,4847307,4899397
1,강원도,50_60,5035603,5157185,5259505,5625213,6235641,6711159,6508426,6126538,6428878
2,강원도,60_85,5671159,5888292,5937993,6421464,7077037,7371753,7331259,7408028,8041103
3,강원도,85_100,6053554,6116621,6405393,6627079,7321626,7739430,8348195,7355994,8352011
4,강원도,100_135,6059070,5874995,6047482,6335626,6733753,7191388,7167794,6703160,7244900
5,강원도,135_over,6009470,5435167,5699527,5954606,6343648,6945226,7084828,6577520,6897619
6,경기도,under_50,10062621,10766882,11287985,12290707,13432154,14414160,15147142,14816804,15911273
7,경기도,50_60,9987100,10606181,11227112,11981337,12906303,14069483,14968033,15826207,16390992
8,경기도,60_85,10210120,10584366,11038438,11670396,12565168,13715817,14502582,15359781,15441251
9,경기도,85_100,10522374,11831791,11778145,12440266,13726847,14661255,15657482,17870889,17327268


In [32]:
# 월별 지역별 사이즈별 평균 평당 가격
def make_region_size_per_pyung_month(ym = 201208):
    for cond in size_conditions2:

        query = f"""
            SELECT adp.region, round(AVG(adp.price_per_pyung)) AS "{cond[0]}"
            FROM (SELECT A.region, (A.contract_price / (A.apt_size / 3.3)) price_per_pyung
                  FROM apt_deal_price A
                  WHERE to_char(A.contract_date, 'YYYYMM') = '{ym}' AND {cond[1]}) adp
            GROUP BY adp.region
            ORDER BY adp.region ASC
            """

        queD = get_data_from_db(query)

        if cond[0] == 'under_50':
            dataD = queD.copy()
        else:
            dataD = dataD.merge(queD, how='left')
    
    # dataD empty 처리

    dataD.fillna(0, inplace=True)
    dataD = dataD.astype( \
        {'under_50':'int64', '50_60':'int64', '60_85':'int64',
        '85_100':'int64', '100_135':'int64', '135_over':'int64'})
    
    # dataD.info()

    dataD.set_index('REGION', inplace=True)
    data_list = dataD.values.reshape(len(region_table1) * len(size_conditions2), )

    return data_list

In [33]:
# 월별 지역별 사이즈별 평균 평당 가격 데이터 만들기
def complete_region_size_avg_price_per_pyung_month():
    yyyymm = get_yyyymm()
    col_ym = [ 'ym' + y for y in yyyymm ]
    col_apt_size_month = ['region', 'apt_size']
    col_apt_size_month += col_ym

    # 최종 데이터 테이블 형태 만들기
    row_count = len(region_table1) * len(size_conditions2)
    col_count = len(col_apt_size_month)
    allD = pd.DataFrame(np.zeros((row_count, col_count), dtype='int64'), columns=col_apt_size_month)

    size_list = [ x[0] for x in size_conditions2 ]
    allD['apt_size'] = size_list * len(region_table1)

    region_list = []
    for r in region_table1:
        region_list.extend([r] * len(size_conditions2))
    allD['region'] = region_list

    # 년도별 데이터 수집
    for ym in yyyymm:
        data_list = make_region_size_per_pyung_month(ym)
        allD['ym' + ym] = data_list
        
    allD.info()
    allD.to_csv('sub-data-files/region_size_avg_price_per_pyung_month.csv', index=False, encoding='utf-8-sig')

In [34]:
# complete_region_size_avg_price_per_pyung_month()

In [35]:
testD = pd.read_csv('sub-data-files/region_size_avg_price_per_pyung_month.csv')
testD.head(12)

Unnamed: 0,region,apt_size,ym201208,ym201209,ym201210,ym201211,ym201212,ym201301,ym201302,ym201303,...,ym201910,ym201911,ym201912,ym202001,ym202002,ym202003,ym202004,ym202005,ym202006,ym202007
0,강원도,under_50,4100543,3831493,3749504,3873928,3737606,4051133,4125851,3501822,...,4609876,4507726,4878141,4839482,4811668,4842320,4779160,4757371,5144811,5051655
1,강원도,50_60,4728273,5033243,4992276,5113644,5237412,4998976,4990708,5196714,...,6147934,6077098,6311076,6179900,6206069,6134343,6256045,6416735,6957298,6646247
2,강원도,60_85,5917922,5619339,5654734,5634440,5596408,5741708,5702146,5694701,...,7440279,7533972,7818561,7743580,7781692,7772065,7471970,8151192,8636073,8380033
3,강원도,85_100,6161315,6335932,6578804,5530117,5502024,5109969,6811836,5803805,...,9244756,7601284,7844000,7154628,8538528,7811454,7690386,7502829,9096376,9367920
4,강원도,100_135,6243071,6183028,6017450,5996006,5928819,5933961,5736283,5676785,...,6646343,7331529,6740437,6876742,7307654,6896854,6951239,7070545,7399136,8027998
5,강원도,135_over,5966271,6510079,5605203,5755194,6093273,5642685,5910573,5704797,...,7349712,6030985,6272126,7058540,7314592,6609761,6511298,7027750,7101921,6601820
6,경기도,under_50,9680685,9917403,10258948,10290641,10012240,10397915,10574241,10478348,...,15611293,16201234,15855971,15735733,15706315,14860943,13938903,15445554,17624586,16810373
7,경기도,50_60,9599046,9936279,10281836,10052954,9861034,10045181,10046882,10286979,...,16917156,17383356,17268302,16763640,16273083,14742257,14369330,15403201,17636631,17731350
8,경기도,60_85,9555021,10057183,10340940,10364811,10439033,10111759,10325187,10437031,...,16594629,17355689,16389466,15575620,15103650,13815465,13353314,14577056,16785031,17091120
9,경기도,85_100,10756021,10659672,10505660,10642336,10084796,9846498,10480973,10367531,...,18608551,20655231,17639727,16112025,16189588,14371161,15306959,16819249,19746030,20571916


## 2019년 아파트 브랜드 순위
고객의 선호도, 투자가치 지표, 인지도, 주거시설 만족도, 시공 건설사

1. gs건설 : 자이
2. 현대건설 : 힐스테이트
3. 삼성물산 : 래미안
4. 대우건설 : 푸르지오
5. 포스코건설 : 더샵
6. 롯데건설 : 롯데캐슬
7. 대림산업 : E편한세상
8. 현대산업개발 : 아이파크
9. 우미건설 : 린
10. 두산건설 : 위브


## 작업 아파트 브랜드 
1. gs건설 : 자이
2. 현대건설 : 힐스테이트
3. 삼성물산 : 래미안
4. 대우건설 : 푸르지오
6. 롯데건설 : 롯데캐슬

In [70]:
# 자이 아파트 년도별 지역별 매매건수
def make_xi_region_deal_count():

    dataD = pd.DataFrame({'REGION': region_table1})

    for year in time_table2:

        query = f"""
            SELECT adp.region, COUNT(*) AS Y{year}
            FROM (SELECT *
                  FROM apt_deal_price A
                  WHERE A.apt_name LIKE '%자이%') adp
            WHERE to_char(adp.contract_date, 'YYYY') = '{year}' AND 
                  adp.apt_name NOT IN ('롯데캐슬자이언트','호등 자이젠 아파트(1665-9)',
                                       '효자이지움','영진자이온','세왕자이연','사곡영진자이온1단지',
                                       '자이온더퍼스트','사곡영진자이온2단지')
            GROUP BY adp.region
            ORDER BY adp.region ASC
            """

        queD = get_data_from_db(query)
        dataD = dataD.merge(queD, how='left')

    lower_col = dataD.columns.values.tolist()
    dataD.columns = [ l.lower() for l in lower_col ]

    dataD.fillna(0, inplace=True)

    dataD.set_index('region', inplace=True)
    dataD = dataD.astype('int64')
    dataD.reset_index(inplace=True)
   
    dataD.info()
    dataD.to_csv('sub-data-files/xi_region_deal_count.csv', index=False, encoding='utf-8-sig')

In [37]:
# make_xi_region_deal_count()

In [38]:
testD = pd.read_csv('sub-data-files/xi_region_deal_count.csv')
testD

Unnamed: 0,region,y2012,y2013,y2014,y2015,y2016,y2017,y2018,y2019,y2020
0,강원도,0,0,0,0,0,0,0,0,0
1,경기도,755,2541,2476,3174,2784,2475,3041,3448,3373
2,경상남도,113,466,538,493,324,224,245,367,343
3,경상북도,0,0,0,1,12,64,63,276,270
4,광주광역시,88,222,300,176,150,243,234,151,130
5,대구광역시,49,139,93,151,109,337,296,265,257
6,대전광역시,43,98,149,157,148,173,198,272,134
7,부산광역시,92,528,488,567,537,298,257,563,468
8,서울특별시,142,594,839,1182,1356,1737,1268,1527,729
9,세종특별자치시,71,107,84,133,159,138,125,96,229


In [39]:
# 자이 아파트 년도별 지역별 평균 평당 가격
def make_xi_region_avg_price_per_pyung():

    dataD = pd.DataFrame({'REGION': region_table1})

    for year in time_table2:

        query = f"""
            SELECT adp.region, round(AVG(adp.price_per_pyung)) AS Y{year}
            FROM (SELECT A.region, A.apt_name, A.contract_date,
                         (A.contract_price / (A.apt_size / 3.3)) price_per_pyung
                  FROM apt_deal_price A
                  WHERE A.apt_name LIKE '%자이%') adp
            WHERE to_char(adp.contract_date, 'YYYY') = '{year}' AND 
                  adp.apt_name NOT IN ('롯데캐슬자이언트','호등 자이젠 아파트(1665-9)',
                                       '효자이지움','영진자이온','세왕자이연','사곡영진자이온1단지',
                                       '자이온더퍼스트','사곡영진자이온2단지')
            GROUP BY adp.region
            ORDER BY adp.region ASC
            """

        queD = get_data_from_db(query)
        dataD = dataD.merge(queD, how='left')

    lower_col = dataD.columns.values.tolist()
    dataD.columns = [ l.lower() for l in lower_col ]

    dataD.fillna(0, inplace=True)

    dataD.set_index('region', inplace=True)
    dataD = dataD.astype('int64')
    dataD.reset_index(inplace=True)
   
    dataD.info()
    dataD.to_csv('sub-data-files/xi_region_avg_price_per_pyung.csv', index=False, encoding='utf-8-sig')

In [40]:
# make_xi_region_avg_price_per_pyung()

In [41]:
testD = pd.read_csv('sub-data-files/xi_region_avg_price_per_pyung.csv')
testD

Unnamed: 0,region,y2012,y2013,y2014,y2015,y2016,y2017,y2018,y2019,y2020
0,강원도,0,0,0,0,0,0,0,0,0
1,경기도,12796216,13453613,13578821,13684281,14812568,16424642,16506220,21771261,18716357
2,경상남도,10759983,10775361,11150230,11125783,10878730,10224536,9397641,9844829,10932060
3,경상북도,0,0,0,9717314,10744201,11967012,11288322,12547971,13544201
4,광주광역시,8067631,8399219,9086216,10594040,10974663,11184430,11844527,12935584,13556124
5,대구광역시,9799903,9974551,11524118,13956364,14676303,15826261,18076107,18807187,19946162
6,대전광역시,10217179,9693435,9982234,11117174,11729472,12096717,12531677,13558236,16224588
7,부산광역시,13142033,12402625,13539375,15153935,16489789,18705934,18339586,19796612,21668286
8,서울특별시,32284052,28810419,29358871,30197595,29727933,32862535,37266745,49019007,51200766
9,세종특별자치시,7943517,7709436,7356598,6732384,6739104,7026453,6990573,6838827,7694445


In [42]:
# 힐스테이트 아파트 년도별 지역별 매매건수
def make_healstate_region_deal_count():

    dataD = pd.DataFrame({'REGION': region_table1})

    for year in time_table2:

        query = f"""
            SELECT adp.region, COUNT(*) AS Y{year}
            FROM (SELECT *
                  FROM apt_deal_price A
                  WHERE A.apt_name LIKE '%힐스테이트%') adp
            WHERE to_char(adp.contract_date, 'YYYY') = '{year}'
            GROUP BY adp.region
            ORDER BY adp.region ASC
            """

        queD = get_data_from_db(query)
        dataD = dataD.merge(queD, how='left')

    lower_col = dataD.columns.values.tolist()
    dataD.columns = [ l.lower() for l in lower_col ]

    dataD.fillna(0, inplace=True)

    dataD.set_index('region', inplace=True)
    dataD = dataD.astype('int64')
    dataD.reset_index(inplace=True)
   
    dataD.info()
    dataD.to_csv('sub-data-files/healstate_region_deal_count.csv', index=False, encoding='utf-8-sig')

In [43]:
# make_healstate_region_deal_count()

In [44]:
testD = pd.read_csv('sub-data-files/healstate_region_deal_count.csv')
testD

Unnamed: 0,region,y2012,y2013,y2014,y2015,y2016,y2017,y2018,y2019,y2020
0,강원도,34,44,44,61,51,43,65,70,52
1,경기도,459,1696,2270,2647,2078,2118,2685,2541,2589
2,경상남도,4,23,80,80,93,72,157,335,367
3,경상북도,0,0,0,0,0,0,0,0,0
4,광주광역시,43,26,32,19,222,384,375,285,232
5,대구광역시,88,213,98,95,56,89,102,90,92
6,대전광역시,0,0,0,0,0,0,0,0,0
7,부산광역시,45,182,85,1052,489,215,252,392,287
8,서울특별시,330,1456,1523,2280,2564,2557,2105,1930,1184
9,세종특별자치시,6,7,21,94,126,184,171,392,203


In [45]:
# 힐스테이트 아파트 년도별 지역별 평균 평당 가격
def make_healstate_region_avg_price_per_pyung():

    dataD = pd.DataFrame({'REGION': region_table1})

    for year in time_table2:

        query = f"""
            SELECT adp.region, round(AVG(adp.price_per_pyung)) AS Y{year}
            FROM (SELECT A.region, A.apt_name, A.contract_date,
                         (A.contract_price / (A.apt_size / 3.3)) price_per_pyung
                  FROM apt_deal_price A
                  WHERE A.apt_name LIKE '%힐스테이트%') adp
            WHERE to_char(adp.contract_date, 'YYYY') = '{year}'
            GROUP BY adp.region
            ORDER BY adp.region ASC
            """

        queD = get_data_from_db(query)
        dataD = dataD.merge(queD, how='left')

    lower_col = dataD.columns.values.tolist()
    dataD.columns = [ l.lower() for l in lower_col ]

    dataD.fillna(0, inplace=True)

    dataD.set_index('region', inplace=True)
    dataD = dataD.astype('int64')
    dataD.reset_index(inplace=True)
   
    dataD.info()
    dataD.to_csv('sub-data-files/healstate_region_avg_price_per_pyung.csv', index=False, encoding='utf-8-sig')

In [46]:
# make_healstate_region_avg_price_per_pyung()

In [47]:
testD = pd.read_csv('sub-data-files/healstate_region_avg_price_per_pyung.csv')
testD

Unnamed: 0,region,y2012,y2013,y2014,y2015,y2016,y2017,y2018,y2019,y2020
0,강원도,6089964,6656109,6774502,7306375,7147283,7970091,8252250,8458579,8964353
1,경기도,13342401,13286121,13705601,13970981,14987980,15959704,16582067,19229504,19727782
2,경상남도,11660903,13529165,13890272,13644062,12495329,11569224,10877289,11321630,12639328
3,경상북도,0,0,0,0,0,0,0,0,0
4,광주광역시,10058948,10315549,10695869,12004197,12409762,14184691,16090945,17356589,18929916
5,대구광역시,8301067,8556268,10916830,11993023,12031932,12660126,16867372,15188047,19795528
6,대전광역시,0,0,0,0,0,0,0,0,0
7,부산광역시,9163691,9452429,12600230,16102570,19119596,21581027,21145368,22229441,24535092
8,서울특별시,21969200,21710221,22559040,23211706,24630223,28194157,30760228,39933305,41473816
9,세종특별자치시,9761685,10154115,11430832,11165061,12426776,13579189,16442972,19884256,25881093


In [48]:
# 래미안 아파트 년도별 지역별 매매건수
def make_raemian_region_deal_count():

    dataD = pd.DataFrame({'REGION': region_table1})

    for year in time_table2:

        query = f"""
            SELECT adp.region, COUNT(*) AS Y{year}
            FROM (SELECT *
                  FROM apt_deal_price A
                  WHERE A.apt_name LIKE '%래미안%' or
                        A.apt_name LIKE '%레미안%') adp
            WHERE to_char(adp.contract_date, 'YYYY') = '{year}' and
                  adp.apt_name NOT LIKE '%대동레미안%' and
                  adp.apt_name NOT IN ('레미안블루')
            GROUP BY adp.region
            ORDER BY adp.region ASC
            """

        queD = get_data_from_db(query)
        dataD = dataD.merge(queD, how='left')

    lower_col = dataD.columns.values.tolist()
    dataD.columns = [ l.lower() for l in lower_col ]

    dataD.fillna(0, inplace=True)

    dataD.set_index('region', inplace=True)
    dataD = dataD.astype('int64')
    dataD.reset_index(inplace=True)
   
    dataD.info()
    dataD.to_csv('sub-data-files/raemian_region_deal_count.csv', index=False, encoding='utf-8-sig')

In [52]:
# make_raemian_region_deal_count()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 17 entries, 0 to 16
Data columns (total 10 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   region  17 non-null     object
 1   y2012   17 non-null     int64 
 2   y2013   17 non-null     int64 
 3   y2014   17 non-null     int64 
 4   y2015   17 non-null     int64 
 5   y2016   17 non-null     int64 
 6   y2017   17 non-null     int64 
 7   y2018   17 non-null     int64 
 8   y2019   17 non-null     int64 
 9   y2020   17 non-null     int64 
dtypes: int64(9), object(1)
memory usage: 1.5+ KB


In [53]:
testD = pd.read_csv('sub-data-files/raemian_region_deal_count.csv')
testD

Unnamed: 0,region,y2012,y2013,y2014,y2015,y2016,y2017,y2018,y2019,y2020
0,강원도,0,0,0,0,0,0,0,0,0
1,경기도,550,2003,2127,2614,2645,2742,2893,2574,1961
2,경상남도,0,0,0,0,0,0,0,0,0
3,경상북도,0,0,0,0,0,0,0,0,0
4,광주광역시,0,0,0,0,0,0,0,0,0
5,대구광역시,481,745,592,570,332,526,531,511,300
6,대전광역시,43,159,142,200,203,136,126,225,89
7,부산광역시,0,0,19,40,45,43,45,204,156
8,서울특별시,802,3338,4420,6324,6668,6873,4598,4979,2666
9,세종특별자치시,2,10,47,81,122,121,93,91,58


In [51]:
# 래미안 아파트 년도별 지역별 평균 평당 가격
def make_raemian_region_avg_price_per_pyung():

    dataD = pd.DataFrame({'REGION': region_table1})

    for year in time_table2:

        query = f"""
            SELECT adp.region, round(AVG(adp.price_per_pyung)) AS Y{year}
            FROM (SELECT A.region, A.apt_name, A.contract_date,
                         (A.contract_price / (A.apt_size / 3.3)) price_per_pyung
                  FROM apt_deal_price A
                  WHERE A.apt_name LIKE '%래미안%' or
                        A.apt_name LIKE '%레미안%') adp
            WHERE to_char(adp.contract_date, 'YYYY') = '{year}' and
                  adp.apt_name NOT LIKE '%대동레미안%' and
                  adp.apt_name NOT IN ('레미안블루')
            GROUP BY adp.region
            ORDER BY adp.region ASC
            """

        queD = get_data_from_db(query)
        dataD = dataD.merge(queD, how='left')

    lower_col = dataD.columns.values.tolist()
    dataD.columns = [ l.lower() for l in lower_col ]

    dataD.fillna(0, inplace=True)

    dataD.set_index('region', inplace=True)
    dataD = dataD.astype('int64')
    dataD.reset_index(inplace=True)
   
    dataD.info()
    dataD.to_csv('sub-data-files/raemian_region_avg_price_per_pyung.csv', index=False, encoding='utf-8-sig')

In [54]:
# make_raemian_region_avg_price_per_pyung()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 17 entries, 0 to 16
Data columns (total 10 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   region  17 non-null     object
 1   y2012   17 non-null     int64 
 2   y2013   17 non-null     int64 
 3   y2014   17 non-null     int64 
 4   y2015   17 non-null     int64 
 5   y2016   17 non-null     int64 
 6   y2017   17 non-null     int64 
 7   y2018   17 non-null     int64 
 8   y2019   17 non-null     int64 
 9   y2020   17 non-null     int64 
dtypes: int64(9), object(1)
memory usage: 1.5+ KB


In [55]:
testD = pd.read_csv('sub-data-files/raemian_region_avg_price_per_pyung.csv')
testD

Unnamed: 0,region,y2012,y2013,y2014,y2015,y2016,y2017,y2018,y2019,y2020
0,강원도,0,0,0,0,0,0,0,0,0
1,경기도,16366634,15880318,16322448,17657037,19608343,20458745,22017982,25684198,23678894
2,경상남도,0,0,0,0,0,0,0,0,0
3,경상북도,0,0,0,0,0,0,0,0,0
4,광주광역시,0,0,0,0,0,0,0,0,0
5,대구광역시,8830004,9453744,11001934,12364259,12257492,12826133,13582779,13471369,14286239
6,대전광역시,8408909,8520085,8561376,8506557,8743648,9173932,9089481,9535104,11866046
7,부산광역시,0,0,15179772,17695371,20137807,23075709,22773428,25042625,29677349
8,서울특별시,21564691,21916229,22790607,23997526,26552414,30148579,34501831,45069240,46929123
9,세종특별자치시,10298128,10819159,12001072,12119308,12779788,13517637,14427241,14970206,17341962


In [56]:
# 푸르지오 아파트 년도별 지역별 매매건수
def make_prugio_region_deal_count():

    dataD = pd.DataFrame({'REGION': region_table1})

    for year in time_table2:

        query = f"""
            SELECT adp.region, COUNT(*) AS Y{year}
            FROM (SELECT *
                  FROM apt_deal_price A
                  WHERE A.apt_name LIKE '%푸르지오%') adp
            WHERE to_char(adp.contract_date, 'YYYY') = '{year}'
            GROUP BY adp.region
            ORDER BY adp.region ASC
            """

        queD = get_data_from_db(query)
        dataD = dataD.merge(queD, how='left')

    lower_col = dataD.columns.values.tolist()
    dataD.columns = [ l.lower() for l in lower_col ]

    dataD.fillna(0, inplace=True)

    dataD.set_index('region', inplace=True)
    dataD = dataD.astype('int64')
    dataD.reset_index(inplace=True)
   
    dataD.info()
    dataD.to_csv('sub-data-files/prugio_region_deal_count.csv', index=False, encoding='utf-8-sig')

In [57]:
# make_prugio_region_deal_count()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 17 entries, 0 to 16
Data columns (total 10 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   region  17 non-null     object
 1   y2012   17 non-null     int64 
 2   y2013   17 non-null     int64 
 3   y2014   17 non-null     int64 
 4   y2015   17 non-null     int64 
 5   y2016   17 non-null     int64 
 6   y2017   17 non-null     int64 
 7   y2018   17 non-null     int64 
 8   y2019   17 non-null     int64 
 9   y2020   17 non-null     int64 
dtypes: int64(9), object(1)
memory usage: 1.5+ KB


In [58]:
testD = pd.read_csv('sub-data-files/prugio_region_deal_count.csv')
testD

Unnamed: 0,region,y2012,y2013,y2014,y2015,y2016,y2017,y2018,y2019,y2020
0,강원도,6,22,27,27,30,17,44,71,74
1,경기도,794,2480,3350,3500,2996,3004,3833,4932,5564
2,경상남도,351,1096,1272,1307,945,749,795,1321,953
3,경상북도,187,468,431,353,224,290,274,587,540
4,광주광역시,9,26,46,27,23,28,20,20,8
5,대구광역시,377,1095,1028,671,403,870,800,573,411
6,대전광역시,36,89,95,91,148,122,156,210,111
7,부산광역시,65,314,392,677,586,398,590,600,570
8,서울특별시,433,1315,1945,2583,2700,2777,2196,2060,1181
9,세종특별자치시,45,96,105,192,313,389,232,348,383


In [59]:
# 푸르지오 아파트 년도별 지역별 평균 평당 가격
def make_prugio_region_avg_price_per_pyung():

    dataD = pd.DataFrame({'REGION': region_table1})

    for year in time_table2:

        query = f"""
            SELECT adp.region, round(AVG(adp.price_per_pyung)) AS Y{year}
            FROM (SELECT A.region, A.apt_name, A.contract_date,
                         (A.contract_price / (A.apt_size / 3.3)) price_per_pyung
                  FROM apt_deal_price A
                  WHERE A.apt_name LIKE '%푸르지오%') adp
            WHERE to_char(adp.contract_date, 'YYYY') = '{year}'
            GROUP BY adp.region
            ORDER BY adp.region ASC
            """

        queD = get_data_from_db(query)
        dataD = dataD.merge(queD, how='left')

    lower_col = dataD.columns.values.tolist()
    dataD.columns = [ l.lower() for l in lower_col ]

    dataD.fillna(0, inplace=True)

    dataD.set_index('region', inplace=True)
    dataD = dataD.astype('int64')
    dataD.reset_index(inplace=True)
   
    dataD.info()
    dataD.to_csv('sub-data-files/prugio_region_avg_price_per_pyung.csv', index=False, encoding='utf-8-sig')

In [62]:
# make_prugio_region_avg_price_per_pyung()

In [61]:
testD = pd.read_csv('sub-data-files/prugio_region_avg_price_per_pyung.csv')
testD

Unnamed: 0,region,y2012,y2013,y2014,y2015,y2016,y2017,y2018,y2019,y2020
0,강원도,6953327,6719189,6996597,7301921,7879927,8390000,9960774,10297607,10419113
1,경기도,12317257,12835762,13262258,14146030,15030926,16609604,17187648,17890422,18524093
2,경상남도,8972833,9178008,9510955,9991416,10377447,10574611,11080551,10972761,11625285
3,경상북도,7985657,8699260,9430485,9653580,9350804,9281422,9116952,9495687,9634937
4,광주광역시,9447889,9955016,10612557,11512458,12358420,13292842,15490842,18295093,18878699
5,대구광역시,8763730,9383028,10632620,12610376,12628118,13103938,14151080,14724294,14919303
6,대전광역시,9720102,10025952,10324740,11017950,11514856,12276621,12810128,13371615,15192554
7,부산광역시,11580280,11546335,12026356,14121472,16213124,17397809,14963795,18782982,20697370
8,서울특별시,18770545,18443626,19550493,21068774,23920456,27322004,30128505,39261913,38867329
9,세종특별자치시,7228420,7167473,7936757,8868726,9870014,11085167,11240308,11453555,13874856


In [63]:
# 롯데캐슬 아파트 년도별 지역별 매매건수
def make_lottecastle_region_deal_count():

    dataD = pd.DataFrame({'REGION': region_table1})

    for year in time_table2:

        query = f"""
            SELECT adp.region, COUNT(*) AS Y{year}
            FROM (SELECT *
                  FROM apt_deal_price A
                  WHERE A.apt_name LIKE '%롯데캐슬%') adp
            WHERE to_char(adp.contract_date, 'YYYY') = '{year}'
            GROUP BY adp.region
            ORDER BY adp.region ASC
            """

        queD = get_data_from_db(query)
        dataD = dataD.merge(queD, how='left')

    lower_col = dataD.columns.values.tolist()
    dataD.columns = [ l.lower() for l in lower_col ]

    dataD.fillna(0, inplace=True)

    dataD.set_index('region', inplace=True)
    dataD = dataD.astype('int64')
    dataD.reset_index(inplace=True)
   
    dataD.info()
    dataD.to_csv('sub-data-files/lottecastle_region_deal_count.csv', index=False, encoding='utf-8-sig')

In [66]:
# make_lottecastle_region_deal_count()

In [65]:
testD = pd.read_csv('sub-data-files/lottecastle_region_deal_count.csv')
testD

Unnamed: 0,region,y2012,y2013,y2014,y2015,y2016,y2017,y2018,y2019,y2020
0,강원도,61,89,94,125,158,108,133,167,308
1,경기도,170,409,694,1617,1161,1002,1398,1362,1649
2,경상남도,16,59,94,104,54,59,287,139,140
3,경상북도,5,7,12,21,11,10,12,93,117
4,광주광역시,27,69,112,73,74,81,90,82,59
5,대구광역시,640,1313,797,778,369,761,750,624,414
6,대전광역시,0,0,0,0,0,0,0,0,0
7,부산광역시,456,1500,1905,2319,2147,1158,1003,1723,1416
8,서울특별시,288,929,1049,1639,1669,1637,1347,1279,767
9,세종특별자치시,0,0,0,0,0,0,0,0,0


In [67]:
# 롯데캐슬 아파트 년도별 지역별 평균 평당 가격
def make_lottecastle_region_avg_price_per_pyung():

    dataD = pd.DataFrame({'REGION': region_table1})

    for year in time_table2:

        query = f"""
            SELECT adp.region, round(AVG(adp.price_per_pyung)) AS Y{year}
            FROM (SELECT A.region, A.apt_name, A.contract_date,
                         (A.contract_price / (A.apt_size / 3.3)) price_per_pyung
                  FROM apt_deal_price A
                  WHERE A.apt_name LIKE '%롯데캐슬%') adp
            WHERE to_char(adp.contract_date, 'YYYY') = '{year}'
            GROUP BY adp.region
            ORDER BY adp.region ASC
            """

        queD = get_data_from_db(query)
        dataD = dataD.merge(queD, how='left')

    lower_col = dataD.columns.values.tolist()
    dataD.columns = [ l.lower() for l in lower_col ]

    dataD.fillna(0, inplace=True)

    dataD.set_index('region', inplace=True)
    dataD = dataD.astype('int64')
    dataD.reset_index(inplace=True)
   
    dataD.info()
    dataD.to_csv('sub-data-files/lottecastle_region_avg_price_per_pyung.csv', index=False, encoding='utf-8-sig')

In [68]:
# make_lottecastle_region_avg_price_per_pyung()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 17 entries, 0 to 16
Data columns (total 10 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   region  17 non-null     object
 1   y2012   17 non-null     int64 
 2   y2013   17 non-null     int64 
 3   y2014   17 non-null     int64 
 4   y2015   17 non-null     int64 
 5   y2016   17 non-null     int64 
 6   y2017   17 non-null     int64 
 7   y2018   17 non-null     int64 
 8   y2019   17 non-null     int64 
 9   y2020   17 non-null     int64 
dtypes: int64(9), object(1)
memory usage: 1.5+ KB


In [69]:
testD = pd.read_csv('sub-data-files/lottecastle_region_avg_price_per_pyung.csv')
testD

Unnamed: 0,region,y2012,y2013,y2014,y2015,y2016,y2017,y2018,y2019,y2020
0,강원도,6465095,7074002,7250960,7426996,8619124,10381692,11314162,10238662,11358803
1,경기도,13036535,11417251,13039280,14787668,15698581,15909431,16287429,18280776,18319323
2,경상남도,8122420,8529985,8089422,8415689,8878402,8928563,9304985,10749417,11787613
3,경상북도,8830888,8814264,8525089,8903110,9279923,9315884,9151955,10986920,12544072
4,광주광역시,7345022,7679655,8430480,10014987,9753359,9810347,10525479,11292471,10959039
5,대구광역시,8583699,8863110,10977041,12154845,13224278,14155718,15300286,16648230,16378163
6,대전광역시,0,0,0,0,0,0,0,0,0
7,부산광역시,10930125,10844429,11330428,11872258,13209165,14363002,14286778,15785436,17022813
8,서울특별시,22205397,22417828,23407774,23887212,25220645,28424578,31236775,37478582,40127734
9,세종특별자치시,0,0,0,0,0,0,0,0,0
