In [1]:
import pandas as pd
import pickle
from tqdm import tqdm
from glob import glob
import re

import cufflinks as cf
cf.go_offline()

## Import

In [2]:
room_info_old = pd.read_csv(f'data_preprocessed/room_info_220925.csv')
room_info_new = pd.read_csv(f'data_preprocessed/room_info_221201.csv')

In [3]:
monthly_old = room_info_old[~room_info_old['월세금액'].isna()]
charter_old = room_info_old[room_info_old['월세금액'].isna()]

monthly_new = room_info_new[~room_info_new['월세금액'].isna()]
charter_new = room_info_new[room_info_new['월세금액'].isna()]

## 전세

In [4]:
def q_trim(df, col, q, all_columns=False):
    if all_columns:
        return df[df[col] <= df[col].quantile(q)]
    else:
        return df[df[col] <= df[col].quantile(q)][col]

In [5]:
def make_histogram(df_old, df_new, col, col_show, bins=None, q=1, kind='histogram'):
    # quantile이 1이면 전체
    old_values = q_trim(df_old, col, q).tolist()
    new_values = q_trim(df_new, col, q).tolist()
    
    df = {f'{col_show}-220925': old_values,
    f'{col_show}-221201': new_values}
    df = pd.DataFrame(dict([ (k,pd.Series(v)) for k,v in df.items() ]))
    if bins:
        df.iplot(kind=kind, bins=bins, opacity=0.6)
    else:
        df.iplot(kind=kind)

In [6]:
print(q_trim(charter_old, '보증금액', 0.99).mean())  # 22820.3434
print(q_trim(charter_new, '보증금액', 0.99).mean())  # 20949.6317

22820.343460626005
20949.63170103093


In [7]:
make_histogram(charter_old, charter_new, '보증금액', '전세')

In [134]:
make_histogram(charter_old, charter_new, '보증금액', '전세', 50, 0.99)

In [165]:
make_histogram(charter_old, charter_new, '보증금액', '전세', 50, 0.99, kind='box')

## 월세

In [8]:
print(q_trim(monthly_new, '월세금액', 0.99).mean())  # 64.89
print(q_trim(monthly_old, '월세금액', 0.99).mean())  # 64.10

64.89144364869959


In [10]:
make_histogram(monthly_old, monthly_new, '월세금액', '월세 보증금', bins=30, q=0.99)

In [11]:
make_histogram(monthly_old, monthly_new, '월세금액', '월세 보증금', bins=30, q=0.99, kind='box')

## 구단위

In [None]:
q_trim(df_old, col, 0.99, True).groupby('local2').mean()[col])

In [16]:
def make_barplot(df_old, df_new, col, col_name):
    df1 = pd.DataFrame(q_trim(df_old, col, 0.99, True).groupby('local2').mean()[col]).reset_index()
    df1.columns = ['지역', f'{col_name}-220925']
    
    df2 = pd.DataFrame(q_trim(df_new, col, 0.99, True).groupby('local2').mean()[col]).reset_index()
    df2.columns = ['지역', f'{col_name}-221201']
    df_merged = pd.merge(df1, df2, on='지역')
    df_merged.iplot(kind = 'bar', x='지역')
    return df_merged

In [17]:
df_merged = make_barplot(charter_old, charter_new, '보증금액', '전세')

In [19]:
df_merged['gap'] = df_merged.iloc[:, 2] - df_merged.iloc[:, 1]
df_merged.sort_values('gap')

In [21]:
df_merged = make_barplot(monthly_old, monthly_new, '월세금액', '월세')

In [22]:
df_merged['gap'] = df_merged.iloc[:, 2] - df_merged.iloc[:, 1]
df_merged.sort_values('gap')

Unnamed: 0,지역,월세-220925,월세-221201,gap
12,마포구,78.201299,66.985586,-11.215713
13,서대문구,72.382184,65.470486,-6.911698
0,강남구,116.10507,110.7414,-5.36367
15,성동구,55.995575,54.17971,-1.815865
20,용산구,89.242291,88.620818,-0.621473
8,노원구,41.979021,42.180064,0.201043
14,서초구,95.399151,96.578584,1.179433
6,구로구,46.207581,48.674699,2.467118
5,광진구,58.32358,61.46134,3.13776
17,송파구,76.576826,79.794023,3.217197
