In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import folium 
# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

/kaggle/input/seoul-apt-info/seoul_apt_info.csv


### Intro
* In this note, I create interactive maps using a data set that includes the prices of seoul apartments.
> * These maps show the distribution of APT complexes whose transaction volumes are greater than certain levels and their average prices.
* The package used: `panda` and `folium`

### Import data

* We use a data set that includes transaction prices of seoul apartments from Jan 2021 to May 2022 and their location information (latitude and longitude). 


In [2]:
# import data
dat = pd.read_csv("../input/seoul-apt-info/seoul_apt_info.csv")
dat = dat.drop(["Unnamed: 0"], axis = 1)
dat.head()

Unnamed: 0,address,size,yearmon,price,floor,const_year,lat,lng
0,서울특별시 강남구 개포동 언주로 103 개포2차현대아파트(220),77.75,202104,174000,9,1988,37.48263,127.051104
1,서울특별시 강남구 개포동 언주로 3 개포6차우성아파트1동~8동,79.97,202105,195000,5,1987,37.476658,127.056909
2,서울특별시 강남구 개포동 언주로 3 개포6차우성아파트1동~8동,79.97,202108,200000,3,1987,37.476658,127.056909
3,서울특별시 강남구 개포동 언주로 3 개포6차우성아파트1동~8동,79.97,202109,220000,2,1987,37.476658,127.056909
4,서울특별시 강남구 개포동 언주로 3 개포6차우성아파트1동~8동,79.97,202112,215000,3,1987,37.476658,127.056909


### Process data

1. Calculate average price for each APT complex, year and month.
2. Calculate transaction volume for each APT complex and reference periods (for last 3months/6months/1year).

In [3]:
dat.dtypes

address        object
size          float64
yearmon         int64
price           int64
floor           int64
const_year      int64
lat           float64
lng           float64
dtype: object

In [4]:
def split_yr_mon(row):
    x = str(row["yearmon"])[0:4]
    x = int(x)
    return x
dat['year'] = dat.apply(lambda row: split_yr_mon(row), axis = 1)

In [5]:
dat.head()

Unnamed: 0,address,size,yearmon,price,floor,const_year,lat,lng,year
0,서울특별시 강남구 개포동 언주로 103 개포2차현대아파트(220),77.75,202104,174000,9,1988,37.48263,127.051104,2021
1,서울특별시 강남구 개포동 언주로 3 개포6차우성아파트1동~8동,79.97,202105,195000,5,1987,37.476658,127.056909,2021
2,서울특별시 강남구 개포동 언주로 3 개포6차우성아파트1동~8동,79.97,202108,200000,3,1987,37.476658,127.056909,2021
3,서울특별시 강남구 개포동 언주로 3 개포6차우성아파트1동~8동,79.97,202109,220000,2,1987,37.476658,127.056909,2021
4,서울특별시 강남구 개포동 언주로 3 개포6차우성아파트1동~8동,79.97,202112,215000,3,1987,37.476658,127.056909,2021


In [6]:
def split_yr_mon2(row):
    x = str(row["yearmon"])[4:6]
    x = int(x)
    return x
dat['month'] = dat.apply(lambda row: split_yr_mon2(row), axis = 1)

In [7]:
dat.head()

Unnamed: 0,address,size,yearmon,price,floor,const_year,lat,lng,year,month
0,서울특별시 강남구 개포동 언주로 103 개포2차현대아파트(220),77.75,202104,174000,9,1988,37.48263,127.051104,2021,4
1,서울특별시 강남구 개포동 언주로 3 개포6차우성아파트1동~8동,79.97,202105,195000,5,1987,37.476658,127.056909,2021,5
2,서울특별시 강남구 개포동 언주로 3 개포6차우성아파트1동~8동,79.97,202108,200000,3,1987,37.476658,127.056909,2021,8
3,서울특별시 강남구 개포동 언주로 3 개포6차우성아파트1동~8동,79.97,202109,220000,2,1987,37.476658,127.056909,2021,9
4,서울특별시 강남구 개포동 언주로 3 개포6차우성아파트1동~8동,79.97,202112,215000,3,1987,37.476658,127.056909,2021,12


In [8]:
dat.drop(["yearmon"], axis = 1, inplace = True)

In [9]:
# create yearmon column: year + month
def create_yearmon(row):
    x= str(row["year"])+"-"+ str(row["month"])
    return x
dat['yearmon'] = dat.apply(lambda row: create_yearmon(row), axis = 1)
dat.head()

Unnamed: 0,address,size,price,floor,const_year,lat,lng,year,month,yearmon
0,서울특별시 강남구 개포동 언주로 103 개포2차현대아파트(220),77.75,174000,9,1988,37.48263,127.051104,2021,4,2021-4
1,서울특별시 강남구 개포동 언주로 3 개포6차우성아파트1동~8동,79.97,195000,5,1987,37.476658,127.056909,2021,5,2021-5
2,서울특별시 강남구 개포동 언주로 3 개포6차우성아파트1동~8동,79.97,200000,3,1987,37.476658,127.056909,2021,8,2021-8
3,서울특별시 강남구 개포동 언주로 3 개포6차우성아파트1동~8동,79.97,220000,2,1987,37.476658,127.056909,2021,9,2021-9
4,서울특별시 강남구 개포동 언주로 3 개포6차우성아파트1동~8동,79.97,215000,3,1987,37.476658,127.056909,2021,12,2021-12


In [10]:
# calculate average transaction price for each apt complex
def calcul_avgprice(year0, month0, year1, month1):
    dat1= dat[(dat["year"]>=year0) & (dat["month"]>= month0)] 
    dat1 = dat1[(dat1["year"]<=year1) & (dat1["month"]<= month1)]
    dat1["avgprice"]= dat1.groupby("address")["price"].transform("mean")
    return dat1


## calculate average transaction price for each apt complex and yearmon
# dat["avgprice"] = dat.groupby(["address", "yearmon"])["price"].transform("mean")
## calculate average transaction price for each apt complex and year
# dat["yr_avgprice"] = dat.groupby(["address", "year"])["price"].transform("mean")



In [11]:
# calculate transaction volume for each apt complex and periods (3months/6months/1year)
def count_past_sales(series, intval = "90D"):
    series2 = pd.to_datetime(series)
    sale_time = pd.Series(series2.index, index=series2, name='count_sales_this_month').sort_index() # exchange the positions of index and values
    count_sales = sale_time.rolling(intval, min_periods=1).count()
    count_sales = count_sales.groupby(count_sales.index.month).transform('last')
    return count_sales

# transaction volume for last 3 months/6months/1year
aa=dat.groupby('address')["yearmon"].apply(count_past_sales)
bb=dat.groupby('address')["yearmon"].apply(lambda series: count_past_sales(series, intval = "180D"))
cc=dat.groupby('address')["yearmon"].apply(lambda series: count_past_sales(series, intval = "365D"))

# put index
aa.index=dat.sort_values(by=['address','year', "month"]).index
bb.index=dat.sort_values(by=['address','year', "month"]).index
cc.index=dat.sort_values(by=['address','year', "month"]).index

dat['num_sales_last_3months']=aa
dat['num_sales_last_6months']=bb
dat['num_sales_last_12months']=cc

In [12]:
# drop observations with inappropriate location info
dat.drop(dat[dat["lng"].isnull() == True].index,axis = 0, inplace = True)
dat.drop(dat[dat["lat"].isnull() == True].index,axis = 0, inplace = True)

In [13]:

def assign_values(pr_col="avgprice"):
    if pr_col == "avgprice":
        return  0, 50000,100000,150000,200000
    elif pr_col in ["num_sales_last_3months", "num_sales_last_6months", "num_sales_last_12months"]:
        return 0,10,20,30,40
# create a column: "color" using price information
def colouring(row, pr_col="avgprice"):
    a0, a1, a2, a3, a4 = assign_values(pr_col)
    if (row[pr_col] > a0) and (row[pr_col] <= a1): # less than $500k 
        return "beige"
    elif (row[pr_col] > a1) and (row[pr_col] <= a2): # more than $500k and less than $1m
        return "orange"
    elif (row[pr_col] > a2) and (row[pr_col] <= a3): # more than $1m and less than $1.5m
        return "red"
    elif (row[pr_col] > a3) and (row[pr_col] <= a4): # more than $1.5m and less than $2m
        return "darkred"
    return "darkpurple"


### Default setting for an interactive map

In [14]:
# define a map
def initialize_map():
    my_map = folium.Map(
        location = [37.544294, 127.0016985], 
        width="%40",
        height="%35",
        zoom_start= 12)
    return my_map


### Create interactive maps

In [15]:
# Function for making a subdat set
def mapping(nums = "num_sales_last_3months", min_transvol = 10, color_col= "avgprice", yr0 = 2021, yr1 = 2022, mon0=1, mon1 = 5):
    # calculate avg prices per APT complex 
    dat = calcul_avgprice(yr0, mon0, yr1, mon1)
    # define color   
    dat["color"] = dat.apply(lambda row: colouring(row, pr_col = color_col), axis = 1)
    subdat = dat[(dat[nums]>min_transvol)]
    subdat = subdat.sort_values(by = ["address", "year", "month"])
    subdat["v_last_vol"] = subdat.groupby("address")[nums].transform("last")
    subdat = subdat.drop_duplicates(subset=['address'])
    subdat = subdat[(subdat["v_last_vol"]>min_transvol)]
    for _, apt in subdat.iterrows():
        folium.Marker(
        location = [apt["lat"], apt["lng"]],
        popup = round(apt[color_col],2),
        tooltip = apt["address"],
        icon = folium.Icon(color = apt["color"])).add_to(my_map)
    return(my_map)

* The interactive map below shows all apartment complexes whose transaction voloume were more than 50 for `last 3 months` as of last month. 
> * Only 7 APT complexes (mostly with low prices) were sold more than 50 times for last 3 months as of last month.
* The colour of icons of this map shows the level of average prices of the apartment complexes.
* The colour of icons generally shows (i) the level of  average prices of the apartment complexes or (ii) the level of transaction volume.
 
 - Average price
 > 1. beige: less than $\$500k$
 > 2. orange:  from $\$500k$ to $\$1m$
 > 3. red:  from $\$1m$ to $\$1.5m$
 > 4. darkred: from $\$1.5m$ to $\$2m$
 > 5. darkpurple: more than $\$2m$ 
 
 - Transaction volume
 > 1. beige: less than 10
 > 2. orange:  from 10 to 20
 > 3. red:  from 20 to 30
 > 4. darkred: from 30 to 40
 > 5. darkpurple: more than 40

In [16]:
my_map = initialize_map()
mapping(min_transvol =  50)

* The interactive map below shows all apartment complexes whose transaction voloume were more than 50 for `last 6 months` as of last month.
* The colour of icons of this map shows the level of average prices of the apartment complexes.



In [17]:
my_map = initialize_map()
mapping(nums="num_sales_last_6months", min_transvol =  50)

* The interactive map below shows all apartment complexes whose transaction voloume were more than 50 for `last 12 months` as of last month.
* The colour of icons of this map shows the level of average prices of the apartment complexes.



In [18]:
my_map = initialize_map()
mapping(nums="num_sales_last_12months", min_transvol =  50)