In [1]:
import pandas as pd
import requests
import os
from datetime import date
from dotenv import load_dotenv

# Ministry of Land, Infrastructure, Transport and Tourism (MLIT)
### Real estate price (transaction price/contract price) information acquisition

https://www.reinfolib.mlit.go.jp/help/apiManual/#titleApi4

In [2]:
# load environment variables and access MLIT api key
load_dotenv()
api_key = os.getenv("MLIT_API_KEY")

if not api_key:
    raise ValueError("API Key not found. Please check your .env file.")

print("API Key loaded successfully.")

base_url = "https://www.reinfolib.mlit.go.jp/ex-api/external/XIT001"
headers = {"Ocp-Apim-Subscription-Key": api_key}

pref_code = "13"  # Tokyo Prefecture
from_date = 2010
to_date = date.today().year + 1

API Key loaded successfully.


In [3]:
# data fetch and aggregation

all_transactions = []

for year in range(from_date, to_date):

    params = {
        "area": pref_code,
        "year": year,
        "language":"en"
    }

    response = requests.get(base_url, headers=headers, params=params)
    all_transactions.extend(response.json()["data"])

df = pd.DataFrame(all_transactions)
df.head()

Unnamed: 0,PriceCategory,Type,Region,MunicipalityCode,Prefecture,Municipality,DistrictName,TradePrice,PricePerUnit,FloorPlan,...,Direction,Classification,Breadth,CityPlanning,CoverageRatio,FloorAreaRatio,Period,Renovation,Remarks,DistrictCode
0,Real Estate Transaction Price Information,"Pre-owned Condominiums, etc.",,13101,Tokyo,Chiyoda Ward,Iidabashi,47000000,,3DK,...,,,,Commercial Zone,80,600,2nd quarter 2010,Done,,131010010
1,Real Estate Transaction Price Information,"Pre-owned Condominiums, etc.",,13101,Tokyo,Chiyoda Ward,Iidabashi,21000000,,1DK,...,,,,Commercial Zone,80,700,2nd quarter 2010,Done,,131010010
2,Real Estate Transaction Price Information,"Pre-owned Condominiums, etc.",,13101,Tokyo,Chiyoda Ward,Ichibancho,63000000,,1LDK,...,,,,Commercial Zone,80,500,2nd quarter 2010,Not yet,,131010020
3,Real Estate Transaction Price Information,"Pre-owned Condominiums, etc.",,13101,Tokyo,Chiyoda Ward,Ichibancho,35000000,,2LDK,...,,,,Category II Residential Zone,60,400,2nd quarter 2010,Not yet,,131010020
4,Real Estate Transaction Price Information,"Pre-owned Condominiums, etc.",,13101,Tokyo,Chiyoda Ward,Ichibancho,170000000,,3LDK,...,,,,Category II Residential Zone,80,500,2nd quarter 2010,Not yet,,131010020


In [4]:
# data write to parquet

try:
    df.to_parquet('../data/tokyo.parquet', index=False)
    print(f"successfuly wrote {df.shape[0]} rows and {df.shape[1]} columns to parquet file: ../data/tokyo.parquet")
except Exception as e:
    print(f"error writing to parquet: {e}")


successfuly wrote 607756 rows and 29 columns to parquet file: ../data/tokyo.parquet
