In [None]:
!pip install numpy
!pip install pandas
!pip install matplotlib
!pip install scikit-learn

!pip install lightgbm
!pip install koreanize-matplotlib
!pip install python-dotenv

!pip install fire
!pip install wandb
!pip install icecream 
!pip install tqdm

!pip install numpy pandas matplotlib koreanize-matplotlib python-dotenv fire wandb icecream tqdm lightgbm scikit-learn

In [None]:
import os
import sys
from dotenv import load_dotenv, dotenv_values

# 환경변수 읽기
if (python_path := dotenv_values().get('PYTHONPATH')) and python_path not in sys.path: sys.path.append(python_path)


#필수 라이브러리 정리
import fire
from icecream import ic
import wandb

import numpy as np
import pandas as pd

from src.utils.constant import Models
from src.dataset.house_pricing import get_datasets
from src.inference.inference import (load_checkpoint, init_model, inference)


def run_train(model_name, batch_size=1, num_epochs=1):
    # 모델명 체크
    Models.validation(model_name)

    # 데이터 불러오기
    train_dataset, val_dataset, test_dataset = get_datasets()

    # 딥러닝용
    model_params = {
        "input_dim": train_dataset.features_dim,
        "num_classes": None,
        "hidden_dim": 64,
    }

    # 모델 생성 HousePricePredictor
    model_class = Models[model_name.upper()].value  # Models -> HOUSE_PRICE_PREDICTOR = HousePricePredictor
    model = model_class(**model_params, train_dataset=train_dataset, val_dataset=val_dataset, test_dataset=test_dataset)

    train_loss = model.train_lgbm()
    val_loss, _ = model.evaluate()
    test_loss, predictions = model.test()

    print("train_loss=", train_loss)
    print("val_loss=", val_loss)
    print("test_loss=", test_loss)

    model.save_model(model_params, num_epochs, train_loss, train_dataset.scaler, train_dataset.label_encoders) 

def run_inference(data=None, batch_size=64):

    checkpoint = load_checkpoint()
    model, scaler, label_encoders = init_model(checkpoint)

    if data is None:
        data = [
            [3.00000000e+01, 1.00000000e+00, 9.38000000e+02, 1.14840000e+02,
            9.00000000e+00, 1.50000000e+01, 2.00000000e+03, 1.80400000e+03,
            1.35000000e+02, 1.00000000e+00, 9.76000000e+02, 3.27000000e+02,
            2.40000000e+02, 0.00000000e+00, 1.14000000e+03, 1.26900834e+02,
            3.75296467e+01, 1.37000000e+05, 1.90000000e+01, 5.30000000e+01,
            2.02300000e+03, 4.00000000e+00, 1.00000000e+00, 1.00000000e+00,
            1.00000000e+00],
            [1.31700000e+03, 0.00000000e+00, 4.28800000e+03, 8.48600000e+01,
            2.70000000e+01, 1.10000000e+01, 2.01100000e+03, 5.11700000e+03,
            1.18400000e+03, 0.00000000e+00, 3.45221705e+02, 7.80000000e+01,
            7.49002584e+01, 0.00000000e+00, 3.37051163e+02, 1.26829869e+02,
            3.75107697e+01, 3.57000000e+04, 1.80000000e+01, 1.85000000e+02,
            2.01200000e+03, 4.00000000e+00, 0.00000000e+00, 1.00000000e+00,
            0.00000000e+00],
            [5.40000000e+01, 7.00000000e+00, 3.47500000e+03, 5.73300000e+01,
            1.30000000e+01, 3.00000000e+00, 1.99700000e+03, 6.71400000e+03,
            1.18400000e+03, 0.00000000e+00, 6.78993095e+02, 7.80000000e+01,
            5.68888889e+01, 2.77557996e+03, 7.74420590e+02, 1.27013688e+02,
            3.75157083e+01, 5.00000000e+04, 1.40000000e+01, 2.56000000e+02,
            2.01100000e+03, 6.00000000e+00, 1.00000000e+00, 1.00000000e+00,
            1.00000000e+00]
        ]

    data = np.array(data)

    price = inference(model, scaler, label_encoders, data, batch_size)
    print(price)

if __name__ == '__main__':  # python main.py

    #fire.Fire(run_train, command=['--model_name', 'house_price_predictor'])
    fire.Fire(run_inference, command=[])



  return pd.read_csv(config.HOUSE_PRICING_DATA)


숫자형 변수: ['본번', '부번', '전용면적(㎡)', '계약일', '층', '건축년도', 'k-전체세대수', 'k-85㎡~135㎡이하', '건축면적', '주차대수', '좌표X', '좌표Y', '계약년', '계약월', '강남여부', '버스정류장유무', '지하철역유무']
문자형 변수: ['아파트명', '도로명', 'k-전화번호', 'k-난방방식', 'k-건설사(시공사)', '구', '동']




[100]	train's l2: 4.79442e+07
[200]	train's l2: 3.2307e+07
[300]	train's l2: 2.47648e+07
[400]	train's l2: 2.05639e+07
[500]	train's l2: 1.76684e+07
[600]	train's l2: 1.5566e+07
[700]	train's l2: 1.40123e+07
[800]	train's l2: 1.2715e+07
[900]	train's l2: 1.16791e+07
[1000]	train's l2: 1.08041e+07
[1100]	train's l2: 1.00229e+07
[1200]	train's l2: 9.34058e+06
[1300]	train's l2: 8.76229e+06
[1400]	train's l2: 8.2529e+06
[1500]	train's l2: 7.77154e+06
[1600]	train's l2: 7.36683e+06
[1700]	train's l2: 6.98019e+06
[1800]	train's l2: 6.62326e+06
[1900]	train's l2: 6.28824e+06
[2000]	train's l2: 5.97958e+06
[2100]	train's l2: 5.69779e+06
[2200]	train's l2: 5.45144e+06
[2300]	train's l2: 5.2297e+06
[2400]	train's l2: 5.011e+06
[2500]	train's l2: 4.80037e+06
[2600]	train's l2: 4.598e+06
[2700]	train's l2: 4.41805e+06
[2800]	train's l2: 4.23011e+06
[2900]	train's l2: 4.06015e+06
[3000]	train's l2: 3.90119e+06
[3100]	train's l2: 3.7621e+06
[3200]	train's l2: 3.63135e+06
[3300]	train's l2: 3.50247e