<a href="https://colab.research.google.com/github/kentaterasaki/PIE_EXP_1/blob/main/catboost_regression.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

CatBoost is particularly known for handling categorical data well, making it highly efficient for datasets with many categorical features, which is common in business, finance, and other real-world applications.


In [1]:
!pip install catboost
!pip install joblib


Collecting catboost
  Downloading catboost-1.2.7-cp310-cp310-manylinux2014_x86_64.whl.metadata (1.2 kB)
Downloading catboost-1.2.7-cp310-cp310-manylinux2014_x86_64.whl (98.7 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m98.7/98.7 MB[0m [31m5.8 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: catboost
Successfully installed catboost-1.2.7


In [2]:
from catboost import CatBoostRegressor
import joblib
import logging

# Initialize logger
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger()

class CatBoostRegressorModel:
    """
    CatBoost Regressor implementation.
    """

    def __init__(self, **params):
        """
        Initializes the CatBoostRegressorModel with given parameters.
        """
        self.params = params
        self.model = CatBoostRegressor(**self.params)

    def fit(self, X, y):
        """
        Train the CatBoost regressor.
        """
        logger.info("Training CatBoost Regressor.")
        self.model.fit(X, y, verbose=False)
        logger.info("Model training completed.")

    def predict(self, X):
        """
        Make predictions using the trained model.
        """
        logger.info("Making predictions with CatBoost Regressor.")
        return self.model.predict(X)

    def evaluate(self, X, y, metrics):
        """
        Evaluate the model's performance.
        """
        logger.info("Evaluating CatBoost Regressor.")
        predictions = self.predict(X)
        results = {}
        for metric in metrics:
            metric_name = metric.__name__
            result = metric(y, predictions)
            results[metric_name] = result
            logger.info(f"{metric_name}: {result}")
        return results

    def save_model(self, filepath):
        """
        Save the trained model to disk.
        """
        logger.info(f"Saving CatBoost Regressor model to {filepath}.")
        self.model.save_model(filepath)

    def load_model(self, filepath):
        """
        Load a trained model from disk.
        """
        logger.info(f"Loading CatBoost Regressor model from {filepath}.")
        self.model.load_model(filepath)


In [3]:
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score
import pandas as pd

# Load dataset
data = fetch_california_housing()
X = pd.DataFrame(data.data, columns=data.feature_names)
y = pd.Series(data.target)

# Split into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Check dataset
X_train.head()


Unnamed: 0,MedInc,HouseAge,AveRooms,AveBedrms,Population,AveOccup,Latitude,Longitude
14196,3.2596,33.0,5.017657,1.006421,2300.0,3.691814,32.71,-117.03
8267,3.8125,49.0,4.473545,1.041005,1314.0,1.738095,33.77,-118.16
17445,4.1563,4.0,5.645833,0.985119,915.0,2.723214,34.66,-120.48
14265,1.9425,36.0,4.002817,1.033803,1418.0,3.994366,32.69,-117.11
2271,3.5542,43.0,6.268421,1.134211,874.0,2.3,36.78,-119.8


In [5]:
# Initialize the model
params = {
    'iterations': 5000,
    'learning_rate': 0.1,
    'depth': 6,
    'loss_function': 'RMSE',
    'verbose': 100
}
model = CatBoostRegressorModel(**params)

# Train the model
model.fit(X_train, y_train)

# Evaluate the model
metrics = [mean_squared_error, r2_score]
results = model.evaluate(X_test, y_test, metrics)

# Print the evaluation results
print("Evaluation Results:")
for metric_name, result in results.items():
    print(f"{metric_name}: {result}")


Evaluation Results:
mean_squared_error: 0.1834203390450016
r2_score: 0.8600281728142415


In [6]:
# Save the model
model.save_model('catboost_regressor_model.cbm')

# Load the model
model.load_model('catboost_regressor_model.cbm')
