# Clothes Size Prediction

## - Training Model

In [1]:
# Import necessary libraries
import os
import sys
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import logging
logging.basicConfig(level=logging.INFO, format='[%(levelname)s] %(message)s')
logger = logging.getLogger(__name__)

# Get the current working directory
current_dir = os.getcwd()

# Navigate to the project root
project_root = os.path.abspath(os.path.join(current_dir, '..'))

# Import from /src
sys.path.append(os.path.join(project_root))
logger.info(f"✅ Libraries Uploaded")

[INFO] ✅ Libraries Uploaded


In [2]:
# --- Import from /src/pipelines
from src.pipeline.training_pipeline import ModelTrainer
logger.info(f"✅ Libraries Uploaded")

[INFO] ✅ Libraries Uploaded


## ☛ Import Features Dataset

In [3]:
# Load the cleaned dataset
file_path = os.path.abspath(os.path.join(project_root, 'data', 'features', 'clothes_features.csv'))

# Load the CSV into a DataFrame
try:
    clothes_ft = pd.read_csv(file_path)
    logger.info(f"✅ Data successfully loaded: {clothes_ft.shape[0]} rows, {clothes_ft.shape[1]} columns.")
except Exception as e:
    logger.error(f"❌ Error loading data: {e}")

[INFO] ✅ Data successfully loaded: 26351 rows, 7 columns.


## Initialize the Training

In [4]:
# Initialize pipeline
trainer = ModelTrainer(data_path=file_path, target_col="size")

In [5]:
# Run full training pipeline
trainer.full_training_pipeline()

📥 Loading data...
✅ Data loaded: 26351 rows, 7 columns.
🔧 Preparing data for training...
✅ Split complete: 21080 train / 5271 test
🚀 Training models...

Logistic Regression: Accuracy = 0.4252
Random Forest: Accuracy = 0.1935
Gradient Boosting: Accuracy = 0.3980
KNN: Accuracy = 0.2525
SVM: Accuracy = 0.4202

🏆 Best model: LogisticRegression
✅ Accuracy: 0.4252
💾 Best model saved as best_model.pkl

📊 Classification Report:
              precision    recall  f1-score   support

           0       0.33      0.07      0.11       828
           1       0.29      0.42      0.34       972
           2       0.33      0.39      0.36       786
           3       0.32      0.34      0.33       915
           4       0.00      0.00      0.00        10
           5       0.49      0.36      0.41       506
           6       0.67      0.78      0.72      1254

    accuracy                           0.43      5271
   macro avg       0.35      0.34      0.33      5271
weighted avg       0.42      0.43 

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
