# Industry-Level Logistic Regression Implementation

Here's a production-ready logistic regression implementation following industry best practices:

## Complete Implementation

```python
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import (accuracy_score, precision_score, recall_score, 
                            f1_score, roc_auc_score, confusion_matrix, 
                            classification_report)
from sklearn.model_selection import GridSearchCV
import joblib
import logging
from typing import Tuple, Union

# Configure logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

class LogisticRegressionModel:
    """
    Industry-grade logistic regression classifier with full ML pipeline
    """
    
    def __init__(self, params: dict = None):
        """Initialize with default or custom parameters"""
        self.default_params = {
            'penalty': 'l2',
            'C': 1.0,
            'solver': 'lbfgs',
            'max_iter': 1000,
            'random_state': 42,
            'class_weight': 'balanced'
        }
        self.params = params or self.default_params
        self.model = None
        self.preprocessor = None
        self.feature_names = None
        
    def build_preprocessor(self, numeric_features: list, categorical_features: list) -> ColumnTransformer:
        """Create preprocessing pipeline for different feature types"""
        numeric_transformer = Pipeline(steps=[
            ('scaler', StandardScaler())
        ])
        
        categorical_transformer = Pipeline(steps=[
            ('onehot', OneHotEncoder(handle_unknown='ignore'))
        ])
        
        preprocessor = ColumnTransformer(
            transformers=[
                ('num', numeric_transformer, numeric_features),
                ('cat', categorical_transformer, categorical_features)
            ])
        
        return preprocessor
    
    def train(self, X: pd.DataFrame, y: Union[pd.Series, np.ndarray], 
              numeric_features: list, categorical_features: list) -> None:
        """
        Train logistic regression model with full pipeline
        """
        try:
            # Validate input
            if not isinstance(X, pd.DataFrame):
                raise ValueError("X must be a pandas DataFrame")
                
            if len(X) != len(y):
                raise ValueError("X and y must have same length")
            
            logger.info("Building preprocessing pipeline...")
            self.preprocessor = self.build_preprocessor(numeric_features, categorical_features)
            
            # Create full pipeline
            pipeline = Pipeline(steps=[
                ('preprocessor', self.preprocessor),
                ('classifier', LogisticRegression(**self.params))
            ])
            
            logger.info("Training model...")
            pipeline.fit(X, y)
            self.model = pipeline
            self.feature_names = numeric_features + categorical_features
            
            logger.info("Model training completed successfully")
            
        except Exception as e:
            logger.error(f"Error during model training: {str(e)}")
            raise
            
    def predict(self, X: pd.DataFrame) -> np.ndarray:
        """Make class predictions"""
        if not self.model:
            raise RuntimeError("Model not trained yet. Call train() first.")
        return self.model.predict(X)
    
    def predict_proba(self, X: pd.DataFrame) -> np.ndarray:
        """Get prediction probabilities"""
        if not self.model:
            raise RuntimeError("Model not trained yet. Call train() first.")
        return self.model.predict_proba(X)
    
    def evaluate(self, X_test: pd.DataFrame, y_test: Union[pd.Series, np.ndarray]) -> dict:
        """Evaluate model performance"""
        metrics = {}
        
        y_pred = self.predict(X_test)
        y_proba = self.predict_proba(X_test)[:, 1] if hasattr(self.model, "predict_proba") else None
        
        # Calculate metrics
        metrics['accuracy'] = accuracy_score(y_test, y_pred)
        metrics['precision'] = precision_score(y_test, y_pred)
        metrics['recall'] = recall_score(y_test, y_pred)
        metrics['f1'] = f1_score(y_test, y_pred)
        
        if y_proba is not None:
            metrics['roc_auc'] = roc_auc_score(y_test, y_proba)
        
        metrics['confusion_matrix'] = confusion_matrix(y_test, y_pred)
        metrics['classification_report'] = classification_report(y_test, y_pred, output_dict=True)
        
        return metrics
    
    def hyperparameter_tuning(self, X: pd.DataFrame, y: Union[pd.Series, np.ndarray], 
                            param_grid: dict, cv: int = 5) -> None:
        """Perform grid search for hyperparameter tuning"""
        grid_search = GridSearchCV(
            estimator=self.model,
            param_grid=param_grid,
            cv=cv,
            scoring='roc_auc',
            n_jobs=-1,
            verbose=1
        )
        
        grid_search.fit(X, y)
        self.model = grid_search.best_estimator_
        logger.info(f"Best parameters found: {grid_search.best_params_}")
        
    def save_model(self, filepath: str) -> None:
        """Save trained model to disk"""
        if not self.model:
            raise RuntimeError("Model not trained yet. Call train() first.")
        joblib.dump(self.model, filepath)
        logger.info(f"Model saved to {filepath}")
        
    @classmethod
    def load_model(cls, filepath: str) -> 'LogisticRegressionModel':
        """Load trained model from disk"""
        model = joblib.load(filepath)
        new_instance = cls()
        new_instance.model = model
        return new_instance

# Example Usage
if __name__ == "__main__":
    # Sample data - replace with your actual data
    data = pd.DataFrame({
        'age': [25, 30, 35, 40, 45],
        'income': [40000, 50000, 60000, 70000, 80000],
        'gender': ['M', 'F', 'M', 'F', 'M'],
        'purchased': [0, 1, 0, 1, 1]
    })
    
    # Prepare data
    X = data.drop('purchased', axis=1)
    y = data['purchased']
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    
    # Define feature types
    numeric_features = ['age', 'income']
    categorical_features = ['gender']
    
    # Initialize and train model
    lr_model = LogisticRegressionModel()
    lr_model.train(X_train, y_train, numeric_features, categorical_features)
    
    # Evaluate model
    metrics = lr_model.evaluate(X_test, y_test)
    print("Model Metrics:", metrics)
    
    # Save model
    lr_model.save_model('logistic_regression_model.joblib')
    
    # Load model
    loaded_model = LogisticRegressionModel.load_model('logistic_regression_model.joblib')
    print("Model loaded successfully")
```

## Key Industry-Standard Features

1. **Modular Design**: 
   - Separate preprocessing, training, and evaluation
   - Clean class-based implementation

2. **Robust Data Handling**:
   - Automatic handling of numeric and categorical features
   - Proper type checking and error handling

3. **Comprehensive Evaluation**:
   - Multiple metrics (accuracy, precision, recall, F1, ROC AUC)
   - Confusion matrix and classification report

4. **Production-Ready Features**:
   - Hyperparameter tuning with GridSearchCV
   - Model serialization with joblib
   - Logging for monitoring

5. **Type Hints**:
   - Clear function signatures with type annotations
   - Better IDE support and code maintainability

6. **Documentation**:
   - Detailed docstrings for all methods
   - Example usage included

## Best Practices Followed

- Proper separation of concerns
- Exception handling and logging
- Configurable through parameters
- Support for both training and inference
- Model persistence capabilities
- Comprehensive evaluation metrics
- Feature preprocessing pipeline
- Hyperparameter optimization

This implementation is ready for deployment in production environments and follows all modern ML engineering best practices.

In [1]:
📁 telco_churn_project/
├── main.py                        # প্রধান রান করার ফাইল
├── config.yaml                   # কনফিগারেশন
├── requirements.txt             # প্রয়োজনীয় লাইব্রেরি
├── 📁 src/
│   ├── __init__.py
│   ├── data_loader.py           # CSV লোড করা
│   ├── preprocessing.py         # প্রিপ্রসেসিং ও ফিচার ইঞ্জিনিয়ারিং
│   ├── model_train.py           # মডেল ট্রেনিং ও ইভালুয়েশন
│   ├── utils.py                 # Utility functions
├── 📁 models/                    # Trained মডেল সেভ হবে এখানে
├── 📁 logs/                      # লগ ফাইল


SyntaxError: invalid character '📁' (U+1F4C1) (672289613.py, line 1)