# Pipeline example

In [3]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
import pandas as pd
import numpy as np


# Simplified classes for the pipeline
class DataLoader:
    def __init__(self, filepath):
        self.filepath = filepath

    def load_data(self):
        return pd.read_csv(self.filepath)

class DataPreprocessor:
    def __init__(self, data):
        self.data = data

    def preprocess(self):
        # Basic preprocessing steps
        self.data = self.data.drop(['Name', 'Ticket', 'Cabin'], axis=1)
        self.data['Age'].fillna(self.data['Age'].median(), inplace=True)
        self.data['Embarked'].fillna(self.data['Embarked'].mode()[0], inplace=True)
        return self.data

class FeatureEngineer:
    def __init__(self, data):
        self.data = data

    def engineer_features(self):
        # Convert categorical variables to numerical
        label_encoder = LabelEncoder()
        self.data['Sex'] = label_encoder.fit_transform(self.data['Sex'])
        self.data['Embarked'] = label_encoder.fit_transform(self.data['Embarked'])
        return self.data

class ModelTrainer:
    def __init__(self, features, target):
        self.X_train, self.X_test, self.y_train, self.y_test = train_test_split(features, target, test_size=0.2, random_state=42)
        self.model = LogisticRegression()

    def train(self):
        self.model.fit(self.X_train, self.y_train)
        return self.model

class ModelEvaluator:
    def __init__(self, model, X_test, y_test):
        self.model = model
        self.X_test = X_test
        self.y_test = y_test

    def evaluate(self):
        predictions = self.model.predict(self.X_test)
        return accuracy_score(self.y_test, predictions)

# Running the pipeline
def main():
    # Load data
    loader = DataLoader('../titanic.csv')
    data = loader.load_data()

    # Preprocess data
    preprocessor = DataPreprocessor(data)
    preprocessed_data = preprocessor.preprocess()

    # Feature engineering
    engineer = FeatureEngineer(preprocessed_data)
    engineered_data = engineer.engineer_features()

    # Splitting features and target
    features = engineered_data.drop('Survived', axis=1)
    target = engineered_data['Survived']

    # Model training
    trainer = ModelTrainer(features, target)
    trained_model = trainer.train()

    # Model evaluation
    evaluator = ModelEvaluator(trained_model, trainer.X_test, trainer.y_test)
    evaluation_result = evaluator.evaluate()
    return evaluation_result

# Execute the main function
accuracy = main()
accuracy


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


0.7821229050279329