In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import AdaBoostClassifier, RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, accuracy_score
from sklearn.preprocessing import LabelEncoder

class ModelPipeline:
    def __init__(self):
        self.models = {}
        self.selected_model = None
        self.data = None
        self.X_train = None
        self.X_test = None
        self.y_train = None
        self.y_test = None
        self.label_encoders = {}

    def load(self, file_path):
        """Loads data from an Excel file."""
        self.data = pd.read_excel(file_path)
        print("Data loaded successfully.")

    def preprocess(self):
        """Preprocesses the data for training."""
        if self.data is None:
            raise ValueError("Data not loaded. Please load data first.")

        # Handle datetime columns by converting them to numerical format
        for col in self.data.select_dtypes(include=['datetime64', 'datetime']):
            self.data[col] = self.data[col].apply(lambda x: x.timestamp() if pd.notnull(x) else 0)

        # Handle categorical columns by encoding them
        for col in self.data.select_dtypes(include=['object', 'category']):
            le = LabelEncoder()
            self.data[col] = le.fit_transform(self.data[col].astype(str))
            self.label_encoders[col] = le

        # Split features and target
        X = self.data.iloc[:, :-1]
        y = self.data.iloc[:, -1]

        # Train-test split
        self.X_train, self.X_test, self.y_train, self.y_test = train_test_split(X, y, test_size=0.2, random_state=42)
        print("Data preprocessing complete.")

    def train(self):
        """Trains AdaBoost, RandomForest, and LogisticRegression models."""
        if self.X_train is None or self.y_train is None:
            raise ValueError("Data not preprocessed. Please preprocess data first.")

        # Train AdaBoostClassifier
        ab_model = AdaBoostClassifier(random_state=42)
        ab_model.fit(self.X_train, self.y_train)
        self.models['AdaBoost'] = ab_model
        print("AdaBoost training complete.")

        # Train RandomForestClassifier
        rf_model = RandomForestClassifier(random_state=42, n_estimators=100)
        rf_model.fit(self.X_train, self.y_train)
        self.models['RandomForest'] = rf_model
        print("Random Forest training complete.")

        # Train LogisticRegression
        lr_model = LogisticRegression(random_state=42, max_iter=1000)
        lr_model.fit(self.X_train, self.y_train)
        self.models['LogisticRegression'] = lr_model
        print("Logistic Regression training complete.")

    def test(self):
        """Tests all models and selects the one with the highest accuracy."""
        if not self.models:
            raise ValueError("Models not trained. Please train the models first.")

        model_accuracies = {}
        for model_name, model in self.models.items():
            predictions = model.predict(self.X_test)
            accuracy = accuracy_score(self.y_test, predictions)
            model_accuracies[model_name] = accuracy
            print(f"{model_name} Testing Results:")
            print(f"Accuracy: {accuracy:.2f}")

        # Select the model with the highest accuracy
        best_model_name = max(model_accuracies, key=model_accuracies.get)
        self.selected_model = self.models[best_model_name]
        print(f"{best_model_name} selected as the final model.")

    def predict(self, input_data):
        """Generates predictions for the provided input data using the selected model."""
        if self.selected_model is None:
            raise ValueError("No model selected. Please run the test method first.")

        predictions = self.selected_model.predict(input_data)
        return predictions


if __name__ == "__main__":
    pipeline = ModelPipeline()
    train_file_path = "/content/train_data.xlsx"
    pipeline.load(train_file_path)
    pipeline.preprocess()
    pipeline.train()
    pipeline.test()

    # Example predictions
    example_data = pipeline.X_test.iloc[:5]  # Example input data
    predictions = pipeline.predict(example_data)
    print("Predictions:", predictions)


Data loaded successfully.
Data preprocessing complete.




AdaBoost training complete.
Random Forest training complete.
Logistic Regression training complete.
AdaBoost Testing Results:
Accuracy: 0.77
RandomForest Testing Results:
Accuracy: 0.76
LogisticRegression Testing Results:
Accuracy: 0.74
AdaBoost selected as the final model.
Predictions: [1 0 1 1 1]
