# Classification Example

In [1]:
from app import MLApplication

# Instantiate the MLApplication class with the desired parameters.
# - csv_path: Path to the CSV data file.
# - target_column: The name of the target column in the dataset.
# - algorithm: The machine learning algorithm to use ('decision_tree' for classification).
# - task_type: The type of ML task ('classification' or 'regression').
app = MLApplication(
    csv_path='example.csv', 
    target_column='Transported', 
    algorithm='decision_tree'
)

# Load the data from the provided CSV file.
# This step separates the features (X) and the target (y).
app.load_data()

# Preprocess the data. This step typically includes:
# - Imputing missing values
# - Scaling numeric features
# - Encoding categorical variables
app.preprocess_data()

# Build the pipeline.
# This constructs a scikit-learn Pipeline that includes the preprocessing steps
# followed by the selected machine learning model.
app.build_pipeline()

# Train and evaluate the model using cross-validation.
# This will print out performance metrics (e.g., accuracy for classification).
app.train_and_evaluate()

# Save the trained pipeline to a .pkl file for later reuse or deployment.
app.save_pipeline('sample_pipeline.pkl')


Starting data loading...
Task type determined: classification
Starting preprocessing...
Preprocessing setup complete.
Building the ML pipeline...
Pipeline built successfully.
Starting model training and evaluation...
Cross-validated accuracy: 0.7695
Model training and evaluation complete.
Saving the pipeline to sample_pipeline.pkl...
Pipeline saved to sample_pipeline.pkl


# Regression Example

In [3]:
from app import MLApplication

# Instantiate the MLApplication class with the desired parameters.
# - csv_path: Path to the CSV data file.
# - target_column: The name of the target column in the dataset.
# - algorithm: The machine learning algorithm to use ('decision_tree' for classification).
# - task_type: The type of ML task ('classification' or 'regression').
app = MLApplication(
    csv_path='housing.csv', 
    target_column='price', 
    algorithm='random_forest'
)

# Load the data from the provided CSV file.
# This step separates the features (X) and the target (y).
app.load_data()

# Preprocess the data. This step typically includes:
# - Imputing missing values
# - Scaling numeric features
# - Encoding categorical variables
app.preprocess_data()

# Build the pipeline.
# This constructs a scikit-learn Pipeline that includes the preprocessing steps
# followed by the selected machine learning model.
app.build_pipeline()

# Train and evaluate the model using cross-validation.
# This will print out performance metrics (e.g., accuracy for classification).
app.train_and_evaluate()

# Save the trained pipeline to a .pkl file for later reuse or deployment.
app.save_pipeline('sample_pipeline.pkl')

Starting data loading...
Task type determined: regression
Starting preprocessing...
Preprocessing setup complete.
Building the ML pipeline...
Pipeline built successfully.
Starting model training and evaluation...
Cross-validated MSE: 2779970364407.9893
Model training and evaluation complete.
Saving the pipeline to sample_pipeline.pkl...
Pipeline saved to sample_pipeline.pkl
