<a href="https://colab.research.google.com/github/samgitnub/Python-Development/blob/main/5_Machine_Learning_Pipeline.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [5]:
# Import necessary libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report
import joblib

# Step 1: Data Collection
def load_data():
    # Replace with your dataset source
    url = "https://raw.githubusercontent.com/jbrownlee/Datasets/master/iris.csv"
    data = pd.read_csv(url, header=None)
    data.columns = ['sepal_length', 'sepal_width', 'petal_length', 'petal_width', 'species']
    print("Data Loaded Successfully")
    return data

# Step 2: Data Preprocessing
def preprocess_data(data):
    # Convert categorical target to numeric
    data['species'] = data['species'].astype('category').cat.codes
    X = data.drop('species', axis=1)
    y = data['species']

    # Split data into train and test sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    # Scale features
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)

    print("Data Preprocessing Completed")
    return X_train_scaled, X_test_scaled, y_train, y_test, scaler

# Step 3: Model Training
def train_model(X_train, y_train):
    model = RandomForestClassifier(random_state=42)
    model.fit(X_train, y_train)
    print("Model Training Completed")
    return model

# Step 4: Model Evaluation
def evaluate_model(model, X_test, y_test):
    y_pred = model.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    print("Model Evaluation Results:")
    print("Accuracy:", accuracy)
    print("Classification Report:")
    print(classification_report(y_test, y_pred))
    return accuracy

# Step 5: Automate Workflow
def save_artifacts(model, scaler):
    joblib.dump(model, "model.pkl")
    joblib.dump(scaler, "scaler.pkl")
    print("Model and Scaler Saved")

def load_artifacts():
    model = joblib.load("model.pkl")
    scaler = joblib.load("scaler.pkl")
    print("Model and Scaler Loaded")
    return model, scaler

# Main Pipeline
if __name__ == "__main__":
    data = load_data()
    X_train, X_test, y_train, y_test, scaler = preprocess_data(data)
    model = train_model(X_train, y_train)
    evaluate_model(model, X_test, y_test)
    save_artifacts(model, scaler)


Data Loaded Successfully
Data Preprocessing Completed
Model Training Completed
Model Evaluation Results:
Accuracy: 1.0
Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        10
           1       1.00      1.00      1.00         9
           2       1.00      1.00      1.00        11

    accuracy                           1.00        30
   macro avg       1.00      1.00      1.00        30
weighted avg       1.00      1.00      1.00        30

Model and Scaler Saved
