In [None]:

from zenml import pipeline, step
from sklearn.datasets import load_digits
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
import pandas as pd
from typing import Any, Tuple

@step
def load_data() -> pd.DataFrame:
    data = load_digits()
    df = pd.DataFrame(data.data)
    df['target'] = data.target
    return df

@step
def clean_data(df: pd.DataFrame) -> pd.DataFrame:
    """
    Cleans the input DataFrame by dropping columns and filling missing values.
    """
    if 'unnecessary_column' in df.columns:
        df = df.drop(columns=['unnecessary_column'])

    df = df.fillna(df.median(numeric_only=True))

    df = df.fillna("no review")
    
    return df

@step
def train_model(df: pd.DataFrame) -> Tuple[Any, pd.DataFrame, pd.DataFrame, pd.Series, pd.Series]:
    """
    Trains a Linear Regression model on the cleaned DataFrame.
    Splits the data into train and test sets.
    """
    X = df.drop(columns=["target"])
    y = df["target"]

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    model = LinearRegression()
    model.fit(X_train, y_train)

    return model, X_train, X_test, y_train, y_test

@step
def evaluate_model(model: Any, X_test: pd.DataFrame, y_test: pd.Series) -> float:
    """
    Evaluates the model using Mean Squared Error (MSE).
    """
    predictions = model.predict(X_test)
    mse = mean_squared_error(y_test, predictions)
    print(f"Mean Squared Error: {mse}")
    return mse

@pipeline
def training_pipeline():
    df = load_data()
    cleaned_df = clean_data(df)
    model, X_train, X_test, y_train, y_test = train_model(cleaned_df)
    evaluate_model(model, X_test, y_test)

if __name__ == "__main__":
    training_pipeline()


[1;35mInitiating a new run for the pipeline: [0m[1;36mtraining_pipeline[1;35m.[0m
[1;35mUsing user: [0m[1;36mdefault[1;35m[0m
[1;35mUsing stack: [0m[1;36mdefault[1;35m[0m
[1;35m  orchestrator: [0m[1;36mdefault[1;35m[0m
[1;35m  artifact_store: [0m[1;36mdefault[1;35m[0m
[1;35mYou can visualize your pipeline runs in the [0m[1;36mZenML Dashboard[1;35m. In order to try it locally, please run [0m[1;36mzenml login --local[1;35m.[0m
[1;35mUsing cached version of step [0m[1;36mload_data[1;35m.[0m
[evaluate_model] [1;35mStep [0m[1;36mclean_data[1;35m has started.[0m
[1;35mStep [0m[1;36mclean_data[1;35m has finished in [0m[1;36m1.537s[1;35m.[0m
[1;35mStep [0m[1;36mtrain_model[1;35m has started.[0m
[1;35mStep [0m[1;36mtrain_model[1;35m has finished in [0m[1;36m4.203s[1;35m.[0m
[1;35mStep [0m[1;36mevaluate_model[1;35m has started.[0m
[evaluate_model] Mean Squared Error: 3.2599689226111743
[1;35mStep [0m[1;36mevaluate_model[1;3