In [1]:
from kfp import dsl

In [2]:
@dsl.component(
    base_image='python:3.9',
    packages_to_install=['pandas == 1.2.4', 'numpy == 1.21.0', 'scikit-learn == 0.24.2']
)
def prepare_data() -> float:
    import numpy as np
    import pandas as pd
    import pickle
    from sklearn.linear_model import LinearRegression
    from sklearn.model_selection import train_test_split
    from sklearn.metrics import mean_squared_error
    from sklearn.metrics import r2_score

    # Load dataset
    df = pd.read_csv("https://raw.githubusercontent.com/mrsddq/MLOps-Project/refs/heads/master/src/data/housing_1000.csv")
    
    # Prepare Data
    X = df[['rooms', 'sqft']].values # Features - rooms and sqft
    y = df['price' ].values
    
    # Split Data for Training and Testing
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

    # Model Training
    model = LinearRegression().fit(X_train, y_train) # Train the model
    
    # Actuals and Predictions from the dataset
    #predicted_rental = model.predict(np.array([[X_test[0][0], X_test[0][1]]]))
    predicted_rental = model.predict(X_test[0].reshape(1,-1))[0]
    
    print("Actual Rental Price for Property with rooms=",X_test[0][0],"and Area Sqft=",X_test[0][1],"is=",y_test[0])
    print("Predicted Rental Price for Property with rooms=",X_test[0][0],"and Area Sqft=",X_test[0][1],"is=",predicted_rental)
    
    return float(predicted_rental) # Return a single float value

In [3]:
@dsl.pipeline(name='rental-price-prediction-pipeline')

def rental_price_prediction_pipeline() -> float:
    prepare_data_task = prepare_data()
    
    return prepare_data_task.output

In [4]:
from kfp import compiler

# compiler.Compiler().compile(rental_price_prediction_pipeline, package_path='rental_price_prediction_pipeline.yaml')

In [5]:
from kfp import compiler
from kfp import client as kfp

# compile the pipeline
compiler.Compiler().compile(rental_price_prediction_pipeline, 'rental_price_prediction_pipeline.yaml')

#connect to kfp server
host = 'http://localhost:8080'
client = kfp.Client(host=host)

#create an experiment
experiment_name = 'Predict Rental Price Experiment'
experiment = client.create_experiment(name=experiment_name)
print(f'Experiment created: {experiment}')



Experiment created: {'created_at': datetime.datetime(2025, 9, 4, 20, 35, 3, tzinfo=tzutc()),
 'description': None,
 'display_name': 'Predict Rental Price Experiment',
 'experiment_id': 'e44d363e-9dd3-4c10-a513-fbe682d71f4b',
 'last_run_created_at': datetime.datetime(1970, 1, 1, 0, 0, tzinfo=tzutc()),
 'namespace': None,
 'storage_state': 'AVAILABLE'}


In [6]:
client.create_run_from_pipeline_package('rental_price_prediction_pipeline.yaml')

RunPipelineResult(run_id=fa13bd66-f9e2-4558-8d3e-4e15a0e10961)

In [7]:
# Run the pipeline using create_run from pipeline func
run_name = f'Run of Rental Price Prediction Pipeline'
run_result = client.create_run_from_pipeline_func(
    pipeline_func=rental_price_prediction_pipeline,
    run_name=run_name,
    experiment_name=experiment_name
)
print(f'Pipeline run submitted: {run_result}')

Pipeline run submitted: RunPipelineResult(run_id=bce71f6d-cea4-44af-ba2f-0d2a23107b3f)
