In [1]:
from sklearn.datasets import load_diabetes
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.pipeline import Pipeline
from sklearn.metrics import mean_squared_error
import mlflow
import mlflow.sklearn
from mlflow.models.signature import infer_signature
import pandas as pd

# Load the dataset
diabetes = load_diabetes()
X = diabetes.data
y = diabetes.target

# Split into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Initialize the KNN regressor
knn = KNeighborsRegressor(n_neighbors=5)

# Fit the KNN model
knn.fit(X_train_scaled, y_train)

# Predict on the test set
y_pred_knn = knn.predict(X_test_scaled)

# Evaluate the KNN model
mse_knn = mean_squared_error(y_test, y_pred_knn)
print(f'Mean Squared Error (KNN): {mse_knn}')

# Create a Pipeline with RandomForestRegressor
pipeline = Pipeline([
    ('scaler', StandardScaler()),
    ('model', RandomForestRegressor(random_state=42))
])

# Start MLflow Tracking
with mlflow.start_run() as run:
    # Train the RandomForestRegressor model
    pipeline.fit(X_train, y_train)
    
    # Make predictions
    y_pred_rf = pipeline.predict(X_test)
    
    # Evaluate the model
    mse_rf = mean_squared_error(y_test, y_pred_rf)
    print(f'Mean Squared Error (Random Forest): {mse_rf}')
    
    # Log metrics
    mlflow.log_metric('mse', mse_rf)
    
    # Log input example
    input_example = pd.DataFrame(X_test[:5], columns=diabetes.feature_names)
    
    # Infer model signature
    signature = infer_signature(X_train, pipeline.predict(X_train))
    
    # Log the model with MLflow
    mlflow.sklearn.log_model(
        sk_model=pipeline,
        artifact_path="random_forest_model",
        input_example=input_example,
        signature=signature
    )

# End of MLflow tracking (automatically handled by `with` block)



ModuleNotFoundError: No module named 'mlflow'

In [2]:
from joblib import dump

# Save the final Random Forest model
dump(pipeline, 'final_model.joblib')
print("Model saved as final_model.joblib")


NameError: name 'pipeline' is not defined

In [7]:
from flask import Flask, request, jsonify
from joblib import load

# Load the saved model
model = load('final_model.joblib')

app = Flask(__name__)

@app.route('/predict', methods=['POST'])
def predict():
    data = request.json  # JSON input
    predictions = model.predict(data['input'])  # Use the model for predictions
    return jsonify({'predictions': predictions.tolist()})

if __name__ == '__main__':
    app.run(debug=True)


 * Serving Flask app '__main__'
 * Debug mode: on


 * Running on http://127.0.0.1:5000
Press CTRL+C to quit
 * Restarting with stat


SystemExit: 1

  warn("To exit: use 'exit', 'quit', or Ctrl-D.", stacklevel=1)


In [8]:
FROM python:3.9-slim

WORKDIR /app

# Install dependencies
COPY requirements.txt requirements.txt
RUN pip install -r requirements.txt

# Copy app files
COPY . .

# Expose port and run the app
EXPOSE 5000
CMD ["python", "app.py"]



SyntaxError: invalid syntax (2121543546.py, line 1)