<a href="https://colab.research.google.com/github/kashishbudhwani/ml-restapi-housing-predictor/blob/main/Predictive_ML_Model_as_a_REST_API.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# Install libraries for model (scikit-learn, pandas, joblib are usually pre-installed, but keep them here for safety)
# Install FastAPI and the necessary tools for running it in a notebook and exposing it (pyngrok, nest-asyncio)
!pip install scikit-learn pandas joblib fastapi uvicorn pyngrok nest-asyncio -q

In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.datasets import fetch_california_housing
import joblib

print("Starting the training script...")

# 1. Load data
housing = fetch_california_housing()
X = pd.DataFrame(housing.data, columns=housing.feature_names)
y = pd.Series(housing.target, name='MedHouseVal')
features_to_use = ['MedInc', 'HouseAge', 'AveRooms', 'AveBedrms', 'Population']
X = X[features_to_use]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
print("Data prepared and split.")

# 2. Train model
model = RandomForestRegressor(n_estimators=100, random_state=42)
print("Training the RandomForestRegressor model...")
model.fit(X_train, y_train)
print("Model training complete.")

# 3. Save model
model_filename = 'california_housing_model.joblib'
joblib.dump(model, model_filename)
print(f"Model saved as {model_filename}.")

Starting the training script...
Data prepared and split.
Training the RandomForestRegressor model...
Model training complete.
Model saved as california_housing_model.joblib.


In [4]:
# Install the Cloudflare Tunnel client
!wget -q -nc https://github.com/cloudflare/cloudflared/releases/latest/download/cloudflared-linux-amd64
!mv cloudflared-linux-amd64 cloudflared
!chmod +x cloudflared

# Install FastAPI and supporting libraries (if you haven't already)
!pip install fastapi uvicorn nest-asyncio -q


In [6]:
from fastapi import FastAPI
from pydantic import BaseModel
import joblib
import pandas as pd
import nest_asyncio
import uvicorn
import subprocess
import threading # Import threading for running Uvicorn in a separate thread
import time
import re # For regex to parse the URL from cloudflared output

# Your existing FastAPI app definition goes here
app = FastAPI(title="California Housing Price Prediction API")

# Load the trained model from the Colab file system
model = joblib.load('california_housing_model.joblib')

class HouseFeatures(BaseModel):
    MedInc: float
    HouseAge: float
    AveRooms: float
    AveBedrms: float
    Population: float

    class Config:
        schema_extra = {
            "example": {
                "MedInc": 8.3252, "HouseAge": 41.0, "AveRooms": 6.9841,
                "AveBedrms": 1.0238, "Population": 322.0
            }
        }

@app.post("/predict")
def predict_price(features: HouseFeatures):
    """Predicts the median house value based on input features."""
    input_data = pd.DataFrame([features.dict()])
    prediction = model.predict(input_data)
    predicted_value = prediction[0]
    return {"predicted_median_house_value": predicted_value}

@app.get("/")
def read_root():
    return {"message": "Welcome to the Housing Price Prediction API!"}

# Apply nest_asyncio to allow Uvicorn to run within the Jupyter/Colab event loop
nest_asyncio.apply()

# Function to run Uvicorn in a separate thread
def run_uvicorn():
    uvicorn.run(app, host="0.0.0.0", port=8000, log_level="warning")

# Start Uvicorn in a separate thread
print("Starting FastAPI server in a background thread...")
uvicorn_thread = threading.Thread(target=run_uvicorn)
uvicorn_thread.start()

# 1. Start the cloudflared tunnel in the background and capture output
print("Starting Cloudflare tunnel...")
# Using `subprocess.Popen` to run in background and capture output
# cloudflared prints the public URL to stderr
cloudflared_process = subprocess.Popen(
    ["./cloudflared", "tunnel", "--url", "http://127.0.0.1:8000"],
    stdout=subprocess.PIPE,
    stderr=subprocess.PIPE,
    text=True # Decode stdout/stderr as text
)

public_url = ""
# Wait for the tunnel to start and print the URL
start_time = time.time()
while time.time() - start_time < 20: # Wait up to 20 seconds for the URL
    # Read from stderr, where cloudflared usually prints the Public URL
    line = cloudflared_process.stderr.readline()
    if "https://" in line and ".trycloudflare.com" in line:
        match = re.search(r"(https://[a-zA-Z0-9-]+\.trycloudflare\.com)", line)
        if match:
            public_url = match.group(1)
            break
    elif cloudflared_process.poll() is not None: # If process exited prematurely
        print("Cloudflare tunnel process exited prematurely.")
        break
    time.sleep(0.5)

if public_url:
    print(f"✅ Cloudflare Public API URL: {public_url}/docs")
else:
    print("⚠️ Could not find Public URL from cloudflared output. Check cloudflared status manually.")
    # If the URL is not found, try to read any remaining output or error for debugging
    stdout_output, stderr_output = cloudflared_process.communicate(timeout=1)
    print(f"Cloudflared stdout: {stdout_output}")
    print(f"Cloudflared stderr: {stderr_output}")

print("FastAPI server is running in a background thread.")
print("The cell execution is complete, but the API server is still active.")
print("To stop the server, interrupt the kernel or stop the Colab runtime.")


* 'schema_extra' has been renamed to 'json_schema_extra'


Starting FastAPI server in a background thread...
Starting Cloudflare tunnel...
✅ Cloudflare Public API URL: https://twisted-grateful-cameron-peripheral.trycloudflare.com/docs
FastAPI server is running in a background thread.
The cell execution is complete, but the API server is still active.
To stop the server, interrupt the kernel or stop the Colab runtime.
