In [11]:
from fastapi import FastAPI
import pandas as pd
import pickle
import re
import requests
import sklearn
import uvicorn

def bold(text):
    return f"\033[1m{text}\033[0m"

### Q1: UV version

In [2]:
!uv --version

uv 0.9.5


### Q2: Scikit-learn version from uv installation

In [12]:
lock_file = "uv.lock"


with open(lock_file, "r") as f:
    content = f.read()

# Step 1: Find the scikit-learn package block
package_match = re.search(
    r'(\[\[package\]\]\s*name\s*=\s*"scikit-learn".*?)(?=\n\[\[package\]\]|\Z)',
    content,
    re.DOTALL
)

if package_match:
    package_block = package_match.group(0)
    
    # Print the cropped block for testing
    print("=== Cropped package block ===")
    print(package_block)
    print("=============================")

    # Step 2: Extract version
    version_match = re.search(r'version\s*=\s*"([^"]+)"', package_block)
    version = version_match.group(1) if version_match else None

    # Step 3: Extract SHA from sdist
    sha_match = re.search(r'sdist\s*=\s*\{[^}]*hash\s*=\s*"([^"]+)"', package_block)
    sha = sha_match.group(1) if sha_match else None

    print("scikit-learn version:", bold(version))
    print("SHA:", bold(sha))
else:
    print("scikit-learn package not found in uv.lock")


=== Cropped package block ===
[[package]]
name = "scikit-learn"
version = "1.6.1"
source = { registry = "https://pypi.org/simple" }
dependencies = [
    { name = "joblib" },
    { name = "numpy" },
    { name = "scipy" },
    { name = "threadpoolctl" },
]
sdist = { url = "https://files.pythonhosted.org/packages/9e/a5/4ae3b3a0755f7b35a280ac90b28817d1f380318973cff14075ab41ef50d9/scikit_learn-1.6.1.tar.gz", hash = "sha256:b4fc2525eca2c69a59260f583c56a7557c6ccdf8deafdba6e060f94c1c59738e", size = 7068312, upload-time = "2025-01-10T08:07:55.348Z" }

scikit-learn version: [1m1.6.1[0m
SHA: [1msha256:b4fc2525eca2c69a59260f583c56a7557c6ccdf8deafdba6e060f94c1c59738e[0m


### Q3: Run the model

In [13]:
client = {
    "lead_source": "paid_ads",
    "number_of_courses_viewed": 2,
    "annual_income": 79276.0
}

In [15]:
model_file = "pipeline_v1.bin"
with open(model_file, 'rb') as f_in:
    pipeline = pickle.load(f_in)

converted_prob = pipeline.predict_proba(client)[0, 1]

print("Converted probability:", bold(f"{converted_prob:.3f}"))

Converted probability: [1m0.534[0m


### Q4: Load model and score record

In [26]:
client_2 = {
    "lead_source": "organic_search",
    "number_of_courses_viewed": 4,
    "annual_income": 80304.0
}


In [22]:
response = !python3 client.py
print("Converted probability:", bold(response[0].split(": ")[1]))

Converted probability: [1m0.534[0m


### Q5: Docker usage

The docker image from the course developer has being installed by using the following command:

 `docker pull agrigorev/zoomcamp-model:2025` 

And from `docker images` it was found out that the image size is: `121 MB`

### Q6: Use developed docker image for prediction

In [28]:
response_docker = !python3 client_docker.py
print("Converted probability (Docker):", bold(response_docker[0].split(": ")[1]))

Converted probability (Docker): [1m0.993[0m
