In [4]:
import pandas as pd
import numpy as np

from sklearn.feature_extraction import DictVectorizer
from sklearn.linear_model import LogisticRegression

from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score

import warnings
warnings.filterwarnings('ignore')

# All plots can be displayed on notebooks:
%matplotlib inline

# Make better use of Jupyter Notebook cell width

from IPython.core.display import display, HTML
display(HTML("<style>.container { width:99% !important; }</style>"))
pd.options.display.float_format = '{:.2f}'.format

# Question 1

- Install Pipenv
- What's the version of pipenv you installed?
- Use --version to find out

In [5]:
!pip install pipenv

Collecting pipenv
  Downloading pipenv-2022.11.5-py2.py3-none-any.whl (2.9 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.9/2.9 MB[0m [31m4.8 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
Collecting virtualenv-clone>=0.2.5
  Using cached virtualenv_clone-0.5.7-py3-none-any.whl (6.6 kB)
Collecting virtualenv
  Downloading virtualenv-20.16.6-py3-none-any.whl (8.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m8.8/8.8 MB[0m [31m6.1 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0mm
[?25hCollecting distlib<1,>=0.3.6
  Using cached distlib-0.3.6-py2.py3-none-any.whl (468 kB)
Collecting platformdirs<3,>=2.4
  Downloading platformdirs-2.5.3-py3-none-any.whl (14 kB)
Collecting filelock<4,>=3.4.1
  Using cached filelock-3.8.0-py3-none-any.whl (10 kB)
Installing collected packages: distlib, virtualenv-clone, platformdirs, filelock, virtualenv, pipenv
Successfully installed distlib-0.3.6 filelock-3.8.0 pipenv-2022.11.5 platformdirs-2.5.3 virtualenv

In [6]:
!pipenv --version

[1mpipenv[0m, version 2022.11.5
[0m

# Question 2
- Use Pipenv to install Scikit-Learn version 1.0.2
- What's the first hash for scikit-learn you get in Pipfile.lock?
- Note: you should create an empty folder for homework and do it there.

In [8]:
!pipenv install scikit-learn==1.0.2

[32m[1mInstalling scikit-learn==1.0.2...[0m
[K[1mAdding[0m [32m[1mscikit-learn[0m [1mto Pipfile's[0m [33m[1m[packages][0m[1m...[0m
[K[?25h✔ Installation Succeeded[0m 
[1mPipfile.lock not found, creating...[0m
Locking[0m [33m[packages][0m dependencies...[0m
[KBuilding requirements...
[KResolving dependencies...
[K[?25h[32m[22m✔ Success![39m[22m[0m 
Locking[0m [33m[dev-packages][0m dependencies...[0m
[1mUpdated Pipfile.lock (c71abaddf023d2d7029f36beec9b8afb73c91860ff0002758fa66f8ea365dad0)![0m
[1mInstalling dependencies from Pipfile.lock (65dad0)...[0m
To activate this project's virtualenv, run [33mpipenv shell[0m.
Alternatively, run a command inside the virtualenv with [33mpipenv run[0m.
[0m

# Question 3

- Let's use these models!

- Write a script for loading these models with pickle
- Score this client:

In [12]:
import pickle 

def load(filename):
    with open(filename, 'rb') as f_in:
        return pickle.load(f_in)

In [16]:
dv = load('dv.bin')
model = load('model1.bin')

client = {"reports": 0, "share": 0.001694, "expenditure": 0.12, "owner": "yes"}

X = dv.transform([client])
y_pred = model.predict_proba(X)[0,1]

In [18]:
#probability that this client will get a credit card?
y_pred

0.16213414434326598

# Question 4

- Now let's serve this model as a web service

- Install Flask and gunicorn (or waitress, if you're on Windows)
- Write Flask code for serving the model
- Now score this client using requests:
- url = "YOUR_URL"
- client = {"reports": 0, "share": 0.245, "expenditure": 3.438, "owner": "yes"}
- requests.post(url, json=client).json()
- What's the probability that this client will get a credit card

In [19]:
!pipenv install flask

[32m[1mInstalling flask...[0m
[K[1mAdding[0m [32m[1mflask[0m [1mto Pipfile's[0m [33m[1m[packages][0m[1m...[0m
[K[?25h✔ Installation Succeeded[0m 
[33m[1mPipfile.lock (65dad0) out of date, updating to (3db960)...[0m
Locking[0m [33m[packages][0m dependencies...[0m
[KBuilding requirements...
[KResolving dependencies...
[K[?25h[32m[22m✔ Success![39m[22m[0m 
Locking[0m [33m[dev-packages][0m dependencies...[0m
[1mUpdated Pipfile.lock (8a607cd054c77add253b3926dfe9a7c6142096e72df3a548b7b4a959523db960)![0m
[1mInstalling dependencies from Pipfile.lock (3db960)...[0m
To activate this project's virtualenv, run [33mpipenv shell[0m.
Alternatively, run a command inside the virtualenv with [33mpipenv run[0m.
[0m

In [20]:
!pipenv install gunicorn

[32m[1mInstalling gunicorn...[0m
[K[1mAdding[0m [32m[1mgunicorn[0m [1mto Pipfile's[0m [33m[1m[packages][0m[1m...[0m
[K[?25h✔ Installation Succeeded[0m 
[33m[1mPipfile.lock (3db960) out of date, updating to (b0a961)...[0m
Locking[0m [33m[packages][0m dependencies...[0m
[KBuilding requirements...
[KResolving dependencies...
[K[?25h[32m[22m✔ Success![39m[22m[0m 
Locking[0m [33m[dev-packages][0m dependencies...[0m
[1mUpdated Pipfile.lock (870144b5655fd9f58ae2d53dac36b52a25d66ce3f657776912c2ac027ab0a961)![0m
[1mInstalling dependencies from Pipfile.lock (b0a961)...[0m
To activate this project's virtualenv, run [33mpipenv shell[0m.
Alternatively, run a command inside the virtualenv with [33mpipenv run[0m.
[0m

# probability from test-q4:

- {'get_card': True, 'get_card_probability': 0.9282218018527452}

# Docker

- Install Docker. We will use it for the next two questions.

- For these questions, we prepared a base image: svizor/zoomcamp-model:3.9.12-slim. You'll need to use it (see Question 5 for an example).

- This image is based on python:3.9.12-slim and has a logistic regression model (a different one) as well a dictionary vectorizer inside.

- This is how the Dockerfile for this image looks like:

- FROM python:3.9.12-slim
- WORKDIR /app
- COPY ["model2.bin", "dv.bin", "./"]
- We already built it and then pushed it to svizor/zoomcamp-model:3.9.12-slim.

- Note: You don't need to build this docker image, it's just for your reference.

- Question 5
- Download the base image svizor/zoomcamp-model:3.9.12-slim. You can easily make it by using docker pull command.

- So what's the size of this base image?

- 15 Mb
- 125 Mb
- 275 Mb
- 415 Mb
- You can get this information when running docker images - it'll be in the "SIZE" column.

# Either we can check size of image that we downloaded with 'docker image ls' command or we can check images properties from docker-desktop

- Answer is 125 Mb

# Dockerfile

- Now create your own Dockerfile based on the image we prepared.

- It should start like that:

- FROM svizor/zoomcamp-model:3.9.12-slim


- Now complete it:

- Install all the dependencies form the Pipenv file
- Copy your Flask script
- Run it with Gunicorn
- After that, you can build your docker image.

