# Unit Testing for Slice-Validation Demo

foo.py

In [None]:
def foo():
    return "Hello world!"

test_foo.py

In [None]:
from .foo import foo

def test_foo():
    foo_result = foo()

    expected_foo_result = "Hello world!"
    assert foo_result == expected_foo_result

test_slice.py

In [None]:
import pandas as pd
import pytest


@pytest.fixture
def data():
    """ Simple function to generate some fake Pandas data."""
    df = pd.DataFrame(
        {
            "id": [1, 2, 3],
            "numeric_feat": [3.14, 2.72, 1.62],
            "categorical_feat": ["dog", "dog", "cat"],
        }
    )
    return df


def test_data_shape(data):
    """ If your data is assumed to have no null values then this is a valid test. """
    assert data.shape == data.dropna().shape, "Dropping null changes shape."


def test_slice_averages(data):
    """ Test to see if our mean per categorical slice is in the range 1.5 to 2.5."""
    for cat_feat in data["categorical_feat"].unique():
        avg_value = data[data["categorical_feat"] == cat_feat]["numeric_feat"].mean()
        assert (
            2.5 > avg_value > 1.5
        ), f"For {cat_feat}, average of {avg_value} not between 2.5 and 3.5."

## Exercise Solution: Data Slicing

In [None]:
import pandas as pd

df = pd.read_csv("./iris.csv")


def slice_iris(df, feature):
    """ Function for calculating descriptive stats on slices of the Iris dataset."""
    for cls in df["class"].unique():
        df_temp = df[df["class"] == cls]
        mean = df_temp[feature].mean()
        stddev = df_temp[feature].std()
        print(f"Class: {cls}")
        print(f"{feature} mean: {mean:.4f}")
        print(f"{feature} stddev: {stddev:.4f}")
    print()


slice_iris(df, "septal_length")
slice_iris(df, "septal_width")
slice_iris(df, "petal_length")
slice_iris(df, "petal_width")

## aequitas_demo.ipynb

In [None]:
import sys
!{sys.executable} -m pip install aequitas==0.42 pandas==1.2.3

In [None]:
import pandas as pd
from aequitas.group import Group
from aequitas.bias import Bias
from aequitas.fairness import Fairness
import aequitas.plot as ap

# Enable Pandas to display dataframes without restriction.
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

In [None]:
# Load in the data and take an initial look at it.
df = pd.read_csv("data/compas_for_aequitas.csv")
print(df.shape)
df.head()

In [None]:
# Race is our protected class that we will be exploring.
df["race"].value_counts()

In [None]:
# Remove the races that have very little data in this data.
df = df[~df["race"].isin(["Asian", "Native American"])]

### Create Crosstab

Create the crosstab that forms the basis for all the subsequent analyses

In [None]:
group = Group()
xtab, _ = group.get_crosstabs(df)

xtab.head(10)

### Compute Bias

We calculate the bias vs. a predefined group we manually set.

In [None]:
bias = Bias()
bias_df = bias.get_disparity_predefined_groups(xtab,
                                               original_df=df,
                                               ref_groups_dict={"race": "Caucasian", "sex": "Male", "age_cat": "25 - 45"},
                                               alpha=0.05,
                                               mask_significance=True)
bias_df.head(10)

In [None]:
bias.get_disparity_major_group(xtab,
                               original_df=df,
                               alpha=0.05,
                               mask_significance=True).head(10)

In [None]:
fairness = Fairness()
fairness_df = fairness.get_group_value_fairness(bias_df)
fairness_df.head(10)

In [None]:
overall_fairness = fairness.get_overall_fairness(fairness_df)
print(overall_fairness)

In [None]:
metrics = ['fpr', 'fnr', 'for']
disparity_tolerance = 1.25

ap.summary(bias_df, metrics, fairness_threshold=disparity_tolerance)

## Exercise: Aequitas

In this exercise, you will use Aequitas to investigate the potential bias in a model/data set.

* We'll use the Car Evaluation Data Set from the UCI Machine Learning Repository, a notebook that trains a logistic regression model to determine the car's acceptability is provided.
* Using Aequitas, determine if the model contains bias. For simplicity, from Aequitas' Fairness class obtain the results of the get_overall_fairness method which returns a dictionary with Yes/No result for "Unsupervised Fairness", "Supervised Fairness" and "Overall Fairness".
* Lastly, use the aequitas.plotting.Plot module and compute the summary on fpr, fnr, and for with a 1.25 fairness_threshold.
* You can draw inspiration from examples present here: https://github.com/dssg/aequitas/blob/master/docs/source/examples/compas_demo.ipynb

The data from this exercise comes from the UCI Machine Learning Repository: https://archive.ics.uci.edu/ml/datasets/Car+Evaluation For more details on the data set see the included documentation.

In [None]:
# Ensure that Aequitas dependency is installed
import sys
!{sys.executable} -m pip install aequitas==0.42 pandas==1.2.3

In [None]:
# Imports
from aequitas.plotting import Plot
ap = Plot()
import pandas as pd

from aequitas.group import Group
from aequitas.bias import Bias 
from aequitas.fairness import Fairness

from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import OneHotEncoder, label_binarize, LabelBinarizer
from sklearn.metrics import f1_score
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay

pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

In [None]:
# I manually added the headers to the data set.
df = pd.read_csv("./car.csv")

# We'll modify the data to make it a binary problem of acceptable or unacceptable car.
df = df.where(df != 'good', 'acc')
df = df.where(df != 'vgood', 'acc')

y = df.pop('car')
X = df

df.head()

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=23)
print("X_train shape: ", X_train.shape)
print("X_test shape: ", X_test.shape)

# Use this later to construct the DataFrame Aequitas requires.
df_aq = X_test.copy()

ohe = OneHotEncoder(handle_unknown="ignore", sparse=False)
X_train = ohe.fit_transform(X_train.values)
X_test = ohe.transform(X_test.values)

lb = LabelBinarizer()
y_train = label_binarize(y_train.values, classes=['unacc', 'acc']).ravel()
y_test = label_binarize(y_test.values, classes=['unacc', 'acc']).ravel()

In [None]:
lr = LogisticRegression(max_iter=1000)
lr.fit(X_train, y_train)

scores = lr.predict_proba(X_test)
pred = lr.predict(X_test)

f1 = f1_score(y_test, pred)
print(f"F1 score: {f1:.4f}")

* Construct the dataframe that Aequitas will use.
* You can draw inspiration from examples present here: https://github.com/dssg/aequitas/blob/master/docs/source/examples/compas_demo.ipynb

In [None]:
# Run Aequitas.
# Summarize: Aequitas classes provides a few functions that provide a high level summary of fairness and disparity, such as 
# plot_fairness_group_all()
# plot_fairness_disparity_all()
# plot_disparity_all()

# Write a model card

Model Details: 
Justin C Smith created the model. It is logistic regression using the default hyperparameters in scikit-learn 0.24.2.

Intended Use: 
This model should be used to predict the acceptability of a car based off a handful of attributes. The users are prospective car buyers.

Metrics: 
The model was evaluated using F1 score. The value is 0.8960.

Data: 
The data was obtained from the UCI Machine Learning Repository (https://archive.ics.uci.edu/ml/datasets/Car+Evaluation). The target class was modified from four categories down to two: "unacc" and "acc", where "good" and "vgood" were mapped to "acc".

The original data set has 1728 rows, and a 75-25 split was used to break this into a train and test set. No stratification was done. To use the data for training a One Hot Encoder was used on the features and a label binarizer was used on the labels.

Bias: 
According to Aequitas bias is present at the unsupervised and supervised level. This implies an unfairness in the underlying data and also unfairness in the model. From Aequitas summary plot we see bias is present in only some of the features and is not consistent across metrics.

# Data and Model Versioning

DVC's commands are designed to be very similar to Git's.

Initializing a project using git init or dvc init.

To add code or data use git add or dvc add, respectively. Typically, after a dvc add it will then prompt you to git commit the corresponding .dvc file that has been generated. There is a dvc commit but it is not used in the same way as with git commit.

Lastly, there is git push and git pull and their equivalents of dvc push and dvc pull. DVC's push and pull are for uploading and downloading data from your remote store specified in in the dvc configuration, whereas git is for sending changes to your remote repository or bringing in any changes.

## Tracking Data with DVC

To create it, simply make the folder and tell DVC it is your remote:

In [None]:
mkdir /local/remote
dvc remote add -d localremote /local/remote

## Tracking Data Locally with DVC

Set up the repository and local remote:

In [None]:
git init
dvc init
mkdir ../local_remote
dvc remote add -d localremote ../local_remote

The code to generate a csv called exercise_func.py:

In [None]:
import sys
import pandas as pd


def create_ids(id_count: str) -> None:
    """ Generate a list of IDs and save it as a csv."""
    ids = [i for i in range(int(id_count))]
    df = pd.DataFrame(ids)
    df.to_csv("./id.csv", index=False)


if __name__ == "__main__":
    create_ids(sys.argv[1])

And then push the data and changes:

In [None]:
python ./exercise_func.py 10
dvc add id.csv
git add .gitignore id.csv.dvc
git commit -m "Initial commit of tracked sample.csv"
dvc push

## Tracking Data Remotely with DVC

Install the Google Drive dependencies for DVC using

In [None]:
conda install -c conda-forge dvc-gdrive

Add the Google Drive remote using the unique identifier found in the URL of your Drive folder:

In [None]:
dvc remote add driveremote gdrive://UNIQUE_IDENTIFIER

At this point you will receive a pop up to authenticate with Google Drive. Complete the authentication in the browser and copy the provided code into the command line prompt.

Then push using

In [None]:
dvc push --remote driveremote

or you can now set the Google Drive remote as your default:

In [None]:
dvc remote default newremote
dvc push

## Pipelines with DVC

Modify train.py as follows:

In [None]:
import yaml
from yaml import CLoader as Loader
import numpy as np
import pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score

with open("./params.yaml", "rb") as f:
    params = yaml.load(f, Loader=Loader)

X = np.loadtxt("X.csv")
y = np.loadtxt("y.csv")

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.25, random_state=23
)

lr = LogisticRegression(C=params["C"])
lr.fit(X_train.reshape(-1, 1), y_train)

preds = lr.predict(X_test.reshape(-1, 1))
f1 = f1_score(y_test, preds)
print(f"F1 score: {f1:.4f}")

This assumes a param.yaml in the working directory with a single line of C: 1.0, or whichever value you choose.

To create the prepare stage:

In [None]:
dvc run -n prepare -d fake_data.csv -d prepare.py -o X.csv -o y.csv python ./prepare.py

And to create the train stage:

In [None]:
dvc run -n train -d X.csv -d y.csv -d train.py -p C python ./train.py

Now that we have a robust and reproducible pipeline built with DVC we are almost ready to use it to track experiments. But first we need to add metrics to our pipeline so that we have something to compare across experiments.

For example:

In [None]:
dvc run -n evaluate \
          -d validate.py -d model.pkl \
          -M validation.json \
          python validate.py model.pkl validation.json

where we have now included a metric in our stage. A similar process can be used to include plots as part of a stage.

We are now ready to set up experiments. Simply use dvc exp run and specify the relevant parameters to run an experiment. Experiments can be compared using dvc exp diff or dvc exp show. Each experiment is given a unique name that can be used for management and ultimately choosing which experiment to keep as we prepare our model for deployment. Don't forget to commit the best experiment!

# CI/CD

## Setting up GitHub Actions

The first action I've chosen is the Python Application Action, part of which we saw in the lesson. This Action installs Python and the requirements for your application (if there are any). Lastly it runs flake8 and pytest -- the build fails if either a test fails or certain flake8 errors are hit.

In principle, one should run flake8 and pytest before you commit your code since that can be faster than waiting on the automated build process but this piece of automation ensures any contributors to the code also pass flake8 and pytest and they get checked in case you don't run them yourself. It also ensures that both of these pass when the code is built in a clean environment.

python-app.yaml

In [None]:
# This workflow will install Python dependencies, run tests and lint with a single version of Python
# For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions

name: Python application

on:
  push:
    branches: [ main ]
  pull_request:
    branches: [ main ]

jobs:
  build:

    runs-on: ubuntu-latest

    steps:
    - uses: actions/checkout@v2
    - name: Set up Python 3.9
      uses: actions/setup-python@v2
      with:
        python-version: 3.9
    - name: Install dependencies
      run: |
        python -m pip install --upgrade pip
        pip install flake8 pytest
        if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
    - name: Lint with flake8
      run: |
        # stop the build if there are Python syntax errors or undefined names
        flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
        # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
        flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
    - name: Test with pytest
      run: |
        pytest

The second Action is a scheduled job that uses a Unix time-based job scheduler called cron. This Action simply marks issues and pull requests as stale if they have no activity. If you go to stale.yaml's repo then you will see that there are many more options than shown in the default YAML file.

This Action is particularly useful for large projects, e.g. open-source projects, since it helps with the maintenance of issues and pull requests. It makes it easier to track what is current or not, and what may need attention without manually combing through the issues and pull requests.

stale.yaml

In [None]:
name: Mark stale issues and pull requests

on:
  schedule:
  - cron: "30 1 * * *"

jobs:
  stale:

    runs-on: ubuntu-latest
    permissions:
      issues: write
      pull-requests: write

    steps:
    - uses: actions/stale@v3
      with:
        repo-token: ${{ secrets.GITHUB_TOKEN }}
        stale-issue-message: 'Stale issue message'
        stale-pr-message: 'Stale pull request message'
        stale-issue-label: 'no-issue-activity'
        stale-pr-label: 'no-pr-activity'

## Heroku Fundamentals

### Setting up CD

* Navigate to the Heroku dashboard and select the button for "New" then select Create new app.
* After creating an you will be brought to the deploy screen. Select GitHub as the deployment method, then search for a repository on your GItHub to connect. In the automatic deploys section select "Wait for CI to pass before deploy" and then click the "Enable Automatic Deploys" button.

# API Deployment with FastAPI

We can define a very simple API where each input is simply an int and it contains a path, query and body as inputs:

In [None]:
# bar.py

from fastapi import FastAPI
from pydantic import BaseModel

app = FastAPI()


class Value(BaseModel):
    value: int


@app.post("/{path}")
async def exercise_function(path: int, query: int, body: Value):
    return {"path": path, "query": query, "body": body}

We can use test_bar.py to test the code in bar.py.

In [None]:
# test_bar.py

import json
from fastapi.testclient import TestClient

from bar import app

client = TestClient(app)


def test_post():
    data = json.dumps({"value": 10})
    r = client.post("/42?query=5", data=data)
    print(r.json())
    assert r.json()["path"] == 42
    assert r.json()["query"] == 5
    assert r.json()["body"] == {"value": 10}

## Local API Testing

Stylistically, I split each test into separate functions. Some people will put all tests of a single function/method in a single test function, others will break it out. I find that the approach below facilitates rapid identification of what exactly is failing when a test breaks. Assuming the app is located in foo.py then for test_foo.py I have:

In [None]:
# test_foo.py

from fastapi.testclient import TestClient

from foo import app

client = TestClient(app)


def test_get_path():
    r = client.get("/items/42")
    assert r.status_code == 200
    assert r.json() == {"fetch": "Fetched 1 of 42"}


def test_get_path_query():
    r = client.get("/items/42?count=5")
    assert r.status_code == 200
    assert r.json() == {"fetch": "Fetched 5 of 42"}


def test_get_malformed():
    r = client.get("/items")
    assert r.status_code != 200

Running Local API Tests

To run the tests, invoke pytest at the command line. With the supplied foo.py and test_foo.py it should yield these results:

## Exploring Heroku's CLI

Steps
* Install the Heroku CLI. I used the Ubunutu installation instructions: curl https://cli-assets.heroku.com/install.sh | sh
* Type 'heroku' to see the full list of commands.
* To create an app with a specific name and to specify it as Python app use: heroku create name-of-the-app --buildpack heroku/python
    * Note that manually specifying it as a Python app is not necessary. Heroku will automatically try and detect the language of your app. In the case of Python it searches for either a requirements.txt or setup.py.
* We can view our apps buildpacks using heroku buildpacks --app name-of-the-app.
* Now we'll initiate our folder as a git repository and commit it so we can connect it to our new Heroku app.
    * git init
    * git add *
    * git commit -m "Initial commit."
* Connect the repo to our new app: heroku git:remote --app name-of-the-app.
    * The app will launch after a few moments.
* Enter into the Heroku VM using: heroku run bash --app name-of-the-app.
    * There is not much to see here. Explore the various Unix commands such as pwd, and ls. Doing 'ls .. reveals many of the standard folders one would expect in a Unix environment. Note this is very lightweight, not even vi is included!

## Live API Testing with the requests Module

We will test our API one final time now that it is fully deployed. To do this we will use the requests module. This module makes it painless to POST, GET, DELETE, etc. with any API.

Using a few lines of code we can POST to an endpoint and retrieve both the status code and the resulting JSON in the response object:

In [None]:
import requests

response = requests.post('/url/to/query/')

print(response.status_code)
print(response.json())

The requests module is expansive, but a few helpful additions to the bare HTTP methods include authentication and passing in data as JSON objects using the auth and data parameters, respectively. E.g.:



In [None]:
r = requests.post('/url/to/query/', auth=('usr', 'pass'), data=json.dumps(data))

Trying out Live APIs

A fun way to get experience with the requests module is to query a live API! There is an extensive list of free and public APIs here. Many of these do not even require an authorization key, though getting access to one that needs a key is also good practice!

Note that not all APIs are created equally and some have way significantly more documentation than others. For instance the Art Institute of Chicago's API has extensive documentation. Some APIs even have their documentation using Swagger like you saw with FastAPI!