## Visually Compare Wine Quality in a Summary Drift Report

In [1]:
# Load the data
import pandas as pd
pd.options.mode.chained_assignment = None  # Disabling false positive warning

url = "http://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-red.csv"
wine = pd.read_csv(url,sep=";")

# Split the wines into two groups: 
# with alcohol below 11 as the reference sample
cond_reference = (wine['alcohol']<=11)
wine_reference = wine.loc[cond_reference]
# with alcohol above 11 as the target dataset
cond_target = (wine['alcohol']>11)
wine_target = wine.loc[cond_target]

# Add some missing values to `citric acid`, to see how this is reflected in profile visualizer
ixs = wine.iloc[100:110].index
wine.loc[ixs,'citric acid'] = None

# Transform the numeric `quality` feature to a categorical feature, 
# where each wine is classified as Good (above 6.5) or Bad
bins = (2, 6.5, 8)
group_names = ['bad', 'good']

wine_reference['quality'] = pd.cut(wine_reference['quality'], bins = bins, labels = group_names)
wine_target['quality'] = pd.cut(wine_target['quality'], bins = bins, labels = group_names)

# Profile the dataframes with whylogs
import whylogs as why
result = why.log(pandas=wine_target)
# Create profile_views as arguments to feed into the NotebookProfileVisualizer
prof_view = result.view()
# Repeat for the reference profile
result_ref = why.log(pandas=wine_reference)
prof_view_ref = result_ref.view()

# Instantiate NotebookProfileViewer
from whylogs.viz import NotebookProfileVisualizer

visualization = NotebookProfileVisualizer()
# Set the target and reference profiles
visualization.set_profiles(target_profile_view=prof_view, reference_profile_view=prof_view_ref)

# Run a summary_drift_report
visualization.summary_drift_report()