In [None]:
pip install shap

In [None]:
import pandas as pd
# Set the file URL and filename
url = 'https://archive.ics.uci.edu/ml/' \
'machine-learning-databases/' \
'wine-quality/winequality-white.csv'
file_name = 'wine.csv'

In [None]:
try:
  wine = pd.read_csv(file_name)
except FileNotFoundError:
  print(f'Downloading {file_name} from {url}...')
  wine = pd.read_csv(url, sep=";")
  wine.to_csv(file_name, index=False)
  print('Download complete!')

from tabulate import tabulate
summary = wine.describe().transpose().round(2)
summary = summary.drop("count", axis=1)
# Create a markdown table
markdown_table = tabulate(
summary, headers='keys', tablefmt='pipe'
)
print(markdown_table)

In [None]:
from sklearn.model_selection import train_test_split
# Extract the target variable (wine quality) from the data
y = wine['quality']
X = wine.drop('quality', axis=1)
X_train, X_test, y_train, y_test = train_test_split(
X, y, test_size=0.2, random_state=42
)

In [None]:
from sklearn.linear_model import LinearRegression
model = LinearRegression()
model = model.fit(X_train, y_train)

In [None]:
from sklearn.metrics import mean_absolute_error
y_pred = model.predict(X_test)
mae = mean_absolute_error(y_test, y_pred)
print(f"MAE: {mae:.2f}")

In [None]:
import numpy as np
coefs = pd.DataFrame({
'feature': X.columns.values,
'coefficient': np.round(model.coef_, 3)
})
print(coefs.to_markdown(index=False))

In [None]:
import shap
explainer = shap.LinearExplainer(model, X_train)
shap_values = explainer(X_test)
print(shap_values.values)

In [None]:
shap.plots.waterfall(shap_values[0])

In [None]:
shap.waterfall_plot(shap_values[1])

In [None]:
shap.plots.beeswarm(shap_values)

In [None]:
shap.plots.scatter(shap_values[:, 'alcohol'])

In [None]:
feature = 'alcohol'
ind = X_test.columns.get_loc(feature)
coefs.coefficient[ind]