In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from pipeline import Pipeline
from collections import Counter
from scipy import stats
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC, SVR
from sklearn.model_selection import KFold, GridSearchCV, train_test_split, cross_val_score
from sklearn.metrics import r2_score
import shap

sns.set_theme(style='white', palette='Set1')
plt.rcParams['xtick.bottom'] = False
plt.rcParams['ytick.left'] = True
# plt.rcParams.update({
#     "figure.facecolor": (0.0, 0.0, 0.0, 0.0),
#     "axes.facecolor": (0.0, 0.0, 0.0, 0.0),
#     "legend.facecolor": (0.0, 0.0, 0.0, 0.0),
#     "savefig.facecolor": (0.0, 0.0, 0.0, 0.0),
# })

ModuleNotFoundError: No module named 'matplotlib'

## Model social contingency using Temporal Response Functions (TRFs)

#### Instantiate pipeline

In [None]:
pipeline_an = Pipeline(
                        trf_direction=1, 
                        trf_min_lag=0, 
                        trf_max_lag=3,
                        regularization=1,
                        modality='va',
                        audio_type='auditory_nerve'
                    )

In [None]:
df_an = pipeline_an.make_main_df()

#### DataFrame with participant responses

In [None]:
df_responses = pipeline_an.make_response_df()
df_responses.to_csv('./df_responses.csv')

In [None]:
# Make separate DataFrames for True and Fake trials
df1_an = df_an[df_an['Condition'] == 'TRUE']
df1_an = df1_an.reset_index(drop=True)
df2_an = df_an[df_an['Condition'] != 'TRUE']
df2_an = df2_an.reset_index(drop=True)

#### Train TRF on true trials

In [None]:
trfs = pipeline_an.train_model(df1_an)

#### Predict responses to both True & Fake trials

In [None]:
# Predict responses to True trials
true_data = pipeline_an.predict_response(df1_an, trfs)
df_trueCorrs = pipeline_an.make_trf_df(true_data)

# Predict responses to Fake trials
fake_data = pipeline_an.predict_response(df2_an, trfs)
df_fakeCorrs = pipeline_an.make_trf_df(fake_data)

#### DataFrame with TRF predictions for trials

In [None]:
df_correlations = pd.concat([df_trueCorrs, df_fakeCorrs])

In [None]:
def get_category(cond, resp):
    if cond=='true' and resp=='g':
        return 'hit'
    elif cond=='true' and resp=='h':
        return 'miss'
    elif cond=='fake' and resp=='g':
        return 'fa'
    elif cond=='fake' and resp=='h':
        return 'cr'
    
df_test = df_correlations[['listener_au', 'trial', 'condition', 'r']]

response = []
for idx, row in df_test.iterrows():
    trials = df_responses[df_responses['VideoPath']==row['trial']]
    mode_response = trials['Resp'].mode()[0]
    response.append(mode_response)

df_test['response'] = response
    
df_test['sdt'] = [get_category(row['condition'], row['response']) for i, row in df_test.iterrows()]
df_test = df_test[df_test['listener_au']!='Pitch']
df_test['region'] = ['eye' if row['listener_au']=='AU43' else 'mouth' for i, row in df_test.iterrows()]
df_test['accuracy'] = [1 if ((row['condition']=='true') & (row['response']=='g')) | ((row['condition']=='fake') & (row['response']=='h')) else 0 for idx, row in df_test.iterrows()]

In [None]:
def fit_to_resp(stim, df):
    fits = df[df['trial']==stim]['r'].to_list()
    return pipeline_an.aus, fits

df_for_log = df_responses[df_responses['Block']=='va'].reset_index(drop=True)
df_for_log['Condition'] = ['true' if x=='TRUE' else 'fake' for x in df_for_log['Condition']]
df_for_log[['listener_au', 'model_performance']] = df_for_log.apply(lambda x: fit_to_resp(x['VideoPath'], df_correlations), axis=1, result_type='expand')
df_for_log = df_for_log.explode(['listener_au', 'model_performance'])

df_for_log.to_csv('./df_for_log.csv')

In [None]:
sns.barplot(df_test[(df_test['listener_au']=='AU25') | (df_test['listener_au']=='AU43')], x='sdt', y='r', hue='listener_au', gap=0.1)

In [None]:
# sns.barplot(df_test, x='sdt', y='r', hue='region', gap=0.1)

In [None]:
fig, axs = plt.subplots(ncols=2, sharex=True, sharey=True, figsize=(10, 4))
plt.tight_layout(w_pad=4)

sns.pointplot(data=df_test[df_test['listener_au']=='AU25'], x='condition', y='r', hue='accuracy', palette=['#c10001', '#c10001'], markers=["o", "s"], dodge=True, linestyles=['--', '-'], ax=axs[0])
axs[0].set_ylabel('Average TRF fit')
axs[0].set_xlabel('Condition')
axs[0].set_title('AU25')

sns.pointplot(data=df_test[df_test['listener_au']=='AU43'], x='condition', y='r', hue='accuracy', palette=['#1b6097', '#1b6097'], markers=["o", "s"], dodge=True, linestyles=['--', '-'], ax=axs[1])
axs[1].set_xlabel('Condition')
axs[1].set_title('AU43')

In [None]:
import pingouin as pg

for au in pipeline_an.aus[:-1]:
    a = pg.anova(data=df_test[df_test['listener_au']==au], dv='r', between=['condition', 'response'])
    if a.iloc[2]['p-unc'] < 0.1:
        print(au)
        print(a)

#### Plot TRF for each AU

In [None]:
def plot_trf(
    direction,
    trf,
    channel=None,
    feature=None,
    axes=None,
    show=True,
    kind="line",
):
    """
    Plot the weights of the (forward) model across time for a select channel or feature.

    Arguments:
        channel (None | int | str): Channel selection. If None, all channels will be used. If an integer, the channel at that index will be used. If 'avg' or 'gfp' , the average or standard deviation across channels will be computed.
        feature (None | int | str): Feature selection. If None, all features will be used. If an integer, the feature at that index will be used. If 'avg' , the average across features will be computed.
        axes (matplotlib.axes.Axes): Axis to plot to. If None is provided (default) generate a new plot.
        show (bool): If True (default), show the plot after drawing.
        kind (str): Type of plot to draw. If 'line' (default), average the weights across all stimulus features, if 'image' draw a features-by-times plot where the weights are color-coded.

    Returns:
        fig (matplotlib.figure.Figure): If now axes was provided and a new figure is created, it is returned.
    """
    if plt is None:
        raise ModuleNotFoundError("Need matplotlib to plot TRF!")
    if direction == -1:
        weights = trf.weights.T
        print(
            "WARNING: decoder weights are hard to interpret, consider using the `to_forward()` method"
        )
    if axes is None:
        fig, ax = plt.subplots(figsize=(6, 6))
    else:
        fig, ax = None, axes  # dont create a new figure
    weights = trf.weights
    # select channel and or feature
    if weights.shape[0] == 1:
        feature = 0
    if weights.shape[-1] == 1:
        channel = 0
    if channel is None and feature is None:
        raise ValueError("You must specify a subset of channels or features!")
    if feature is not None:
        image_ylabel = "channel"
        if isinstance(feature, int):
            weights = weights[feature, :, :]
        elif feature == "avg":
            weights = weights.mean(axis=0)
        else:
            raise ValueError('Argument `feature` must be an integer or "avg"!')
    if channel is not None:
        image_ylabel = "feature"
        if isinstance(channel, int):
            weights = weights.T[channel].T
        elif channel == "avg":
            weights = weights.mean(axis=-1)
        elif channel == "gfp":
            weights = weights.std(axis=-1)
        else:
            raise ValueError(
                'Argument `channel` must be an integer, "avg" or "gfp"'
            )
        weights = weights.T  # transpose so first dimension is time
    # plot the result
    scaler = StandardScaler()
    # normalizer = MinMaxScaler()
    if kind == "line":
        ax.plot(
            trf.times.flatten(), scaler.fit_transform(weights.reshape(-1, 1)), linewidth=2 - 0.01 * weights.shape[-1]
        )
        ax.set(
            xlabel="Time lag[s]",
            ylabel="Amplitude [a.u.]",
            xlim=(trf.times.min(), trf.times.max()),
        )
    elif kind == "image":
        scale = trf.times.max() / len(trf.times)
        im = ax.imshow(
            weights.T,
            origin="lower",
            aspect="auto",
            extent=[0, weights.shape[0], 0, weights.shape[1]],
        )
        extent = np.asarray(im.get_extent(), dtype=float)
        extent[:2] *= scale
        im.set_extent(extent)
        ax.set(
            xlabel="Time lag [s]",
            ylabel=image_ylabel,
            xlim=(trf.times.min(), trf.times.max()),
        )
    if show is True:
        plt.show()
    if fig is not None:
        return fig

In [None]:
plt.figure(figsize=(15, 10))
plt.subplots_adjust(hspace=1, wspace=0.5)
for i, au_id in enumerate(pipeline_an.aus):
    ax = plt.subplot(4, 3, i + 1)
    plot_trf(direction=1, trf=trfs[i], axes=ax, show=False) 
    ax.set_title(f'TRF for {au_id}')
    if (i==0) or (i==6) or (i==6) or (i==7) or (i==8) or (i==10):
        ax.get_lines()[0].set_color("g")
    else:
        ax.get_lines()[0].set_color("k")
    ax.get_lines()[0].set_linewidth(2)
    ax.set_ylabel('')
    ax.set_ylim(-2.5, 2.5)
    ax.axhline(y=0, color='k', linestyle='--')

#### Declare which similarity metric to use for further analysis

In [None]:
# Options: r, r2, mae, mse, rmse
similarity_measure = 'r'

#### Compare TRF prediction accuracy for True & Fake trials

In [None]:
ax = sns.boxplot(data=df_correlations, x='listener_au', y=similarity_measure, hue='condition', gap=0.2, fill=False)
sns.stripplot(data=df_correlations, x='listener_au', y=similarity_measure, hue='condition', ax=ax, dodge=True, alpha=0.1, legend=False)
plt.xticks(rotation=90)
plt.xlabel('Action Unit')
sns.move_legend(ax, "upper left", bbox_to_anchor=(1, 1))

# Ridge plot
# with sns.axes_style(rc={"axes.facecolor": (0, 0, 0, 0)}):
#     g = sns.FacetGrid(df_correlations, row="listener_au", hue="condition", aspect=10, height=0.5, palette='Set2', legend_out=True)
#     g.map(sns.kdeplot, similarity_measure, bw_adjust=.5, clip_on=False, fill=True, lw=1, alpha=0.5)
#     g.map(sns.kdeplot, similarity_measure, bw_adjust=.5, clip_on=False, lw=0.5)
#     g.refline(x=0, lw=0.5, alpha=1, linestyle="--", clip_on=False)
#     g.refline(y=0, lw=1, alpha=1, linestyle="-", clip_on=False)
#     for i, ax in enumerate(g.axes.flat):
#         ax.text(0, .2, pipeline_an.aus[i], fontsize=8, fontweight="bold", ha="left", va="center", color=ax.lines[-1].get_color(), transform=ax.transAxes)
#     g.figure.subplots_adjust(hspace=-.3)
#     g.set_titles("")
#     g.set(yticks=[], ylabel="")
#     g.despine(left=True, bottom=True)
#     g.set_axis_labels(similarity_measure, '')

#### t-test between TRF prediction accuracy for True & Fake trials

In [None]:
t_r, p_r = pipeline_an.ttests(df_correlations, 'r')
t_r2, p_r2 = pipeline_an.ttests(df_correlations, 'r2')
t_mae, p_mae = pipeline_an.ttests(df_correlations, 'mae')
t_mse, p_mse = pipeline_an.ttests(df_correlations, 'mse')
t_rmse, p_rmse = pipeline_an.ttests(df_correlations, 'rmse')

num_trues = len(df_correlations[df_correlations['condition']=='true']) // len(pipeline_an.aus)
num_fakes = len(df_correlations[df_correlations['condition']=='fake']) // len(pipeline_an.aus)

df_ttest = pd.DataFrame({
    'listener_au': np.tile(pipeline_an.aus, 5),
    'metric': np.repeat(['r', 'r2', 'mae', 'mse', 'rmse'], len(pipeline_an.aus)),
    't': np.concatenate((t_r, t_r2, t_mae, t_mse, t_rmse), axis=None),
    'p': np.concatenate((p_r, p_r2, p_mae, p_mse, p_rmse), axis=None),
    'image': np.tile(pipeline_an.au_gifs, 5)
})

In [None]:
cols = ['green' if p < 0.05 else 'black' for p in df_ttest[df_ttest['metric']=='r'].p]
sns.barplot(x='listener_au', y='t', data=df_ttest[df_ttest['metric']=='r'], palette=cols)

In [None]:
# sns.barplot(data=df_ttest, x='listener_au', y='t', hue='metric')
cols = ['green' if p < 0.05 else 'black' for p in df_ttest.p]

g = sns.FacetGrid(df_ttest, col='metric', aspect=2, sharex=True, sharey=True, despine=False, col_wrap=2)
g.map_dataframe(sns.barplot, x='listener_au', y='t', palette=cols)
plt.xticks(rotation=90)
plt.xlabel('Action Unit')

In [None]:
# alt.renderers.enable("mimetype")

# alt.Chart(df_ttest).mark_bar().encode(
#     y = alt.Y('t', aggregate='mean').axis(
#         title="t_statistic",
#         titleAngle=0,
#         titleAlign="left",
#         titleY=-2,
#         titleX=0,
#     ),
#     x = alt.X('listener_au', axis=alt.Axis(labelAngle=-90)).title('Action Unit'),
#     tooltip = ['p', 'image'],
#     row = 'metric',
#     color = alt.condition(alt.datum.p < 0.05, alt.value('green'), alt.value('white'))
# ).configure_axis(grid=False).configure_view(stroke=None).properties(width=350, height=150)

## Compare model accuracy with participant ratings of genuineness

In [None]:
trial = []
trial_cond = []
listener_au = []
duration = []
dyad = []
times_presented = []
model_performance = []
subject_accuracy = []
genuineness = []
confidence = []

for idx, row in df_correlations.iterrows():
    if any(df_responses['VideoPath'] == row['trial']):
        ab = df_responses.loc[df_responses['VideoPath'] == row['trial']]

        subject_accuracy.append(Counter(ab['Accuracy'])[True]/len(ab))
        genuineness.append(Counter(ab['Resp'])['g']/len(ab))
        confidence.append(sum(ab['LikertResp'])/len(ab))
        trial.append(row['trial'])
        duration.append(row['duration'])
        trial_cond.append(row['condition'])
        listener_au.append(row['listener_au'])
        dyad.append(row['displayed_dyad'])
        times_presented.append(len(df_responses.loc[df_responses['VideoPath'] == row['trial']]))
        model_performance.append(row['r'])

df_regression = pd.DataFrame({
                                'trial': trial,
                                'condition': trial_cond,
                                'listener_au': listener_au,
                                'duration': duration,
                                'dyad': dyad,
                                'times_presented': times_presented,
                                'model_performance': model_performance,
                                'subject_accuracy': subject_accuracy,
                                'confidence': confidence,
                                'genuineness': genuineness
                            })
df_regression.to_csv('./df_regression.csv')

#### Regression plots of model 'performance' & participant ratings of genuineness

In [None]:
# sns.lmplot(data=df_regression, y='model_performance', x='subject_accuracy', hue='condition', col='listener_au', col_wrap=2, height=3, aspect=1)

In [None]:
sns.lmplot(data=df_regression, y='genuineness', x='model_performance', hue='condition', col='listener_au', col_wrap=4, height=3, aspect=1)

In [None]:
g = sns.lmplot(data=df_regression, x='model_performance', y='genuineness', col='listener_au', col_wrap=4, height=3, aspect=1, scatter_kws=dict(color="k", alpha=0.3), line_kws=dict(color="k"))

def annotate(data, **kws):
	r, p = stats.pearsonr(data['genuineness'], data['model_performance'])
	r2 = r2_score(data['genuineness'], data['model_performance'],)
	ax = plt.gca()
	ax.text(.01, .8, 'r2={:.2f}, \np={:.2g}'.format(r2, p), transform=ax.transAxes)
	
g.map_dataframe(annotate)
plt.show()

#### SVM

In [None]:
def get_training_data(metric):
    feature_vector = []
    target_vector = []

    grouped_trials = df_correlations.groupby('trial')

    for name, trial in grouped_trials:
        target_vector.append(trial['condition'].to_list()[0])
        feature_vector.append(trial[metric].to_list())

    feature_vector = np.asarray(feature_vector)
    scaler = StandardScaler()
    scaled_features = scaler.fit_transform(feature_vector)

    target_vector = np.asarray(target_vector)

    return scaled_features, target_vector

In [None]:
def nested_cv(features, targets):
	N_TRIALS = 10
	scores = np.zeros(N_TRIALS)

	svc = SVC(probability=True)
	param_grid = [
		{'C': np.logspace(-5, 3, 9), 'kernel':['linear']},
		# {'C': np.logspace(-5, 3, 9), 'gamma': np.logspace(-5, 2, 8), 'kernel':['rbf']}
	]

	for i in range(N_TRIALS):
		inner_cv = KFold(n_splits=5, shuffle=True, random_state=i)
		outer_cv = KFold(n_splits=3, shuffle=True, random_state=i)

		model = GridSearchCV(estimator=svc, param_grid=param_grid, cv=inner_cv, n_jobs=-1)
		model.fit(features, targets)

		score = cross_val_score(model, features, targets, cv=outer_cv, n_jobs=-1)
		scores[i] = score.mean()

	return scores

#### Get best models for each similarity metric

In [None]:
df_metrics = pd.DataFrame({})

models = {'r': [], 'r2': [], 'mae': [], 'mse': [], 'rmse': []}

for metric in models.keys():
    features, targets = get_training_data(metric)
    X_train, X_test, y_train, y_test = train_test_split(features, targets, test_size=0.5, random_state=1)

    results = nested_cv(features, targets)
    df_metrics[metric] = results

    svc = SVC(probability=True)
    param_grid = [{'C': np.logspace(-5, 3, 9), 'kernel':['linear']}]
    cv = KFold(n_splits=5, shuffle=True, random_state=1)
    model = GridSearchCV(estimator=svc, param_grid=param_grid, cv=cv, n_jobs=-1)
    model.fit(X_train, y_train)

    optimal_params = model.best_params_

    final_clf = SVC(C=optimal_params.get('C'), kernel=optimal_params.get('kernel'), probability=True)
    final_clf.fit(X_train, y_train)
    # final_clf.score(X_test, y_test)
    models[metric].append(final_clf)


In [None]:
dfm = df_metrics.melt()
dfm.groupby('variable').mean()

#### Plot average accuracy (over K folds) for each similarity metric

In [None]:
sns.boxplot(x='variable', y='value', data=dfm[dfm['variable']=='r'], fill=False, width=0.25)
plt.ylim(0, 1)
plt.xlabel('')
plt.ylabel('SVM Accuracy')
plt.xticks([])
plt.axhline(y=0.5, color='k', ls='--')

In [None]:
sns.boxplot(x='variable', y='value', data=dfm, fill=True)
plt.xlabel('Similarity Metric')
plt.ylabel('Average SVM accuracy')

#### t-tests for above chance accuracy for each similarity metric

In [None]:
stat_results =  [
                    stats.ttest_1samp(dfm[dfm['variable']=='r']['value'], 0.5),
                    stats.ttest_1samp(dfm[dfm['variable']=='r2']['value'], 0.5),
                    stats.ttest_1samp(dfm[dfm['variable']=='mae']['value'], 0.5),
                    stats.ttest_1samp(dfm[dfm['variable']=='mse']['value'], 0.5),
                    stats.ttest_1samp(dfm[dfm['variable']=='rmse']['value'], 0.5)
                ]

print("r: ", stat_results[0])
print("r2: ", stat_results[1])
print("mae: ", stat_results[2])
print("mse: ", stat_results[3])
print("rmse: ", stat_results[4])

#### Shapley values on SVM

In [None]:
shap.initjs()

features, targets = get_training_data('r')
X_train, X_test, y_train, y_test = train_test_split(features, targets, test_size=0.5, random_state=1)

In [None]:
# 0: r
# 1: r2
# 2: mae
# 3: mse
# 4: rmse
model_metric_idx = 'r'

model_labels = models[model_metric_idx][0].classes_
true_label_idx = np.argwhere(model_labels=='true')[0][0]
fake_label_idx = np.argwhere(model_labels=='fake')[0][0]

In [None]:
explainer = shap.Explainer(models[model_metric_idx][0].predict_proba, X_train, feature_names=pipeline_an.aus)
shap_values = explainer(features)

In [None]:
shap.plots.beeswarm(shap_values[:, :, true_label_idx], max_display=11, show=True)

In [None]:
shap.plots.beeswarm(shap_values[:, :, fake_label_idx], max_display=11, show=True)

#### SVR

In [None]:
regr = SVR(kernel='linear', C=0.1, epsilon=0.2)

plt.figure(figsize=(15, 10))
plt.subplots_adjust(hspace=1, wspace=0.5)

for i, au_id in enumerate(pipeline_an.aus):
    accuracy_true = df_regression[(df_regression['listener_au'] == au_id) & (df_regression['condition'] == 'true')]['model_performance'].to_numpy().reshape(-1, 1)
    scaler_true = StandardScaler()
    accuracy_true_scaled = scaler_true.fit_transform(accuracy_true)

    accuracy_fake = df_regression[(df_regression['listener_au'] == au_id) & (df_regression['condition'] == 'fake')]['model_performance'].to_numpy().reshape(-1, 1)
    scaler_fake = StandardScaler()
    accuracy_fake_scaled = scaler_fake.fit_transform(accuracy_fake)

    genuineness_true = df_regression[(df_regression['listener_au'] == au_id) & (df_regression['condition'] == 'true')]['genuineness'].to_numpy()
    genuineness_fake = df_regression[(df_regression['listener_au'] == au_id) & (df_regression['condition'] == 'fake')]['genuineness'].to_numpy()

    ax = plt.subplot(4, 3, i+1)
    sns.regplot(y=regr.fit(accuracy_true_scaled, genuineness_true).predict(accuracy_true_scaled), x=accuracy_true_scaled, ax=ax, label='True')
    sns.regplot(y=regr.fit(accuracy_fake_scaled, genuineness_fake).predict(accuracy_fake_scaled), x=accuracy_fake_scaled, ax=ax, label='Fake')
    ax.set_title(au_id)
    ax.set_xlabel('model performance')
    ax.set_ylabel('genuineness')
    ax.legend(frameon=False)

plt.show()

In [None]:
def get_training_data_svr():
       feature_vector = []
       target_vector = []
       
       grouped_trials = df_regression.groupby('trial')
       
       for name, trial in grouped_trials:
             target_vector.append(trial['genuineness'].to_list()[0])
             feature_vector.append(trial['model_performance'].to_list())

       feature_vector = np.asarray(feature_vector)
       scaler = StandardScaler()
       scaled_features = scaler.fit_transform(feature_vector)

       target_vector = np.asarray(target_vector)
      #  target_scaler = StandardScaler()
      #  scaled_targets = target_scaler.fit_transform(target_vector.reshape(-1, 1))

       return scaled_features, target_vector

In [None]:
def nested_cv_svr(features, targets):
	N_TRIALS = 20
	scores = np.zeros(N_TRIALS)

	svr = SVR()
	param_grid = [
		{'C': np.logspace(-5, 3, 9), 'kernel':['linear']},
		{'C': np.logspace(-5, 3, 9), 'gamma': np.logspace(-5, 2, 8), 'kernel':['rbf']}
	]

	for i in range(N_TRIALS):
		inner_cv = KFold(n_splits=5, shuffle=True, random_state=i)
		outer_cv = KFold(n_splits=3, shuffle=True, random_state=i)

		model = GridSearchCV(estimator=svr, param_grid=param_grid, cv=inner_cv, n_jobs=-1)
		model.fit(X_train, y_train)

		test_score = cross_val_score(model, features, targets, cv=outer_cv, n_jobs=-1)
		scores[i] = test_score.mean()

	return scores

In [None]:
features, targets = get_training_data_svr()
X_train, X_test, y_train, y_test = train_test_split(features, targets, test_size=0.5, random_state=1)

scores = nested_cv_svr(features, targets)

In [None]:
scores

In [None]:
plt.boxplot(scores)

In [None]:
svc = SVR()
param_grid = [{'C': np.logspace(-5, 3, 9), 'kernel':['linear']}]
cv = KFold(n_splits=5, shuffle=True, random_state=1)
model = GridSearchCV(estimator=svc, param_grid=param_grid, cv=cv, n_jobs=-1)
model.fit(X_train, y_train)
best_model = model.best_estimator_

In [None]:
final_clf = SVR(C=model.best_params_.get('C'), kernel='linear')
final_clf.fit(X_train, y_train)
final_clf.score(X_test, y_test)

In [None]:
explainer = shap.Explainer(best_model.predict, X_train, feature_names=pipeline_an.aus)
shap_values = explainer(features)

In [None]:
shap.plots.beeswarm(shap_values, max_display=11)