# Example usage

To use `audinter` in a project:

In [1]:
import audinter
print(audinter.__version__)

0.1.0


In [2]:
# function imports
from audinter.metrics import algorithm_class_score
from audinter.metrics import correlated_features_score
from audinter.metrics import model_size
from audinter.metrics import feature_importance_score
from audinter.metrics import cv_shap_score

  from .autonotebook import tqdm as notebook_tqdm


Dataset Generation and Test Split

In [3]:
from sklearn.datasets import make_classification, make_regression
from sklearn.model_selection import train_test_split

# Generate synthetic data for classification and regression
X_class, y_class = make_classification(n_samples=1000, n_features=20, random_state=42)
X_reg, y_reg = make_regression(n_samples=1000, n_features=20, random_state=42)

# Split data into training and test sets
X_train_class, X_test_class, y_train_class, y_test_class = train_test_split(X_class, y_class, test_size=0.2, random_state=42)
X_train_reg, X_test_reg, y_train_reg, y_test_reg = train_test_split(X_reg, y_reg, test_size=0.2, random_state=42)

Usage examples:

- Random Forest Classifier 

In [4]:
from sklearn.ensemble import RandomForestClassifier

rf_model = RandomForestClassifier(random_state=42)
rf_model.fit(X_train_class, y_train_class)

print("=== Random Forest Classifier Scores ===")
print(f"Classifier Algorithm Class Score: {algorithm_class_score(rf_model)}")
print(f"Model Size: {model_size(rf_model)}")
print(f"Correlated Features Score: {correlated_features_score(X_test_class)}")
print(f"Feature Importance Score: {feature_importance_score(rf_model)}")
print(f"CV SHAP Score: {cv_shap_score(rf_model, X_test_class)}")

=== Random Forest Classifier Scores ===
Classifier Algorithm Class Score: (4, 'Level of explainability based on on literature research and qualitative analysis of each learning technique.')
Model Size: (12826, 'Returned number of total nodes.')
Correlated Features Score: (0.84, 'Proportion of non-highly correlated features, correlation lesser than avg_corr + std_corr.')
Feature Importance Score: (0.05, 'Percentage of features that concentrate distri_threshold (0.5) percent of the total importance.')
CV SHAP Score: (2.375, 'Coefficient of variation of absolute SHAP values.')


- Support Vector Classifier (SVC)

In [5]:
from sklearn.svm import SVC

svc_model = SVC()
svc_model.fit(X_train_class, y_train_class)

print("=== Support Vector Classifier Scores ===")
print(f"Classifier Algorithm Class Score: {algorithm_class_score(svc_model)}")
print(f"Model Size: {model_size(svc_model)}")
print(f"Correlated Features Score: {correlated_features_score(X_test_class)}")
print(f"Feature Importance Score: {feature_importance_score(svc_model)}")
print(f"CV SHAP Score: {cv_shap_score(svc_model, X_test_class)}")

=== Support Vector Classifier Scores ===
Classifier Algorithm Class Score: (2, 'Level of explainability based on on literature research and qualitative analysis of each learning technique.')
Model Size: (20, 'Returned number of features seen during fit.')
Correlated Features Score: (0.84, 'Proportion of non-highly correlated features, correlation lesser than avg_corr + std_corr.')
Feature Importance Score: (None, 'Classifier type is not applicable!')
CV SHAP Score: (1.555, 'Coefficient of variation of absolute SHAP values.')


- Linear Regression

In [6]:
from sklearn.linear_model import LinearRegression

linreg_model = LinearRegression()
linreg_model.fit(X_train_reg, y_train_reg)

print("=== Linear Regression Scores ===")
print(f"Classifier Algorithm Class Score: {algorithm_class_score(linreg_model)}")
print(f"Model Size: {model_size(linreg_model)}")
print(f"Correlated Features Score: {correlated_features_score(X_test_reg)}")
print(f"Feature Importance Score: {feature_importance_score(linreg_model)}")
print(f"CV SHAP Score: {cv_shap_score(linreg_model, X_test_reg)}")

=== Linear Regression Scores ===
Classifier Algorithm Class Score: (3.5, 'Level of explainability based on on literature research and qualitative analysis of each learning technique.')
Model Size: (20, 'Returned number of features seen during fit.')
Correlated Features Score: (0.32, 'Proportion of non-highly correlated features, correlation lesser than avg_corr + std_corr.')
Feature Importance Score: (0.1, 'Percentage of features that concentrate distri_threshold (0.5) percent of the total importance.')
CV SHAP Score: (1.696, 'Coefficient of variation of absolute SHAP values.')


- XGBoost Classifier

In [7]:
import xgboost as xgb

xgb_model = xgb.XGBClassifier(random_state=42, eval_metric='logloss')
xgb_model.fit(X_train_class, y_train_class)

print("=== XGBoost Classifier Scores ===")
print(f"Classifier Algorithm Class Score: {algorithm_class_score(xgb_model)}")
print(f"Model Size: {model_size(xgb_model)}")
print(f"Correlated Features Score: {correlated_features_score(X_test_class)}")
print(f"Feature Importance Score: {feature_importance_score(xgb_model)}")
print(f"CV SHAP Score: {cv_shap_score(xgb_model, X_test_class)}")

=== XGBoost Classifier Scores ===
Classifier Algorithm Class Score: (1, 'Level of explainability based on on literature research and qualitative analysis of each learning technique.')
Model Size: (20, 'Returned number of features seen during fit.')
Correlated Features Score: (0.84, 'Proportion of non-highly correlated features, correlation lesser than avg_corr + std_corr.')
Feature Importance Score: (None, 'Classifier type is not applicable!')
CV SHAP Score: (2.474, 'Coefficient of variation of absolute SHAP values.')


- Neural Network

Note: In this example Keras library was used, but other libraries such as PyTorch are also supported in this package.

In [8]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

keras_model = Sequential([
    Dense(10, input_dim=20, activation='relu'),
    Dense(1, activation='sigmoid')
])
keras_model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
keras_model.fit(X_train_class, y_train_class, epochs=10, batch_size=32, verbose=0)

print("=== Keras Neural Network ===")
print(f"Classifier Algorithm Class Score: {algorithm_class_score(keras_model)}")
print(f"Model Size: {model_size(keras_model)}")
print(f"Correlated Features Score: {correlated_features_score(X_test_class)}")
print(f"Feature Importance Score: {feature_importance_score(keras_model)}")
print(f"CV SHAP Score: {cv_shap_score(keras_model, X_test_class)}")

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


=== Keras Neural Network ===
Classifier Algorithm Class Score: (1, 'Level of explainability based on on literature research and qualitative analysis of each learning technique.')
Model Size: (221, 'Returned number of parameters.')
Correlated Features Score: (0.84, 'Proportion of non-highly correlated features, correlation lesser than avg_corr + std_corr.')
Feature Importance Score: (None, 'Classifier type is not applicable!')
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 38ms/step
[1m6525/6525[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 1ms/step
CV SHAP Score: (1.147, 'Coefficient of variation of absolute SHAP values.')
