In [None]:
# Initialization
%matplotlib widget

from evaluator import evaluate, get_considered_prs
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn import tree
from definitions import Repository, PullRequest, Commit
from configuration import ProjectConfiguration
import db
import smells
import metrics
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
import ipywidgets as widgets
import itertools
from sqlalchemy.orm import joinedload, subqueryload
from sqlalchemy import select, column
from scipy import stats



#plots size in inches
plt.rcParams["figure.figsize"] = (13,6)

In [None]:
config = ProjectConfiguration()

db.prepare(config.connstr)
dbsession = db.get_session()

repositories = list(map(lambda repository_name: dbsession.query(Repository).filter(Repository.full_name == repository_name).first(), config.projects))
if None in repositories:
    raise LookupError("One of repositories does not exist in the database")
repositories = list(filter(lambda repo: len(get_considered_prs(repo, dbsession).all())>0, repositories))
    
def avg(lst):
    return sum(lst) / len(lst)

# Model based solely on data from _A Large Dataset for Just-In-Time Defect Prediction_

In [None]:
# data preparation
df = pd.DataFrame(columns=["pull_id","buggy","la_min","la_avg","la_max","ld_min","ld_avg","ld_max","nf_min","nf_avg","nf_max","nd_min","nd_avg","nd_max","ns_min","ns_avg","ns_max","ent_min","ent_avg","ent_max","ndev_min","ndev_avg","ndev_max","age_min","age_avg","age_max","nuc_min","nuc_avg","nuc_max","aexp_min","aexp_avg","aexp_max","arexp_min","arexp_avg","arexp_max","asexp_min","asexp_avg","asexp_max"])
for repo in repositories:
    prs = get_considered_prs(repo, dbsession).all()
    repo_df = pd.concat(list(map(lambda pr: 
                                 pd.DataFrame({
                                     "pull_id": [pr.id],
                                     "buggy": [any(map(lambda c: c.buggy, pr.commits))],
                                     "la_min": [min(list(map(lambda c: c.la, pr.commits)))],
                                     "la_avg": [stats.trim_mean(list(map(lambda c: c.la, pr.commits)),0.1)],
                                     "la_max": [max(list(map(lambda c: c.la, pr.commits)))],
                                     "ld_min": [min(list(map(lambda c: c.ld, pr.commits)))],
                                     "ld_avg": [stats.trim_mean(list(map(lambda c: c.ld, pr.commits)),0.1)],
                                     "ld_max": [max(list(map(lambda c: c.ld, pr.commits)))],
                                     "nf_min": [min(list(map(lambda c: c.nf, pr.commits)))],
                                     "nf_avg": [stats.trim_mean(list(map(lambda c: c.nf, pr.commits)),0.1)],
                                     "nf_max": [max(list(map(lambda c: c.nf, pr.commits)))],
                                     "nd_min": [min(list(map(lambda c: c.nd, pr.commits)))],
                                     "nd_avg": [stats.trim_mean(list(map(lambda c: c.nd, pr.commits)),0.1)],
                                     "nd_max": [max(list(map(lambda c: c.nd, pr.commits)))],
                                     "ns_min": [min(list(map(lambda c: c.ns, pr.commits)))],
                                     "ns_avg": [stats.trim_mean(list(map(lambda c: c.ns, pr.commits)),0.1)],
                                     "ns_max": [max(list(map(lambda c: c.ns, pr.commits)))],
                                     "ent_min": [min(list(map(lambda c: c.ent, pr.commits)))],
                                     "ent_avg": [stats.trim_mean(list(map(lambda c: c.ent, pr.commits)),0.1)],
                                     "ent_max": [max(list(map(lambda c: c.ent, pr.commits)))],
                                     "ndev_min": [min(list(map(lambda c: c.ndev, pr.commits)))],
                                     "ndev_avg": [stats.trim_mean(list(map(lambda c: c.ndev, pr.commits)),0.1)],
                                     "ndev_max": [max(list(map(lambda c: c.ndev, pr.commits)))],
                                     "age_min": [min(list(map(lambda c: c.age, pr.commits)))],
                                     "age_avg": [stats.trim_mean(list(map(lambda c: c.age, pr.commits)),0.1)],
                                     "age_max": [max(list(map(lambda c: c.age, pr.commits)))],
                                     "nuc_min": [min(list(map(lambda c: c.nuc, pr.commits)))],
                                     "nuc_avg": [stats.trim_mean(list(map(lambda c: c.nuc, pr.commits)),0.1)],
                                     "nuc_max": [max(list(map(lambda c: c.nuc, pr.commits)))],
                                     "aexp_min": [min(list(map(lambda c: c.aexp, pr.commits)))],
                                     "aexp_avg": [stats.trim_mean(list(map(lambda c: c.aexp, pr.commits)),0.1)],
                                     "aexp_max": [max(list(map(lambda c: c.aexp, pr.commits)))],
                                     "arexp_min": [min(list(map(lambda c: c.arexp, pr.commits)))],
                                     "arexp_avg": [stats.trim_mean(list(map(lambda c: c.arexp, pr.commits)),0.1)],
                                     "arexp_max": [max(list(map(lambda c: c.arexp, pr.commits)))],
                                     "asexp_min": [min(list(map(lambda c: c.asexp, pr.commits)))],
                                     "asexp_avg": [stats.trim_mean(list(map(lambda c: c.asexp, pr.commits)),0.1)],
                                     "asexp_max": [max(list(map(lambda c: c.asexp, pr.commits)))]
                                 }),
                            prs)))
    df = pd.concat([df, repo_df])
df = df.set_index("pull_id")
labels = np.array(df["buggy"])
features = df.drop("buggy", axis = 1)
feature_list = list(features.columns)
features = np.array(features)

## Selected training and testing sets

In [None]:
# Preparing training and testing sets
train_features, test_features, train_labels, test_labels = train_test_split(features, labels, test_size = 0.25, random_state = 0)

print('Training features shape:\t', train_features.shape)
print('Training labels shape:\t\t', train_labels.shape)
print('Testing features shape:\t\t', test_features.shape)
print('Testing labels shape:\t\t', test_labels.shape)

In [None]:
# Training
rf = RandomForestRegressor(random_state = 0)
rf.fit(train_features, train_labels);

In [None]:
# Predicting
predictions = rf.predict(test_features)

## Prediction errors

In [None]:
# Calculatemean absolute error
errors = abs(predictions - test_labels)
print(f"Mean absolute error:\t\t{round(np.mean(errors), 2)}")
errors = np.mean((predictions - test_labels)**2)
print(f"Mean squared error:\t\t{round(np.mean(errors), 2)}")
print(f"Root Mean squared error:\t{round(np.mean(errors)**0.5, 2)}")

# Model based on our metrics and smells

## Smells
### Share of smelly pulls

In [None]:
chosen_simple_tests = [
    smells.lack_of_review,
    smells.missing_description,
    #smells.large_changesets,
    smells.sleeping_reviews,
    smells.review_buddies,
    smells.ping_pong
]
chosen_complex_tests = [
    (smells.union, [smells.lack_of_review,
                    smells.missing_description,
                    smells.large_changesets,
                    smells.sleeping_reviews,
                    smells.review_buddies,
                    smells.ping_pong]),
    (smells.intersection, [smells.lack_of_review,
                    smells.missing_description,
                    smells.large_changesets,
                    smells.sleeping_reviews,
                    smells.review_buddies,
                    smells.ping_pong])
]

In [None]:
smells_evaluations = {}

for repository in repositories:
    tests_results = list(map(lambda simple_test: evaluate(repository.full_name, simple_test), chosen_simple_tests))
    tests_results.extend(list(map(lambda complex_test: evaluate(repository.full_name, complex_test[0], complex_test[1]), chosen_complex_tests)))
    smells_evaluations[repository.full_name] = tests_results

In [None]:
# display results as a text
print("Smell / repository".ljust(30), end="\t")
column_width=max(len(repo.name) for repo in repositories)+1
print(*map(lambda repo: repo.name.ljust(column_width), repositories),sep="\t")

for i in range(0, len(chosen_simple_tests)):
    print(next(iter(smells_evaluations.values()))[i].evaluator_name.ljust(30), end="\t")
    print(*map(lambda tests_results: f"{round(tests_results[i].percentage*100,2)}%".rjust(column_width), smells_evaluations.values()), sep="\t")

for i in range(len(chosen_simple_tests), len(chosen_simple_tests)+len(chosen_complex_tests)):
    print(next(iter(smells_evaluations.values()))[i].evaluator_name.ljust(30), end="\t")
    print(*map(lambda tests_results: f"{round(tests_results[i].percentage*100,2)}%".rjust(column_width), smells_evaluations.values()), sep="\t")

In [None]:
# display results as a plot
labels = list(map(lambda eval: eval.evaluator_name, next(iter(smells_evaluations.values()))))
x = np.arange(len(labels))
width = 0.35

fig, ax = plt.subplots()
counter = 0.5
for repository in smells_evaluations:
    bar = ax.bar(x - width/2 + width/len(smells_evaluations)*counter, list(map(lambda evaluation: evaluation.percentage, smells_evaluations[repository])), width/len(smells_evaluations), label=repository)
    counter+=1
ax.set_xticks(x, labels, rotation="vertical")
ax.set_ylim([0, 1])
ax.legend(bbox_to_anchor =(1, 1))
fig.tight_layout()
plt.show()

## Metrics

In [None]:
calculated_metrics = [
    metrics.review_window_metric,
    metrics.review_window_per_line_metric,
    metrics.review_chars,
    metrics.review_chars_code_lines_ratio,
    metrics.no_of_reviewers,
    metrics.no_of_reviewers_diff_than_author,
    metrics.reviewed_lines_per_hour,
    metrics.no_of_reviews,
    metrics.ping_pong
]

In [None]:
metrics_evaluations = {}

for repository in repositories:
    for metric in calculated_metrics:
        metrics_evaluations[repository.full_name] = metrics_evaluations.get(repository.full_name, [])
        metrics_evaluations[repository.full_name].append(evaluate(repository.full_name, metric))
    metrics_evaluations[repository.full_name] = metrics_evaluations.get(repository.full_name, [])

# create dataframes
metrics_evaluations_df = {}
for repository in repositories:
    df = pd.DataFrame()
    df["pull_id"] = list(map(lambda r: float(r[0]) if r[0] is not None else None, dbsession.execute(select(column("id")).select_from(metrics_evaluations[repository.full_name][0].evaluated.subquery())).all()))
    for m in metrics_evaluations[repository.full_name]:
        df[m.metric_name] = m.to_list(dbsession)
        
    # add buggy
    df_buggy = pd.DataFrame()
    df_buggy["buggy"] = False
    for index, row in df.iterrows():
        df_buggy = pd.concat([df_buggy, pd.DataFrame({"buggy": [any(list(map(lambda commit: commit.buggy, dbsession.query(PullRequest).get(row["pull_id"]).commits)))]}, [index])])
    df = df.join(df_buggy["buggy"])
    
    metrics_evaluations_df[repository.full_name] = df


overall_metrics_df = pd.concat(metrics_evaluations_df.values())

In [None]:
# calculate correlation
def calc_correlations(method):
    correlations = {}
    for repository in repositories:
        correlations[repository.full_name] = metrics_evaluations_df[repository.full_name].drop("pull_id", axis = 1).corr(method=method)
    overall_correlations = overall_metrics_df.drop("pull_id", axis = 1).corr(method=method)
    return correlations, overall_correlations

### Correlation between metrics

In [None]:
# display correlation heatmaps
@widgets.interact(corr_method=["pearson", "kendall", "spearman"],per_project=False)
def display_corr_plots(corr_method="spearman",per_project=False):
    correlations, overall_correlations = calc_correlations(corr_method)
    if per_project:
        for repository in repositories:
            fig, ax = plt.subplots()
            hm = sns.heatmap(correlations[repository.full_name], annot = True, ax=ax)
            hm.set(title = f"Correlation matrix of reviews metrics and bugginess for {repository.full_name}\n")
            fig.subplots_adjust(left=0.2, bottom=0.4)
            ax.set_xticklabels(list(map(lambda label: label.get_text().replace('_',' '), ax.get_xticklabels())),rotation="vertical")
            ax.set_yticklabels(list(map(lambda label: label.get_text().replace('_',' '), ax.get_yticklabels())))
            plt.show()
    fig, ax = plt.subplots()
    hm = sns.heatmap(overall_correlations, annot = True, ax=ax)
    hm.set(title = f"Correlation matrix of reviews metrics and bugginess for all repositories\n")
    fig.subplots_adjust(left=0.2, bottom=0.4)
    ax.set_xticklabels(list(map(lambda label: label.get_text().replace('_',' '), ax.get_xticklabels())),rotation="vertical")
    ax.set_yticklabels(list(map(lambda label: label.get_text().replace('_',' '), ax.get_yticklabels())))
    plt.show()

### Boxplots for different metrics

In [None]:
# display boxplots
@widgets.interact(outlier=False,per_project=False)
def display_boxplots(outlier,per_project):
    for column_name in overall_metrics_df.columns[1:-1]:
        if per_project:
            for repository in repositories:
                    fig, ax = plt.subplots()
                    df = metrics_evaluations_df[repository.full_name]
                    ax.boxplot([df[df.buggy == 0][column_name], df[df.buggy == 1][column_name]], 0, 'k+' if outlier else '')
                    ax.set_xticklabels(["nonbuggy", "buggy"], fontsize=8)
                    ax.set_title(f"Boxplot for {column_name.replace('_',' ')} in repository {repository.full_name}")
                    plt.show()
        fig, ax = plt.subplots()
        df = overall_metrics_df
        col = getattr(df, column_name)
        ax.boxplot([df[df.buggy == 0][column_name], df[df.buggy == 1][column_name]], 0, 'k+' if outlier else '')
        ax.set_xticklabels(["nonbuggy", "buggy"], fontsize=8)
        ax.set_title(f"Boxplot for {column_name.replace('_',' ')} in all repositories")
        plt.show()

## Model

In [None]:
df = overall_metrics_df.copy()
labels = np.array(df["buggy"])
features = df.drop("buggy", axis = 1)
smells_results = {}
for smell in chosen_simple_tests:
    evaluations = map(lambda repository: evaluate(repository.full_name, smell), repositories)
    considered = list(map(lambda pr: pr.id, itertools.chain(*map(lambda e: e.considered.all(), evaluations))))
    evaluations = map(lambda repository: evaluate(repository.full_name, smell), repositories)
    smelly = list(map(lambda pr: pr.id, itertools.chain(*map(lambda e: e.smelly.all(), evaluations))))
    smells_results[smell.__name__] = (considered, smelly)

for smell in smells_results:
    temp_df = pd.DataFrame({smell: list(map(lambda considered: considered in smells_results[smell][1], smells_results[smell][0])),
                            "pull_id": considered})
    features = features.join(temp_df.set_index("pull_id"), on="pull_id", lsuffix="_metric", rsuffix="_smell")
features = features.set_index("pull_id")
feature_list = list(features.columns)
features = np.array(features)

### Selected training and testing sets

In [None]:
# Preparing training and testing sets
train_features, test_features, train_labels, test_labels = train_test_split(features, labels, test_size = 0.25, random_state = 0)

print('Training features shape:\t', train_features.shape)
print('Training labels shape:\t\t', train_labels.shape)
print('Testing features shape:\t\t', test_features.shape)
print('Testing labels shape:\t\t', test_labels.shape)

In [None]:
# training
rf = RandomForestRegressor(random_state = 0)
rf.fit(train_features, train_labels);

In [None]:
# Use the forest's predict method on the test data
predictions = rf.predict(test_features)

### Prediction errors

In [None]:
# Calculatemean absolute error
errors = abs(predictions - test_labels)
print(f"Mean absolute error:\t\t{round(np.mean(errors), 2)}")
errors = np.mean((predictions - test_labels)**2)
print(f"Mean squared error:\t\t{round(np.mean(errors), 2)}")
print(f"Root Mean squared error:\t{round(np.mean(errors)**0.5, 2)}")

### Metrics importance

In [None]:
# Get numerical feature importances
importances = list(rf.feature_importances_)
# List of tuples with variable and importance
feature_importances = [(feature, round(importance, 2)) for feature, importance in zip(feature_list, importances)]
# Sort the feature importances by most important first
feature_importances = sorted(feature_importances, key = lambda x: x[1], reverse = True)
# Print out the feature and importances
print("Metrics                        Importance")
[print(f"{pair[0].replace('_',' ').ljust(30,' ')} {pair[1]}") for pair in feature_importances];

In [None]:
fig, ax = plt.subplots()
ax.bar(feature_list, importances)
ax.set_xticklabels(list(map(lambda e: e.replace('_',' '), feature_list)),rotation="vertical")
ax.set_ylim(0,1)
fig.subplots_adjust( bottom=0.5)
plt.show()

### Example of a tree

In [None]:
# Pull out one tree from the forest
@widgets.interact_manual(tree_number=widgets.IntSlider(min=0, max=rf.get_params(deep=False)["n_estimators"]-1, step=1, value=0), max_depth=widgets.IntSlider(min=0, max=30, step=1, value=2))
def plot_tree(tree_number, max_depth):
    _, ax = plt.subplots()
    tree.plot_tree(rf.estimators_[tree_number],
                   feature_names = feature_list,
                   class_names = labels,
                   filled = True,
                   ax = ax,
                   max_depth=max_depth)
    ax.text(0,1, f"depth = {rf.estimators_[tree_number].get_depth()}")
    plt.show()

# Combined model

In [None]:
features = overall_metrics_df.copy()
smells_results = {}
for smell in chosen_simple_tests:
    evaluations = map(lambda repository: evaluate(repository.full_name, smell), repositories)
    considered = list(map(lambda pr: pr.id, itertools.chain(*map(lambda e: e.considered.all(), evaluations))))
    evaluations = map(lambda repository: evaluate(repository.full_name, smell), repositories)
    smelly = list(map(lambda pr: pr.id, itertools.chain(*map(lambda e: e.smelly.all(), evaluations))))
    smells_results[smell.__name__] = (considered, smelly)

for smell in smells_results:
    temp_df = pd.DataFrame({smell: list(map(lambda considered: considered in smells_results[smell][1], smells_results[smell][0])),
                            "pull_id": considered})
    features = features.join(temp_df.set_index("pull_id"), on="pull_id", lsuffix="_metric", rsuffix="_smell")
features = features.set_index("pull_id")


df = pd.DataFrame(columns=["pull_id","la_min","la_avg","la_max","ld_min","ld_avg","ld_max","nf_min","nf_avg","nf_max","nd_min","nd_avg","nd_max","ns_min","ns_avg","ns_max","ent_min","ent_avg","ent_max","ndev_min","ndev_avg","ndev_max","age_min","age_avg","age_max","nuc_min","nuc_avg","nuc_max","aexp_min","aexp_avg","aexp_max","arexp_min","arexp_avg","arexp_max","asexp_min","asexp_avg","asexp_max"])
for repo in repositories:
    prs = get_considered_prs(repo, dbsession).all()
    repo_df = pd.concat(list(map(lambda pr: 
                                 pd.DataFrame({
                                     "pull_id": [pr.id],
                                     "la_min": [min(list(map(lambda c: c.la, pr.commits)))],
                                     "la_avg": [stats.trim_mean(list(map(lambda c: c.la, pr.commits)),0.1)],
                                     "la_max": [max(list(map(lambda c: c.la, pr.commits)))],
                                     "ld_min": [min(list(map(lambda c: c.ld, pr.commits)))],
                                     "ld_avg": [stats.trim_mean(list(map(lambda c: c.ld, pr.commits)),0.1)],
                                     "ld_max": [max(list(map(lambda c: c.ld, pr.commits)))],
                                     "nf_min": [min(list(map(lambda c: c.nf, pr.commits)))],
                                     "nf_avg": [stats.trim_mean(list(map(lambda c: c.nf, pr.commits)),0.1)],
                                     "nf_max": [max(list(map(lambda c: c.nf, pr.commits)))],
                                     "nd_min": [min(list(map(lambda c: c.nd, pr.commits)))],
                                     "nd_avg": [stats.trim_mean(list(map(lambda c: c.nd, pr.commits)),0.1)],
                                     "nd_max": [max(list(map(lambda c: c.nd, pr.commits)))],
                                     "ns_min": [min(list(map(lambda c: c.ns, pr.commits)))],
                                     "ns_avg": [stats.trim_mean(list(map(lambda c: c.ns, pr.commits)),0.1)],
                                     "ns_max": [max(list(map(lambda c: c.ns, pr.commits)))],
                                     "ent_min": [min(list(map(lambda c: c.ent, pr.commits)))],
                                     "ent_avg": [stats.trim_mean(list(map(lambda c: c.ent, pr.commits)),0.1)],
                                     "ent_max": [max(list(map(lambda c: c.ent, pr.commits)))],
                                     "ndev_min": [min(list(map(lambda c: c.ndev, pr.commits)))],
                                     "ndev_avg": [stats.trim_mean(list(map(lambda c: c.ndev, pr.commits)),0.1)],
                                     "ndev_max": [max(list(map(lambda c: c.ndev, pr.commits)))],
                                     "age_min": [min(list(map(lambda c: c.age, pr.commits)))],
                                     "age_avg": [stats.trim_mean(list(map(lambda c: c.age, pr.commits)),0.1)],
                                     "age_max": [max(list(map(lambda c: c.age, pr.commits)))],
                                     "nuc_min": [min(list(map(lambda c: c.nuc, pr.commits)))],
                                     "nuc_avg": [stats.trim_mean(list(map(lambda c: c.nuc, pr.commits)),0.1)],
                                     "nuc_max": [max(list(map(lambda c: c.nuc, pr.commits)))],
                                     "aexp_min": [min(list(map(lambda c: c.aexp, pr.commits)))],
                                     "aexp_avg": [stats.trim_mean(list(map(lambda c: c.aexp, pr.commits)),0.1)],
                                     "aexp_max": [max(list(map(lambda c: c.aexp, pr.commits)))],
                                     "arexp_min": [min(list(map(lambda c: c.arexp, pr.commits)))],
                                     "arexp_avg": [stats.trim_mean(list(map(lambda c: c.arexp, pr.commits)),0.1)],
                                     "arexp_max": [max(list(map(lambda c: c.arexp, pr.commits)))],
                                     "asexp_min": [min(list(map(lambda c: c.asexp, pr.commits)))],
                                     "asexp_avg": [stats.trim_mean(list(map(lambda c: c.asexp, pr.commits)),0.1)],
                                     "asexp_max": [max(list(map(lambda c: c.asexp, pr.commits)))]
                                 }),
                            prs)))
    df = pd.concat([df, repo_df])
df = df.set_index("pull_id")

features = features.join(df, how="inner")
labels = np.array(features["buggy"])
features = features.drop("buggy", axis = 1)
feature_list = list(features.columns)
features = np.array(features)

## Selected training and testing sets

In [None]:
train_features, test_features, train_labels, test_labels = train_test_split(features, labels, test_size = 0.25, random_state = 0)

print('Training Features Shape:', train_features.shape)
print('Training Labels Shape:', train_labels.shape)
print('Testing Features Shape:', test_features.shape)
print('Testing Labels Shape:', test_labels.shape)

In [None]:
# training
rf = RandomForestRegressor(random_state = 0)
rf.fit(train_features, train_labels);

In [None]:
# Use the forest's predict method on the test data
predictions = rf.predict(test_features)

## Prediction errors

In [None]:
# Calculatemean absolute error
errors = abs(predictions - test_labels)
print(f"Mean absolute error:\t\t{round(np.mean(errors), 2)}")
errors = np.mean((predictions - test_labels)**2)
print(f"Mean squared error:\t\t{round(np.mean(errors), 2)}")
print(f"Root Mean squared error:\t{round(np.mean(errors)**0.5, 2)}")

## Metrics importance

In [None]:
# Get numerical feature importances
importances = list(rf.feature_importances_)
# List of tuples with variable and importance
feature_importances = [(feature, round(importance, 2)) for feature, importance in zip(feature_list, importances)]
# Sort the feature importances by most important first
feature_importances = sorted(feature_importances, key = lambda x: x[1], reverse = True)
# Print out the feature and importances
print("Metrics                        Importance")
[print(f"{pair[0].replace('_',' ').ljust(30,' ')} {pair[1]}") for pair in feature_importances];

In [None]:
fig, ax = plt.subplots()
ax.bar(feature_list, importances)
ax.set_xticklabels(list(map(lambda e: e.replace('_',' '), feature_list)),rotation="vertical")
ax.set_ylim(0,1)
fig.subplots_adjust( bottom=0.5)
plt.show()

## Example of tree

In [None]:
# Pull out one tree from the forest
@widgets.interact_manual(tree_number=widgets.IntSlider(min=0, max=rf.get_params(deep=False)["n_estimators"]-1, step=1, value=0), max_depth=widgets.IntSlider(min=0, max=30, step=1, value=2))
def plot_tree(tree_number, max_depth):
    _, ax = plt.subplots()
    tree.plot_tree(rf.estimators_[tree_number],
                   feature_names = feature_list,
                   class_names = labels,
                   filled = True,
                   ax = ax,
                   max_depth=max_depth)
    ax.text(0,1, f"depth = {rf.estimators_[tree_number].get_depth()}")
    plt.show()