In [24]:
import numpy as np

np.random.seed(42)

n_samples = 100
X = np.random.uniform(-5, 5, size=n_samples).reshape(-1, 1)
y = 2*X + 1 + np.random.normal(scale=0.5, size=n_samples).reshape(-1, 1)


In [25]:
from sklearn.tree import DecisionTreeRegressor

clf = DecisionTreeRegressor(random_state=42)
clf.fit(X, y)


In [26]:
def conformal_prediction(clf, X_train, y_train, X_test, significance):
    # Train the model on the training set
    clf.fit(X_train, y_train)

    # Get the predicted values and residuals for the training set
    y_train_pred = clf.predict(X_train)
    residuals = y_train - y_train_pred

    # Initialize an array to store the p-values for the test set
    n_test = X_test.shape[0]
    npc_scores = np.zeros((n_test, len(significance)))

    # Loop over each test instance and compute the p-values
    for i in range(n_test):
        # Get the predicted value for this test instance
        y_test_pred = clf.predict(X_test[i].reshape(1, -1))

        # Compute the residual and absolute residual for this test instance
        residual = y_test - y_test_pred
        abs_residual = np.abs(residual)

        # Compute the p-values for this test instance
        p_values = np.zeros(len(significance))
        for j, alpha in enumerate(significance):
            k = int(alpha * (len(X_train) + 1))
            threshold = np.partition(abs_residual, k)[k]
            p_values[j] = np.mean(abs_residual > threshold)

        # Store the p-values for this test instance
        npc_scores[i] = p_values

    # Compute the conformal prediction intervals for the test set
    npc_prediction = np.zeros(n_test)
    for i in range(n_test):
        k_values = np.zeros(len(significance))
        for j, alpha in enumerate(significance):
            k_values[j] = int(alpha * (len(X_train) + 1))
        npc_prediction[i] = np.partition(residuals, k_values)[-1]

    return npc_prediction, npc_scores


In [27]:
import matplotlib.pyplot as plt

# Set the significance levels
significance = [0.05, 0.1, 0.2]

# Compute the conformal prediction intervals and p-values for the test set
npc_prediction, npc_scores = conformal_prediction(clf, X_train, y_train, X_test, significance)

# Plot the data points, true function, and predicted function
plt.figure(figsize=(8, 6))
plt.scatter(X_train, y_train, color='b', label='Training Data')
plt.scatter(X_test, y_test, color='r', label='Test Data')
plt.plot(X, 2*X + 1, color='g', label='True Function')
plt.plot(X_test, clf.predict(X_test), color='orange', label='Predicted Function')

# Plot the conformal prediction intervals
for i, alpha in enumerate(significance):
    lower = npc_prediction - npc_scores[:, i] * np.std(y_train)
    upper = npc_prediction + npc_scores[:, i] * np.std(y_train)
    plt.fill_between(X_test.ravel(), lower.ravel(), upper.ravel(), alpha=0.1, color='gray')
    plt.text(X_test[-1], upper[-1], f'{int(alpha*100)}% Significance', ha='right', va='center', fontsize=10)

plt.xlabel('Input (x)')
plt.ylabel('Output (y)')
plt.title('Conformal Prediction Intervals')
plt.legend(loc='upper left')
plt.show()


KeyError: ignored