In [None]:
import numpy as np
import pandas as pd
import math
import matplotlib.pyplot as plt

## Create the toy datasets

In [None]:
np.random.seed(0)

In [None]:
def generate_polynomial_dataset(coeffs, width, samples, samples_on_edge):
    def _calculate_value(x_i):
        return np.sum(np.multiply(np.power(np.repeat(x_i, len(coeffs)), range(len(coeffs))), coeffs))
    
    x = np.linspace(-10, 10, 100)
    boundary = [_calculate_value(x_i) for x_i in x]
    margin_1 = [b + width for b in boundary]
    margin_2 = [b - width for b in boundary] 
    
    plt.plot(x, boundary, 'r')
    plt.plot(x, margin_1, 'r', alpha = 0.3)
    plt.plot(x, margin_2, 'r', alpha = 0.3)
    
    # Generate observations on the edge of the margin
    x_1 = []
    while len(x_1) < samples_on_edge:
        value = np.random.uniform(-10, 10)
        x_1.append([value, _calculate_value(value) + width])
    value = np.random.uniform(-10, 10)
    x_2 = [[value, _calculate_value(value) - width]]
    # Generate the rest of the observations
    while len(x_1) < samples or len(x_2) < samples:
        value_x = np.random.uniform(-10, 10)
        value_y = np.random.uniform(min(margin_2), max(margin_1))
        if value_y > _calculate_value(value_x) + width:
            x_1.append([value_x, value_y])
        elif value_y < _calculate_value(value_x) - width:
            x_2.append([value_x, value_y])

    # Output label
    y = np.concatenate((np.repeat(0, len(x_1)), np.repeat(1, len(x_2))))

    # Final version of the dataset
    x_1.extend(x_2)
    dataset = np.array(x_1)
    dataset = np.concatenate((dataset, y[:, None]), axis = 1)

    _ = plt.plot(dataset[np.where(dataset[:, 2] == 0), 0], 
                 dataset[np.where(dataset[:, 2] == 0), 1], 
                 'o',
                 color = 'tab:blue')
    _ = plt.plot(dataset[np.where(dataset[:, 2] == 1), 0], 
                 dataset[np.where(dataset[:, 2] == 1), 1],
                 'o',
                 color = 'tab:orange')
    
    return dataset

In [None]:
dataset_1 = generate_polynomial_dataset([-3, 1.7], 2, 10, 1)

In [None]:
dataset_2 = generate_polynomial_dataset([1, -2.3], 3, 15, 4)

In [None]:
dataset_3 = generate_polynomial_dataset([0.2, -0.5, 0.3, 0.08, -0.003], 5, 40, 4)

In [None]:
def generate_gaussian_dataset(params, width, samples, samples_on_edge):
    # Not really gaussian data, but a toydataset design to be 
    # used with a RBF kernel
    x = np.linspace(-10, 10, 500)
    y = np.linspace(-10, 10, 500)
    xv, yv = np.meshgrid(x, y)
    
    @np.vectorize
    def border(x, y, w):
        close = []
        border = []
        for p in range(len(params)):
            dist = math.sqrt((params[p][0] - x) ** 2 + (params[p][1] - y) ** 2)
            close.append(dist < params[p][2] - w)
            border.append(dist == params[p][2] - w)
        if np.any(border) and np.all(np.logical_not(close)):
            return 0
        elif np.any(close):
            return 1
        else:
            return -1
    
    plt.contour(xv, yv, border(xv, yv, 0), levels = [0], colors = ['r'])
    c = plt.contour(xv, yv, border(xv, yv, width), levels = [0], colors = ['r'], alpha = 0.3)
    plt.contour(xv, yv, border(xv, yv, -width), levels = [0], colors = ['r'], alpha = 0.3)
    
    x_1 = []
    for i in range(len(c.collections[0].get_paths())):
        v = c.collections[0].get_paths()[i].vertices
        indices = list(range(v.shape[0]))
        np.random.shuffle(indices)
        x_1.extend([[v[j, 0], v[j, 1]] for j in indices[:samples_on_edge]])
    x_2 = [] 
    while len(x_1) < samples and len(x_2) < samples:
        value_x = np.random.uniform(-10, 10)
        value_y = np.random.uniform(-10, 10)
        close = False
        margin = False
        for p in range(len(params)):
            dist = math.sqrt((params[p][0] - value_x) ** 2 + (params[p][1] - value_y) ** 2)
            if dist < params[p][2]:
                close = True
            if dist > params[p][2] - width and dist < params[p][2] + width:
                margin = True
        if not margin:
            if close:
                x_1.append([value_x, value_y])
            else:
                x_2.append([value_x, value_y])
            
     # Output label
    y = np.concatenate((np.repeat(0, len(x_1)), np.repeat(1, len(x_2))))

    # Final version of the dataset
    x_1.extend(x_2)
    dataset = np.array(x_1)
    dataset = np.concatenate((dataset, y[:, None]), axis = 1)

    _ = plt.plot(dataset[np.where(dataset[:, 2] == 0), 0], 
                 dataset[np.where(dataset[:, 2] == 0), 1], 
                 'o',
                 color = 'tab:blue')
    _ = plt.plot(dataset[np.where(dataset[:, 2] == 1), 0], 
                 dataset[np.where(dataset[:, 2] == 1), 1],
                 'o',
                 color = 'tab:orange')
    
    return dataset

In [None]:
params = [[2, 3, 6],
          [-2, -4, 3],
          [-9, 7.5, 3]]

dataset_4 = generate_gaussian_dataset(params, 0.3, 100, 5)