In [None]:
import numpy as np
import pandas as pd
import math
import matplotlib.pyplot as plt

## Create the toy datasets

In [None]:
np.random.seed(0)

In [None]:
def generate_linear_dataset(slope, intercept, width, samples, samples_on_edge):
    # Plotting the decision boundary and the margin
    x = np.linspace(-10, 10, 100)
    boundary = x * slope + intercept
    margin_1 = x * slope + intercept + width
    margin_2 = x * slope + intercept - width
    plt.plot(x, boundary, 'r')
    plt.plot(x, margin_1, 'r', alpha = 0.5)
    plt.plot(x, margin_2, 'r', alpha = 0.5)

    # Generate observations on the edge of the margin
    x_1 = []
    while len(x_1) < samples_on_edge:
        value = np.random.uniform(-10, 10)
        x_1.append([value, value * slope + intercept + width])
    value = np.random.uniform(-10, 10)
    x_2 = [[value, value * slope + intercept - width]]
    # Generate the rest of the observations
    while len(x_1) < samples or len(x_2) < samples:
        value_x = np.random.uniform(-10, 10)
        value_y = np.random.uniform(min(margin_2), max(margin_1))
        if value_y > value_x * slope + intercept + width:
            x_1.append([value_x, value_y])
        elif value_y < value_x * slope + intercept - width:
            x_2.append([value_x, value_y])

    # Output label
    y = np.concatenate((np.repeat(0, len(x_1)), np.repeat(1, len(x_2))))

    # Final version of the dataset
    x_1.extend(x_2)
    dataset = np.array(x_1)
    dataset = np.concatenate((dataset, y[:, None]), axis = 1)

    _ = plt.plot(dataset[np.where(dataset[:, 2] == 0), 0], 
                 dataset[np.where(dataset[:, 2] == 0), 1], 
                 'o',
                 color = 'tab:blue')
    _ = plt.plot(dataset[np.where(dataset[:, 2] == 1), 0], 
                 dataset[np.where(dataset[:, 2] == 1), 1],
                 'o',
                 color = 'tab:orange')
    
    return dataset

In [None]:
dataset_1 = generate_linear_dataset(1.7, -3, 2, 10, 1)

In [None]:
dataset_2 = generate_linear_dataset(-2.3, 1, 3, 15, 4)

In [None]:
def generate_polynomial_dataset(coeffs, width, samples):
    def _calculate_value(x_i):
        return np.sum(np.multiply(np.power(np.repeat(x_i, len(coeffs)), range(len(coeffs))), coeffs))
    
    x = np.linspace(-10, 10, 100)
    boundary = [_calculate_value(x_i) for x_i in x]
    margin_1 = [b + width for b in boundary]
    margin_2 = [b - width for b in boundary] 
    
    plt.plot(x, boundary, 'r')
    plt.plot(x, margin_1, 'r', alpha = 0.5)
    plt.plot(x, margin_2, 'r', alpha = 0.5)
    
    # Generate observations on the edge of the margin
    value = np.random.uniform(-10, 10)
    x_1 = [[value, _calculate_value(value) + width]]
    value = np.random.uniform(-10, 10)
    x_2 = [[value, _calculate_value(value) - width]]
    # Generate the rest of the observations
    while len(x_1) < samples or len(x_2) < samples:
        value_x = np.random.uniform(-10, 10)
        value_y = np.random.uniform(min(margin_2), max(margin_1))
        if value_y > _calculate_value(value_x) + width:
            x_1.append([value_x, value_y])
        elif value_y < _calculate_value(value_x) - width:
            x_2.append([value_x, value_y])

    # Output label
    y = np.concatenate((np.repeat(0, len(x_1)), np.repeat(1, len(x_2))))

    # Final version of the dataset
    x_1.extend(x_2)
    dataset = np.array(x_1)
    dataset = np.concatenate((dataset, y[:, None]), axis = 1)

    _ = plt.plot(dataset[np.where(dataset[:, 2] == 0), 0], 
                 dataset[np.where(dataset[:, 2] == 0), 1], 
                 'o',
                 color = 'tab:blue')
    _ = plt.plot(dataset[np.where(dataset[:, 2] == 1), 0], 
                 dataset[np.where(dataset[:, 2] == 1), 1],
                 'o',
                 color = 'tab:orange')
    
    return dataset

In [None]:
dataset_3 = generate_polynomial_dataset([0.2, -0.5, 0.3, 0.08, -0.003], 10, 40)