# Imports

In [1]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import log_loss

import pandas as pd
import numpy as np

import matplotlib.pyplot as plt

from ipywidgets import Layout, interact
import ipywidgets as widgets

# Constants

In [2]:
X_min = -30
X_max = 30
ROWS = 40
TRAIN_TEST_SPLIT_RATIO = 0.7

# Generate points

In [3]:
def generate_np_points(class_a_range, class_b_range):
    X_class_a = np.random.uniform(low=class_a_range[0], high=class_a_range[1], size=(ROWS//2,))
    X_class_b = np.random.uniform(low=class_b_range[0], high=class_b_range[1], size=(ROWS//2,))

    y_class_a = np.ones(ROWS//2)
    y_class_b = np.zeros(ROWS//2)
                                          
    return X_class_a, X_class_b, y_class_a, y_class_b


def to_pandas_df(X_class_a, X_class_b, y_class_a, y_class_b):
    df = pd.DataFrame(data={
        'x': np.concatenate((X_class_a, X_class_b)),
        'y': np.concatenate((y_class_a, y_class_b))
    })
    # Randomly shuffling
    df = df.sample(frac=1).reset_index(drop=True)
    
    return df
    

def generate_points(class_a_range, class_b_range):
    return to_pandas_df(
        *generate_np_points(class_a_range, class_b_range)    
    )

# Train, test split

In [4]:
def train_test_split(df):
    msk = np.random.rand(len(df)) < TRAIN_TEST_SPLIT_RATIO
    train = df[msk]
    test = df[~msk]

    return train, test

# Visualization

In [5]:
# Calculate logistic regression curve
calc_logistic_curve = lambda a, b, x : 1 / (1 + np.exp(-(a+b*x)))

In [6]:
def draw_plt(df, log_curve):
    condition = df['y'] == 1

    X_class_a = df.loc[condition, 'x']
    X_class_b = df.loc[~condition, 'x']
    y_class_a = df.loc[condition, 'y']
    y_class_b = df.loc[~condition, 'y']
    
    plt.plot(X_class_a, y_class_a, 'og')
    plt.plot(X_class_b, y_class_b, 'or')
    
    logistic_curve_x = np.arange(-30, 30, 0.5)
    logistic_curve_y = calc_logistic_curve(log_curve['intercept'], log_curve['coef'], logistic_curve_x)[0]
    plt.plot(logistic_curve_x, logistic_curve_y)    

# Training & testing

In [7]:
def training(logr, X, y):
    logr.fit(X, y)

In [8]:
def testing(logr, X):
    return logr.predict_proba(X)[:, 1]

# Main function

In [9]:
def range_changed(green_range, red_range):
    df = generate_points(green_range, red_range)
    train, test = train_test_split(df)

    logr = LogisticRegression(solver='lbfgs')
    training(logr, train[['x']], train['y'])
    draw_plt(df, {'intercept': logr.intercept_, 'coef': logr.coef_})

    y_pred = testing(logr, test[['x']])
    print('Log loss: ', log_loss(test['y'], y_pred))

# Widgets

In [10]:
green_range = widgets.FloatRangeSlider(
    value=[10, X_max],
    min=X_min,
    max=X_max,
    step=0.1,
    description='Green (True class):',
    readout_format='d',
    layout=Layout(width='600px')
)

In [11]:
red_range = widgets.FloatRangeSlider(
    value=[X_min, -10],
    min=X_min,
    max=X_max,
    step=0.1,
    description='Red (False class):',
    readout_format='d',
    layout=Layout(width='600px')
)

In [12]:
interact(range_changed, green_range=green_range, red_range=red_range)

interactive(children=(FloatRangeSlider(value=(10.0, 30.0), description='Green (True class):', layout=Layout(wi…

<function __main__.range_changed(green_range, red_range)>