In [None]:
%load_ext autoreload
%autoreload 2

%matplotlib inline

## Install libraries

```bash
conda create -n edu4 python=3.11 jupyter matplotlib
```

```bash 
! pip install -U -r requirements.txt
```

```bash
! pip install -U numpy
! pip install -U scikit-learn
```

## Update repository

In [None]:
! git pull

## Add import path

In [None]:
import os
import sys
import gc

In [None]:
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)

In [None]:
del module_path

## Organize imports

In [None]:
import multiprocessing
from pathlib import Path

In [None]:
import seaborn as sns

In [None]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

In [None]:
import plotly.express as px

In [None]:
from sklearn.linear_model import LinearRegression
from sklearn.datasets import load_iris

In [None]:
from scipy import stats
from scipy.interpolate import interp1d

#### Number of CPU cores

In [None]:
workers = multiprocessing.cpu_count()
workers

## Initialize Path

PATH = Path.home() / 'git' / 'my_papers' / 'latticems'/ 'algebraic_theory_of_model_fitting_posets_covers'
images_path = PATH / 'images'
images_path.mkdir(exist_ok=True)

In [None]:
PATH = Path('data')
images_path = PATH / 'images'
images_path.mkdir(exist_ok=True, parents=True)

## Initialize simple dataset

In [None]:
X = np.random.uniform(low=-1.0, high=1.0, size=100)

In [None]:
def add_noise(target, range=0.04):
    return target + np.random.normal(0, range, size=target.shape)

In [None]:
def funct_ln(x: float, b: float = 1.4) -> float:
    return x**2 + b

X = np.linspace(-1, 1, 20)
y = funct_ln(X)

C = [-0.85, -0.2, 0.2, 0.75, 0.95]
B = [-0.9, -0.6, -0.4, -0.2, 0.2, 0.4, 0.6, 0.8]
A = [-1.0, -0.8, -0.5, -0.3, -0.2, -0.1, 0, 0.1, 0.2, 0.3, 0.5, 0.9, 1.0]
F_C = add_noise(funct_ln(np.array(C)), range=0.8)
F_B = add_noise(funct_ln(np.array(B)), range=0.6)
F_A = add_noise(funct_ln(np.array(A)), range=0.4)

In [None]:
# Plotting
plt.figure(figsize=(6, 4))
plt.scatter(X, add_noise(y, range=0.4), label='$(X, Y)$', color='gray', marker='.')
plt.scatter(A, F_A, color='blue', label='$(A, Y_{A})$', marker='x')
plt.scatter(B, F_B, color='red', label='$(B, Y_{B})$', marker='s')
plt.scatter(C, F_C, color='green', label='$(C, Y_{C})$', marker='o')
plt.grid(False)
plt.legend()
plt.xlabel('$X$')
plt.ylabel('$Y$')
# plt.title('Quadratic Function with Specific Points')
plt.savefig(images_path / 'x_lattice_1.png')

In [None]:
def funct_ln(x: float, b: float = 1.4) -> float:
    return 2 * x + b

X = np.linspace(-1, 1, 20)
y = funct_ln(X)

C = [-0.85, -0.2, 0.2, 0.75, 0.95]
B = [-0.9, -0.6, -0.4, -0.2, 0.2, 0.4, 0.6, 0.8]
A = [-1.0, -0.8, -0.5, -0.3, -0.2, -0.1, 0, 0.1, 0.2, 0.3, 0.5, 0.9, 1.0]
F_C = add_noise(funct_ln(np.array(C)), range=0.4)
F_B = add_noise(funct_ln(np.array(B)), range=0.4)
F_A = add_noise(funct_ln(np.array(A)), range=0.4)

In [None]:
# Plotting
plt.figure(figsize=(6, 4))
plt.scatter(X, add_noise(y, range=0.4), label='$(X, Y)$', color='gray', marker='.')
plt.scatter(A, F_A, color='blue', label='$(A, Y_{A})$', marker='x')
plt.scatter(B, F_B, color='red', label='$(B, Y_{B})$', marker='s')
plt.scatter(C, F_C, color='green', label='$(C, Y_{C})$', marker='o')
plt.grid(False)
plt.legend()
plt.xlabel('$X$')
plt.ylabel('$Y$')
# plt.title('Quadratic Function with Specific Points')
plt.savefig(images_path / 'x_lattice_2.png')

In [None]:
def funct_wv(x: float, b: float = 1.4) -> float:
    return np.sin(12 * np.pi * x)

X = np.linspace(-1, 1, 800)
A = np.random.choice(X, size=40, replace=False)
y = funct_wv(X)

# C = [-0.85, -0.2, 0.2, 0.75, 0.95]
B = [-1.0, -0.75, -0.5, -0.25, 0, 0.25, 0.5, 0.75, 1.0]
A = np.array(list(set(A.tolist()).union(set(B))))
print(A)
C = np.random.choice(A, size=20, replace=False)
# A = [-1.0, -0.75, -0.5, -0.25, -0.3, -0.15, 0, 0.1, 0.15, 0.25, 0.5, 0.75, 1.0]
F_C = add_noise(funct_wv(np.array(C)), range=0.004)
F_B = add_noise(funct_wv(np.array(B)), range=0.004)
F_A = add_noise(funct_wv(np.array(A)), range=0.004)

In [None]:
# Plotting
plt.figure(figsize=(6, 4))
plt.scatter(X, add_noise(y, range=0.004), label='$(X, Y)$', color='gray', marker='.')
plt.scatter(A, F_A, color='blue', label='$(A, Y_{A})$', marker='x')
plt.scatter(B, F_B, color='red', label='$(B, Y_{B})$', marker='s')
plt.scatter(C, F_C, color='green', label='$(C, Y_{C})$', marker='o')
plt.ylim(-1.1, 1.1)
plt.axhline(0, color='black', linewidth=0.5)
plt.axvline(0, color='black', linewidth=0.5)
plt.grid(False)
plt.legend()
plt.xlabel('$X$')
plt.ylabel('$Y$')
# plt.title('Quadratic Function with Specific Points')
plt.savefig(images_path / 'x_lattice_3.png')

## Simple approximations

In [None]:
# Initialize the plot again
plt.figure(figsize=(6, 4))
plt.rc('text', usetex=True)
plt.rc('font', family='serif')
plt.xlim(0, 1)
plt.ylim(0, 1)

# Slopes for 12 asymmetric lines
n_ln = 4
func_names = ['$\phi$', '$\psi$', '$\eta$', '$\mu$',]
func_pairs = ['$(\phi, \{x\})$', '$(\psi$, \{x\})$', 
              '$(\eta$, \{x\})$', '$(\mu$, \{x\})$',]
colors = ['gray', 'blue', 'green', 'red']
slopes = np.linspace(-0.2, 0.4, n_ln)
X_text = 0.2

for i, (slope, color, func_name, func_pair) in enumerate(
        zip(
            slopes, colors, func_names, func_pairs)):
    # Calculate the y-intercept 'b' using the point (0.82, 0.64)
    b = 0.64 - slope * 0.82
    
    # Define the x range
    X_range = np.array([0, 1])
    
    # Calculate the corresponding y values using the line equation y = mx + b
    y_range = slope * X_range + b

    y_text = slope * X_text + b
    
    # Plot the line
    plt.plot(X_range, y_range, color, label=func_pair)
    plt.text(
        X_text, y_text, 
        func_name, 
        # fontsize=9, 
        verticalalignment='bottom', 
        horizontalalignment='right')

# Highlight the central point
plt.plot(0.82, 0.64, 'o')  # 'ro' plots the point in red color
plt.text(
    0.82, 0.64, 
    '$P$', 
    # fontsize=9, 
    verticalalignment='bottom', 
    horizontalalignment='center')

# plt.title(f'{n_ln} linear approximations on a single point')
plt.grid(False)
plt.savefig(images_path / 'k_1_points_1.png')
plt.show()

In [None]:
X_t = np.array([0.2, 0.5, 0.8], dtype=float)
y_t = np.array([0.4, 0.84, 0.24], dtype=float)
y_z = np.array([0.0, 0.0], dtype=float)
X_1 = X_t[:2]
y_1 = y_t[:2]
X_2 = X_t[1:]
y_2 = y_t[1:]

In [None]:
a_1 = (y_1[1] - y_1[0]) / (X_1[1] - X_1[0])
b_1 = y_1[0] - a_1 * X_1[0]
a_2 = (y_2[1] - y_2[0]) / (X_2[1] - X_2[0])
b_2 = y_2[0] - a_2 * X_2[0]

In [None]:
X_1, y_1, X_2, y_2

In [None]:
# Calculate the linear regression
slope, intercept, r_value, p_value, std_err = stats.linregress(X_t, y_t)

# Create the line of best fit
line = slope * X_t + intercept

In [None]:
a = 0
b = 1

In [None]:
X_line = np.linspace(a, b, 400)
y_line = slope * X_line + intercept
line_inter = interp1d(X_t, line, kind='linear')

In [None]:
line_1 = a_1 * X_line + b_1
line_2 = a_2 * X_line + b_2

In [None]:
meet_label = r'$(K_{A \lor B}, A \lor B) = (K_{A}, A) \land (K_{B}, B)$'

In [None]:
meet_label

In [None]:
# Plotting
plt.figure(figsize=(6, 4))
plt.plot(X_t, y_t, 'o', label='Original data', markersize=10)  # Plot the original data points
plt.plot(X_1, y_z, 'x', label='$A = \{x_{1}, x_{2}\}$', markersize=10)  # Plot the original data points
plt.plot(X_2, y_z, '^', label='$B = \{x_{2}, x_{3}\}$', markersize=10)  # Plot the original data points
plt.plot(X_line, line_1, 'b', linestyle='-', label='$(K_A, A)$')  # Plot the line of best fit
plt.plot(X_line, line_2, 'g', linestyle='-', label='$(K_{B}, B)$')  # Plot the line of best fit
plt.plot(X_line, y_line, 'r', label=r"$(K_{C}, C)$")  # Plot the line of best fit
plt.legend()
plt.xlim(a, b)
plt.ylim(a, b)
plt.xlabel('X')
plt.ylabel('Y')
# plt.title('Linear Regression on Three Points')
plt.grid(False)
plt.savefig(images_path / 'k_3_points_1.png')
plt.show()

In [None]:
import numpy as np
import matplotlib.pyplot as plt

# Set the random seed for reproducibility
np.random.seed(0)

# Step 1: Generate evenly spaced data points within the range [-1, 1]
X = np.linspace(-1, 1, 20)  # 20 data points from -1 to 1

# Step 2: Define a simple function for y and add some random noise
def simple_function(x):
    return np.sin(2 * np.pi * x)

y = simple_function(X) # + np.random.normal(0, 0.1, X.shape)  # Adding noise with mean 0 and std deviation 0.1

# Step 3: Plot the original data points with noise
plt.scatter(X, add_noise(y, range=0.4), color='red', label='Noisy Data Points')

# Step 4: Create and plot the piecewise linear approximation
for i in range(len(X)-1):
    plt.plot([X[i], X[i+1]], [y[i], y[i+1]], 'blue')  # Connect each point to the next

plt.title('Piecewise Linear Approximation with Noise')
plt.xlabel('x')
plt.ylabel('y')
plt.xlim(-1, 1)
plt.ylim(-1, 1)
plt.grid(True)
plt.legend()
plt.show()

## Join operations

In [None]:
y_inter = 0.2
x_inter = 0.24
A_land_B = np.array([x_inter])
y_A_land_y_B = np.array([y_inter])
A = np.array([-0.2, 0.12, x_inter])
y_A = np.array([0.28, 0.4, y_inter])
B = np.array([x_inter, 0.62, 0.8])
y_B = np.array([y_inter, 0.48, 0.312])

In [None]:
K_A = LinearRegression().fit(np.expand_dims(A, axis=1), y_A)
K_B = LinearRegression().fit(np.expand_dims(B, axis=1), y_B)
K_C = LinearRegression().fit(np.array([[0.0], [x_inter]]), np.array([0.14, y_inter]))
f_C = LinearRegression().fit(np.array([[0.0], [x_inter]]), np.array([0.1, y_inter + 0.02]))

In [None]:
K_A.coef_, K_A.intercept_

In [None]:
K_B.coef_, K_B.intercept_

In [None]:
K_C.coef_, K_C.intercept_

In [None]:
f_C.coef_, f_C.intercept_

In [None]:
K_C_coef_ = np.array([1.0])
K_C_intercept_ = 0.02

In [None]:
A.tolist()

In [None]:
A_ext = np.concatenate([[-0.5], A, [1.0]])
B_ext = np.concatenate([[-0.5], B, [1.0]])
y_delta_A = K_A.coef_ * A_ext + K_A.intercept_
y_delta_B = K_B.coef_ * B_ext + K_B.intercept_
y_delta_C = K_C.coef_ * A_ext + K_C.intercept_
f_delta_C = f_C.coef_ * A_ext + f_C.intercept_

In [None]:
label_A_land_B = '$(A \land B, y_{A \land B})$'

In [None]:
plt.figure(figsize=(6, 4))
plt.rc('text', usetex=True)
plt.xlim(-0.5, 1)
plt.ylim(0, 0.5)
plt.scatter(A, y_A, color='blue', label='$(A, Y_{A})$')
plt.scatter(B, y_B, color='red', label='$(B, Y_{B})$')
plt.scatter(A_land_B, y_A_land_y_B, color='brown', label='$(A \land B, Y_{A \land B})$')
plt.plot(A_ext, y_delta_A, color='gray', label='$(K_{A}, A)$')
plt.plot(B_ext, y_delta_B, color='green', label='$(K_{B}, B)$')
plt.plot(A_ext, y_delta_C, color='blue', label='$(K_{A \land B}, A \land B)$')
plt.plot(A_ext, f_delta_C, color='red', label='$(\phi, A \land B)$')
plt.legend()
plt.xlabel('X')
plt.ylabel('Y')
# plt.title('Linear Regression on Three Points')
plt.grid(False)
plt.savefig(images_path / 'k_lor_1.png')
plt.show()

## Piecwise and stepwise approximations

In [None]:
A_piec = np.array([-0.4, -0.2, -0.02, 0.2, 0.32, 0.4])
y_A_piec = np.array([0.2, 0.32, 0.34, 0.28, 0.26, 0.18])

plt.figure(figsize=(6, 4))
plt.rc('text', usetex=True)
plt.xlim(-0.6, 0.6)
plt.ylim(0, 0.5)
plt.scatter(A_piec, y_A_piec, color='blue', label='$(A, Y_{A})$')
plt.legend()
plt.xlabel('X')
plt.ylabel('Y')
# plt.title('Linear Regression on Three Points')
plt.grid(False)
plt.savefig(images_path / 'k_picwise_1.png')
plt.show()

In [None]:
import numpy as np
import matplotlib.pyplot as plt

import os
os.environ['PYTHONIOENCODING'] = 'utf-8'

# Given points
X = np.array([-0.4, -0.2, -0.02, 0.2, 0.32, 0.4])
y = np.array([0.2, 0.32, 0.34, 0.28, 0.26, 0.18])

X_conc = np.concatenate([[-0.6], X, [0.6]])
X_con1 = np.array([-0.6, -0.4])
X_con2 = np.array([0.4, 0.6])

y_con11 = np.array([0.16, 0.2])
y_con12 = np.array([0.18, 0.01])

y_con21 = np.array([0.26, 0.2])
y_con22 = np.array([0.18, 0.12])

y_con2 = np.concatenate([[0.12], X, [0.24]])


# Function to interpolate at a new point
def piecewise_linear(x, X_data, y_data):
    return np.interp(x, X_data, y_data)

# Test the function at a new x
x_new = np.linspace(-0.6, 0.6, 100)
y_new = piecewise_linear(x_new, X_conc, y_con1) 

# Plotting
# plt.figure(figsize=(8, 4))
plt.figure(figsize=(6, 4))
plt.rc('text', usetex=True)
plt.rc('font', family='serif')
plt.plot(X, y, 'o', label='$(A, Y_{A})$')
plt.plot(x_new, y_new1, '-', label="$(\phi, A)$")

plt.plot(X_con1, y_con11, '--', color='green', label='$(\mu, A)$')
plt.plot(X_con2, y_con12, '--', color='green')

plt.plot(X_con1, y_con21, '--', color='red', label='$(\eta, A)$')
plt.plot(X_con2, y_con22, '--', color='red')
plt.xlabel('X')
plt.ylabel('Y')
# plt.title('Piecewise Linear Interpolation')
plt.legend()
plt.grid(False)
plt.savefig(images_path / 'k_picwise_1.png')
plt.show()

In [None]:
from scipy.interpolate import interp1d

# Define the interpolator function
f = interp1d(X, y, kind='cubic')

# Generate new x values for a smooth plot
x_dense = np.linspace(min(X), max(X), 300)
y_dense = f(x_dense)

# Plotting
plt.figure(figsize=(8, 4))
plt.plot(X, y, 'o', label='Data points')
plt.plot(x_dense, y_dense, label='Cubic spline interpolation')
plt.xlabel('X')
plt.ylabel('Y')
plt.title('Cubic Spline Interpolation')
plt.legend()
plt.grid(True)
plt.show()


In [None]:
import numpy as np
import matplotlib.pyplot as plt

# Given points
X = np.array([-0.4, -0.2, -0.02, 0.2, 0.32, 0.4])
y = np.array([0.2, 0.32, 0.34, 0.28, 0.26, 0.18])

# Plot stepwise function
plt.figure(figsize=(8, 4))
plt.step(X, y, where='post', label='Stepwise Function')

# Mark the data points for clarity
plt.plot(X, y, 'o', color='red', label='Data Points')

# Label the axes and add a title and legend
plt.xlabel('X')
plt.ylabel('Y')
plt.title('Stepwise Function from Data Points')
plt.legend()

# Show grid and plot
plt.grid(True)
plt.show()


In [None]:
import numpy as np
import matplotlib.pyplot as plt

# Given data points
X = np.array([-0.4, -0.3, -0.2, -0.1, -0.02, -0.01, 0.2, 0.24, 0.32, 0.36, 0.38, 0.4])
y = np.array([0.2, 0.2, 0.32, 0.32, 0.34, 0.34, 0.28, 0.28, 0.26, 0.26, 0.18, 0.18])

# Create a new figure for plotting
plt.figure(figsize=(6, 8))
plt.rc('text', usetex=True)
plt.rc('font', family='serif')

# Plot each segment manually
for i in range(len(X) - 1):
    # Plot a horizontal line from (X[i], y[i]) to (X[i+1], y[i])
    plt.plot([X[i], X[i+1]], [y[i], y[i]], 'b-', linewidth=2)

    # Optionally, if you want to also connect vertical gaps with small lines uncomment the following:
    # plt.plot([X[i+1], X[i+1]], [y[i], y[i+1]], 'b-', linewidth=2)  # Vertical connection

# Add the last horizontal part (if needed)
plt.plot([X[-1], X[-1]], [y[-2], y[-1]], 'b-', linewidth=2)

# Plot the data points as dots
plt.plot(X, y, 'ro', label='Data Points')

# Add labels and a title
plt.xlabel('X')
plt.ylabel('Y')
plt.title('Custom Stepwise Function Without Vertical Lines')

# Add a grid for better visibility of the graph structure
plt.grid(True)

# Show the legend
plt.legend()

# Display the plot
plt.show()


In [None]:
def generate_steps(nums=None):
    Xs = list()
    ns = nums if nums else [8, 8, 8, 8, 8, 8]
    Xs.append(np.linspace(-0.4, -0.2, num=ns[0]))
    Xs.append(np.linspace(-0.2, -0.02, num=ns[1]))
    Xs.append(np.linspace(-0.02, 0.2, num=ns[2]))
    Xs.append(np.linspace(0.2, 0.32, num=ns[3]))
    Xs.append(np.linspace(0.32, 0.38, num=ns[4]))
    Xs.append(np.linspace(0.38, 0.4, num=ns[5]))
    y = np.array([0.2, 0.32, 0.34, 0.28, 0.26, 0.18])
    ys = list()
    for i, y_i in enumerate(y):
        y_rn = np.repeat(y_i, repeats=ns[i])
        ys.append(y_rn)
    X_array = np.concatenate(Xs)
    y_array = np.concatenate(ys)
    
    return X_array, y_array

In [None]:
X_con1 = np.linspace(-0.6, -0.4, num=num)
X_con1 = np.linspace(0.4, 0.6, num=num)

y_con11 = np.repeat(0.12, repeats=num)
y_con12 = np.repeat(0.24, repeats=num)

y_con21 = np.repeat(0.42, repeats=num)
y_con22 = np.repeat(0.14, repeats=num)

X_con1, y_con11

In [None]:
import numpy as np
import matplotlib.pyplot as plt

# Given data points
num=4
nums = [num, num, num, num, 2, 2]
X, y = generate_steps(nums=nums)

X_con1 = np.linspace(-0.6, -0.4, num=num)
X_con2 = np.linspace(0.4, 0.6, num=num)

y_con11 = np.repeat(0.12, repeats=num)
y_con12 = np.repeat(0.24, repeats=num)

y_con21 = np.repeat(0.42, repeats=num)
y_con22 = np.repeat(0.14, repeats=num)

y_con31 = np.repeat(0.16, repeats=num)
y_con32 = np.repeat(0.1088, repeats=num)

# Create a new figure for plotting
plt.figure(figsize=(6, 4))
plt.rc('text', usetex=True)
plt.rc('font', family='serif')

# Plot the data points as dots
plt.plot(X, y, 'ro', label='$(A, Y_{A})$')

# Plot each segment manually
for i in range(len(X) - 1):
    # Plot a horizontal line from (X[i], y[i]) to (X[i+1], y[i])
    plt.plot([X[i], X[i+1]], [y[i], y[i]], 'b-', linewidth=2)

    # Optionally, if you want to also connect vertical gaps with small lines uncomment the following:
    # plt.plot([X[i+1], X[i+1]], [y[i], y[i+1]], 'b-', linewidth=2)  # Vertical connection

# Add the last horizontal part (if needed)
plt.plot([X[-1], X[-1]], [y[-2], y[-1]], 'b-', linewidth=2, label='$(\phi, A)$')

# Add labels and a title
plt.plot(X_con1, y_con11, '--', color='green', label='$(\mu, A)$')
plt.plot(X_con2, y_con12, '--', color='green')

plt.plot(X_con1, y_con21, '--', color='red', label='$(\eta, A)$')
plt.plot(X_con2, y_con22, '--', color='red')

plt.plot(X_con1, y_con31, '-', color='blue')
plt.plot(X_con2, y_con32, '-', color='blue')

plt.xlabel('X')
plt.ylabel('Y')
# plt.title('Piecewise Linear Interpolation')
plt.legend()
plt.grid(False)
plt.savefig(images_path / 'k_stepwise_1.png')
plt.show()

## Linear regressions on subspaces

In [None]:
X1 = np.linspace(-0.4, -0.12, num=14)
X2 = np.linspace(-0.102, 0.2, num=12)
X3 = np.linspace(0.0012, 0.4, num=18)
X4 = np.concatenate([X1, X2, X3])
f1 = 0.44 * X1 + 0.1
f2 = 0.0002 * X2 + 0.12
f3 = -0.64 * X3 + 0.04
y1 = add_noise(f1, range=0.042)
y2 = add_noise(f2, range=0.024)
y3 = add_noise(f3, range=0.026)
y4 = np.concatenate([y1, y2, y3])
f_t1 = LinearRegression().fit(np.expand_dims(X1, axis=1), y1)
f_t2 = LinearRegression().fit(np.expand_dims(X2, axis=1), y2)
f_t3 = LinearRegression().fit(np.expand_dims(X3, axis=1), y3)
f_t4 = LinearRegression().fit(np.expand_dims(X4, axis=1), y4)
X_conc1 = np.concatenate([[-0.6], X1, [0.6]])
X_conc2 = np.concatenate([[-0.6], X2, [0.6]])
X_conc3 = np.concatenate([[-0.6], X3, [0.6]])
X_conc4 = np.concatenate([[-0.6], X1, X2, X3, [0.6]])
y_t1 = f_t1.coef_ * X_conc1 + f_t1.intercept_
y_t2 = f_t2.coef_ * X_conc2 + f_t2.intercept_
y_t3 = f_t3.coef_ * X_conc3 + f_t3.intercept_
y_t4 = f_t4.coef_ * X_conc4 + f_t4.intercept_

In [None]:
plt.figure(figsize=(6, 4))
plt.rc('text', usetex=True)
plt.rc('font', family='serif')
plt.scatter(X1, y1, color='blue', label='$(A, Y_{A})$')
plt.scatter(X2, y2, color='green', label='$(B, Y_{B})$')
plt.scatter(X3, y3, color='red', label='$(C, Y_{C})$')
plt.plot(X_conc1, y_t1, color='blue', label='$(K_{A}, A)$')
plt.plot(X_conc2, y_t2, color='green', label='$(K_{B}, B)$')
plt.plot(X_conc3, y_t3, color='red', label='$(K_{C}, C)$')
plt.plot(
    X_conc4, y_t4, 
    color='brown', 
    label='$(K_{A \lor B \lor C}, A \lor B \lor C)$')
plt.xlabel('X')
plt.ylabel('Y')
# plt.title('Piecewise Linear Interpolation')
plt.legend()
plt.grid(False)
plt.savefig(images_path / 'k_regs_1.png')
plt.show()

## Compare functions

In [None]:
X1 = np.linspace(-0.4, -0.12, num=14)
X2 = np.linspace(-0.102, 0.2, num=12)
X3 = np.linspace(0.0012, 0.4, num=18)

X_conc1 = np.concatenate([[-0.6], X1, [0.6]])
X_conc3 = np.concatenate([[-0.6], X3, [0.6]])

f1 = 0.12 * X_conc1 - 0.2
f3 = -0.2 * X_conc3 - 0.04

In [None]:
plt.figure(figsize=(6, 4))
plt.rc('text', usetex=True)
plt.rc('font', family='serif')
plt.scatter(X2, y2, color='green', label='$(A, Y_{A})$')
plt.plot(X_conc1, f1, color='blue', label='$(\phi, A)$')
plt.plot(X_conc3, f3, color='red', label='$(\mu, A)$')
plt.xlabel('X')
plt.ylabel('Y')
# plt.title('Piecewise Linear Interpolation')
plt.legend()
plt.grid(False)
plt.savefig(images_path / 'phi_2_regs_1.png')
plt.show()

In [None]:
import numpy as np
import matplotlib.pyplot as plt

import os
os.environ['PYTHONIOENCODING'] = 'utf-8'

# Given points
X = np.array([-0.38, -0.24, -0.018, 0.212, 0.324, 0.3898])
y = np.array([0.22, 0.312, 0.324, 0.282, 0.226, 0.182])

X_conc = np.concatenate([[-0.6], X, [0.6]])
X_con1 = np.array([-0.6, -0.4])
X_con2 = np.array([0.4, 0.6])

y_con11 = np.array([0.1602, 0.202])
y_con12 = np.array([0.182, 0.204])

y_con21 = np.array([0.2602, 0.202])
y_con22 = np.array([0.182, 0.1204])

y_con2 = np.concatenate([[0.1204], X, [0.2402]])


X_left = np.random.uniform(
    low=-0.58, high=-0.38, size=4)

X_right = np.random.uniform(
    low=0.42, high=0.56, size=2)

y_left = add_noise(
    np.random.uniform(
        low=0.26, high=0.36, size=4),
    range=0.002,
)

y_right = add_noise(
    np.random.uniform(
        low=0.26, high=0.28, size=2),
    range=0.002
)

# Function to interpolate at a new point
def piecewise_linear(x, X_data, y_data):
    return np.interp(x, X_data, y_data)

# Test the function at a new x
x_new = np.linspace(-0.6, 0.6, 100)
y_new = piecewise_linear(x_new, X_conc, y_con1) 

# Plotting
# plt.figure(figsize=(8, 4))
plt.figure(figsize=(6, 4))
plt.rc('text', usetex=True)
plt.rc('font', family='serif')
plt.plot(X_left, y_left, 'o', color='red', label='$(A, Y_{A})$')
plt.plot(X_right, y_right, 'o', color='red')
plt.plot(X, y, 'o', color='blue', label='$(B, Y_{B})$')
plt.plot(x_new, y_new1, '-', label="$(\phi, A)$")

plt.plot(X_con1, y_con11, '--', color='green', label='$(\mu, A)$')
plt.plot(X_con2, y_con12, '--', color='green')

plt.plot(X_con1, y_con21, '--', color='red', label='$(\eta, A)$')
plt.plot(X_con2, y_con22, '--', color='red')
plt.xlabel('X')
plt.ylabel('Y')
# plt.title('Piecewise Linear Interpolation')
plt.legend()
plt.grid(False)
plt.savefig(images_path / 'phi_picwise_on_sample_1.png')
plt.show()

In [None]:
X = np.linspace(-0.48, 0.28, num=2)
y = np.array([0.032, 0.046])

y_shift = np.array([-0.012, 0.012])
y1 = y + y_shift
y2 = y - y_shift

X_conc = np.concatenate([[-0.6], X, [0.6]])

f1 = 0.12 * X_conc1 - 0.2
f2 = -0.2 * X_conc3 - 0.04

In [None]:
def line_params(X, y):
    a = (y[1] - y[0]) / (X[1] - X[0])
    b = y[0] - a * X[0]

    return a, b

In [None]:
a1, b1 = line_params(X, y1)
a2, b2 = line_params(X, y2)

In [None]:
y_11 = -0.6 * a1 + b1
y_12 = 0.6 * a1 + b1

y_21 = -0.6 * a2 + b2
y_22 = 0.6 * a2 + b2

In [None]:
y_conc1 = np.concatenate([[y_11], y1, [y_12]])
y_conc2 = np.concatenate([[y_21], y2, [y_22]])

In [None]:
plt.figure(figsize=(6, 4))
plt.rc('text', usetex=True)
plt.rc('font', family='serif')
plt.xlim(-0.6, 0.6)
plt.ylim(0, 0.05)
plt.scatter(X, y, color='green', label='$(A, Y_{A})$')
plt.plot(X_conc, y_conc1, color='blue', label='$(\phi, A)$')
plt.plot(X_conc, y_conc2, color='red', label='$(\mu, A)$')
plt.xlabel('X')
plt.ylabel('Y')
# plt.title('Piecewise Linear Interpolation')
plt.legend()
plt.grid(False)
plt.savefig(images_path / 'phi_2_regs_equiv_1.png')
plt.show()

## Classification models

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import LogisticRegression
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# Generate R^1 -> R^1 data suitable for logistic regression
X, y = make_classification(n_features=1, n_redundant=0, n_informative=1,
                           n_clusters_per_class=1, n_samples=200, flip_y=0.05)

# Split the data into training and testing sets for fair comparison
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Instantiate the logistic regression models with different parameters
model1 = LogisticRegression(C=1.0, solver='liblinear', random_state=42)
model2 = LogisticRegression(C=0.1, solver='saga', max_iter=5000, random_state=42)  # Lower C and different solver

# Fit the models
model1.fit(X_train, y_train)
model2.fit(X_train, y_train)

# Evaluate the models on the test set
accuracy1 = accuracy_score(y_test, model1.predict(X_test))
accuracy2 = accuracy_score(y_test, model2.predict(X_test))

# Generate a range of values for predictions
X_range = np.linspace(X.min(), X.max(), 300).reshape(-1, 1)

# Predict probabilities
y_pred1 = model1.predict_proba(X_range)[:, 1]
y_pred2 = model2.predict_proba(X_range)[:, 1]

In [None]:
y_pred1 = model1.predict_proba(X_range)[:, 1]
y_pred2 = model2.predict_proba(X_range)[:, 1]

In [None]:
# Plotting
plt.figure(figsize=(6, 4))
plt.rc('text', usetex=True)
plt.rc('font', family='serif')
plt.scatter(X_train.ravel(), y_train, color='gray', zorder=20, label='$(A, Y_{A})$')
plt.scatter(X_test.ravel(), y_test, color='gray', zorder=20)
# plt.plot(X_range.ravel(), y_pred1, color='blue', linewidth=2, label=f'Model 1 (C=1.0) - Acc: {accuracy1:.2f}')
# plt.plot(X_range.ravel(), y_pred2, color='red', linestyle='--', linewidth=2, label=f'Model 2 (C=0.1, saga) - Acc: {accuracy2:.2f}')
plt.plot(X_range.ravel(), y_pred1, color='blue', linewidth=2, label='$(\phi, A)$')
plt.plot(X_range.ravel(), y_pred2, color='red', linestyle='--', linewidth=2, label='$(\mu, A)$')
plt.xlabel('$X$')
plt.ylabel('$P(y=1|X)$')
# plt.title('Logistic Regression on Synthetic Data with Similar Model Performance')
plt.legend()
plt.grid(False)
plt.savefig(images_path / 'phi_2_log_regs_equiv_1.png')
plt.show()

In [None]:
from scipy.special import kl_div
from sklearn.metrics import log_loss

y_pred1 = model1.predict(X_train)
y_pred2 = model2.predict(X_train)

y_hat1 = model1.predict(X_train)
y_hat2 = model2.predict(X_train)

kl_1 = kl_div(y_train, y_pred1)
kl_2 = kl_div(y_train, y_pred2)

ce_1 = log_loss(y_train, y_pred1)
ce_2 = log_loss(y_train, y_pred2)

In [None]:
y_pred1

In [None]:
y_pred2

In [None]:
np.all(y_pred1 == y_pred2)

In [None]:
np.all(y_hat1 == y_hat2)

In [None]:
kl_1

In [None]:
kl_2

In [None]:
ce_1

In [None]:
ce_2