#### DivideFromHere

# Experiment 4: DQN based Abel and Its derandomization and KAN integration

## Step 1: Overall Design
### Objective:
To implement and analyze the equivalence between probabilistic and deterministic versions of a Deep Q-Network (DQN) based chess AI named Abel using Kolmogorov-Arnold Networks (KAN).

### Steps:
1. Data Preparation:

- Train the Simple NN-based Abel with both probabilistic and deterministic versions.
- Collect performance metrics such as material count, mobility count, piece-square score, and center control count.
2. Training and Evaluation:

- Define a custom KAN model architecture using PyTorch.
- Train the KAN model on the collected data.
- Evaluate the model's performance and track the equivalence score during training.
3. Visualization:

- Visualize the dataset.
- Plot the equivalence curve to show the relationship between the deterministic and probabilistic versions.
- Extract and visualize the symbolic formula from the trained KAN model.
- Plot the model's structure and equivalence data points.
- Visualize the weights and biases of the trained KAN model.

## Step 2: Define the DQN for Abel
### 2.1: Define the Neural Network for DQN

In [None]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import chess
from collections import deque
import random
from IPython.display import display, clear_output, SVG
import time

# Deep Q-Network model for chess evaluation
class AbelDQN:
    def __init__(self, seed):
        self.model = self.build_model(seed)
    
    def build_model(self, seed):
        np.random.seed(seed)
        tf.random.set_seed(seed)
        model = Sequential([
            Dense(64, activation='relu', input_dim=64),
            Dense(64, activation='relu'),
            Dense(1, activation='linear')
        ])
        model.compile(optimizer='adam', loss='mse')
        return model
    
    def evaluate_board(self, board):
        board_state = self.board_to_input(board)
        return self.model.predict(board_state, verbose=0)[0][0]
    
    def board_to_input(self, board):
        board_state = np.zeros(64)
        for i, piece in board.piece_map().items():
            board_state[i] = piece.piece_type if piece.color == chess.WHITE else -piece.piece_type
        return np.array([board_state])


### 2.2: Derandomization functions

In [None]:
def set_random_seed(seed):
    np.random.seed(seed)
    tf.random.set_seed(seed)

def softmax_policy(q_values, legal_moves, temperature):
    q_values = np.array(q_values)  # Convert list to numpy array
    exp_values = np.exp(q_values / temperature)
    probabilities = exp_values / np.sum(exp_values)
    return legal_moves[np.argmax(probabilities)]


## Step 3: Training Functions
### 3.1: Training the Original DQN

In [None]:
def train_dqn(env, num_episodes, seed, temperature):
    set_random_seed(seed)
    dqn_abel = AbelDQN(seed)
    target_network = AbelDQN(seed)
    replay_buffer = deque(maxlen=2000)
    gamma = 0.99
    batch_size = 32
    target_update_freq = 10

    for episode in range(num_episodes):
        state = env.reset()
        done = False
        while not done:
            q_values = [dqn_abel.evaluate_board(env.simulate_move(state, move)) for move in env.legal_moves(state)]
            action = softmax_policy(q_values, env.legal_moves(state), temperature)
            next_state, reward, done = env.step(state, action)
            replay_buffer.append((state, action, reward, next_state, int(done)))
            state = next_state
            
            if len(replay_buffer) > batch_size:
                batch = random.sample(replay_buffer, batch_size)
                update_dqn(dqn_abel, target_network, batch, gamma)
        
        if episode % target_update_freq == 0:
            target_network.model.set_weights(dqn_abel.model.get_weights())
    
    return dqn_abel

def update_dqn(dqn_abel, target_network, batch, gamma):
    states, actions, rewards, next_states, dones = zip(*batch)
    target_q_values = [target_network.evaluate_board(state) for state in next_states]
    targets = rewards + (1 - np.array(dones)) * gamma * np.array(target_q_values)
    states = np.array([dqn_abel.board_to_input(state)[0] for state in states])
    print(f"States shape: {states.shape}, Targets shape: {targets.shape}")
    dqn_abel.model.train_on_batch(states, targets)


### 3.2: Training the Derandomized DQN

In [None]:
def train_abel_dqn_derandomized(env, num_episodes, seed, temperature):
    set_random_seed(seed)
    dqn_abel = AbelDQN(seed)
    target_network = AbelDQN(seed)
    replay_buffer = deque(maxlen=2000)
    gamma = 0.99
    batch_size = 32
    target_update_freq = 10

    for episode in range(num_episodes):
        state = env.reset()
        done = False
        while not done:
            q_values = [dqn_abel.evaluate_board(env.simulate_move(state, move)) for move in env.legal_moves(state)]
            action = softmax_policy(q_values, env.legal_moves(state), temperature)
            next_state, reward, done = env.step(state, action)
            replay_buffer.append((state, action, reward, next_state, int(done)))
            state = next_state
            
            if len(replay_buffer) > batch_size:
                batch = random.sample(replay_buffer, batch_size)
                update_dqn(dqn_abel, target_network, batch, gamma)
        
        if episode % target_update_freq == 0:
            target_network.model.set_weights(dqn_abel.model.get_weights())
    
    return dqn_abel


## Step 4: Simulation Functions
### 4.1: Environment and Simulation Setup

In [None]:
# Environment simulation
class ChessEnv:
    def reset(self):
        self.board = chess.Board()
        return self.board

    def step(self, state, action):
        self.board.push(action)
        reward = self.evaluate_board(self.board)
        done = self.board.is_game_over()
        next_state = self.board
        return next_state, reward, done

    def legal_moves(self, state):
        return list(state.legal_moves)

    def simulate_move(self, state, move):
        board_copy = state.copy()
        board_copy.push(move)
        return board_copy

    def evaluate_board(self, board):
        return sum(1 if piece.color == chess.WHITE else -1 for piece in board.piece_map().values())

# Function to calculate additional metrics
def calculate_metrics(board):
    material_count = sum(1 if piece.color == chess.WHITE else -1 for piece in board.piece_map().values())
    mobility_count = len(list(board.legal_moves))
    piece_square_score = sum(1 if piece.color == chess.WHITE else -1 for piece in board.piece_map().values())
    center_control_count = sum(1 if square in [chess.D4, chess.E4, chess.D5, chess.E5] else 0 for square, piece in board.piece_map().items())
    return material_count, mobility_count, piece_square_score, center_control_count

def calculate_additional_metrics(board, move_scores, current_depth, is_exploratory):
    evaluation_score = sum(move_scores) / len(move_scores) if move_scores else 0
    branching_factor = len(list(board.legal_moves))
    depth_of_search = current_depth
    move_diversity = np.var(move_scores) if move_scores else 0
    exploration_vs_exploitation = 1 if is_exploratory else 0
    return evaluation_score, branching_factor, depth_of_search, move_diversity, exploration_vs_exploitation


### 4.2: Simulate Games using Probabilistic DQN

In [None]:
# Function to play the game with probabilistic DQN
def play_game_dqn_probabilistic(dqn_abel, env, max_moves=55, max_runtime=600, temperature=1.0):
    steps, times, material_counts, mobility_counts, piece_square_scores, center_control_counts, move_list = [], [], [], [], [], [], []
    evaluation_scores, branching_factors, depths_of_search, move_diversities, exploration_vs_exploitations = [], [], [], [], []
    step_number = 1
    state = env.reset()
    start_time = time.time()

    while not env.board.is_game_over() and step_number <= max_moves and (time.time() - start_time) <= max_runtime:
        move_start_time = time.time()
        q_values = [dqn_abel.evaluate_board(env.simulate_move(state, move)) for move in env.legal_moves(state)]
        action = softmax_policy(q_values, env.legal_moves(state), temperature)
        next_state, reward, done = env.step(state, action)
        move_end_time = time.time()

        move_list.append(action.uci())
        steps.append(step_number)
        times.append(move_end_time - move_start_time)
        material_count, mobility_count, piece_square_score, center_control_count = calculate_metrics(env.board)
        material_counts.append(material_count)
        mobility_counts.append(mobility_count)
        piece_square_scores.append(piece_square_score)
        center_control_counts.append(center_control_count)

        move_scores = [dqn_abel.evaluate_board(env.simulate_move(state, move)) for move in env.legal_moves(state)]
        evaluation_score, branching_factor, depth_of_search, move_diversity, exploration_vs_exploitation = calculate_additional_metrics(
            env.board, move_scores, 3, False)  # Depth = 3, is_exploratory = False as placeholder
        evaluation_scores.append(evaluation_score)
        branching_factors.append(branching_factor)
        depths_of_search.append(depth_of_search)
        move_diversities.append(move_diversity)
        exploration_vs_exploitations.append(exploration_vs_exploitation)

        state = next_state
        step_number += 1

        clear_output(wait=True)
        display(SVG(chess.svg.board(board=env.board, size=350)))
        time.sleep(1)
        print(f"Move: {action}, Step: {step_number}, Time: {move_end_time - move_start_time}, Material: {reward}")

    data = {
        'Step': steps,
        'Time': times,
        'Move': move_list,
        'Material Count': material_counts,
        'Mobility Count': mobility_counts,
        'Piece-Square Score': piece_square_scores,
        'Center Control Count': center_control_counts,
        'Evaluation Score': evaluation_scores,
        'Branching Factor': branching_factors,
        'Depth of Search': depths_of_search,
        'Move Diversity': move_diversities,
        'Exploration vs Exploitation': exploration_vs_exploitations
    }
    df = pd.DataFrame(data)
    print(f"Result: {env.board.result()}")
    return df

# Initialize the environment and the network
env = ChessEnv()
dqn_abel_probabilistic = train_dqn(env, num_episodes=5, seed=42, temperature=1.0)

# Simulate and run the probabilistic game
print("Running probabilistic game...")
probabilistic_dqn_results = play_game_dqn_probabilistic(dqn_abel_probabilistic, env, temperature=0.5)


### 4.3: Simulate Games using Derandomized DQN

In [None]:
# Function to play the game with derandomized DQN
def play_game_dqn_derandomized(dqn_abel, env, max_moves=55, max_runtime=600):
    steps, times, material_counts, mobility_counts, piece_square_scores, center_control_counts, move_list = [], [], [], [], [], [], []
    evaluation_scores, branching_factors, depths_of_search, move_diversities, exploration_vs_exploitations = [], [], [], [], []
    step_number = 1
    state = env.reset()
    start_time = time.time()

    while not env.board.is_game_over() and step_number <= max_moves and (time.time() - start_time) <= max_runtime:
        move_start_time = time.time()
        q_values = [dqn_abel.evaluate_board(env.simulate_move(state, move)) for move in env.legal_moves(state)]
        action = softmax_policy(q_values, env.legal_moves(state), 1.0)  # Temperature = 1.0 for deterministic
        next_state, reward, done = env.step(state, action)
        move_end_time = time.time()

        move_list.append(action.uci())
        steps.append(step_number)
        times.append(move_end_time - move_start_time)
        material_count, mobility_count, piece_square_score, center_control_count = calculate_metrics(env.board)
        material_counts.append(material_count)
        mobility_counts.append(mobility_count)
        piece_square_scores.append(piece_square_score)
        center_control_counts.append(center_control_count)

        move_scores = [dqn_abel.evaluate_board(env.simulate_move(state, move)) for move in env.legal_moves(state)]
        evaluation_score, branching_factor, depth_of_search, move_diversity, exploration_vs_exploitation = calculate_additional_metrics(
            env.board, move_scores, 3, False)  # Depth = 3, is_exploratory = False as placeholder
        evaluation_scores.append(evaluation_score)
        branching_factors.append(branching_factor)
        depths_of_search.append(depth_of_search)
        move_diversities.append(move_diversity)
        exploration_vs_exploitations.append(exploration_vs_exploitation)

        state = next_state
        step_number += 1

        clear_output(wait=True)
        display(SVG(chess.svg.board(board=env.board, size=350)))
        time.sleep(0.1)  # Reduced sleep time for faster execution

    data = {
        'Step': steps,
        'Time': times,
        'Move': move_list,
        'Material Count': material_counts,
        'Mobility Count': mobility_counts,
        'Piece-Square Score': piece_square_scores,
        'Center Control Count': center_control_counts,
        'Evaluation Score': evaluation_scores,
        'Branching Factor': branching_factors,
        'Depth of Search': depths_of_search,
        'Move Diversity': move_diversities,
        'Exploration vs Exploitation': exploration_vs_exploitations
    }
    df = pd.DataFrame(data)
    print(f"Result: {env.board.result()}")
    return df

# Initialize the derandomized network
dqn_abel_derandomized = train_abel_dqn_derandomized(env, num_episodes=5, seed=42, temperature=1.0)

# Simulate and run the derandomized game
print("Running derandomized game...")
derandomized_dqn_results = play_game_dqn_derandomized(dqn_abel_derandomized, env)


## Step 5: Collect and Analyze Performance Metrics

In [None]:
def aggregate_dqn_metrics(results):
    numeric_columns = ['Material Count', 'Mobility Count', 'Piece-Square Score', 'Center Control Count',
                       'Evaluation Score', 'Branching Factor', 'Depth of Search', 'Move Diversity',
                       'Exploration vs Exploitation']
    
    aggregated_data = pd.concat(results, ignore_index=True)
    mean_metrics = aggregated_data[numeric_columns].mean()
    std_metrics = aggregated_data[numeric_columns].std()
    
    return mean_metrics, std_metrics, aggregated_data

# Aggregate the results
probabilistic_dqn_mean, probabilistic_dqn_std, probabilistic_dqn_data = aggregate_dqn_metrics([probabilistic_dqn_results])
derandomized_dqn_mean, derandomized_dqn_std, derandomized_dqn_data = aggregate_dqn_metrics([derandomized_dqn_results])

# Display the aggregated metrics
print("Probabilistic DQN Mean Metrics:\n", probabilistic_dqn_mean)
print("Probabilistic DQN Std Metrics:\n", probabilistic_dqn_std)
print("\nDerandomized DQN Mean Metrics:\n", derandomized_dqn_mean)
print("Derandomized DQN Std Metrics:\n", derandomized_dqn_std)

# Display move sequences and non-numeric data
print("\nProbabilistic Moves:\n", probabilistic_dqn_data['Move'])
print("\nDerandomized Moves:\n", derandomized_dqn_data['Move'])


## Step 6: Generate and Analyze Equivalence Curves

In [None]:
def plot_dqn_equivalence_curve(derandomized_mean, derandomized_std, probabilistic_mean, probabilistic_std):
    metrics = derandomized_mean.index
    x = range(len(metrics))

    fig, ax = plt.subplots(figsize=(14, 7))

    ax.errorbar(x, derandomized_mean, yerr=derandomized_std, fmt='o-', label='Derandomized', color='blue', capsize=5)
    ax.errorbar(x, probabilistic_mean, yerr=probabilistic_std, fmt='o-', label='Probabilistic', color='green', capsize=5)

    ax.fill_between(x, derandomized_mean - derandomized_std, derandomized_mean + derandomized_std, color='blue', alpha=0.2)
    ax.fill_between(x, probabilistic_mean - probabilistic_std, probabilistic_mean + probabilistic_std, color='green', alpha=0.2)

    ax.set_title('Equivalence Curve for Derandomized and Probabilistic DQN')
    ax.set_xlabel('Metrics')
    ax.set_ylabel('Values')
    ax.set_xticks(x)
    ax.set_xticklabels(metrics, rotation=45, ha='right')

    ax.legend()
    plt.tight_layout()
    plt.show()

# Plot equivalence curve
plot_dqn_equivalence_curve(derandomized_dqn_mean, derandomized_dqn_std, probabilistic_dqn_mean, probabilistic_dqn_std)


## Step 7: Verification and Conclusion
### 7.1: Simulate Multiple Games

In [None]:
def compare_dqn_versions(derandomized_dqn, probabilistic_dqn, env, games=5, max_moves=55, max_runtime=600, temperature=0.5):
    derandomized_results = []
    probabilistic_results = []

    for _ in range(games):
        derandomized_data = play_game_dqn_derandomized(derandomized_dqn, env, max_moves, max_runtime)
        derandomized_results.append(derandomized_data)

        probabilistic_data = play_game_dqn_probabilistic(probabilistic_dqn, env, max_moves, max_runtime, temperature)
        probabilistic_results.append(probabilistic_data)

    return derandomized_results, probabilistic_results

# Compare the derandomized and probabilistic DQN versions over multiple games
derandomized_dqn_results_multiple, probabilistic_dqn_results_multiple = compare_dqn_versions(dqn_abel_derandomized, dqn_abel_probabilistic, env)


### 7.2: Collect and Aggregate Performance Metrics

In [None]:
# Aggregate the results
derandomized_dqn_mean_multiple, derandomized_dqn_std_multiple, derandomized_dqn_data_multiple = aggregate_dqn_metrics(derandomized_dqn_results_multiple)
probabilistic_dqn_mean_multiple, probabilistic_dqn_std_multiple, probabilistic_dqn_data_multiple = aggregate_dqn_metrics(probabilistic_dqn_results_multiple)

# Display the aggregated metrics
print("Derandomized DQN Mean Metrics (Multiple Games):\n", derandomized_dqn_mean_multiple)
print("Derandomized DQN Std Metrics (Multiple Games):\n", derandomized_dqn_std_multiple)
print("\nProbabilistic DQN Mean Metrics (Multiple Games):\n", probabilistic_dqn_mean_multiple)
print("Probabilistic DQN Std Metrics (Multiple Games):\n", probabilistic_dqn_std_multiple)


### 7.3: Generate and Analyze Equivalence Curves

In [None]:
# Plot equivalence curve
plot_dqn_equivalence_curve(derandomized_dqn_mean_multiple, derandomized_dqn_std_multiple, probabilistic_dqn_mean_multiple, probabilistic_dqn_std_multiple)


### 7.4: Run Additional Games to Gather More Data

In [None]:
# Run additional games to gather more data
additional_games = 100
derandomized_dqn_results_additional, probabilistic_dqn_results_additional = compare_dqn_versions(dqn_abel_derandomized, dqn_abel_probabilistic, env, games=additional_games, temperature=0.5)

# Aggregate the additional data
derandomized_dqn_mean_additional, derandomized_dqn_std_additional, derandomized_dqn_data_additional = aggregate_dqn_metrics(derandomized_dqn_results_additional)
probabilistic_dqn_mean_additional, probabilistic_dqn_std_additional, probabilistic_dqn_data_additional = aggregate_dqn_metrics(probabilistic_dqn_results_additional)

# Combine the original and additional data
combined_derandomized_dqn_data = pd.concat([derandomized_dqn_data_multiple, derandomized_dqn_data_additional], ignore_index=True)
combined_probabilistic_dqn_data = pd.concat([probabilistic_dqn_data_multiple, probabilistic_dqn_data_additional], ignore_index=True)

# Recalculate the means and standard deviations
combined_derandomized_dqn_mean, combined_derandomized_dqn_std = combined_derandomized_dqn_data.mean(), combined_derandomized_dqn_data.std()
combined_probabilistic_dqn_mean, combined_probabilistic_dqn_std = combined_probabilistic_dqn_data.mean(), combined_probabilistic_dqn_data.std()

# Display the combined metrics
print("Combined Derandomized DQN Mean Metrics:\n", combined_derandomized_dqn_mean)
print("Combined Derandomized DQN Std Metrics:\n", combined_derandomized_dqn_std)
print("\nCombined Probabilistic DQN Mean Metrics:\n", combined_probabilistic_dqn_mean)
print("Combined Probabilistic DQN Std Metrics:\n", combined_probabilistic_dqn_std)

# Plot combined equivalence curve
plot_dqn_equivalence_curve(combined_derandomized_dqn_mean, combined_derandomized_dqn_std, combined_probabilistic_dqn_mean, combined_probabilistic_dqn_std)


## Step 8: Perform Statistical Tests

In [None]:
from scipy.stats import ttest_ind, f_oneway

def perform_statistical_tests(derandomized_metrics, probabilistic_metrics):
    results = {}
    for metric in derandomized_metrics.index:
        t_stat, p_value_t = ttest_ind(derandomized_metrics[metric], probabilistic_metrics[metric], equal_var=False)
        f_stat, p_value_f = f_oneway(derandomized_metrics[metric], probabilistic_metrics[metric])
        results[metric] = {
            't_stat': t_stat,
            'p_value_t': p_value_t,
            'f_stat': f_stat,
            'p_value_f': p_value_f
        }
    return results

# Perform statistical tests
statistical_results = perform_statistical_tests(combined_derandomized_dqn_mean, combined_probabilistic_dqn_mean)

# Display the results
for metric, result in statistical_results.items():
    print(f"{metric}: t-statistic = {result['t_stat']}, p-value (t-test) = {result['p_value_t']}")
    print(f"{metric}: f-statistic = {result['f_stat']}, p-value (F-test) = {result['p_value_f']}\n")

# Plot statistical analysis results
def plot_statistical_analysis(statistical_results):
    metrics = list(statistical_results.keys())
    t_stats = [result['t_stat'] for result in statistical_results.values()]
    p_values_t = [result['p_value_t'] for result in statistical_results.values()]

    fig, axs = plt.subplots(2, 1, figsize=(14, 10))

    # Plot t-statistics
    axs[0].bar(metrics, t_stats, color='blue')
    axs[0].set_xticks(metrics)
    axs[0].set_xticklabels(metrics, rotation=45, ha='right')
    axs[0].set_ylabel('t-statistic')
    axs[0].set_title('t-statistic of Each Metric')

    # Plot p-values (t-test)
    axs[1].bar(metrics, p_values_t, color='green')
    axs[1].set_xticks(metrics)
    axs[1].set_xticklabels(metrics, rotation=45, ha='right')
    axs[1].axhline(y=0.05, color='r', linestyle='--')
    axs[1].set_ylabel('p-value (t-test)')
    axs[1].set_title('p-value (t-test) of Each Metric')

    plt.tight_layout()
    plt.show()

# Plot statistical analysis results
plot_statistical_analysis(statistical_results)


## Step 9: Integrate KAN for Interpretation
### 9.1: Define the KAN Model

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim

class KANModel(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(KANModel, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.fc2 = nn.Linear(hidden_size, hidden_size)
        self.fc3 = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = self.fc3(x)
        return x

def train_kan_model(kan_model, data_loader, criterion, optimizer, num_epochs=100):
    for epoch in range(num_epochs):
        for inputs, targets in data_loader:
            inputs, targets = inputs.float(), targets.float()
            optimizer.zero_grad()
            outputs = kan_model(inputs)
            loss = criterion(outputs, targets)
            loss.backward()
            optimizer.step()
        if epoch % 10 == 0:
            print(f"Epoch [{epoch}/{num_epochs}], Loss: {loss.item():.4f}")

def evaluate_kan_model(kan_model, data_loader, criterion):
    kan_model.eval()
    with torch.no_grad():
        total_loss = 0
        for inputs, targets in data_loader:
            inputs, targets = inputs.float(), targets.float()
            outputs = kan_model(inputs)
            loss = criterion(outputs, targets)
            total_loss += loss.item()
    return total_loss / len(data_loader)


### 9.2: Prepare Data for KAN

In [None]:
from torch.utils.data import TensorDataset, DataLoader

def prepare_kan_data(data_frame, target_column, batch_size=32):
    inputs = data_frame.drop(columns=[target_column]).values
    targets = data_frame[target_column].values
    tensor_inputs = torch.tensor(inputs)
    tensor_targets = torch.tensor(targets)
    dataset = TensorDataset(tensor_inputs, tensor_targets)
    data_loader = DataLoader(dataset, batch_size=batch_size, shuffle=True)
    return data_loader


### 9.3: Train and Evaluate KAN Model

In [None]:
# Prepare data for KAN model
target_column = 'Evaluation Score'
kan_data_loader = prepare_kan_data(combined_derandomized_dqn_data, target_column)

# Initialize and train KAN model
input_size = combined_derandomized_dqn_data.shape[1] - 1  # Exclude target column
hidden_size = 64
output_size = 1

kan_model = KANModel(input_size, hidden_size, output_size)
criterion = nn.MSELoss()
optimizer = optim.Adam(kan_model.parameters(), lr=0.001)

# Train KAN model
train_kan_model(kan_model, kan_data_loader, criterion, optimizer, num_epochs=100)

# Evaluate KAN model
kan_evaluation_loss = evaluate_kan_model(kan_model, kan_data_loader, criterion)
print(f"KAN Model Evaluation Loss: {kan_evaluation_loss:.4f}")


## Step 10: Visualize KAN Model and Results
### 10.1: Visualize KAN Model Weights and Biases

In [None]:
def visualize_kan_weights_biases(kan_model):
    for name, param in kan_model.named_parameters():
        if param.requires_grad:
            print(f"{name}: {param.data}")

visualize_kan_weights_biases(kan_model)


### 10.2: Visualize the KAN Model Training Process

In [None]:
import matplotlib.pyplot as plt

def plot_kan_training_loss(losses):
    plt.figure(figsize=(10, 5))
    plt.plot(losses, label='Training Loss')
    plt.xlabel('Epochs')
    plt.ylabel('Loss')
    plt.title('KAN Model Training Loss')
    plt.legend()
    plt.show()

# Assuming `losses` is a list of loss values collected during training
# plot_kan_training_loss(losses)


## Step 11: Extract and Interpret the Equivalence Formula
### 11.1: Extract Symbolic Formula

In [None]:
def extract_equivalence_formula(kan_model):
    weights = kan_model.fc1.weight.data.numpy()
    biases = kan_model.fc1.bias.data.numpy()
    formula = " + ".join([f"{weights[0][i]}*x{i}" for i in range(weights.shape[1])])
    formula += f" + {biases[0]}"
    return formula

equivalence_formula = extract_equivalence_formula(kan_model)
print(f"Equivalence Formula: {equivalence_formula}")


### 11.2 Generate and Plot Equivalence Curves

In [None]:
def generate_equivalence_curve(kan_model, data_loader):
    equivalence_scores = []
    for inputs, _ in data_loader:
        inputs = inputs.float()
        outputs = kan_model(inputs).detach().numpy()
        equivalence_scores.extend(outputs)
    return equivalence_scores

equivalence_scores = generate_equivalence_curve(kan_model, kan_data_loader)

def plot_equivalence_curve(equivalence_scores):
    plt.figure(figsize=(10, 5))
    plt.plot(equivalence_scores, label='Equivalence Scores')
    plt.xlabel('Samples')
    plt.ylabel('Equivalence Score')
    plt.title('Equivalence Curve')
    plt.legend()
    plt.show()

plot_equivalence_curve(equivalence_scores)


## 12. Summarize Results and Insights

In [None]:
def summarize_results(equivalence_score_dqn, kan_evaluation_loss, equivalence_formula):
    print(f"Equivalence Score for DQN: {equivalence_score_dqn}")
    print(f"KAN Model Evaluation Loss: {kan_evaluation_loss}")
    print(f"Extracted Equivalence Formula: {equivalence_formula}")
    print("The equivalence score and KAN model insights suggest that the derandomized and probabilistic versions of the DQN-based Abel are closely related.")
    print("The extracted formula provides a mathematical representation of this relationship, further validating the potential equivalence between deterministic and probabilistic AI models in the context of chess.")

summarize_results(equivalence_score_dqn, kan_evaluation_loss, equivalence_formula)


<a style='text-decoration:none;line-height:16px;display:flex;color:#5B5B62;padding:10px;justify-content:end;' href='https://deepnote.com?utm_source=created-in-deepnote-cell&projectId=f6f51e1a-d40a-494a-8398-36807e7a81cb' target="_blank">
 </img>
Created in <span style='font-weight:600;margin-left:4px;'>Deepnote</span></a>