In [15]:
import numpy as np
import matplotlib.pyplot as plt

# # Load the data
# true_labels = np.load('true_label.npy', allow_pickle=True)
# predictions = np.load('prediction.npy', allow_pickle=True)
# pred_prob = np.load('pred_prob.npy', allow_pickle=True)


def inspect_array(name, arr):
    print(f"Details for {name}:")
    print(f"Shape: {arr.shape}")
    print(f"Data Type: {arr.dtype}")
    print(f"Number of Dimensions: {arr.ndim}")
    print(f"First few elements: {arr[:2]}")  # Adjust the number as needed
    print("--------------------------------------------------")



def calculate_transition_matrix(predictions):
    # Initialize a 2x2 matrix to store transition counts
    transition_counts = np.zeros((2, 2), dtype=int)

    # Iterate over each sequence in predictions
    for sequence in predictions:
        for i in range(len(sequence) - 1):
            # Increment the count for the observed transition
            transition_counts[sequence[i], sequence[i + 1]] += 1

    # Normalize the counts to get probabilities
    transition_probs = transition_counts / transition_counts.sum(axis=1, keepdims=True)

    return transition_probs


def calculate_initial_probabilities(predictions):
    # Count the number of sequences that start with 0 and 1
    initial_counts = np.zeros(2, dtype=int)

    for sequence in predictions:
        initial_counts[sequence[0]] += 1

    # Normalize the counts to get probabilities
    initial_probs = initial_counts / len(predictions)

    return initial_probs


# Forward algorithm
def forward(sequence, initial_prob, transition_matrix, emission_probs):
    alpha = np.zeros((len(sequence), 2))
    alpha[0] = initial_prob * emission_probs[0]

    for t in range(1, len(sequence)):
        for j in range(2):
            alpha[t, j] = np.sum(alpha[t-1] * transition_matrix[:, j]) * emission_probs[t, j]

    return alpha

# Backward algorithm
def backward(sequence, transition_matrix, emission_probs):
    beta = np.zeros((len(sequence), 2))
    beta[-1] = 1

    for t in range(len(sequence)-2, -1, -1):
        for j in range(2):
            beta[t, j] = np.sum(transition_matrix[j, :] * emission_probs[t+1] * beta[t+1])

    return beta

# Predict labels using Forward-Backward algorithm
def predict_labels(sequence, initial_prob, transition_matrix, emission_probs):
    alpha = forward(sequence, initial_prob, transition_matrix, emission_probs)
    beta = backward(sequence, transition_matrix, emission_probs)
    gamma = alpha * beta / np.sum(alpha[-1])
    return np.argmax(gamma, axis=1)

# Calculate accuracy
def calculate_accuracy(predicted_labels, true_labels):
    return np.mean(predicted_labels == true_labels)




# Calculate transition matrix for the sample data
transition_matrix = calculate_transition_matrix(predictions)

# Calculate initial probabilities
initial_probabilities = calculate_initial_probabilities(predictions)



# Shuffle and split the data
indices = np.arange(len(predictions))
np.random.shuffle(indices)

train_size = int(0.7 * len(predictions))
train_indices = indices[:train_size]
valid_indices = indices[train_size:]

train_predictions = predictions[train_indices]
train_true_labels = true_labels[train_indices]
valid_predictions = predictions[valid_indices]
valid_true_labels = true_labels[valid_indices]
valid_pred_prob = pred_prob[valid_indices]

# Predict labels and calculate accuracy for validation set
total_accuracy = 0
for i in range(len(valid_predictions)):
    predicted_labels = predict_labels(valid_predictions[i], initial_probabilities, transition_matrix, valid_pred_prob[i])
    accuracy = calculate_accuracy(predicted_labels, valid_true_labels[i].flatten())
    total_accuracy += accuracy

def calculate_originaccuracy(predictions, true_labels):
    correct_predictions = 0
    total_predictions = 0

    for pred, true in zip(predictions, true_labels):
        correct_predictions += np.sum(pred == true.ravel())
        total_predictions += len(pred)

    return correct_predictions / total_predictions

average_accuracy = total_accuracy / len(valid_predictions)
print(f"Average Accuracy: {average_accuracy * 100:.2f}%")

accuracy = calculate_originaccuracy(predictions, true_labels)
print(f"Accuracy of original predictions: {accuracy * 100:.2f}%")


def evaluate_predictions(predicted_labels, true_labels):
    TP = np.sum((predicted_labels == 1) & (true_labels == 1))
    TN = np.sum((predicted_labels == 0) & (true_labels == 0))
    FP = np.sum((predicted_labels == 1) & (true_labels == 0))
    FN = np.sum((predicted_labels == 0) & (true_labels == 1))

    precision = TP / (TP + FP) if TP + FP != 0 else 0
    recall = TP / (TP + FN) if TP + FN != 0 else 0
    f1_score = 2 * (precision * recall) / (precision + recall) if precision + recall != 0 else 0

    print(f"True Positives (TP): {TP}")
    print(f"True Negatives (TN): {TN}")
    print(f"False Positives (FP): {FP}")
    print(f"False Negatives (FN): {FN}")
    print(f"Precision: {precision:.4f}")
    print(f"Recall: {recall:.4f}")
    print(f"F1 Score: {f1_score:.4f}")



# Evaluate the predictions using the forward-backward algorithm
all_valid_predicted_labels = np.concatenate(
    [predict_labels(seq, initial_probabilities, transition_matrix, prob) for seq, prob in
     zip(valid_predictions, valid_pred_prob)])
all_valid_true_labels = np.concatenate([label.ravel() for label in valid_true_labels])
evaluate_predictions(all_valid_predicted_labels, all_valid_true_labels)


# Evaluate the original predictions
all_predicted_labels = np.concatenate(predictions)
all_true_labels = np.concatenate([label.ravel() for label in true_labels])
print("Original Predictions:")
evaluate_predictions(all_predicted_labels, all_true_labels)

# ... [rest of the imports and functions]

# Iterate the Forward-Backward algorithm 100 times and store accuracies
accuracies = []
all_valid_true_labels = np.concatenate([label.ravel() for label in valid_true_labels])

for iteration in range(10):
    # Predict labels for each sequence in the validation set
    all_valid_predicted_labels = []
    for seq, prob in zip(valid_predictions, valid_pred_prob):
        predicted_labels = predict_labels(seq, initial_probabilities, transition_matrix, prob)
        all_valid_predicted_labels.extend(predicted_labels)

    # Calculate accuracy for this iteration
    accuracy = calculate_accuracy(all_valid_predicted_labels, all_valid_true_labels)
    accuracies.append(accuracy)

    # Update the transition matrix and initial probabilities based on the new predictions
    transition_matrix = calculate_transition_matrix([all_valid_predicted_labels])
    initial_probabilities = calculate_initial_probabilities([all_valid_predicted_labels])

# Plot the accuracy changes over the 100 iterations
plt.plot(accuracies)
plt.xlabel('Iteration')
plt.ylabel('Accuracy')
plt.title('Accuracy Change Over 10 Iterations')
plt.grid(True)
plt.show()

trial_accuracies = []

# Perform 100 trials
for trial in range(100):
    # Shuffle and split the data
    indices = np.arange(len(predictions))
    np.random.shuffle(indices)

    train_size = int(0.7 * len(predictions))
    train_indices = indices[:train_size]
    valid_indices = indices[train_size:]

    train_predictions = predictions[train_indices]
    train_true_labels = true_labels[train_indices]
    valid_predictions = predictions[valid_indices]
    valid_true_labels = true_labels[valid_indices]
    valid_pred_prob = pred_prob[valid_indices]

    # Calculate transition matrix and initial probabilities using training data
    transition_matrix = calculate_transition_matrix(train_predictions)
    initial_probabilities = calculate_initial_probabilities(train_predictions)

    # Predict labels and calculate accuracy for validation set
    total_accuracy = 0
    for i in range(len(valid_predictions)):
        predicted_labels = predict_labels(valid_predictions[i], initial_probabilities, transition_matrix, valid_pred_prob[i])
        accuracy = calculate_accuracy(predicted_labels, valid_true_labels[i].flatten())
        total_accuracy += accuracy

    average_accuracy = total_accuracy / len(valid_predictions)
    trial_accuracies.append(average_accuracy)

# Plot the accuracy for each trial
plt.plot(trial_accuracies)
plt.xlabel('Trial')
plt.ylabel('Accuracy')
plt.title('Accuracy for 100 Trials of Forward-Backward Algorithm')
plt.grid(True)
plt.show()


The output of smoothing filter: 
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]


ValueError: could not broadcast input array from shape (2,3) into shape (2,2)

In [7]:
gamma

array([[0.5, 0.5],
       [0.5, 0.5],
       [0.5, 0.5],
       [0.5, 0.5],
       [0.5, 0.5],
       [0.5, 0.5],
       [0.5, 0.5],
       [0.5, 0.5],
       [0.5, 0.5],
       [0.5, 0.5],
       [0.5, 0.5],
       [0.5, 0.5],
       [0.5, 0.5],
       [0.5, 0.5],
       [0.5, 0.5],
       [0.5, 0.5],
       [0.5, 0.5],
       [0.5, 0.5],
       [0.5, 0.5],
       [0.5, 0.5],
       [0.5, 0.5],
       [0.5, 0.5],
       [0.5, 0.5],
       [0.5, 0.5],
       [0.5, 0.5],
       [0.5, 0.5],
       [0.5, 0.5],
       [0.5, 0.5],
       [0.5, 0.5],
       [0.5, 0.5],
       [0.5, 0.5],
       [0.5, 0.5],
       [0.5, 0.5],
       [0.5, 0.5],
       [0.5, 0.5],
       [0.5, 0.5],
       [0.5, 0.5],
       [0.5, 0.5],
       [0.5, 0.5],
       [0.5, 0.5],
       [0.5, 0.5],
       [0.5, 0.5],
       [0.5, 0.5],
       [0.5, 0.5],
       [0.5, 0.5],
       [0.5, 0.5],
       [0.5, 0.5],
       [0.5, 0.5],
       [0.5, 0.5],
       [0.5, 0.5],
       [0.5, 0.5],
       [0.5, 0.5],
       [0.5,