In [1]:
# importing necessary libraries
import numpy as np
from scipy.stats import norm

In [32]:
# data storage
data = np.array ([
    [1, 1, 35, 1, 0, 'normal'],
    [2, 1, 30, 1, 1, 'normal'],
    [3, 0, 32, 1, 0, 'tumor'],
    [4, -1, 20, 1, 0, 'tumor'],
    [5, -1, 15, 0, 0, 'tumor'],
    [6, -1, 13, 0, 1, 'normal'],
    [7, 0, 11, 0, 1, 'tumor'],
    [8, 1, 22, 1, 0, 'normal'],
    [9, 1, 14, 0, 0, 'tumor'],
    [10, -1, 24, 0, 0, 'tumor'],
    [11, 1, 23, 0, 1, 'tumor'],
    [12, 0, 25, 1, 1, 'tumor'],
    [13, 0, 30, 0, 0, 'tumor'],
    [14, -1, 21, 1, 1, 'normal'],
    [15, 1, 12, 1, 1, 'unknown']
])

In [33]:
# Separate data and labels
X = data[:, 1:-1].astype(float)  # Features excluding Sample and Class columns
y = data[:, -1]

In [34]:
# Separate data for prediction (row 15)
X_pred = X[-1].reshape(1, -1)

In [36]:
# Remove the row for prediction from the training data
X = X[:-1]
y = y[:-1]

In [37]:
# Create a dictionary to store counts of feature values for each class
feature_counts_by_class = {}
class_labels = np.unique(y)

# skip continuous features
continuous_feature_indices = [1] 

# Initialize the dictionary with zeros
for class_label in class_labels:
    feature_counts_by_class[class_label] = {}

for feature_index in range(X.shape[1]):  # Loop over each feature
    for class_label in class_labels:  # Loop over each class
        feature_counts_by_class[class_label][feature_index] = {}

        # Count occurrences of each feature value for the current class
        for feature_value in np.unique(X[:, feature_index]):
            count = np.sum((X[:, feature_index] == feature_value) & (y == class_label))
            feature_counts_by_class[class_label][feature_index][feature_value] = count

# Display the counts for each feature value given each class
for class_label, feature_counts in feature_counts_by_class.items():
    print(f"Class: {class_label}")
    for feature_index, value_counts in feature_counts.items():
        print(f"  Feature {feature_index}: {value_counts}")



Class: normal
  Feature 0: {-1.0: 1, 0.0: 0, 1.0: 3}
  Feature 1: {11.0: 0, 13.0: 1, 14.0: 0, 15.0: 0, 20.0: 0, 22.0: 1, 23.0: 0, 24.0: 0, 25.0: 0, 30.0: 1, 32.0: 0, 35.0: 1}
  Feature 2: {0.0: 1, 1.0: 3}
  Feature 3: {0.0: 2, 1.0: 2}
Class: tumor
  Feature 0: {-1.0: 3, 0.0: 4, 1.0: 2}
  Feature 1: {11.0: 1, 13.0: 0, 14.0: 1, 15.0: 1, 20.0: 1, 22.0: 0, 23.0: 1, 24.0: 1, 25.0: 1, 30.0: 1, 32.0: 1, 35.0: 0}
  Feature 2: {0.0: 6, 1.0: 3}
  Feature 3: {0.0: 6, 1.0: 3}


In [41]:

continuous_data = feature_counts_by_class['normal'][1]
continuous_normal = [k for k,v in continuous_data.items() if v == 1]
continuous_tumor = [k for k,v in continuous_data.items() if v == 0]
continuous_data

{11.0: 0,
 13.0: 1,
 14.0: 0,
 15.0: 0,
 20.0: 0,
 22.0: 1,
 23.0: 0,
 24.0: 0,
 25.0: 0,
 30.0: 1,
 32.0: 0,
 35.0: 1}

In [39]:
continuous_tumor

[11.0, 14.0, 15.0, 20.0, 23.0, 24.0, 25.0, 32.0]

In [40]:
continuous_normal

[13.0, 22.0, 30.0, 35.0]