#### __Most plots Related to dataset with categorical variable as output are used in KNN exp__
#### __But it should also be used for relevant other experiments also like Naive Bayes, Logistic Regression__

### __***PCA***__

In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA

df = pd.read_table('data/balance-scale.csv', delimiter=',')
print("Shape:", df.shape)
display(df.head())

X = df.drop(columns='class name')
y = df['class name']

scaled = StandardScaler().fit_transform(X)
pca = PCA(n_components=2).fit_transform(scaled)

final = pd.DataFrame(pca, columns=['PC1', 'PC2'])
final['target'] = df['class name']
final.head()

sns.countplot(df, x='class name', hue='class name')
plt.show()
sns.heatmap(X.corr(), cmap='Blues')
plt.show()
sns.pairplot(X)
plt.show()
sns.scatterplot(final, x='PC1', y='PC2', hue='target')
plt.show()

### __***KNN***__

In [None]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.decomposition import PCA
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import confusion_matrix, classification_report
from sklearn.metrics import accuracy_score, ConfusionMatrixDisplay
from mlxtend.plotting import plot_decision_regions
from scipy.cluster.hierarchy import dendrogram, linkage
from scipy.spatial import Voronoi, voronoi_plot_2d

df = pd.read_csv("data/sobar-72.csv")
print("Shape:", df.shape)
display(df.head())

X = df.drop(columns='ca_cervix')
y = df['ca_cervix']

X_scaled = StandardScaler().fit_transform(X)
X_pca = PCA(n_components=2).fit_transform(X_scaled)
X_train, X_test, y_train, y_test = train_test_split(X_pca, y, 
                                                    test_size=0.4,
                                                    random_state=4)

knn = KNeighborsClassifier(n_neighbors=15, metric='euclidean') 
#can be ['euclidean', 'manhattan', 'minkowski']
knn.fit(X_train, y_train)
y_pred = knn.predict(X_test)

sns.countplot(df, x='ca_cervix', hue='ca_cervix')
plt.show()

sns.pairplot(df.iloc[:,:4])
plt.show()

# dont use this mlxtend module may not be installed in lab
# ========================================================
plot_decision_regions(X_train, y_train.values, clf=knn, legend=2)
plt.xlabel('X')
plt.ylabel('Y')
plt.title(f'KNN with K=5 using Euclidean Distance')
plt.show()
# ========================================================
print(classification_report(y_test, y_pred))
print("Accuracy", accuracy_score(y_test, y_pred))
ConfusionMatrixDisplay(confusion_matrix(y_test, y_pred)).plot()
plt.show()

errors = [1 - cross_val_score(KNeighborsClassifier(n_neighbors=k), 
                              X_train, y_train).mean() for k in range(1, 21)]
plt.plot(range(1, 21), errors, marker='o')
plt.title("Elbow Method for Optimal k")
plt.xlabel("k")
plt.ylabel("Error")
plt.show()

vor = Voronoi(X_pca)
voronoi_plot_2d(vor, show_vertices=False)
plt.scatter(X_pca[:, 0], X_pca[:, 1], c=y)
plt.show()

# use this instead for decision boundary graph
x_min, x_max = X_pca[:,0].min() - 1, X_pca[:,0].max() + 1
y_min, y_max = X_pca[:,1].min() - 1, X_pca[:,1].max() + 1
xx, yy = np.meshgrid(np.arange(x_min, x_max, 0.02), 
                     np.arange(y_min, y_max, 0.02))

Z = knn.predict(np.c_[xx.ravel(), yy.ravel()]).reshape(xx.shape)

plt.figure()
plt.contourf(xx, yy, Z, cmap=plt.cm.coolwarm, alpha=0.8)
plt.scatter(X_pca[:,0], X_pca[:,1], c=y, cmap=plt.cm.coolwarm, s=20, 
            edgecolors='k')
plt.title('Decision surface')
plt.show()

### __***LDA***__

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA
from sklearn.metrics import accuracy_score

df = pd.read_csv(r"data\doctor-visits.csv")
print("Shape:", df.shape)
display(df.head())

X = df.drop(columns=['Number of Doctors Visited'])
y = df['Number of Doctors Visited']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, 
                                                    random_state=4)

lda = LDA(n_components=2)
X_train = lda.fit_transform(X_train, y_train)
X_test = lda.fit_transform(X_test, y_test)

lda.fit(X_train,y_train)
y_pred = lda.predict(X_test)
print ("Accuracy:", accuracy_score(y_test, y_pred))

plt.scatter(X_train[:, 0], X_train[:, 1], c=y_train, cmap='viridis')
plt.title('LDA Dimensionality Reduction')
plt.xlabel('LDA Component 1')
plt.ylabel('LDA Component 2')
plt.show()

### __***Linear Regression***__

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score, mean_squared_error

df = pd.read_excel("data/real-estate.xlsx")
print("Shape:", df.shape)
display(df.head())

X = df[['X5 latitude']].values
y = df['Y house price of unit area'].values

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

model = LinearRegression().fit(X_train, y_train)
y_train_pred = model.predict(X_train)
y_test_pred = model.predict(X_test)

sns.heatmap(df.corr(), annot=True)
plt.show()

plt.figure(figsize=(10,5))
plt.scatter(X_train, y_train, label='Train Data Points', edgecolor='k')
plt.scatter(X_test, y_test, label='Test Data Points', edgecolor='k')
plt.plot(X_train, model.predict(X_train), color='red', 
         label='Linear Regression Line')
plt.legend()
plt.grid()
plt.show()

plt.figure(figsize=(10,5))
plt.scatter(X_test, y_test, label='Test Data Points', edgecolor='k')
plt.plot(X_test, model.predict(X_test), color='red', 
         label='Linear Regression Line')
for i in range(len(X_test)):
    plt.plot((X_test[i], X_test[i]), (y_test[i], y_test_pred[i]), 
             color='blue', linestyle='--')
plt.legend()
plt.grid()
plt.show()

print("Train MSE:", mean_squared_error(y_train, y_train_pred))
print("Train R2 Score:", r2_score(y_train, y_train_pred))
print("Test MSE:", mean_squared_error(y_test, y_test_pred))
print("Test R2 Score:", r2_score(y_test, y_test_pred))

### __***Logistic Regression***__

In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler, LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.decomposition import PCA
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report, auc
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay, roc_curve

df = pd.read_csv('data/magic04.data', header=None)
display(df.head())

X = MinMaxScaler().fit_transform(df.drop(columns=[10]))
X = PCA(n_components=1).fit_transform(X)
y = LabelEncoder().fit_transform(df[10])  # Convert 'g'/'h' to 0/1

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, 
                                                    stratify=y)
lr = LogisticRegression(max_iter=5000, random_state=0)
lr.fit(X_train, y_train)
y_pred = lr.predict(X_test)

print(f"Accuracy: {accuracy_score(y_test, y_pred)}")
print(classification_report(y_test, y_pred))
cm = confusion_matrix(y_test, y_pred)
ConfusionMatrixDisplay(cm, display_labels=['g', 'h']).plot()
plt.show()

y_pred_proba = lr.predict_proba(X_test)[:, 1]
fpr, tpr, _ = roc_curve(y_test, y_pred_proba, pos_label=1)
roc_auc = auc(fpr, tpr)
plt.plot(fpr, tpr, label=f'ROC curve (area = {roc_auc:.2f})')
plt.plot([0, 1], [0, 1], 'k--', label='No Skill')
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('ROC Curve')
plt.legend()
plt.show()

# may not proper s curve because of low model accuracy
# ====================================================
plt.figure(figsize=(10, 6))
plt.scatter(X, y, color='red', label='Data points (g = 1, h = 0)')
x_values = np.linspace(X.min(), X.max(), 100).reshape(-1, 1)
y_values = lr.predict_proba(x_values)[:, 1] 
plt.plot(x_values, y_values, color='blue', label='Logistic Regression S-Curve')
plt.axhline(0.5, color='green', linestyle='--', label='Threshold (0.5)')
plt.title('Logistic Regression: X(PCA Transformed) vs Probability of g')
plt.legend()
plt.grid()
plt.show()

### __***Naive Bayes***__

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score, classification_report, roc_curve
from sklearn.metrics import ConfusionMatrixDisplay, auc, confusion_matrix

df = pd.read_csv("data/agaricus-lepiota.data", header=None)
display(df.head())

df = df.apply(LabelEncoder().fit_transform)
X = df.drop(columns=[0])
y = df[0]

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42, 
                                                    stratify=y)

nb = GaussianNB()
nb.fit(X_train, y_train)
y_pred = nb.predict(X_test)

print(f"Accuracy: {accuracy_score(y_test, y_pred)}")
print(classification_report(y_test, y_pred))
ConfusionMatrixDisplay(confusion_matrix(y_test, y_pred)).plot()
plt.show()

y_pred_proba = nb.predict_proba(X_test)[:, 1]
fpr, tpr, _ = roc_curve(y_test, y_pred_proba)
roc_auc = auc(fpr, tpr)
plt.plot(fpr, tpr, label=f'ROC curve (area = {roc_auc:.2f})')
plt.plot([0, 1], [0, 1], 'k--', label='No Skill')
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('ROC Curve for Agaricus-Lepiota Classification')
plt.legend()
plt.show()

### __***SVM (Linear & Non-Linear)***__

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report
from sklearn.metrics import ConfusionMatrixDisplay, roc_curve, auc
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.svm import SVC
from mlxtend.plotting import plot_decision_regions
from scipy.io import arff

data = pd.DataFrame(arff.loadarff("data/rice.arff")[0])
data['Class'] = data['Class'].map({b'Cammeo': 0, b'Osmancik': 1})

X = PCA(n_components=2).fit_transform(data.drop('Class', axis=1))
y = data['Class']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, 
                                                    random_state=42)

scaler = StandardScaler()
X_train, X_test = scaler.fit_transform(X_train), scaler.transform(X_test)

# kernel options: 'linear', 'rbf', 'poly', 'sigmoid'
kernel = 'linear'

# gamma should be:
# - 'auto' for linear kernel
# - 'scale' or 'auto' for rbf, poly, sigmoid kernels
model = SVC(kernel=kernel, C=1, degree=5, 
            gamma='auto' if kernel == 'linear' else 'scale', probability=True)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)

print(f"\nSVM ({kernel}) Accuracy: {accuracy_score(y_test, y_pred):.2f}")
print(classification_report(y_test, y_pred))

plot_decision_regions(X_train, y_train.values, clf=model, legend=2)
plt.title(f'Decision Boundary ({kernel})')
plt.show()

ConfusionMatrixDisplay.from_predictions(y_test, y_pred, display_labels=[0, 1])
plt.title(f'Confusion Matrix ({kernel})')
plt.show()

proba = model.predict_proba(X_test)[:, 1]
fpr, tpr, _ = roc_curve(y_test, proba)
plt.plot(fpr, tpr, label=f'{kernel} (AUC = {auc(fpr, tpr):.2f})')
plt.plot([0, 1], [0, 1], 'k--')
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title(f'ROC Curve ({kernel})')
plt.legend()
plt.show()

### __***Feed Forward - Classification (Output Categorical)***__

In [None]:
import warnings
warnings.filterwarnings('ignore')

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, accuracy_score 
from sklearn.metrics import ConfusionMatrixDisplay, roc_curve, auc
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras import regularizers

df = pd.read_csv("data/sobar-72.csv")
print("Shape:",df.shape)
display(df.head())

X = df.drop(columns=['ca_cervix'])
y = tf.keras.utils.to_categorical(df['ca_cervix'], num_classes=2)

x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.2, 
                                                    random_state=42)

# change l2 (Ridge) to l1 for Lasso regularization
model = Sequential([
    Dense(128, activation='relu', input_shape=(19,), 
          kernel_regularizer=regularizers.l2(0.001)), # if no regularizer remove this
    Dropout(0.3),
    Dense(64, activation='relu', 
          kernel_regularizer=regularizers.l2(0.001)), # if no regularizer remove this
    Dropout(0.3),
    Dense(2, activation='softmax') # makes it classification
])

model.compile(optimizer=Adam(), loss='categorical_crossentropy', 
              metrics=['accuracy'])

history = model.fit(x_train, y_train, epochs=50, batch_size=32, 
                    validation_split=0.2, verbose=1)

def plot_history(history):
    plt.figure(figsize=(12, 5))
    plt.subplot(1, 2, 1)
    plt.plot(history.history['loss'], label='Training Loss')
    plt.plot(history.history['val_loss'], label='Validation Loss')
    plt.title('Loss')
    plt.subplot(1, 2, 2)
    plt.plot(history.history['accuracy'], label='Training Accuracy')
    plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
    plt.title('Accuracy')
    plt.tight_layout()
    plt.show()

plot_history(history)

loss, accuracy = model.evaluate(x_test, y_test)
print(f'Test accuracy: {accuracy:.4f}, Test loss: {loss:.4f}')

y_pred = np.argmax(model.predict(x_test), axis=1)
y_test = np.argmax(y_test, axis=1)

print("Classification Report:")
print(classification_report(y_test, y_pred))
print(f"Accuracy: {accuracy_score(y_test, y_pred):.2f}")
ConfusionMatrixDisplay.from_predictions(y_test, y_pred, cmap='Blues')
plt.show()

y_pred_proba = model.predict(x_test)[:, 1]
fpr, tpr, _ = roc_curve(y_test, y_pred_proba)
plt.plot(fpr, tpr, label=f'ROC curve (area = {auc(fpr, tpr):.2f})')
plt.plot([0, 1], [0, 1], color='navy', linestyle='--')
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver Operating Characteristic')
plt.legend(loc="lower right")
plt.show()

### __***Feed Forward - Regression (Output Numerical)***__

In [None]:
import warnings
warnings.filterwarnings('ignore')

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, mean_squared_error
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

df = pd.read_csv('data/california.csv')
display(df.head())

X = df.drop(columns='target')
y = df['target']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, 
                                                    random_state=42)

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Simple FNN/MLP model
model = Sequential([
    Dense(32, activation='relu', input_shape=(X_train.shape[1],)),
    Dense(16, activation='relu'),
    Dense(1) # makes its regression
])

model.compile(optimizer='adam', loss='mse')

history = model.fit(X_train, y_train, epochs=25, validation_split=0.2, verbose=1)

y_pred = model.predict(X_test).flatten()
mse = mean_squared_error(y_test, y_pred)
print(f"Regression MSE on California Housing dataset: {mse:.4f}")

plt.plot(history.history['loss'], label='Train Loss')
plt.plot(history.history['val_loss'], label='Val Loss')
plt.title('Regression Loss')
plt.xlabel('Epochs')
plt.ylabel('MSE Loss')
plt.legend()

plt.tight_layout()
plt.show()

### __***MLP (FNN from scratch above code or learn this)***__

In [None]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.metrics import ConfusionMatrixDisplay
from sklearn.neural_network import MLPClassifier

df = pd.read_csv('data/HeartDiseaseTrain-Test.csv')
display(df.head())

X = df.drop('target', axis=1)
X = X.apply(LabelEncoder().fit_transform)

X = X / X.max() #normalize features
y = df['target']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, 
                                                    random_state=1)

# Using 'relu' activation; others to try: 'identity', 'tanh', 'logistic'
activation = 'relu'
hidden_layers = (8, 8, 8)

print(f"\nActivation: {activation}, Hidden Layers: {hidden_layers}")
model = MLPClassifier(hidden_layer_sizes=hidden_layers, activation=activation,
                      solver='adam', max_iter=500, random_state=42)
model.fit(X_train, y_train)

preds_train = model.predict(X_train)
print("\nTrain Results:")
print(classification_report(y_train, preds_train, zero_division=0))
ConfusionMatrixDisplay.from_predictions(y_train, preds_train)
plt.show()

preds_test = model.predict(X_test)
print("\nTest Results:")
print(classification_report(y_test, preds_test, zero_division=0))
ConfusionMatrixDisplay.from_predictions(y_test, preds_test)
plt.show()

### __***CNN***__

In [None]:
import warnings
warnings.filterwarnings('ignore')

In [None]:
import tensorflow as tf
from tensorflow.keras import layers, models
from keras.datasets import cifar10
from keras.utils import to_categorical
import matplotlib.pyplot as plt
from sklearn.metrics import roc_curve, auc, confusion_matrix
from sklearn.metrics import ConfusionMatrixDisplay
from sklearn.preprocessing import label_binarize

(X_train, y_train), (X_test, y_test) = cifar10.load_data()

X_train = X_train.astype('float32') / 255.0
X_test = X_test.astype('float32') / 255.0

y_train = to_categorical(y_train, 10)
y_test = to_categorical(y_test, 10)

model = models.Sequential([
    layers.Conv2D(32, kernel_size=(3, 3), activation="relu", input_shape=(32, 32, 3)),
    layers.MaxPooling2D(),
    layers.Conv2D(64, kernel_size=(3, 3), activation="relu"),
    layers.MaxPooling2D(),
    layers.Flatten(),
    layers.Dense(10, activation="softmax")
])

model.compile(loss="categorical_crossentropy", optimizer="adam", metrics=["accuracy"])

history = model.fit(X_train, y_train, epochs=5, batch_size=32, validation_split=0.2)

y_pred = model.predict(X_test)
y_pred_classes = tf.argmax(y_pred, axis=1)
y_test_classes = tf.argmax(y_test, axis=1)

conf_matrix = confusion_matrix(y_test_classes, y_pred_classes)
ConfusionMatrixDisplay(conf_matrix).plot(cmap='Blues')
plt.title('Confusion Matrix')
plt.show()

plt.figure(figsize=(12, 5))

plt.subplot(1, 2, 1)
plt.plot(history.history['accuracy'], label='Training Accuracy')
plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
plt.title('Accuracy Curve')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()

plt.subplot(1, 2, 2)
plt.plot(history.history['loss'], label='Training Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.title('Loss Curve')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()

plt.tight_layout()
plt.show()

### __***CNN Another Example***__

In [None]:
import numpy as np
import tensorflow as tf
from tensorflow.keras import layers, models
from tensorflow.keras.preprocessing import image
import os

dataset_path = 'Pistachio_Image_Dataset'

img_height, img_width = 180, 180
batch_size = 32

train_ds = tf.keras.utils.image_dataset_from_directory(
    dataset_path,
    validation_split=0.2,
    subset="training",
    seed=123,
    image_size=(img_height, img_width),
    batch_size=batch_size)

val_ds = tf.keras.utils.image_dataset_from_directory(
    dataset_path,
    validation_split=0.2,
    subset="validation",
    seed=123,
    image_size=(img_height, img_width),
    batch_size=batch_size)

model = models.Sequential([
    layers.Rescaling(1./255, input_shape=(img_height, img_width, 3)),
    layers.Conv2D(32, 3, activation='relu'),
    layers.MaxPooling2D(),
    layers.Conv2D(64, 3, activation='relu'),
    layers.MaxPooling2D(),
    layers.Conv2D(128, 3, activation='relu'),
    layers.MaxPooling2D(),
    layers.Flatten(),
    layers.Dense(128, activation='relu'),
    layers.Dense(2)
])

model.compile(optimizer='adam',
              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
              metrics=['accuracy'])

epochs = 5
history = model.fit(
    train_ds,
    validation_data=val_ds,
    epochs=epochs
)

class_names = ['Kirmizi_Pistachio', 'Siirt_Pistachio']
img_path = 'Pistachio_Image_Dataset/Siirt_Pistachio/siirt (11).jpg'
img = image.load_img(img_path, target_size=(180, 180))

plt.imshow(img)
plt.title("Input Image")
plt.axis("off")
plt.show()

img_array = image.img_to_array(img)
img_array = tf.expand_dims(img_array, 0)

predictions = model.predict(img_array)
score = tf.nn.softmax(predictions[0])

predicted_class = class_names[np.argmax(score)]
confidence = 100 * np.max(score)

print(f"Image most likely belongs to '{predicted_class}' with {confidence:.2f}% confidence.")

### __***HMM - Viterbi***__

In [None]:
import numpy as np
import pandas as pd
import networkx as nx
import matplotlib.pyplot as plt
from hmmlearn.hmm import CategoricalHMM

visibleStates = ['early', 'mid', 'late']
hiddenStates = ['Genuine User', 'Intruder']
hiddenInitial = [0.9, 0.1]

hiddenTransition = np.array([
    [0.7, 0.3],
    [0.4, 0.6]
])

emissionMatrix = np.array([
    [0.8, 0.1, 0.1],
    [0.1, 0.3, 0.6]
])

graph = nx.DiGraph()
graph.add_nodes_from(visibleStates + hiddenStates)

for i, x in enumerate(hiddenStates):
    for j, y in enumerate(hiddenStates):
        graph.add_edge(x, y, weight=hiddenTransition[i, j])

for i, x in enumerate(hiddenStates):
    for j, y in enumerate(visibleStates):
        graph.add_edge(x, y, weight=emissionMatrix[i, j])

pos = nx.circular_layout(graph)
nx.draw(graph, pos, with_labels=True, node_size=1500)
nx.draw_networkx_edge_labels(graph, pos, edge_labels=nx.get_edge_attributes(graph, 'weight'))
plt.show()

observations = ['early', 'early', 'late', 'mid', 'early', 'late']
observationMap = {'early': 0, 'mid': 1, 'late': 2}
mappedSequence = np.array([observationMap[o] for o in observations]).reshape(-1, 1)

model = CategoricalHMM(n_components=2)
model.startprob_ = hiddenInitial
model.transmat_ = hiddenTransition
model.emissionprob_ = emissionMatrix

logValue, bestPath = model.decode(mappedSequence, algorithm="viterbi")

decodedPath = [hiddenStates[state] for state in bestPath]

result = pd.DataFrame({
    'Observation': observations,
    'Predicted State': decodedPath
})

print("\nDecoded Path with Observations:")
display(result)

print("\nLog Probability of Best Path:", logValue)