In [None]:
# Import necessary libraries
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import math
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from tensorflow import keras
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report, confusion_matrix, matthews_corrcoef
from keras.models import Sequential
from keras.layers import Dense

from tensorflow.keras import layers, models
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.layers import BatchNormalization, Flatten, Dense
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras import regularizers

In [None]:
# Load the dataset
df = pd.read_csv('8taijiquan.csv')


In [None]:
# Check the data info
df.info()

In [None]:
# Remove missing values
df.dropna(inplace=True)

In [None]:
# Check the null values
df.isnull().sum()

## Calculate Angle Keypoints

In [None]:
def get_angle(frame, keypoint_1, keypoint_2, keypoint_3, keypoint_4):
    a = np.array([frame['x' + str(keypoint_1)], frame['y' + str(keypoint_1)]])
    b = np.array([frame['x' + str(keypoint_2)], frame['y' + str(keypoint_2)]])
    c = np.array([frame['x' + str(keypoint_3)], frame['y' + str(keypoint_3)]])
    d = np.array([frame['x' + str(keypoint_4)], frame['y' + str(keypoint_4)]])
    
    ba = a - b
    bc = c - b
    cd = d - c
    
    cosine_angle = np.dot(ba, bc) / (np.linalg.norm(ba) * np.linalg.norm(bc))
    angle = math.degrees(np.arccos(cosine_angle))
    
    if angle > 180.0:
        angle = 360 - angle
        
    return angle


In [None]:
df['angle1'] = df.apply(lambda row: get_angle(row, 11, 13, 23, 24), axis = 1)
df['angle2'] = df.apply(lambda row: get_angle(row, 25, 26, 27, 28), axis = 1)


In [None]:
df

## Model

In [None]:
# Save the dataset to a CSV file
df.to_csv('taijiquan_dataset.csv', index=False)

In [None]:
dataset = pd.read_csv('taijiquan_dataset.csv')
dataset

In [None]:
# Check if there are any missing values in the dataset
missing_values = dataset.isnull().sum()
print("Missing Values:\n", missing_values)

In [None]:
# Remove rows with missing values
dataset.dropna(inplace=True)

In [None]:
# Set the color pallete to 'pastel'
sns.set_palette('pastel')

# Create the histogram plot
plt.figure(figsize=(10, 6))
sns.histplot(dataset['class'], bins=20, kde=True)

plt.xlabel('Angle 1', fontsize=14)
plt.ylabel('Frequency', fontsize=14)
plt.title('Distribution of Taijiquan', fontsize=16)

# Annotate each bin with the number of data points
for p in plt.gca().patches:
    plt.gca().text(p.get_x() + p.get_width() / 2., p.get_height(), f'{int(p.get_height())}',
                ha='center', va='bottom', fontsize=12, color='black')

# Customize the style
sns.set_style("whitegrid")  # Use a white grid background
sns.despine()

# Show the plot
plt.show()

In [None]:
# Dataset loaded as 'dataset'
labels_count = dataset['class'].value_counts()

plt.figure(figsize=(12, 6)) # Set the figure size

# Create a bar chart
plt.bar(labels_count.index, labels_count.values, color='skyblue')

plt.xlabel('Stance', fontsize=14)
plt.ylabel('Count', fontsize=14)
plt.title('Stance Distribution', fontsize=16)

# Rotate the x-axis labels for better readability
plt.xticks(rotation=45, fontsize=12)

# Annotate each bar with its count
for x, y in zip(labels_count.index, labels_count.values):
    plt.text(x, y, str(y), ha='center', va='bottom', fontsize=12, color='black')

plt.show()

In [None]:
# Data Splitting
features = dataset.iloc[:, 1:-2]  # Exclude 'label', 'angle1', and 'angle2' columns
labels = dataset['class']

In [None]:
print(features.shape)

In [None]:
print(features.columns)

In [None]:
# Encode the labels
label_encoder = LabelEncoder()
labels_encoded = label_encoder.fit_transform(labels)

### Data Splitting: 11,220
X_train and y_train_encoded to train your machine learning model, and X_test and y_test_encoded to evaluate the model's performance on unseen data

In [None]:
# Perform train-test split
X_train, X_test, y_train_encoded, y_test_encoded = train_test_split(features, labels_encoded, test_size=0.2, random_state=42)


In [None]:
dataset['class'].unique()

In [None]:
print("Size of X_train:", X_train.shape)
print("Size of X_test:", X_test.shape)

In [None]:
print("Size of y_train_encoded:", y_train_encoded.shape)
print("Size of y_test_encoded:", y_test_encoded.shape)


In [None]:
# Standardize the features
from sklearn.preprocessing import StandardScaler 

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [None]:
num_classes = len(np.unique(labels_encoded))
print(num_classes)

### Feedforward Neural Network 

In [None]:
# Create a feedforward neural network model
model = keras.Sequential()
model.add(keras.layers.InputLayer(input_shape=(X_train.shape[1],)))
model.add(layers.Dense(64, activation='relu', kernel_regularizer=regularizers.l2(0.01)))
model.add(layers.Dropout(0.7))
model.add(layers.Dense(32, activation='relu', kernel_regularizer=regularizers.l2(0.01)))
model.add(layers.Dropout(0.7))
model.add(layers.Dense(len(label_encoder.classes_), activation='softmax'))
optimizer = Adam(learning_rate=0.001)  # Adjust the learning rate as needed
model.compile(optimizer=optimizer, loss='sparse_categorical_crossentropy', metrics=['accuracy'])

In [None]:
# Compile the model with different optimizer and learning rate schedule
initial_learning_rate = 0.001
lr_schedule = keras.optimizers.schedules.ExponentialDecay(
    initial_learning_rate, decay_steps=10000, decay_rate=0.9)
optimizer = keras.optimizers.RMSprop(learning_rate=lr_schedule)
model.compile(optimizer=optimizer, loss='sparse_categorical_crossentropy', metrics=['accuracy'])


In [None]:
# Train the model
history = model.fit(X_train_scaled, y_train_encoded, epochs=10, batch_size=32, validation_split=0.2)

In [None]:
# Evaluate the model on the test set
X_test_scaled = scaler.transform(X_test)
test_loss, test_accuracy = model.evaluate(X_test_scaled, y_test_encoded)
print(f'Test accuracy: {test_accuracy * 100:.2f}%')

In [None]:
# Extract training history and plot
training_loss = history.history['loss']
training_accuracy = history.history['accuracy']
validation_loss = history.history['val_loss']
validation_accuracy = history.history['val_accuracy']

# Evaluate the model on the test set
X_test_scaled = scaler.transform(X_test)
test_loss, test_accuracy = model.evaluate(X_test_scaled, y_test_encoded)

plt.figure(figsize=(12, 6))
plt.subplot(1, 2, 1)
plt.plot(range(1, len(training_loss) + 1), training_loss, label='Training Loss')
plt.plot(range(1, len(validation_loss) + 1), validation_loss, label='Validation Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.title('Training and Validation Loss')
plt.legend()

plt.subplot(1, 2, 2)
plt.plot(range(1, len(training_accuracy) + 1), training_accuracy, label='Training Accuracy')
plt.plot(range(1, len(validation_accuracy) + 1), validation_accuracy, label='Validation Accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.title('Training and Validation Accuracy')
plt.legend()

plt.tight_layout()
plt.show()

In [None]:
# Make predictions on the test set
y_pred = model.predict(X_test_scaled)

In [None]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report

# Make predictions on the test set
y_pred_probabilities = model.predict(X_test_scaled)
y_pred_classes = np.argmax(y_pred_probabilities, axis=1)

# Calculate accuracy
accuracy = accuracy_score(y_test_encoded, y_pred_classes)
print(f'Accuracy: {accuracy * 100:.2f}%')

# Calculate precision, recall, and F1-score
precision = precision_score(y_test_encoded, y_pred_classes, average='weighted')
recall = recall_score(y_test_encoded, y_pred_classes, average='weighted')
f1 = f1_score(y_test_encoded, y_pred_classes, average='weighted')

print(f'Precision: {precision:.2f}')
print(f'Recall: {recall:.2f}')
print(f'F1 Score: {f1:.2f}')

# Display a classification report
class_names = label_encoder.classes_
report = classification_report(y_test_encoded, y_pred_classes, target_names=class_names, zero_division=0)
print('Classification Report:')
print(report)


In [None]:
from sklearn.metrics import confusion_matrix
import seaborn as sns
import matplotlib.pyplot as plt

# Calculate the confusion matrix
confusion = confusion_matrix(y_test_encoded, y_pred_classes)

# Define class names for the target labels (change as needed)
class_names = ["Bow-Arrow", "False Stance", "Four-Six", "Golden Rooster", "Horse Stance", "Sitting", "Taijiquan", "Tame"]

# Create a Seaborn heatmap for the confusion matrix
plt.figure(figsize=(8, 6))
sns.heatmap(confusion, annot=True, fmt='d', cmap='YlGnBu', cbar=True,
            xticklabels=class_names, yticklabels=class_names)
plt.xlabel('Predicted')
plt.ylabel('True')
plt.title('Confusion Matrix')

# Display the confusion matrix
plt.show()


In [None]:
# MCC = (TP * TN - FP * FN) / sqrt((TP + FP) * (TP + FN) * (TN + FP) * (TN + FN))

from sklearn.metrics import confusion_matrix, matthews_corrcoef

mcc = matthews_corrcoef(y_test_encoded, y_pred_classes)
print("MCC:", mcc)


In [None]:
from sklearn.model_selection import KFold
import numpy as np
from tensorflow import keras

# Define the number of folds (e.g., k = 10 for 10-fold cross-validation)
k = 10

# Initialize lists to store accuracy results for each fold
accuracy_per_fold = []

# Create a KFold object
kf = KFold(n_splits=k, shuffle=True, random_state=42)

# Iterate over the folds
for train_index, val_index in kf.split(X_train_scaled):
    # Split the data into training and validation sets for this fold
    X_train_fold, X_val_fold = X_train_scaled[train_index], X_train_scaled[val_index]
    y_train_fold, y_val_fold = y_train_encoded[train_index], y_train_encoded[val_index]

    num_classes = len(np.unique(y_train_encoded))

    model = keras.Sequential()
    model.add(keras.layers.InputLayer(input_shape=(X_train.shape[1],)))
    model.add(keras.layers.Dense(128, activation='relu'))
    model.add(keras.layers.BatchNormalization())
    model.add(keras.layers.Dropout(0.7))
    model.add(keras.layers.Dense(64, activation='relu'))
    model.add(keras.layers.BatchNormalization())
    model.add(keras.layers.Dropout(0.7))
    model.add(keras.layers.Dense(32, activation='relu'))
    model.add(keras.layers.BatchNormalization())
    model.add(keras.layers.Dropout(0.3))
    model.add(keras.layers.Dense(num_classes, activation='softmax'))


    # Compile the model for this fold
    model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    
    # Train the model on the current fold
    history = model.fit(X_train_fold, y_train_fold, epochs=100, batch_size=32, validation_data=(X_val_fold, y_val_fold), verbose=0)

    # Evaluate the model on the validation set for this fold
    val_loss, val_accuracy = model.evaluate(X_val_fold, y_val_fold)
    accuracy_per_fold.append(val_accuracy)

# Calculate and print the average accuracy across all folds
average_accuracy = sum(accuracy_per_fold) / k
print(f'Average Accuracy: {average_accuracy * 100:.2f}%')


### Percentage of Keypoints

# X_train, X_test, y_train_encoded, y_test_encoded = train_test_split(features, labels_encoded, test_size=0.2, random_state=42)
The variables X_test and y_test_encoded contain the test data, and y_test_encoded specifically contains the ground truth labels for your test data. So, you can consider y_test_encoded as your ground truth data for your test set. You can use X_test for the input features and y_test_encoded for the ground truth to calculate accuracy or any other evaluation metrics for your model.

In [None]:

# Define a function to calculate accuracy for a single keypoint
def calculate_keypoint_accuracy(predicted_keypoint, ground_truth_keypoint, threshold):
    distance = np.linalg.norm(np.array(predicted_keypoint) - np.array(ground_truth_keypoint))
    return int(distance < threshold)

# Define a function to calculate accuracy for all 33 landmarks
def calculate_overall_accuracy(predicted_landmarks, ground_truth_landmarks, threshold):
    num_landmarks = len(predicted_landmarks)
    accuracies = [calculate_keypoint_accuracy(predicted_landmarks[i], ground_truth_landmarks[i], threshold) for i in range(num_landmarks)]
    percentage_accurate = sum(accuracies) / num_landmarks * 100
    return percentage_accurate

# Usage
threshold = 0.5  # Define your accuracy threshold
predicted_landmarks = y_pred_classes  # List of predicted landmarks (each landmark is [x, y])
ground_truth_landmarks = y_test_encoded     # List of ground truth landmarks (each landmark is [x, y])

accuracy = calculate_overall_accuracy(predicted_landmarks, ground_truth_landmarks, threshold)
print(f"Overall Accuracy: {accuracy:.2f}%")


# Model Save


In [None]:
"""
# Save the model architecture to JSON
model_json = model.to_json()
with open("model.json", "w") as json_file:
    json_file.write(model_json)
    
# Save the model weights to HDF5
model.save_weights("model_weights.h5")
"""

In [None]:
'''
from keras.models import model_from_json

# Load the model architecture from JSON
with open('model.json', 'r') as json_file:
    loaded_model_json = json_file.read()
loaded_model = model_from_json(loaded_model_json)

# Load the model weights from HDF5
loaded_model.load_weights("model_weights.h5")

# Compile the loaded model 
loaded_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
'''

### Statistical Testing


In [None]:
""""
import statsmodels.api as sm
from statsmodels.formula.api import ols
from statsmodels.stats.multicomp import pairwise_tukeyhsd

# Data Preparation
dataset['label'] = pd.Categorical(dataset['label'])

# Perform the One-Way ANOVA
model = ols('angle1 ~ label', data=dataset).fit()
anova_table = sm.stats.anova_lm(model, typ=2)

# Post Hoc Analysis (Turkey's HSD Test)
tukey = pairwise_tukeyhsd(endog=dataset['angle1'], groups=dataset['label'], alpha=0.05)
print(tukey.summary())
"""