## Data preprocessing

In [79]:
import cv2
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset


In [80]:

# Load your dataset
df = pd.read_csv('dataset.csv')

In [81]:
# Check for missing values
missing_values = df.isnull().sum()


In [82]:
# Check for White Spaces
df.columns = df.columns.str.strip()

In [83]:
# Label Encoding
from sklearn.preprocessing import LabelEncoder

# Initialize the encoder
label_encoder = LabelEncoder()

# Encode the labels
df['label'] = label_encoder.fit_transform(df['label'])


In [84]:
# Feature Scaling
from sklearn.preprocessing import StandardScaler

# Initialize the scaler
scaler = StandardScaler()

# Scale the landmark coordinates (x and y)
df.iloc[:, 1:] = scaler.fit_transform(df.iloc[:, 1:])


In [85]:
# Function to apply data augmentation
def augment_data(x, y):
    # Combine x and y into landmark pairs
    landmarks = list(zip(x, y))

    # Random rotation (angle in degrees)
    angle = np.random.uniform(-10, 10)
    rotation_matrix = cv2.getRotationMatrix2D((0, 0), angle, 1)
    rotated_landmarks = cv2.transform(np.array(landmarks)[None, ...], rotation_matrix)[0]

    # Random translation
    x_translation = np.random.uniform(-5, 5)
    y_translation = np.random.uniform(-5, 5)
    translated_landmarks = rotated_landmarks + [x_translation, y_translation]

    return list(zip(*translated_landmarks))

# Apply data augmentation to your dataset
augmented_landmarks = []

for _, row in df.iterrows():
    x = row[1:34].tolist()  # Extract x coordinates
    y = row[34:].tolist()    # Extract y coordinates
    augmented_x, augmented_y = augment_data(x, y)
    augmented_landmarks.append(augmented_x + augmented_y)

# Create a new DataFrame with augmented data
augmented_df = pd.DataFrame(augmented_landmarks, columns=df.columns[1:])

# Concatenate the original and augmented DataFrames
augmented_dataset = pd.concat([df, augmented_df])



In [86]:
print(df)

    label        x1        y1        x2        y2        x3        y3  \
0       7 -0.183421  0.069315 -0.162536  0.061732 -0.155372  0.063275   
1       0 -0.556643 -0.568918 -0.570981 -0.473468 -0.583750 -0.473002   
2       0 -0.384387 -0.535327 -0.356010 -0.580508 -0.341002 -0.580257   
3       0  0.484072  0.119702  0.403554  0.168772  0.372961  0.152654   
4       0 -0.807850  1.060256 -0.778786  1.060773 -0.783660  1.064325   
..    ...       ...       ...       ...       ...       ...       ...   
58      4  2.199458  3.881919  2.180646  3.950854  2.179288  3.924469   
59      4  0.132382  0.069315  0.081097 -0.080988  0.065957 -0.079732   
60      4 -0.047051 -0.552122 -0.026388 -0.723228 -0.026859 -0.723264   
61      5  0.749634  0.002133  0.776170 -0.080988  0.779920 -0.079732   
62      6 -0.161889 -0.518531 -0.169702 -0.544828 -0.162512 -0.526630   

          x4        y4        x5  ...       x29       y29       x30       y30  \
0  -0.148053  0.063435 -0.198172  ... -0.3

## Data splitting

In [87]:
from sklearn.model_selection import train_test_split

# Assuming 'df' is your DataFrame containing the dataset
X = df.iloc[:, 1:]  # Extract features from all columns except the first (which is 'label')
y = df['label']  # Extract the 'label' column as the target

# Split the data into training (70%), validation (15%), and test (15%) sets
X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.3, random_state=42)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)

# You can print the shapes of the resulting sets to verify the split
print("X_train shape:", X_train.shape)
print("X_val shape:", X_val.shape)
print("X_test shape:", X_test.shape)


X_train shape: (44, 66)
X_val shape: (9, 66)
X_test shape: (10, 66)


In [88]:
# Count the number of unique classes in the 'label' column
num_classes = len(df['label'].unique())

print("Number of classes:", num_classes)

Number of classes: 8


## Feedforward Neural Network Model

In [89]:
data = df

data.head()

Unnamed: 0,label,x1,y1,x2,y2,x3,y3,x4,y4,x5,...,x29,y29,x30,y30,x31,y31,x32,y32,x33,y33
0,7,-0.183421,0.069315,-0.162536,0.061732,-0.155372,0.063275,-0.148053,0.063435,-0.198172,...,-0.352796,0.790793,0.536681,0.694568,-0.381126,0.754676,0.542694,0.617171,-0.264514,0.719053
1,0,-0.556643,-0.568918,-0.570981,-0.473468,-0.58375,-0.473002,-0.589165,-0.474196,-0.512117,...,0.053367,-1.27504,-0.653026,-1.272766,0.061121,-1.264021,-0.806617,-1.271993,0.064952,-1.298262
2,0,-0.384387,-0.535327,-0.35601,-0.580508,-0.341002,-0.580257,-0.325921,-0.599644,-0.366096,...,-0.277581,0.240684,0.583109,0.173308,-0.213377,0.265639,0.306161,0.123653,-0.449015,0.194443
3,0,0.484072,0.119702,0.403554,0.168772,0.372961,0.152654,0.342861,0.15304,0.480827,...,-0.37536,-0.315277,-0.008843,-0.353556,-0.548875,-0.371245,0.177143,-0.470691,-0.145906,-0.330167
4,0,-0.80785,1.060256,-0.778786,1.060773,-0.78366,1.064325,-0.781262,1.067013,-0.731149,...,-0.601007,1.036586,1.442019,1.030864,-0.487875,1.135669,1.144777,1.062929,-0.78507,0.957021


In [90]:
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

model = RandomForestClassifier()

model.fit(X_train, y_train)

# Step 5: Evaluate on the validation set
y_val_pred = model.predict(X_val)
val_accuracy = accuracy_score(y_val, y_val_pred)
print("Validation Accuracy:", val_accuracy)

Validation Accuracy: 0.5555555555555556


In [91]:
param_grid = {'n_estimators': [50, 100, 200], 'max_depth': [None, 10, 20]}
grid_search = GridSearchCV(model, param_grid, cv=3)
grid_search.fit(X_train, y_train)
best_model = grid_search.best_estimator_



In [92]:
y_test_pred = best_model.predict(X_test)
test_accuracy = accuracy_score(y_test, y_test_pred)
print("Test Accuracy:", test_accuracy)

Test Accuracy: 0.5
