## Data preprocessing

In [1]:
import cv2
import numpy as np
import pandas as pd


In [2]:

# Load your dataset
df = pd.read_csv('dataset.csv')

In [3]:
# Check for missing values
missing_values = df.isnull().sum()


In [4]:
# Check for White Spaces
df.columns = df.columns.str.strip()

In [5]:
# Label Encoding
from sklearn.preprocessing import LabelEncoder

# Initialize the encoder
label_encoder = LabelEncoder()

# Encode the labels
df['label'] = label_encoder.fit_transform(df['label'])


In [6]:
# Feature Scaling
from sklearn.preprocessing import StandardScaler

# Initialize the scaler
scaler = StandardScaler()

# Scale the landmark coordinates (x and y)
df.iloc[:, 1:] = scaler.fit_transform(df.iloc[:, 1:])


In [7]:
# Function to apply data augmentation
def augment_data(x, y):
    # Combine x and y into landmark pairs
    landmarks = list(zip(x, y))

    # Random rotation (angle in degrees)
    angle = np.random.uniform(-10, 10)
    rotation_matrix = cv2.getRotationMatrix2D((0, 0), angle, 1)
    rotated_landmarks = cv2.transform(np.array(landmarks)[None, ...], rotation_matrix)[0]

    # Random translation
    x_translation = np.random.uniform(-5, 5)
    y_translation = np.random.uniform(-5, 5)
    translated_landmarks = rotated_landmarks + [x_translation, y_translation]

    return list(zip(*translated_landmarks))

# Apply data augmentation to your dataset
augmented_landmarks = []

for _, row in df.iterrows():
    x = row[1:34].tolist()  # Extract x coordinates
    y = row[34:].tolist()    # Extract y coordinates
    augmented_x, augmented_y = augment_data(x, y)
    augmented_landmarks.append(augmented_x + augmented_y)

# Create a new DataFrame with augmented data
augmented_df = pd.DataFrame(augmented_landmarks, columns=df.columns[1:])

# Concatenate the original and augmented DataFrames
augmented_dataset = pd.concat([df, augmented_df])



In [8]:
print(df)

    label        x1        y1        x2        y2        x3        y3  \
0       7 -0.183421  0.069315 -0.162536  0.061732 -0.155372  0.063275   
1       0 -0.556643 -0.568918 -0.570981 -0.473468 -0.583750 -0.473002   
2       0 -0.384387 -0.535327 -0.356010 -0.580508 -0.341002 -0.580257   
3       0  0.484072  0.119702  0.403554  0.168772  0.372961  0.152654   
4       0 -0.807850  1.060256 -0.778786  1.060773 -0.783660  1.064325   
..    ...       ...       ...       ...       ...       ...       ...   
58      4  2.199458  3.881919  2.180646  3.950854  2.179288  3.924469   
59      4  0.132382  0.069315  0.081097 -0.080988  0.065957 -0.079732   
60      4 -0.047051 -0.552122 -0.026388 -0.723228 -0.026859 -0.723264   
61      5  0.749634  0.002133  0.776170 -0.080988  0.779920 -0.079732   
62      6 -0.161889 -0.518531 -0.169702 -0.544828 -0.162512 -0.526630   

          x4        y4        x5  ...       x29       y29       x30       y30  \
0  -0.148053  0.063435 -0.198172  ... -0.3

## Data splitting

In [9]:
from sklearn.model_selection import train_test_split

# Assuming 'df' is your DataFrame containing the dataset
X = df.iloc[:, 1:]  # Extract features from all columns except the first (which is 'label')
y = df['label']  # Extract the 'label' column as the target

# Split the data into training (70%), validation (15%), and test (15%) sets
X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.3, random_state=42)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)

# You can print the shapes of the resulting sets to verify the split
print("X_train shape:", X_train.shape)
print("X_val shape:", X_val.shape)
print("X_test shape:", X_test.shape)


X_train shape: (44, 66)
X_val shape: (9, 66)
X_test shape: (10, 66)


In [10]:
# Count the number of unique classes in the 'label' column
num_classes = len(df['label'].unique())

print("Number of classes:", num_classes)

Number of classes: 8


## Feedforward Neural Network Model

In [18]:
import torch
import torch.nn as nn

class NeuralNetwork(nn.Module):
    def __init__(self, input_size, hidden_size, num_classes):
        super(NeuralNetwork, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(hidden_size, num_classes)

    def forward(self, x):
        out = self.fc1(x)
        out = self.relu(out)
        out = self.fc2(out)
        return out

# Initialize the model
input_size = 33  # Assuming 33 landmarks as input features
hidden_size = 32  # You can adjust this
num_classes = 2  # Replace with the number of classes in your dataset

model = NeuralNetwork(input_size, hidden_size, num_classes)


loss function

In [19]:
import torch.optim as optim
from torch.utils.data import DataLoader, random_split

In [21]:
import torch
from torch.utils.data import Dataset, DataLoader

# Define your custom dataset class
class MyDataset(Dataset):
    def __init__(self, df, labels):
        self.features = torch.tensor(df.values, dtype=torch.float32)
        self.labels = torch.tensor(labels.values, dtype=torch.long)

    def __len__(self):
        return len(self.features)

    def __getitem__(self, idx):
        return {'features': self.features[idx], 'label': self.labels[idx]}


# Define your dataset and data loaders (assuming you've already prepared your dataset)
train_dataset = MyDataset(X_train, y_train)  # Replace with your dataset
val_dataset = MyDataset(X_val, y_val)  # Replace with your validation dataset


In [22]:
# Define the loss function
criterion = nn.CrossEntropyLoss()