# Training the Custom Model

## Read in Collected Data and Process

In [4]:
## Remove image_path column and any empty rows

import pandas as pd

# Specify the path to your CSV file
csv_path = 'fer2013_landmarks_nopathsfixed.csv'

# Read the CSV file into a DataFrame
df = pd.read_csv(csv_path)

# Specify the name of the column you want to remove
column_to_remove = 'image_path'

# Check if the column exists in the DataFrame
if column_to_remove in df.columns:
    # Drop the specified column
    df = df.drop(columns=[column_to_remove])

    # Save the modified DataFrame to a new CSV file
    new_csv_path = 'fer2013_landmarks_nopathsfixed.csv'
    df.to_csv(new_csv_path, index=False)

    print(f"Column '{column_to_remove}' removed and saved to '{new_csv_path}'.")
else:
    print(f"Column '{column_to_remove}' not found in the DataFrame.")

Column 'image_path' removed and saved to 'fer2013_landmarks_nopathsfixed.csv'.


In [22]:
# Load the CSV file
df = pd.read_csv('fer2013_landmarks_nopathsfixed.csv')

# Drop rows where 'x1' column has NaN values
df = df.dropna(subset=['x1'])

df.to_csv('fer2013_landmarks_nopathsfixed.csv', index=False)
# Now, df does not contain rows where 'x1' has NaN values

In [8]:
%pip install scikit-learn

Note: you may need to restart the kernel to use updated packages.


In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split

In [2]:
# Load the CSV file
df = pd.read_csv('fer2013_landmarks_nopathsfixed.csv')
df.head()
df.tail()

X = df.drop('emotion', axis=1) # features
y = df['emotion'] # target value
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=1234)
y_test

5660         fear
22427         sad
24193    surprise
7332         fear
20720         sad
           ...   
18855     neutral
249         angry
17011     neutral
5637         fear
26386    surprise
Name: emotion, Length: 7959, dtype: object

## Train Machine Learning Classification Model

In [3]:
# X = df.drop('emotion', axis=1).values
# y = df['emotion'].values

from sklearn.pipeline import make_pipeline 
from sklearn.preprocessing import StandardScaler 

from sklearn.linear_model import LogisticRegression, RidgeClassifier
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier

In [4]:
pipelines = {
    'lr':make_pipeline(StandardScaler(), LogisticRegression(solver='lbfgs', max_iter=2000)),
    'rc':make_pipeline(StandardScaler(), RidgeClassifier()),
    'rf':make_pipeline(StandardScaler(), RandomForestClassifier()),
    'gb':make_pipeline(StandardScaler(), GradientBoostingClassifier()),
}

In [None]:
fit_models = {}
for algo, pipeline in pipelines.items():
    print(algo, pipeline)
    model = pipeline.fit(X_train, y_train)
    fit_models[algo] = model

lr Pipeline(steps=[('standardscaler', StandardScaler()),
                ('logisticregression', LogisticRegression(max_iter=2000))])


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


rc Pipeline(steps=[('standardscaler', StandardScaler()),
                ('ridgeclassifier', RidgeClassifier())])
rf Pipeline(steps=[('standardscaler', StandardScaler()),
                ('randomforestclassifier', RandomForestClassifier())])
gb Pipeline(steps=[('standardscaler', StandardScaler()),
                ('gradientboostingclassifier', GradientBoostingClassifier())])


In [None]:
fit_models

In [None]:
fit_models['rc'].predict(X_test)

In [None]:
from sklearn.metrics import accuracy_score # Accuracy metrics 
import pickle 

In [None]:
for algo, model in fit_models.items():
    yhat = model.predict(X_test)
    print(algo, accuracy_score(y_test, yhat))

In [None]:
fit_models['rf'].predict(X_test)

In [None]:
y_test

In [None]:
with open('emotions.pkl', 'wb') as f:
    pickle.dump(fit_models['rf'], f)

## Encode Labels

In [None]:
# from sklearn.preprocessing import LabelEncoder

# label_encoder = LabelEncoder()
# y_encoded = label_encoder.fit_transform(y)
# # Split the data
# from sklearn.model_selection import train_test_split

# X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.2, random_state=42)


## Define the model

In [None]:
# from tensorflow.keras.models import Sequential
# from tensorflow.keras.layers import Dense

# model = Sequential()
# model.add(Dense(64, input_dim=X_train.shape[1], activation='relu'))
# model.add(Dense(32, activation='relu'))
# model.add(Dense(len(label_encoder.classes_), activation='softmax'))

# model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
# # Train the model
# model.fit(X_train, y_train, epochs=10, batch_size=32, validation_split=0.2)
# # Evaluate the model
# model.fit(X_train, y_train, epochs=10, batch_size=32, validation_split=0.2)
# # Make predictions
# predictions = model.predict(X_test)

In [1]:
# Import necessary libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import classification_report, confusion_matrix
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten, Conv2D, MaxPooling2D
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.callbacks import EarlyStopping

# Load the data
data = pd.read_csv("fer2013_landmarks_nopathsfixed.csv")

# Data Preprocessing
# Assuming your CSV has columns 'emotion' and 'facial_coordinates'
# Handle missing values
data = data.dropna()
# Split the data into features (X) and labels (y)
# X = data['facial_coordinates'].apply(lambda x: np.fromstring(x[1:-1], sep=',', dtype=np.float32))
# y = data['emotion']
# Separate features (X) and target value (y)
X = data.drop('emotion', axis=1)
y = data['emotion']

# Label encode the emotions
label_encoder = LabelEncoder()
y = label_encoder.fit_transform(y)
# Convert to one-hot encoding
y_one_hot = to_categorical(y, num_classes=len(label_encoder.classes_))

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y_one_hot, test_size=0.3, random_state=1234)

# Build the model
model = Sequential()
model.add(Dense(128, input_shape=(X_train.shape[1],), activation='relu'))
model.add(Dense(len(label_encoder.classes_), activation='softmax'))

# Compile the model
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Train the model
model.fit(X_train, y_train, epochs=10, batch_size=32, validation_split=0.2, callbacks=[EarlyStopping(patience=3)])

# Evaluate the model on the test set
y_pred = model.predict(X_test)
y_pred_labels = np.argmax(y_pred, axis=1)
y_test_labels = np.argmax(y_test, axis=1)

# Print classification report and confusion matrix
print(classification_report(y_test_labels, y_pred_labels))
print(confusion_matrix(y_test_labels, y_pred_labels))





NameError: name 'df' is not defined