In [2]:
import pandas as pd  # Pandas for data manipulation and analysis
import numpy as np   # NumPy for numerical operations
import cv2 as cv     # OpenCV for image processing
import os            # OS module to interact with the operating system
import tensorflow as tf  # TensorFlow for machine learning models

# Additional imports
from tensorflow.keras.preprocessing.image import ImageDataGenerator  # For image data augmentation
from tensorflow.keras.models import load_model  # To load a saved Keras model
from keras.models import Sequential  # For creating a sequential model
from keras.layers import Conv2D, MaxPooling2D, Dense, Dropout, Flatten  # Layers for model building
from keras.optimizers import Adam  # Optimizer for training the model

In [3]:
import warnings
import sys
if not sys.warnoptions:
    warnings.simplefilter("ignore")  # Ignore simple warnings
warnings.filterwarnings("ignore", category=DeprecationWarning)  # Ignore deprecation warnings

In [4]:
ckpixelset = pd.read_csv('ckextended.csv')
ckpixelset

Unnamed: 0,emotion,pixels,Usage
0,6,36 39 35 25 19 11 8 7 3 13 15 9 21 57 75 90 10...,Training
1,6,88 74 19 4 5 5 3 12 8 21 15 21 15 18 24 29 32 ...,Training
2,6,9 2 4 7 1 1 1 0 7 29 49 76 115 141 156 169 177...,Training
3,6,104 106 108 104 95 50 60 61 58 83 126 133 139 ...,Training
4,6,68 72 67 67 6 2 1 1 1 1 1 14 24 24 38 65 79 94...,Training
...,...,...,...
915,5,87 86 88 92 92 127 231 248 251 253 254 254 254...,PrivateTest
916,5,21 24 26 28 27 28 30 8 0 0 0 0 0 0 1 4 37 42 4...,PrivateTest
917,5,76 40 31 38 28 34 38 36 41 36 46 38 44 26 45 5...,PrivateTest
918,5,114 87 16 29 17 25 30 34 37 35 45 93 63 80 73 ...,PrivateTest


In [5]:
ckpixelset['emotion'].value_counts()

emotion
6    593
5     83
3     69
1     59
0     45
4     28
2     25
7     18
Name: count, dtype: int64

In [6]:
training = ckpixelset[ckpixelset['Usage'] == 'Training']
training = training[training['emotion'] != 7]
training.drop('Usage', axis = 1, inplace = True)
training.reset_index(drop = True, inplace = True)
training

Unnamed: 0,emotion,pixels
0,6,36 39 35 25 19 11 8 7 3 13 15 9 21 57 75 90 10...
1,6,88 74 19 4 5 5 3 12 8 21 15 21 15 18 24 29 32 ...
2,6,9 2 4 7 1 1 1 0 7 29 49 76 115 141 156 169 177...
3,6,104 106 108 104 95 50 60 61 58 83 126 133 139 ...
4,6,68 72 67 67 6 2 1 1 1 1 1 14 24 24 38 65 79 94...
...,...,...
715,5,171 163 167 165 53 29 24 21 22 35 41 49 61 63 ...
716,5,35 37 34 38 39 37 38 39 17 0 0 1 4 6 1 14 43 9...
717,5,45 16 11 17 7 3 11 0 9 5 2 23 35 5 8 15 18 15 ...
718,5,12 7 6 9 8 11 14 16 17 17 20 26 33 34 43 36 33...


In [7]:
test = ckpixelset[ckpixelset['Usage'] == 'PrivateTest']
test = test[test['emotion'] != 7]
test.drop('Usage', axis = 1, inplace = True)
test.reset_index(drop = True, inplace = True)
test

Unnamed: 0,emotion,pixels
0,6,72 72 65 70 58 48 23 21 17 28 42 34 50 82 108 ...
1,6,50 36 9 4 3 4 9 13 20 55 91 123 148 158 174 18...
2,6,0 0 0 0 0 0 0 0 5 13 9 1 0 0 2 14 10 13 4 1 8 ...
3,6,19 21 21 15 10 3 26 30 32 29 10 18 48 58 50 56...
4,6,64 68 65 70 69 69 67 30 11 17 17 13 19 21 49 6...
...,...,...
88,5,87 86 88 92 92 127 231 248 251 253 254 254 254...
89,5,21 24 26 28 27 28 30 8 0 0 0 0 0 0 1 4 37 42 4...
90,5,76 40 31 38 28 34 38 36 41 36 46 38 44 26 45 5...
91,5,114 87 16 29 17 25 30 34 37 35 45 93 63 80 73 ...


In [8]:
def process_pixels(data):
    # Keep a copy of the emotion column
    emotion = data['emotion'].copy()
    
    # Check the type of the first element in the 'pixels' column to see if it's already a list
    if isinstance(data['pixels'].iloc[0], list):
        # If the pixels are already lists, just create a new DataFrame
        data = pd.DataFrame(data['pixels'].to_list(), columns=[f'pixel{i}' for i in range(len(data['pixels'].iloc[0]))])
    else:
        # If the pixels are strings, split them into lists of integers
        data['pixels'] = data['pixels'].apply(lambda x: [int(pixel) for pixel in x.split()])
        data = pd.DataFrame(data['pixels'].to_list(), columns=[f'pixel{i}' for i in range(len(data['pixels'].iloc[0]))])
    
    # Add the emotion column back to the data
    data['emotion'] = emotion
    return data

In [9]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler

# Use the function to process the pixel data
training = process_pixels(training)

# Now proceed with the rest of your code
X = training.drop('emotion', axis=1)
y = training['emotion']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [10]:
# Initialize the StandardScaler
scaler = StandardScaler()

# Fit the scaler to the training data and transform it
X_train = scaler.fit_transform(X_train)

# Transform the test data using the same scaler
X_test = scaler.transform(X_test)

In [11]:
svc = SVC()
svc.fit(X_train, y_train)

In [12]:
y_pred = svc.predict(X_test)

# Print the accuracy
print(f'Accuracy: {accuracy_score(y_test, y_pred)}')

Accuracy: 0.7708333333333334


In [13]:
def process_pixels1(data):
    # Checking if the first element in the 'pixels' column is a list
    # This is important to determine if the pixel data needs further processing
    if not isinstance(data['pixels'].iloc[0], list):
        # If the pixels are stored as strings, split them into lists of integers
        # This step converts the string representation of pixels into a format that can be used for analysis
        data['pixels'] = data['pixels'].apply(lambda x: [int(pixel) for pixel in x.split()])
    
    # Convert the list of pixels to a 2D array format
    # This step is crucial for creating a structured, tabular format where each pixel value becomes a separate column
    # It facilitates easier manipulation and analysis of pixel data
    pixel_data = pd.DataFrame(data['pixels'].to_list(), columns=[f'pixel{i}' for i in range(len(data['pixels'].iloc[0]))])
    
    # Returning the processed pixel data and corresponding emotion labels
    return pixel_data, data['emotion']

# Process the pixel data from the 'test' dataset
testingX, testingY = process_pixels1(test)

# Creating an instance of StandardScaler
scaler1 = StandardScaler()
# Applying the scaler to the testing data
# Why: Standardizing the data to ensure that the model receives data with consistent scaling during prediction
testingX = scaler1.fit_transform(testingX)

# Predicting emotions using the trained model 'svc'
test_predict = svc.predict(testingX)

# Printing the accuracy of the predictions
# Why: To evaluate the performance of the model on the test dataset
print(f'Accuracy: {accuracy_score(testingY, test_predict)}')

Accuracy: 0.7311827956989247


In [14]:
from sklearn.metrics import confusion_matrix, classification_report
import plotly.figure_factory as ff

# Assuming test_predict contains your model's predictions on the test dataset
# Generating a classification report
# Why: The classification report shows the main classification metrics precision, recall and f1-score on a per-class basis.
# This gives a deeper intuition of the classifier's performance, especially in imbalanced datasets.
report = classification_report(testingY, test_predict)
print(report)

# Generating a confusion matrix
# Why: A confusion matrix is a table used to evaluate the performance of a classification model.
# It provides insight into not only the accuracy but also the types of errors made by the classifier.
cm = confusion_matrix(testingY, test_predict)

# Converting the confusion matrix to a DataFrame for easier plotting
# Why: DataFrames provide a convenient structure for data manipulation and are compatible with many plotting libraries.
cm_df = pd.DataFrame(cm, index=[str(i) for i in range(cm.shape[0])], columns=[str(i) for i in range(cm.shape[1])])

# Creating a heatmap for the confusion matrix using Plotly
# Why: A heatmap is a graphical representation of data where individual values are represented as colors.
# It provides a visually intuitive way to understand the confusion matrix.
fig = ff.create_annotated_heatmap(
    z=cm_df.values,  # The confusion matrix values
    x=list(cm_df.columns),  # Predicted labels (as column names)
    y=list(cm_df.index),  # True labels (as index)
    annotation_text=cm_df.values,  # Text to show on the heatmap (confusion matrix values)
    colorscale='Blues'  # Color scale of the heatmap
)

# Updating the layout of the plot to add titles and axis labels
fig.update_layout(
    title='Confusion Matrix',
    xaxis_title='Predicted Label',
    yaxis_title='True Label'
)

# Displaying the plot
fig.show()

              precision    recall  f1-score   support

           0       0.00      0.00      0.00         5
           1       0.00      0.00      0.00         6
           2       0.00      0.00      0.00         3
           3       1.00      0.43      0.60         7
           4       0.00      0.00      0.00         3
           5       1.00      0.56      0.71         9
           6       0.71      1.00      0.83        60

    accuracy                           0.73        93
   macro avg       0.39      0.28      0.31        93
weighted avg       0.63      0.73      0.65        93



In [15]:
from sklearn.linear_model import LogisticRegression

log_reg = LogisticRegression(random_state=42)
log_reg.fit(X_train, y_train)
y_pred_log_reg = log_reg.predict(X_test)
print(f'Logistic Regression Accuracy: {accuracy_score(y_test, y_pred_log_reg)}')

Logistic Regression Accuracy: 0.875


In [16]:
from sklearn.linear_model import LogisticRegression

log_reg = LogisticRegression(random_state=42)
log_reg.fit(X_train, y_train)

# Predictions
log_reg_test_predict = log_reg.predict(testingX)

# Print the accuracy
print(f'Logistic Regression Accuracy: {accuracy_score(testingY, log_reg_test_predict)}')

# Classification report and confusion matrix
report_log_reg = classification_report(testingY, log_reg_test_predict)
print(report_log_reg)

cm_log_reg = confusion_matrix(testingY, log_reg_test_predict)
cm_df_log_reg = pd.DataFrame(cm_log_reg, index=[str(i) for i in range(cm_log_reg.shape[0])], columns=[str(i) for i in range(cm_log_reg.shape[1])])

fig_log_reg = ff.create_annotated_heatmap(
    z=cm_df_log_reg.values,
    x=list(cm_df_log_reg.columns),
    y=list(cm_df_log_reg.index),
    annotation_text=cm_df_log_reg.values,
    colorscale='Blues'
)
fig_log_reg.update_layout(title='Logistic Regression Confusion Matrix', xaxis_title='Predicted Label', yaxis_title='True Label')
fig_log_reg.show()

Logistic Regression Accuracy: 0.8709677419354839
              precision    recall  f1-score   support

           0       0.50      0.40      0.44         5
           1       1.00      0.33      0.50         6
           2       0.67      0.67      0.67         3
           3       0.88      1.00      0.93         7
           4       1.00      0.67      0.80         3
           5       1.00      0.89      0.94         9
           6       0.88      0.97      0.92        60

    accuracy                           0.87        93
   macro avg       0.85      0.70      0.74        93
weighted avg       0.87      0.87      0.86        93



In [17]:
import tensorflow as tf

cnn_model = tf.keras.models.Sequential([
    tf.keras.layers.Conv2D(32, (3,3), activation='relu', input_shape=(48,48,1)),
    tf.keras.layers.MaxPooling2D(pool_size=(2,2)),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dense(7, activation='softmax')  # Assuming 7 emotion classes
])

cnn_model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# You will need to reshape X_train and X_test to match the input shape and convert y_train and y_test to categorical
X_train_cnn = X_train.reshape(-1, 48, 48, 1)
X_test_cnn = X_test.reshape(-1, 48, 48, 1)
y_train_cnn = tf.keras.utils.to_categorical(y_train)
y_test_cnn = tf.keras.utils.to_categorical(y_test)

history = cnn_model.fit(X_train_cnn, y_train_cnn, validation_data=(X_test_cnn, y_test_cnn), epochs=29, batch_size=64)

Epoch 1/29
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 53ms/step - accuracy: 0.3956 - loss: 2.3562 - val_accuracy: 0.7222 - val_loss: 1.1786
Epoch 2/29
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step - accuracy: 0.7600 - loss: 1.1007 - val_accuracy: 0.7639 - val_loss: 0.8071
Epoch 3/29
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step - accuracy: 0.7878 - loss: 0.7150 - val_accuracy: 0.7986 - val_loss: 0.6559
Epoch 4/29
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step - accuracy: 0.8360 - loss: 0.5505 - val_accuracy: 0.8472 - val_loss: 0.5753
Epoch 5/29
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step - accuracy: 0.8889 - loss: 0.3804 - val_accuracy: 0.8542 - val_loss: 0.5190
Epoch 6/29
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step - accuracy: 0.9143 - loss: 0.2945 - val_accuracy: 0.8611 - val_loss: 0.4644
Epoch 7/29
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━

In [19]:
import tensorflow as tf
import numpy as np
import pandas as pd
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import plotly.figure_factory as ff

# Define and compile the CNN model
cnn_model = tf.keras.models.Sequential([
    tf.keras.layers.Conv2D(32, (3,3), activation='relu', input_shape=(48,48,1)),
    tf.keras.layers.MaxPooling2D(pool_size=(2,2)),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dense(7, activation='softmax')  # Assuming 7 emotion classes
])
cnn_model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Prepare data
X_train_cnn = X_train.reshape(-1, 48, 48, 1)
X_test_cnn = testingX.reshape(-1, 48, 48, 1)
y_train_cnn = tf.keras.utils.to_categorical(y_train)
y_test_cnn = tf.keras.utils.to_categorical(testingY)

# Train the CNN model
history = cnn_model.fit(X_train_cnn, y_train_cnn, validation_data=(X_test_cnn, y_test_cnn), epochs=30, batch_size=64)

# Predictions
cnn_test_predict = cnn_model.predict(X_test_cnn).argmax(axis=1)

# Print the accuracy
accuracy = accuracy_score(testingY, cnn_test_predict)
print(f'CNN Accuracy: {accuracy}')

# Classification report
report_cnn = classification_report(testingY, cnn_test_predict)
print(report_cnn)

# Confusion matrix
cm_cnn = confusion_matrix(testingY, cnn_test_predict)
cm_df_cnn = pd.DataFrame(cm_cnn, index=range(7), columns=range(7))

# Create confusion matrix heatmap using Plotly
fig_cnn = ff.create_annotated_heatmap(
    z=cm_df_cnn.values,
    x=[str(i) for i in range(7)],
    y=[str(i) for i in range(7)],
    annotation_text=cm_df_cnn.values,
    colorscale='Blues'
)

# Update layout and display confusion matrix
fig_cnn.update_layout(
    title="Confusion Matrix for CNN Model",
    xaxis_title="Predicted Labels",
    yaxis_title="True Labels"
)

fig_cnn.show()


Epoch 1/30
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 53ms/step - accuracy: 0.4155 - loss: 2.5024 - val_accuracy: 0.6452 - val_loss: 1.3367
Epoch 2/30
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 35ms/step - accuracy: 0.6815 - loss: 0.9865 - val_accuracy: 0.7312 - val_loss: 0.9781
Epoch 3/30
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 35ms/step - accuracy: 0.7860 - loss: 0.6781 - val_accuracy: 0.7419 - val_loss: 0.8894
Epoch 4/30
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 34ms/step - accuracy: 0.7971 - loss: 0.6061 - val_accuracy: 0.7419 - val_loss: 0.7813
Epoch 5/30
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 33ms/step - accuracy: 0.8791 - loss: 0.4001 - val_accuracy: 0.8172 - val_loss: 0.6630
Epoch 6/30
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 38ms/step - accuracy: 0.9433 - loss: 0.3139 - val_accuracy: 0.8172 - val_loss: 0.6353
Epoch 7/30
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━

In [20]:
import joblib
joblib.dump(log_reg, 'model_file.h5')

# Later, you can load the model from the file
loaded_model = joblib.load('model_file.h5')

In [22]:
keras_model = Sequential()
keras_model.add(Dense(units=7, input_dim=X_train.shape[1], activation='sigmoid'))

# Transfer weights from logistic regression to Keras model
keras_model.layers[0].set_weights([log_reg.coef_.T, log_reg.intercept_])

# Save the Keras model to an HDF5 file
keras_model.save('model_file.h5')

