In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/balance-fer2013-data-image/__results__.html
/kaggle/input/balance-fer2013-data-image/balanced_emotions_dataset.csv
/kaggle/input/balance-fer2013-data-image/__notebook__.ipynb
/kaggle/input/balance-fer2013-data-image/__output__.json
/kaggle/input/balance-fer2013-data-image/custom.css
/kaggle/input/balance-fer2013-data-image/__results___files/__results___8_0.png
/kaggle/input/balance-fer2013-data-image/__results___files/__results___11_0.png
/kaggle/input/balance-fer2013-data-image/__results___files/__results___22_0.png


In [2]:
import numpy as np
import pandas as pd
import cv2  # For image processing
from tensorflow.keras.preprocessing.image import ImageDataGenerator

In [3]:
data = pd.read_csv("/kaggle/input/balance-fer2013-data-image/balanced_emotions_dataset.csv")
data.head()

Unnamed: 0,emotion,pixels,Usage,emotion_text
0,0,143 144 145 145 146 145 151 149 142 135 128 13...,Training,anger
1,0,182 167 154 146 134 84 30 10 3 17 27 10 7 11 6...,Training,anger
2,0,207 208 208 209 208 214 176 139 116 66 124 135...,Training,anger
3,0,193 193 196 203 216 229 233 243 189 114 87 95 ...,Training,anger
4,0,168 195 207 208 209 207 208 204 207 207 208 20...,Training,anger


In [4]:
data.shape

(28000, 4)

In [5]:
print(f"Length of pixel data: {len(data.pixels[0].split())}")

Length of pixel data: 2304


In [6]:
def preprocess_pixels(img_array):
    # Ensure the input is already a NumPy array, so no need to split or convert again
    img_array = img_array.reshape(48, 48)  # Reshape to 48x48
    img_array = np.expand_dims(img_array, -1)  # Add channel dimension for grayscale
    img_array = np.repeat(img_array, 3, axis=-1)  # Convert to 3 channels for Xception
    img_array = cv2.resize(img_array, (299, 299)) / 255.0  # Resize to 299x299 and normalize
    return img_array


In [7]:
# Process data in chunks
def preprocess_chunk(chunk):
    chunk['pixels'] = chunk['pixels'].apply(lambda x: np.array(x.split(), dtype='float32'))
    chunk['image'] = chunk['pixels'].apply(preprocess_pixels)  # Pass the array directly to preprocess_pixels
    return chunk

In [8]:
# Load data in chunks
chunk_size = 1000
chunks = []
for chunk in pd.read_csv("/kaggle/input/balance-fer2013-data-image/balanced_emotions_dataset.csv",chunksize=chunk_size):
    chunks.append(chunk)

In [None]:
# Apply preprocessing to each chunk
processed_chunks = []
for chunk in chunks:
    processed_chunk = preprocess_chunk(chunk)
    processed_chunks.append(processed_chunk)
      
final_data = pd.concat(processed_chunks, ignore_index=True)

In [None]:
final_data

In [None]:
final_data['image'][0].shape

In [None]:
final_data['image'][0].min()

In [None]:
final_data['image'][0].max()

In [None]:
# Enregistrer les données en Parquet
#final_data.to_csv("/kaggle/working/preprocessed_train_dataset.csv", index=False)


In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical

In [None]:
labels = data['emotion'].values
X_train, X_val, y_train, y_val = train_test_split(final_data['image'].tolist(), labels, test_size=0.2, random_state=42, stratify=labels)

In [None]:
X_train = np.array(X_train)
X_val = np.array(X_val)

# Ensure your labels are in the correct format, e.g., one-hot encoding for multi-class classification

num_classes = len(np.unique(y_train))  # Adjust based on your classes
y_train = to_categorical(y_train, num_classes)
y_val = to_categorical(y_val, num_classes)

In [None]:
num_classes = len(np.unique(y_train))  

In [None]:
!pip install --upgrade --force-reinstall tensorflow-addons==0.21.0


In [None]:
import tensorflow as tf
from tensorflow.keras import layers, models
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau
import tensorflow_addons as tfa

# Constants
TARGET_SIZE = 750  # Set according to your input size
BATCH_SIZE = 32  # Adjust based on your TPU
EPOCHS = 50  # Adjust as needed
STEPS_PER_EPOCH = 100  # Set according to your dataset size
VALIDATION_STEPS = 20  # Set according to your validation size

# 1. Train-Test Split
(train_img, valid_img, 
 train_labels, valid_labels) = train_test_split(train_images, labels, 
                                                train_size=0.85, 
                                                random_state=0)

# 2. Build Dataset Function
def build_dataset(images, labels=None, bsize=BATCH_SIZE, repeat=True, shuffle=True, augment=False, cache=False):
    dataset = tf.data.Dataset.from_tensor_slices((images, labels))
    if augment:
        dataset = dataset.map(augmentation_function)  # Define your augmentation_function separately
    if cache:
        dataset = dataset.cache()
    if shuffle:
        dataset = dataset.shuffle(buffer_size=len(images))
    dataset = dataset.batch(bsize)
    if repeat:
        dataset = dataset.repeat()
    dataset = dataset.prefetch(buffer_size=tf.data.experimental.AUTOTUNE)
    return dataset

# 3. Tensorflow datasets
train_df = build_dataset(train_img, tf.cast(train_labels, tf.float32), bsize=BATCH_SIZE, cache=True)
valid_df = build_dataset(valid_img, tf.cast(valid_labels, tf.float32), bsize=BATCH_SIZE, repeat=False, shuffle=False, augment=False, cache=True)
test_df = build_dataset(test_images, bsize=BATCH_SIZE, repeat=False, shuffle=False, augment=False, cache=False)

# 4. Model Creation Function
def create_model():
    conv_base = tf.keras.applications.Xception(include_top=False, weights='imagenet',
                                               input_shape=(TARGET_SIZE, TARGET_SIZE, 3))
    model = conv_base.output
    model = layers.GlobalAveragePooling2D()(model)
    model = layers.Dropout(0.3)(model)
    model = layers.Dense(11, activation="sigmoid")(model)
    model = models.Model(conv_base.input, model)

    model.compile(optimizer=tf.keras.optimizers.Adam(lr=0.001),
                  loss=tfa.losses.SigmoidFocalCrossEntropy(alpha=0.5, gamma=2),
                  metrics=[tf.keras.metrics.AUC(multi_label=True)])
    return model

# 5. TPU Strategy Scope
strategy = tf.distribute.TPUStrategy(tf.distribute.cluster_resolver.TPUClusterResolver())
with strategy.scope():
    model = create_model()

# Print model summary
model.summary()

# 6. Callbacks
model_save = ModelCheckpoint('./Xcep_750_best_weights_TPU.h5', 
                             save_best_only=True, 
                             save_weights_only=True,
                             monitor='val_loss', 
                             mode='min', verbose=1)
early_stop = EarlyStopping(monitor='val_loss', min_delta=0.001, 
                           patience=5, mode='min', verbose=1,
                           restore_best_weights=True)
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.5, 
                              patience=2, min_delta=0.0001, 
                              mode='min', verbose=1)

# 7. Training
history = model.fit(
    train_df,
    epochs=EPOCHS,
    steps_per_epoch=STEPS_PER_EPOCH,
    validation_data=valid_df,
    validation_steps=VALIDATION_STEPS,
    callbacks=[model_save, early_stop, reduce_lr]
)


In [None]:
import tensorflow as tf
print(tf.__version__)


In [None]:
# A simple model to test environment setup
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten

# Generate random data for testing
X_test = np.random.rand(100, 299, 299, 3)
y_test = np.random.randint(0, 10, 100)
y_test = tf.keras.utils.to_categorical(y_test, num_classes=10)

# Define a small test model
test_model = Sequential([
    Flatten(input_shape=(299, 299, 3)),
    Dense(128, activation='relu'),
    Dense(10, activation='softmax')
])

test_model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Run a quick test fit to confirm environment setup
test_model.fit(X_test, y_test, epochs=1, batch_size=16)


In [None]:
history = model.fit(X_train, y_train,
                    validation_data=(X_val, y_val),
                    epochs=5,  # Adjust number of epochs as needed
                    batch_size=32,  # Adjust batch size based on your hardware
                    callbacks=[early_stopping])