# Making Label csv's

In [None]:
import os
import pandas as pd

In [None]:
annotation = pd.read_pickle('../input/be-project/test/test/annotation_test.pkl')
type(annotation)

In [None]:
# train_path = os.getcwd()
filenames = os.listdir('../input/be-project/test/test/cropped_frames')

filenames.sort()
filenames[0:10]

In [None]:
df = pd.DataFrame(columns=['Video','O','C','E','A','N'])
for filename in filenames:
    index = filename.rindex('_')
    video = filename[:index]+'.mp4'
    O = annotation['openness'][video]
    C = annotation['conscientiousness'][video]
    E = annotation['extraversion'][video]
    A = annotation['agreeableness'][video]
    N = annotation['neuroticism'][video]

    df = df.append({'Video':filename, 'O':O, 'C':C, 'E':E, 'A':A, 'N':N}, ignore_index=True)

df = df.drop_duplicates()
df = df.reset_index(drop=True)
df

In [None]:
df.to_csv('/kaggle/working/test_images.csv') 

# Debiasing Training Code

In [None]:
import numpy as np
import tensorflow as tf
import pandas as pd
from tensorflow import keras

In [None]:
class AdversarialDebiasing(keras.Model):
    def __init__(self, classifier, adversary, alpha, c_loss, a_loss, debias=True, data_type='image'):
        super(AdversarialDebiasing, self).__init__()
        self.classifier = classifier
        self.adversary = adversary
        self.c_loss = c_loss #metric for classifier
        self.a_loss = a_loss #metric for adversaary
        self.protect_loss_weight = alpha
        self.debias = debias
        self.data_type = data_type
        
    @property
    def metrics(self):
        return [self.c_loss, self.a_loss]


    def compile(self, optimizer,c_loss_fn, a_loss_fn):
        super(AdversarialDebiasing, self).compile()
        self.c_optimizer = optimizer[0]
        self.a_optimizer = optimizer[1]
        self.c_loss_fn = c_loss_fn
        self.a_loss_fn = a_loss_fn

        
    def call(self, data):
        x = data
        y = self.classifier(x)
        z = self.adversary(y)
        return [y,z]
        
        
    def train_step(self, data):
        
        x, y = data
        e_g = None
        if self.data_type=='image':
            e_g = y[1]
            y = y[0]
        elif self.data_type=='audio':
            e_g = y[:, 5:]
            y = y[:, 0:5]

        with tf.GradientTape() as tape:
            c_predictions = self.classifier(x)
            c_loss = self.c_loss_fn(y, c_predictions)

            
        c_grads = tape.gradient(c_loss, self.classifier.trainable_weights)

        
        with tf.GradientTape() as tape:
            c_predictions = self.classifier(x)
            a_predictions = self.adversary(c_predictions)
            a_loss = self.a_loss_fn(e_g, a_predictions)
            
        
        a_grads = tape.gradient(a_loss, self.classifier.trainable_weights) #projection
        
        
        with tf.GradientTape() as tape:
            c_predictions = self.classifier(x)
            a_predictions = self.adversary(c_predictions)
            a_loss = self.a_loss_fn(e_g, a_predictions)
            
        a_grads_own = tape.gradient(a_loss, self.adversary.trainable_weights)

        if self.debias:
            protect_grad = {v.name: g for (g, v) in zip(a_grads, self.classifier.trainable_weights)}
            pred_grad = [] #classifier update function
        
            for (g, v) in zip(c_grads, self.classifier.trainable_weights):
                unit_protect = protect_grad[v.name] / (tf.norm(protect_grad[v.name]) + np.finfo(np.float32).tiny)
                g -= tf.reduce_sum(g * unit_protect) * unit_protect # g- projection
                g -= self.protect_loss_weight * protect_grad[v.name] # g - projection - alpha*adv grad
                pred_grad.append((g, v))
                 
            self.c_optimizer.apply_gradients(pred_grad)
        
        else:
            self.c_optimizer.apply_gradients(zip(c_grads, self.classifier.trainable_weights))
            
        
        self.a_optimizer.apply_gradients(zip(a_grads_own, self.adversary.trainable_weights))
        
        self.c_loss.update_state(y,c_predictions)
        self.a_loss.update_state(e_g, a_predictions)
        
        return {m.name: m.result() for m in self.metrics}
    
    
    
    def test_step(self, data):
        
        x, y = data
        
        if self.data_type=='image':
            e_g = y[1]
            y = y[0]
        elif self.data_type=='audio':
            e_g = y[:, 5:]
            y = y[:, 0:5]

        c_predictions = self.classifier(x)
        c_loss = self.c_loss_fn(y, c_predictions)
        a_predictions = self.adversary(c_predictions)
        a_loss = self.a_loss_fn(e_g, a_predictions)
            
        
        self.c_loss.update_state(y,c_predictions)
        self.a_loss.update_state(e_g, a_predictions)
        
        return {m.name: m.result() for m in self.metrics}

# Fairness Metric Function

In [None]:
!pip install aif360

In [None]:
!pip install fairlearn

In [None]:
!pip install --upgrade scipy

In [None]:
from aif360.sklearn import metrics

In [None]:
def fairness_metrics(data, trait, pos):
    trait_data = data[[trait]].copy()
    trait_data.loc[trait_data[trait] < 0.5, trait] = 0
    trait_data.loc[trait_data[trait] >= 0.5, trait] = 1
#     trait_data.loc[trait_data[trait] < trait_data.mean()[trait], trait] = 0
#     trait_data.loc[trait_data[trait] >= trait_data.mean()[trait], trait] = 1
    
    
    trait_data_pred = data[[trait+'_pred']].copy()
#     print(trait_data_pred.columns)
#     trait_data_pred.set_index([trait_data.index,'Ethnicity', 'Gender'], inplace=True)
    trait_data_pred.loc[trait_data_pred[trait+'_pred'] < 0.5, trait+'_pred'] = 0
    trait_data_pred.loc[trait_data_pred[trait+'_pred'] >= 0.5, trait+'_pred'] = 1
#     trait_data_pred.loc[trait_data_pred[trait+'_pred'] < trait_data_pred.mean()[trait+'_pred'], trait+'_pred'] = 0
#     trait_data_pred.loc[trait_data_pred[trait+'_pred'] >= trait_data_pred.mean()[trait+'_pred'], trait+'_pred'] = 1
  
  
    print("Ethnicity")
    print(metrics.disparate_impact_ratio(trait_data_pred, 
                                      prot_attr='Ethnicity',
                                    priv_group=1,
                                      pos_label=pos))
  
    
    
    print("Gender")
    print(metrics.disparate_impact_ratio(trait_data_pred, 
                                      prot_attr='Gender', 
                                      priv_group=1, 
                                      pos_label=pos))


In [None]:
def ocean_fairness(metric_check):
    metric_check.set_index([metric_check.index, 'Ethnicity', 'Gender'], inplace=True)
#     metric_check
    traits = ['O', 'C', 'E', 'A', 'N']
    for trait in traits:
        print(trait)
        print('0')
        fairness_metrics(metric_check[[trait, trait+'_pred']], trait, 0)
        print('1')
        fairness_metrics(metric_check[[trait, trait+'_pred']], trait, 1)

# Training on Image Data

In [None]:
from tensorflow.keras import layers
from keras.applications.vgg16 import VGG16
from keras.layers import Input, Dense, Flatten,Dropout, LeakyReLU, Rescaling, BatchNormalization

In [None]:
IMAGE_SIZE = [224,224]

In [None]:
vgg = VGG16(input_shape=IMAGE_SIZE + [3], weights='imagenet', include_top=False)

for layer in vgg.layers:
    layer.trainable = False


classifier = keras.Sequential(
    [
        vgg,
        Flatten(),
        Dense(4096, activation='relu', name='dense'),
        Dropout(0.5),
        Dense(5, activation='sigmoid', name="output"),
    ],
    name="classifier",
)


adversary = keras.Sequential(
    [
        keras.Input(shape=(5,)),
        Dense(200, activation='relu', name="dense"),
        Dense(2, activation='sigmoid', name="output"),
    ],
    name="adversary",
)

In [None]:
tf.keras.utils.plot_model(
    classifier,
    to_file="model.png",
    show_shapes=True,
    show_dtype=False,
    show_layer_names=False,
    rankdir="TB",
    expand_nested=False,
    dpi=96,
    layer_range=None,
#     show_layer_activations=True,
)

In [None]:
train_df = pd.read_csv('../input/be-project/dataframes/train_images.csv')
train_df.drop(columns=['Unnamed: 0'], inplace=True)
train_df.sort_values(by=['Video'], inplace=True)
train_df

In [None]:
val_df = pd.read_csv('../input/be-project/dataframes/validation_images.csv')
val_df.drop(columns=['Unnamed: 0'], inplace=True)
val_df.sort_values(by=['Video'], inplace=True)
val_df

In [None]:
test_df = pd.read_csv('../input/be-project/dataframes/test_images.csv')
test_df.drop(columns=['Unnamed: 0'], inplace=True)
test_df.sort_values(by=['Video'], inplace=True)
test_df

In [None]:
ethnicity_gender = pd.read_csv('../input/be-project/dataframes/eth_gender_annotations_dev.csv', delimiter = ';')
ethnicity_gender.sort_values(by=['VideoName'])
ethnicity_gender

In [None]:
ethnicity_gender_test = pd.read_csv('../input/be-project/dataframes/eth_gender_annotations_test.csv', delimiter = ';')
ethnicity_gender_test.sort_values(by=['VideoName'])
ethnicity_gender_test

In [None]:
def get_video_name(data):
    data['VideoName'] = data['Video'].str.slice(0,15) + '.mp4'

In [None]:
def merge_protected_attributes(ethnicity_gender, data):
    data = pd.merge(data, ethnicity_gender, on='VideoName', how='inner')
#     data
    data = data[['Video','O','C','E','A', 'N', 'Ethnicity', 'Gender']]
    return data

In [None]:
get_video_name(train_df)
train_df = merge_protected_attributes(ethnicity_gender, train_df)
train_df

In [None]:
get_video_name(val_df)
val_df = merge_protected_attributes(ethnicity_gender, val_df)
val_df

In [None]:
get_video_name(test_df)
test_df = merge_protected_attributes(ethnicity_gender_test, test_df)
test_df

In [None]:
train_df.loc[(train_df['Ethnicity']==1) | (train_df['Ethnicity']==3), 'Ethnicity'] = 0
train_df.loc[train_df['Ethnicity']==2, 'Ethnicity'] = 1

train_df.loc[(train_df['Gender']==1), 'Gender'] = 1
train_df.loc[train_df['Gender']==2, 'Gender'] = 0

train_df

In [None]:
val_df.loc[(val_df['Ethnicity']==1) | (val_df['Ethnicity']==3), 'Ethnicity'] = 0
val_df.loc[val_df['Ethnicity']==2, 'Ethnicity'] = 1

val_df.loc[(val_df['Gender']==1), 'Gender'] = 1
val_df.loc[val_df['Gender']==2, 'Gender'] = 0

val_df

In [None]:
test_df.loc[(test_df['Ethnicity']==1) | (test_df['Ethnicity']==3), 'Ethnicity'] = 0
test_df.loc[test_df['Ethnicity']==2, 'Ethnicity'] = 1

test_df.loc[(test_df['Gender']==1), 'Gender'] = 1
test_df.loc[test_df['Gender']==2, 'Gender'] = 0

test_df

In [None]:
from keras.preprocessing.image import ImageDataGenerator

train_generator = ImageDataGenerator(
    rescale=1./255,
    horizontal_flip=True
    # validation_split=0.2,
)

val_generator = ImageDataGenerator(
    rescale=1./255,
)

In [None]:
def generate_data_generator(generator, df, folder):
    genX1 = generator.flow_from_dataframe(
    df, 
    directory = "../input/be-project/{}/{}/cropped_frames/".format(folder,folder), 
    x_col='Video',
    y_col=['O','C','E','A','N'],
    target_size=IMAGE_SIZE,
    color_mode ='rgb',
    class_mode='raw',
    # shuffle = False,
    # subset=subset
    shuffle=True,
    seed = 6,
#     batch_size = 256,
    )
    
    genX2 =generator.flow_from_dataframe(
        df, 
        directory = "../input/be-project/{}/{}/cropped_frames/".format(folder,folder), 
        x_col='Video',
        y_col=['Ethnicity','Gender'],
        target_size=IMAGE_SIZE,
        color_mode ='rgb',
        class_mode='raw',
        shuffle=True,
        seed=6,
#         batch_size = 256,
        # shuffle = False,
#         subset=subset
    )
    
    
    while True:
        X1i = genX1.next()
        X2i = genX2.next()
        yield X1i[0], [X1i[1], X2i[1]]

In [None]:
adv_cls = AdversarialDebiasing(classifier, adversary, 1,
                               keras.metrics.MeanAbsoluteError(name="c_loss"), 
                               keras.metrics.BinaryCrossentropy(name="a_loss"),
                               debias=True,
                               data_type='image'
                              )


adv_cls.compile(
    optimizer=[keras.optimizers.SGD(nesterov=True, decay=0.0001),keras.optimizers.SGD(nesterov=True),],
    c_loss_fn = keras.losses.MeanAbsoluteError(),
    a_loss_fn = keras.losses.BinaryCrossentropy(),
)

In [None]:
model_checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
    filepath='checkpoint/',
    save_weights_only=True,
    monitor='val_c_loss',
    mode='min',
    save_best_only=True)

In [None]:
history = adv_cls.fit(generate_data_generator(train_generator, train_df, 'train'), 
                      epochs=50, 
                      steps_per_epoch=len(train_df) / 32,
                      validation_data = generate_data_generator(val_generator, val_df, 'val'),
                      validation_steps = len(val_df) / 32,
                      callbacks=[model_checkpoint_callback]
                     )

In [None]:
from matplotlib import pyplot as plt
plt.plot(history.history['c_loss'])
plt.plot(history.history['val_c_loss'])

In [None]:
from matplotlib import pyplot as plt
plt.plot(history.history['a_loss'])
plt.plot(history.history['val_a_loss'])

In [None]:
hist_df = pd.DataFrame(history.history) 
hist_df.to_csv('Debias True.csv')

In [None]:
adv_cls.save_weights('debias_true 50.h5')

In [None]:
import os
os.chdir(r'/kaggle/working')
from IPython.display import FileLink 
FileLink(r'debias_true 50.h5')

# Image predictions

In [None]:
from sklearn.metrics import mean_absolute_error

In [None]:
adv_cls = AdversarialDebiasing(classifier, adversary, 1,
                               keras.metrics.MeanAbsoluteError(name="c_loss"), 
                               keras.metrics.BinaryCrossentropy(name="a_loss"),
                               debias=True,
                               data_type='image'
                              )


adv_cls.compile(
    optimizer=[keras.optimizers.SGD(nesterov=True, decay=0.0001),keras.optimizers.SGD(nesterov=True),],
    c_loss_fn = keras.losses.MeanAbsoluteError(),
    a_loss_fn = keras.losses.BinaryCrossentropy(),
)

In [None]:
adv_cls.built=True
# adv_cls.load_weights('../input/models/image 4-new/debias_true 48.h5')
adv_cls.load_weights('../input/models/image 4-new/debias_false 50.h5')

In [None]:
from keras.preprocessing.image import ImageDataGenerator
test_generator = ImageDataGenerator(
    rescale=1./255,
)

In [None]:
def generate_pred_data_generator(generator, df, folder):
    genX1 = generator.flow_from_dataframe(
    df, 
    directory = "../input/be-project/{}/{}/cropped_frames/".format(folder,folder), 
    x_col='Video',
    y_col=['O','C','E','A','N'],
    target_size=IMAGE_SIZE,
    color_mode ='rgb',
    class_mode='raw',
    shuffle = False,
    )
    
    genX2 =generator.flow_from_dataframe(
        df, 
        directory = "../input/be-project/{}/{}/cropped_frames/".format(folder,folder), 
        x_col='Video',
        y_col=['Ethnicity','Gender'],
        target_size=IMAGE_SIZE,
        color_mode ='rgb',
        class_mode='raw',
        # shuffle=True,
        # seed=6,
#         batch_size = 256,
        shuffle = False,
#         subset=subset
    )
    
    
    while True:
        X1i = genX1.next()
        X2i = genX2.next()
        yield X1i[0], [X1i[1], X2i[1]]

In [None]:
img_pred = adv_cls.predict(generate_pred_data_generator(test_generator, test_df, 'test'), steps =len(test_df) / 32)
img_pred

## Fairness Metrics

In [None]:
# metric_check = train_df[['O', 'C', 'E', 'A', 'N', 'Ethnicity', 'Gender', 'Video']]
metric_check = test_df[['O', 'C', 'E', 'A', 'N', 'Ethnicity', 'Gender', 'Video']]
metric_check

In [None]:
metric_check.loc[:, ['O_pred', 'C_pred', 'E_pred', 'A_pred', 'N_pred']] = img_pred[0]
metric_check

In [None]:
metric_check.to_csv('Image Debias False Pred.csv')

In [None]:
ocean_fairness(metric_check)

## Accuracy metrics

In [None]:
# y_true = train_df[['O', 'C', 'E', 'A', 'N']].to_numpy()
y_true = test_df[['O', 'C', 'E', 'A', 'N']].to_numpy()
y_true

In [None]:
y_pred = img_pred[0]
y_pred

In [None]:
# 1 - mean_absolute_error(y_true, y_pred, multioutput='raw_values')
1 - mean_absolute_error(y_true, y_pred)

# Audio

In [None]:
import numpy as np
import math
import os
import pandas as pd
import glob
import tensorflow as tf
from tensorflow import keras
from keras import layers
from keras.layers import Input, TimeDistributed, LSTM, Dense, Dropout, Bidirectional, GRU, Normalization, SpatialDropout1D, CuDNNLSTM, Conv1D, MaxPooling2D, Flatten, Attention
from keras.models import Model, load_model
from tensorflow.keras.optimizers import Adam, SGD, RMSprop, Adagrad, Adadelta, Adamax

In [None]:
time_steps = 15 # Frames extracted from the video
aud_ft = 68 # Number of Audio features extracted from each non overlapping frame 

In [None]:
from keras.regularizers import l1_l2, l1, l2


classifier = keras.Sequential(
    [
        Input(shape=(time_steps, aud_ft)),
        SpatialDropout1D(0.3),
        LSTM(20, return_sequences=False,dropout=0.2,recurrent_dropout=0.2, kernel_regularizer=l1()),
        Dense(5,activation='sigmoid'),
    ],
    name="classifier",
)

adversary = keras.Sequential(
    [
        keras.Input(shape=(5,)),
        Dense(200, activation='relu', name="dense"),
        Dense(2, activation='sigmoid', name="output"),
    ],
    name="adversary",
)

In [None]:
tf.keras.utils.plot_model(
    adversary,
    to_file="model.png",
    show_shapes=True,
    show_dtype=False,
    show_layer_names=True,
    rankdir="TB",
    expand_nested=True,
    dpi=96,
    layer_range=None,
#     show_layer_activations=True,
)

In [None]:
def get_audio_features(folder):
    video_names = glob.glob('../input/be-project/{}/{}/audio_features//*.wav_st.csv'.format(folder,folder))
    video_names.sort()
    print(len(video_names))
    aud_2 = np.empty((0,time_steps,aud_ft))  # numpy array to contain the audio features 

#     audio_features = []
    for video in video_names:
        aud = pd.read_csv(video,header=None)
        aud = np.array(aud)
        if aud.shape[0]<15:
            rows = 15 - aud.shape[0]
            zeros = np.zeros((rows, 68))
            aud = np.vstack((aud,zeros))
        aud_2 = np.vstack((aud_2,aud[np.newaxis,...]))
    print(aud_2.shape)
    return aud_2

In [None]:
def get_audio_labels(folder, annotation):
    video_names = glob.glob('../input/be-project/{}/{}/audio_features//*.wav_st.csv'.format(folder,folder))
    video_names.sort()
    annotation = pd.read_pickle('../input/be-project/{}/{}/annotation_{}.pkl'.format(folder, folder, annotation))
    df = pd.DataFrame(columns=['O','C','E','A','N'])
    for video_name in video_names:
        video = video_name.replace('../input/be-project/{}/{}/audio_features/'.format(folder, folder), '')
        video = video.replace('.wav_st.csv', '')
        O = annotation['openness'][video]
        C = annotation['conscientiousness'][video]
        E = annotation['extraversion'][video]
        A = annotation['agreeableness'][video]
        N = annotation['neuroticism'][video]

        df = df.append({'O':O, 'C':C, 'E':E, 'A':A, 'N':N}, ignore_index=True)
    y = df.to_numpy()
    
    
    ethnicity_gender = pd.read_csv('../input/be-project/dataframes/eth_gender_annotations_dev.csv', delimiter = ';')
    ethnicity_gender.sort_values(by=['VideoName'])
    df = pd.DataFrame(columns=['Ethnicity', 'Gender'])
  
    for video_name in video_names:
        video = video_name.replace('../input/be-project/{}/{}/audio_features/'.format(folder,folder), '')
        video = video.replace('.wav_st.csv', '')
        ethnicity = ethnicity_gender.loc[ethnicity_gender['VideoName']==video]['Ethnicity'].values[0]
        gender = ethnicity_gender.loc[ethnicity_gender['VideoName']==video]['Gender'].values[0]
        df = df.append({'Ethnicity': ethnicity, 'Gender': gender}, ignore_index=True)

    df.loc[(df['Ethnicity']==1) | (df['Ethnicity']==3), 'Ethnicity'] = 0
    df.loc[df['Ethnicity']==2, 'Ethnicity'] = 1

    df.loc[(df['Gender']==1), 'Gender'] = 1
    df.loc[df['Gender']==2, 'Gender'] = 0

    eth_gen = df.to_numpy()
    y = np.hstack((y,eth_gen))
    return y

In [None]:
audio_features = get_audio_features('train')
audio_features.shape

In [None]:
audio_features_val = get_audio_features('val')
audio_features_val.shape

In [None]:
audio_features_test = get_audio_features('test')
audio_features_test.shape

In [None]:
y = get_audio_labels('train', 'training')
y.shape

In [None]:
y_val = get_audio_labels('val', 'validation')
y_val.shape

In [None]:
y_test = get_audio_labels('test', 'test')
y_test.shape

In [None]:
model = AdversarialDebiasing(classifier, adversary, 1,
                               keras.metrics.MeanAbsoluteError(name="c_loss"), 
                               keras.metrics.BinaryCrossentropy(name="a_loss"),
                               debias=True,
                               data_type='audio'
                              )


model.compile(
    optimizer=[keras.optimizers.RMSprop(learning_rate=0.001, decay = 0.0005),keras.optimizers.RMSprop(learning_rate=0.001),],
    c_loss_fn = keras.losses.MeanAbsoluteError(),
    a_loss_fn = keras.losses.BinaryCrossentropy(),
)

In [None]:
history = model.fit(audio_features, np.asarray(y).astype('float32'), batch_size=32, epochs=100,
                    validation_data = (audio_features_val, np.asarray(y_val).astype('float32')),
                    shuffle=True,
                   )

In [None]:
from matplotlib import pyplot as plt
plt.plot(history.history['c_loss'])
plt.plot(history.history['val_c_loss'])

In [None]:
from matplotlib import pyplot as plt
plt.plot(history.history['a_loss'])
plt.plot(history.history['val_a_loss'])

In [None]:
model.save_weights('audio_rmsprop_debias_true2.h5')

In [None]:
import os
os.chdir(r'/kaggle/working')
from IPython.display import FileLink 
FileLink(r'audio_rmsprop_debias_true2.h5')

In [None]:
hist_df = pd.DataFrame(history.history) 
hist_df.to_csv('Debias True2 Audio History.csv')

# Audio Predictions

In [None]:
model.built=True
model.load_weights('../input/models/audio 22_Feb/audio_rmsprop_debias_true.h5')
# model.load_weights('../input/models/audio 22_Feb/audio_rmsprop_debias_false.h5')

In [None]:
# pred = model.predict(audio_features)
pred = model.predict(audio_features_test)

In [None]:
pred = pred[0]

In [None]:
pred_df = pd.DataFrame(columns=['O','C','E','A','N', 'O_pred','C_pred','E_pred','A_pred','N_pred', 'Ethnicity', 'Gender','Video' ])
# pred_df['O'] = y[:,0]
# pred_df['C'] = y[:,1]
# pred_df['E'] = y[:,2]
# pred_df['A'] = y[:,3]
# pred_df['N'] = y[:,4]

# pred_df['O'] = y_val[:,0]
# pred_df['C'] = y_val[:,1]
# pred_df['E'] = y_val[:,2]
# pred_df['A'] = y_val[:,3]
# pred_df['N'] = y_val[:,4]

pred_df['O'] = y_test[:,0]
pred_df['C'] = y_test[:,1]
pred_df['E'] = y_test[:,2]
pred_df['A'] = y_test[:,3]
pred_df['N'] = y_test[:,4]

pred_df['O_pred'] = pred[:,0]
pred_df['C_pred'] = pred[:,1]
pred_df['E_pred'] = pred[:,2]
pred_df['A_pred'] = pred[:,3]
pred_df['N_pred'] = pred[:,4]
# pred_df['Ethnicity'] = eth_gen_val[:,0]
# pred_df['Gender'] = eth_gen_val[:,1]
# pred_df['Ethnicity'] = y[:,5]
# pred_df['Gender'] = y[:,6]

pred_df['Ethnicity'] = y_test[:,5]
pred_df['Gender'] = y_test[:,6]

# video_names = glob.glob('../input/be-project/train/train/audio_features//*.wav_st.csv')
video_names = glob.glob('../input/be-project/test/test/audio_features//*.wav_st.csv')
video_names.sort()
# video_names = [i.replace('../input/be-project/train/train/audio_features/', '') for i in video_names]
video_names = [i.replace('../input/be-project/test/test/audio_features/', '') for i in video_names]

pred_df['Video'] = video_names

metric_check = pred_df
metric_check

In [None]:
metric_check.to_csv('Audio Debias False Pred.csv')

In [None]:
from sklearn.metrics import mean_absolute_error

In [None]:
# y_true = y[:, 0:5]
y_true = y_test[:, 0:5]
y_true

In [None]:
y_pred = pred
y_pred

In [None]:
mean_absolute_error(y_true, y_pred, multioutput='raw_values')

In [None]:
1 - mean_absolute_error(y_true, y_pred, multioutput='raw_values')

In [None]:
1 - mean_absolute_error(y_true, y_pred)

In [None]:
ocean_fairness(metric_check)

# Model Ensemble

In [None]:
img_ocean = pd.read_csv('../input/predictions/Image Debias True Pred.csv')
img_ocean.drop(columns=['Unnamed: 0'], inplace=True)
img_ocean

In [None]:
get_video_name(img_ocean)

In [None]:
avg_img_ocean = img_ocean.groupby(['VideoName'], as_index=False).mean()

In [None]:
audio_ocean = pd.read_csv('../input/predictions/Audio Debias True Pred.csv')
audio_ocean.drop(columns=['Unnamed: 0'], inplace=True)
audio_ocean

In [None]:
get_video_name(audio_ocean)
audio_ocean

In [None]:
merged = pd.merge(avg_img_ocean, audio_ocean, how='inner', on='VideoName', suffixes=['', '_aud'])

In [None]:
merged

In [None]:
traits = ['O', 'C', 'E', 'A', 'N']
true_cols = []
pred_cols = []
for trait in traits:
    pred_cols += [trait+'_pred', trait+'_pred_aud']
    true_cols += [trait]

In [None]:
true = merged[true_cols].to_numpy()
true

In [None]:
merged[pred_cols]

In [None]:
img_aud_pred = merged[pred_cols].to_numpy()
img_aud_pred

In [None]:
o = img_aud_pred[:, 0:2]
c = img_aud_pred[:, 2:4]
e = img_aud_pred[:, 4:6]
a = img_aud_pred[:, 6:8]
n = img_aud_pred[:, 8:10]

In [None]:
# img = img_aud_pred[:, [0,2,4,6,8]]
# aud = img_aud_pred[:, [1,3,5,7,9]]

In [None]:
# class WeightedSum(layers.Layer):
#     """A custom keras layer to learn a weighted sum of tensors"""

#     def __init__(self, **kwargs):
#         super(WeightedSum, self).__init__(**kwargs)

#     def build(self, input_shape=1):
#         self.a = self.add_weight(
#             name='alpha',
#             shape=(),
#             initializer='ones',
#             dtype='float32',
#             trainable=True,
#             constraint=tf.keras.constraints.min_max_norm(max_value=1,min_value=0),
#         )
#         super(WeightedSum, self).build(input_shape)

#     def call(self, model_outputs):
#         return self.a * model_outputs[0] + (1 - self.a) * model_outputs[1]

#     def compute_output_shape(self, input_shape):
#         return input_shape[0]

In [None]:
class WeightedAverage(keras.layers.Layer):
    
    def __init__(self):
        super(WeightedAverage, self).__init__()
        
    def build(self, input_shape):
        self.W = self.add_weight(
                    shape=(1,1,len(input_shape)),
                    initializer='uniform',
                    dtype=tf.float32,
                    trainable=True)
        
    def call(self, inputs):
        # inputs is a list of tensor of shape [(n_batch, n_feat), ..., (n_batch, n_feat)]
        # expand last dim of each input passed [(n_batch, n_feat, 1), ..., (n_batch, n_feat, 1)]
        inputs = [tf.expand_dims(i, -1) for i in inputs]
        inputs = keras.layers.Concatenate(axis=-1)(inputs) # (n_batch, n_feat, n_inputs)
        weights = tf.nn.softmax(self.W, axis=-1) # (1,1,n_inputs)
        # weights sum up to one on last dim

        return tf.reduce_sum(weights*inputs, axis=-1) # (n_batch, n_feat) 

In [None]:
# class WeightedAverage(keras.layers.Layer):
    
#     def __init__(self):
#         super(WeightedAverage, self).__init__()
        
#     def build(self, input_shape):
#         self.o_W = self.add_weight(
#                     shape=(1,1,2),
#                     initializer='uniform',
#                     dtype=tf.float32,
#                     trainable=True)
#         self.c_W = self.add_weight(
#                     shape=(1,1,2),
#                     initializer='uniform',
#                     dtype=tf.float32,
#                     trainable=True)
#         self.e_W = self.add_weight(
#                     shape=(1,1,2),
#                     initializer='uniform',
#                     dtype=tf.float32,
#                     trainable=True)
#         self.a_W = self.add_weight(
#                     shape=(1,1,2),
#                     initializer='uniform',
#                     dtype=tf.float32,
#                     trainable=True)
#         self.n_W = self.add_weight(
#                     shape=(1,1,2),
#                     initializer='uniform',
#                     dtype=tf.float32,
#                     trainable=True)
        
#     def call(self, inputs):
#         # inputs is a list of tensor of shape [(n_batch, n_feat), ..., (n_batch, n_feat)]
#         o = [inputs[0][0], inputs[1][0]]
#         c = [inputs[0][1], inputs[1][1]]
#         e = [inputs[0][2], inputs[1][2]]
#         a = [inputs[0][3], inputs[1][3]]
#         n = [inputs[0][4], inputs[1][4]]
        
#         # expand last dim of each input passed [(n_batch, n_feat, 1), ..., (n_batch, n_feat, 1)]
#         o_inputs = [tf.expand_dims(i, -1) for i in o]
#         c_inputs = [tf.expand_dims(i, -1) for i in c]
#         e_inputs = [tf.expand_dims(i, -1) for i in e]
#         a_inputs = [tf.expand_dims(i, -1) for i in a]
#         n_inputs = [tf.expand_dims(i, -1) for i in n]
        
#         o_inputs = keras.layers.Concatenate(axis=-1)(o_inputs) # (n_batch, n_feat, n_inputs)
#         c_inputs = keras.layers.Concatenate(axis=-1)(c_inputs) # (n_batch, n_feat, n_inputs)
#         e_inputs = keras.layers.Concatenate(axis=-1)(e_inputs) # (n_batch, n_feat, n_inputs)
#         a_inputs = keras.layers.Concatenate(axis=-1)(a_inputs) # (n_batch, n_feat, n_inputs)
#         n_inputs = keras.layers.Concatenate(axis=-1)(n_inputs) # (n_batch, n_feat, n_inputs)
        
#         o_weights = tf.nn.softmax(self.o_W, axis=-1) # (1,1,n_inputs)
#         c_weights = tf.nn.softmax(self.c_W, axis=-1) # (1,1,n_inputs)
#         e_weights = tf.nn.softmax(self.e_W, axis=-1) # (1,1,n_inputs)
#         a_weights = tf.nn.softmax(self.a_W, axis=-1) # (1,1,n_inputs)
#         n_weights = tf.nn.softmax(self.n_W, axis=-1) # (1,1,n_inputs)
#         # weights sum up to one on last dim

#         return tf.reduce_sum(o_weights*o_inputs, axis=-1), tf.reduce_sum(c_weights*c_inputs, axis=-1),tf.reduce_sum(e_weights*e_inputs, axis=-1), tf.reduce_sum(a_weights*a_inputs, axis=-1),tf.reduce_sum(n_weights*n_inputs, axis=-1) # (n_batch, n_feat) 

In [None]:
# inp1 = Input((5,))
# inp2 = Input((5,))
# # out = WeightedSum()([inp1, inp2])
# out = WeightedAverage()([inp1,inp2])

# # Create the merged model
# o_model = Model(inputs=[inp1,inp2], outputs=out)

In [None]:
# o_model.compile(
#     optimizer='sgd',
#     loss='mae',)

In [None]:
# o_model.summary()

In [None]:
inp1 = Input((1,))
inp2 = Input((1,))
# out = WeightedSum()([inp1, inp2])
out = WeightedAverage()([inp1,inp2])

# Create the merged model
o_model = Model(inputs=[inp1, inp2], outputs=out)

In [None]:
inp1 = Input((1,))
inp2 = Input((1,))
# out = WeightedSum()([inp1, inp2])
out = WeightedAverage()([inp1,inp2])

# Create the merged model
c_model = Model(inputs=[inp1, inp2], outputs=out)

In [None]:
inp1 = Input((1,))
inp2 = Input((1,))
# out = WeightedSum()([inp1, inp2])
out = WeightedAverage()([inp1,inp2])

# Create the merged model
e_model = Model(inputs=[inp1, inp2], outputs=out)

In [None]:
inp1 = Input((1,))
inp2 = Input((1,))
# out = WeightedSum()([inp1, inp2])
out = WeightedAverage()([inp1,inp2])

# Create the merged model
a_model = Model(inputs=[inp1, inp2], outputs=out)

In [None]:
inp1 = Input((1,))
inp2 = Input((1,))
# out = WeightedSum()([inp1, inp2])
out = WeightedAverage()([inp1,inp2])

# Create the merged model
n_model = Model(inputs=[inp1, inp2], outputs=out)

In [None]:
o_model.compile(
    optimizer='adam',
    loss='mae',)

c_model.compile(
    optimizer='adam',
    loss='mae',)

e_model.compile(
    optimizer='adam',
    loss='mae',)

a_model.compile(
    optimizer='adam',
    loss='mae',)

n_model.compile(
    optimizer='adam',
    loss='mae',)

In [None]:
o_model.summary()

In [None]:
# o_model.fit([img,aud], true.reshape(-1,5), epochs=50, batch_size=32)

In [None]:
o_model.fit([o[:,0].reshape(-1,1), o[:,1].reshape(-1,1)], true[:,0].reshape(-1,1), epochs=50, batch_size=32)

In [None]:
c_model.fit([c[:,0].reshape(-1,1), c[:,1].reshape(-1,1)], true[:,1].reshape(-1,1), epochs=50, batch_size=32)

In [None]:
e_model.fit([e[:,0].reshape(-1,1), e[:,1].reshape(-1,1)], true[:,3].reshape(-1,1), epochs=50, batch_size=32)

In [None]:
a_model.fit([a[:,0].reshape(-1,1), a[:,1].reshape(-1,1)], true[:,2].reshape(-1,1), epochs=50, batch_size=32)

In [None]:
n_model.fit([n[:,0].reshape(-1,1), n[:,1].reshape(-1,1)], true[:,4].reshape(-1,1), epochs=50, batch_size=32)

In [None]:
print(tf.nn.softmax(o_model.get_weights()[-1]).numpy())
print(tf.nn.softmax(c_model.get_weights()[-1]).numpy())
print(tf.nn.softmax(e_model.get_weights()[-1]).numpy())
print(tf.nn.softmax(a_model.get_weights()[-1]).numpy())
print(tf.nn.softmax(n_model.get_weights()[-1]).numpy())

In [None]:
o_pred = o_model.predict([o[:,0].reshape(-1,1), o[:,1].reshape(-1,1)])
c_pred = c_model.predict([c[:,0].reshape(-1,1), c[:,1].reshape(-1,1)])
e_pred = e_model.predict([e[:,0].reshape(-1,1), e[:,1].reshape(-1,1)])
a_pred = a_model.predict([a[:,0].reshape(-1,1), a[:,1].reshape(-1,1)])
n_pred = n_model.predict([n[:,0].reshape(-1,1), n[:,1].reshape(-1,1)])

In [None]:
from sklearn.metrics import mean_absolute_error
1- mean_absolute_error(true[:,0].reshape(-1,1), o_pred, multioutput='raw_values')

In [None]:
1- mean_absolute_error(true[:,1].reshape(-1,1), c_pred, multioutput='raw_values')

In [None]:
1- mean_absolute_error(true[:,2].reshape(-1,1), e_pred, multioutput='raw_values')

In [None]:
1- mean_absolute_error(true[:,3].reshape(-1,1), a_pred, multioutput='raw_values')

In [None]:
1- mean_absolute_error(true[:,4].reshape(-1,1), n_pred, multioutput='raw_values')

In [None]:
true_cols+['Ethnicity', 'Gender']

In [None]:
metric_check = merged[true_cols+['Ethnicity', 'Gender']]

In [None]:
metric_check['O_pred'] = o_pred
metric_check['C_pred'] = c_pred
metric_check['E_pred'] = e_pred
metric_check['A_pred'] = a_pred
metric_check['N_pred'] = n_pred

In [None]:
metric_check

In [None]:
ocean_fairness(metric_check)