<a href="https://colab.research.google.com/github/stemlock/w266_final_project/blob/master/Base_Model_Colab_v1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Environment Setup

In [1]:
from google.colab import drive
drive.mount('/content/drive')
CWD = '/content/drive/My Drive/W266 Final Project/Code'

%cd $CWD

Mounted at /content/drive
/content/drive/My Drive/W266 Final Project/Code


In [None]:
!pip install transformers==4.12.2
!pip install tensorflow==2.5.2

In [3]:
# Imports
import os

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split

import tensorflow as tf
from transformers import DistilBertTokenizerFast, TFDistilBertForSequenceClassification, TFTrainer, TFTrainingArguments

In [4]:
print("Tensorflow version:", tf.__version__)

Tensorflow version: 2.5.2


In [5]:
# print("Transformers version:", transformers.__version__)

In [6]:
# Set random seed
seed = 42

## Load Data

In [7]:
# Read in processed data (Rows with NA in the neutral_review_text had no tokens replaced)
df_train = pd.read_csv('data/model_train.csv')
df_test = pd.read_csv('data/model_test.csv')
df_train.head()

Unnamed: 0,review_id,review_score,review_text,neutral_review_text,neutral_sub_count,female_review_text,female_sub_count,male_review_text,male_sub_count,label
0,6990,1,What the hell is this? Its one of the dumbest ...,what the hell is this? its one of the dumbest ...,1,what the hell is this? its one of the dumbest ...,1,what the hell is this? its one of the dumbest ...,0,0
1,12145,1,"As you may have gathered from the title, I who...","as you may have gathered from the title, i who...",8,"as you may have gathered from the title, i who...",7,"as you may have gathered from the title, i who...",1,0
2,7457,1,"This Canadian ""movie"" is the worst ever! Stunn...","this canadian ""movie"" is the worst ever! stunn...",7,"this canadian ""movie"" is the worst ever! stunn...",5,"this canadian ""movie"" is the worst ever! stunn...",2,0
3,7324,1,Being a Film studies graduate I would like to ...,being a film studies graduate i would like to ...,2,being a film studies graduate i would like to ...,1,being a film studies graduate i would like to ...,1,0
4,7089,1,A sexually obsessed chef leads a duplicitous l...,a sexually obsessed chef leads a duplicitous l...,7,a sexually obsessed chef leads a duplicitous l...,4,a sexually obsessed chef leads a duplicitous l...,3,0


## Baseline Metrics

### Majority tokens average

In [7]:
# Extract the male vs female majority texts
df_female_majority = df_test[df_test['male_sub_count'] > df_test['female_sub_count']]
df_male_majority = df_test[df_test['male_sub_count'] < df_test['female_sub_count']]

In [8]:
# Average sentiment for female majority texts
print("Average female review binary sentiment:", df_female_majority['label'].mean())

Average female review binary sentiment: 0.4896039603960396


In [9]:
# Average review score for female majority texts
print("Average female review score 1-10:", df_female_majority['review_score'].mean())

Average female review score 1-10: 5.436138613861386


In [10]:
# Average sentiment for male majority texts
print("Average male review binary sentiment:", df_male_majority['label'].mean())

Average male review binary sentiment: 0.5035229420862691


In [11]:
# Average review score for male majority texts
print("Average male review score 1-10:", df_male_majority['review_score'].mean())

Average male review score 1-10: 5.509709572091425


In [12]:
# Distribution of scores across review scores for female majority texts
df_female_majority['review_score'].value_counts()/len(df_female_majority)

1     0.181683
10    0.171782
8     0.121287
4     0.121287
3     0.111386
7     0.106931
2     0.096040
9     0.089604
Name: review_score, dtype: float64

In [13]:
# Distribution of scores across review scores for male majority texts
df_male_majority['review_score'].value_counts()/len(df_male_majority)

1     0.197457
10    0.190067
8     0.120124
4     0.105860
3     0.101564
7     0.100361
9     0.092971
2     0.091596
Name: review_score, dtype: float64

### Proportional weighted average

In [14]:
# Extract the proportion of male vs female tokens per review
male_proportion = (df_test['female_sub_count']/df_test['neutral_sub_count'])
female_proportion = (df_test['male_sub_count']/df_test['neutral_sub_count'])

In [15]:
# Weighted average sentiment for female tokens
(df_test['label']*female_proportion).sum()/female_proportion.sum()

0.4922782109404571

In [16]:
# Weighted average review scores for female tokens
(df_test['review_score']*female_proportion).sum()/female_proportion.sum()

5.440864712868192

In [17]:
# Weighted average sentiment for male tokens
(df_test['label']*male_proportion).sum()/male_proportion.sum()

0.5028605798629667

In [18]:
# Weighted average review scores for male tokens
(df_test['review_score']*male_proportion).sum()/male_proportion.sum()

5.508159425039407

## Data Transformations

In [18]:
# ## TO DO: Try to fix this function. For some reason, this causes the model.fit() to fail
# def encode_datasets(X_train, y_train, X_test, y_test, tokenizer, split_size=0.5, seed=42):

#   '''
#   Takes in train and test data and encodes them into train, dev, and test
#   TF datasets using the provided tokenizer.
#   '''

#   # Split test set into dev and test
#   X_dev, X_test, y_dev, y_test = train_test_split(X_test, y_test, test_size=split_size, random_state=seed)

#   # Apply tokenizer to each dataset
#   train_encodings = tokenizer(X_train, truncation=True, padding=True)
#   dev_encodings = tokenizer(X_dev, truncation=True, padding=True)
#   test_encodings = tokenizer(X_test, truncation=True, padding=True)

#   # Turn encodings into datasets for easy batching
#   train_dataset = tf.data.Dataset.from_tensor_slices((
#       dict(train_encodings),
#       y_train
#   ))
#   dev_dataset = tf.data.Dataset.from_tensor_slices((
#       dict(dev_encodings),
#       y_dev
#   ))
#   test_dataset = tf.data.Dataset.from_tensor_slices((
#       dict(test_encodings),
#       y_test
#   ))

#   return train_dataset, dev_dataset, test_dataset

In [19]:
# # Load data
# train_texts = df_train['review_text'].values.tolist()
# n_train_texts = df_train['neutral_review_text'].values.tolist()
# f_train_texts = df_train['female_review_text'].values.tolist()
# m_train_texts = df_train['male_review_text'].values.tolist()
# train_labels = df_train['label'].values.tolist()

# test_texts = df_test['review_text'].values.tolist()
# n_test_texts = df_test['neutral_review_text'].values.tolist()
# f_test_texts = df_test['female_review_text'].values.tolist()
# m_test_texts = df_test['male_review_text'].values.tolist()
# test_labels = df_test['label'].values.tolist()

In [20]:
# # Specify tokenizer and encode each dataset
# tokenizer = DistilBertTokenizerFast.from_pretrained('distilbert-base-uncased')

# train_dataset, dev_dataset, test_dataset = encode_datasets(train_texts, train_labels, 
#                                                            test_texts, test_labels,
#                                                            tokenizer)
# n_train_dataset, n_dev_dataset, n_test_dataset = encode_datasets(n_train_texts, train_labels, 
#                                                                  n_test_texts, test_labels,
#                                                                  tokenizer)

### Split data

In [22]:
# Load data
train_texts = df_train['review_text'].values.tolist()
n_train_texts = df_train['neutral_review_text'].values.tolist()
f_train_texts = df_train['female_review_text'].values.tolist()
m_train_texts = df_train['male_review_text'].values.tolist()
train_labels = df_train['label'].values.tolist()

test_texts = df_test['review_text'].values.tolist()
n_test_texts = df_test['neutral_review_text'].values.tolist()
f_test_texts = df_test['female_review_text'].values.tolist()
m_test_texts = df_test['male_review_text'].values.tolist()
test_labels = df_test['label'].values.tolist()

In [23]:
# Create dev set from portion of test set using split
dev_texts, test_texts, _, _ = train_test_split(test_texts, test_labels, test_size=.5, random_state=seed)
n_dev_texts, n_test_texts, _, _ = train_test_split(n_test_texts, test_labels, test_size=.5, random_state=seed)
f_dev_texts, f_test_texts, _, _ = train_test_split(f_test_texts, test_labels, test_size=.5, random_state=seed)
m_dev_texts, m_test_texts, dev_labels, test_labels = train_test_split(m_test_texts, test_labels, 
                                                                        test_size=.5, random_state=seed)

#### TEST SMALL DATASETS


In [None]:
# # TEST SMALL DATASETS
# tokenizer = DistilBertTokenizerFast.from_pretrained('distilbert-base-uncased')

# small_train_encodings = tokenizer(train_texts[:1000], max_length=50, truncation=True, padding=True, return_tensors='tf')
# small_dev_encodings = tokenizer(dev_texts[:1000], max_length=50, truncation=True, padding=True, return_tensors='tf')
# small_test_encodings = tokenizer(test_texts[:1000], max_length=50, truncation=True, padding=True, return_tensors='tf')

# small_train_labels = tf.convert_to_tensor(train_labels[:1000])
# small_dev_labels = tf.convert_to_tensor(dev_labels[:1000])
# small_test_labels = tf.convert_to_tensor(test_labels[:1000])

In [27]:
# # TEST SMALL DATASETS
# small_train_dataset = tf.data.Dataset.from_tensor_slices((
#     dict(small_train_encodings),
#     small_train_labels
# )).shuffle(1000, seed=seed).batch(8)

# small_dev_dataset = tf.data.Dataset.from_tensor_slices((
#     dict(small_dev_encodings),
#     small_dev_labels
# )).batch(8)

# small_test_dataset = tf.data.Dataset.from_tensor_slices((
#     dict(small_test_encodings),
#     small_test_labels
# )).batch(8)

In [None]:
# # Initialize the TPU devices
# if os.environ['COLAB_TPU_ADDR']:
#   cluster_resolver = tf.distribute.cluster_resolver.TPUClusterResolver(tpu='')
#   tf.config.experimental_connect_to_cluster(cluster_resolver)
#   tf.tpu.experimental.initialize_tpu_system(cluster_resolver)
#   tpu_strategy = tf.distribute.TPUStrategy(cluster_resolver)
#   print('Using TPU')
# elif tf.config.list_physical_devices('GPU'):
#   strategy = tf.distribute.MirroredStrategy()
#   print('Using GPU')
# else:
#   raise ValueError('Running on CPU is not recommended.')

In [None]:
# with tpu_strategy.scope():
#   small_model = TFDistilBertForSequenceClassification.from_pretrained('distilbert-base-uncased', num_labels=2)
#   small_model.compile(
#       optimizer=tf.keras.optimizers.Adam(learning_rate=5e-5),
#       loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
#       metrics=tf.metrics.SparseCategoricalAccuracy('accuracy'),
#       ) 
#   small_model.fit(small_dev_dataset, validation_data=small_test_dataset, epochs=3)

### Tokenize

In [24]:
# Specify tokenizer and batch encode original datasets
tokenizer = DistilBertTokenizerFast.from_pretrained('distilbert-base-uncased')

train_encodings = tokenizer(train_texts, truncation=True, padding=True, return_tensors='tf')
dev_encodings = tokenizer(dev_texts, truncation=True, padding=True, return_tensors='tf')
test_encodings = tokenizer(test_texts, truncation=True, padding=True, return_tensors='tf')

In [25]:
# Batch encode the neutral datasets

n_train_encodings = tokenizer(n_train_texts, truncation=True, padding=True, return_tensors='tf')
# n_dev_encodings = tokenizer(n_dev_texts, truncation=True, padding=True, return_tensors='tf')
# n_test_encodings = tokenizer(n_test_texts, truncation=True, padding=True, return_tensors='tf')

In [26]:
# Batch encode the male and female datasets

# m_train_encodings = tokenizer(m_train_texts, truncation=True, padding=True, return_tensors='tf')
# m_dev_encodings = tokenizer(m_dev_texts, truncation=True, padding=True, return_tensors='tf')
m_test_encodings = tokenizer(m_test_texts, truncation=True, padding=True, return_tensors='tf')

# f_train_encodings = tokenizer(f_train_texts, truncation=True, padding=True, return_tensors='tf')
# f_dev_encodings = tokenizer(f_dev_texts, truncation=True, padding=True, return_tensors='tf')
f_test_encodings = tokenizer(f_test_texts, truncation=True, padding=True, return_tensors='tf')

### Create TF.Datasets

In [27]:
# Change labels list into tf.Tensors

tf_train_labels = tf.convert_to_tensor(train_labels)
tf_dev_labels = tf.convert_to_tensor(dev_labels)
tf_test_labels = tf.convert_to_tensor(test_labels)

In [28]:
# Turn original encodings into datasets for easy batching

train_dataset = tf.data.Dataset.from_tensor_slices((
    dict(train_encodings),
    tf_train_labels
)).shuffle(10000, seed=seed).batch(16)

dev_dataset = tf.data.Dataset.from_tensor_slices((
    dict(dev_encodings),
    tf_dev_labels
)).batch(16)

test_dataset = tf.data.Dataset.from_tensor_slices((
    dict(test_encodings),
    tf_test_labels
)).batch(16)

In [30]:
# Turn neutral encodings into datasets for easy batching

n_train_dataset = tf.data.Dataset.from_tensor_slices((
    dict(n_train_encodings),
    tf_train_labels
)).shuffle(10000, seed=seed).batch(16)

# n_dev_dataset = tf.data.Dataset.from_tensor_slices((
#     dict(n_dev_encodings),
#     tf_dev_labels
# )).batch(16)

# n_test_dataset = tf.data.Dataset.from_tensor_slices((
#     dict(n_test_encodings),
#     tf_test_labels
# )).batch(16)

In [31]:
# Turn male and female encodings into datasets for easy batching

# m_train_dataset = tf.data.Dataset.from_tensor_slices((
#     dict(m_train_encodings),
#     tf_train_labels
# )).shuffle(10000, seed=seed).batch(16)

# m_dev_dataset = tf.data.Dataset.from_tensor_slices((
#     dict(m_dev_encodings),
#     tf_dev_labels
# )).batch(16)

m_test_dataset = tf.data.Dataset.from_tensor_slices((
    dict(m_test_encodings),
    tf_test_labels
)).batch(16)

# f_train_dataset = tf.data.Dataset.from_tensor_slices((
#     dict(f_train_encodings),
#     tf_train_labels
# )).shuffle(10000, seed=seed).batch(16)

# f_dev_dataset = tf.data.Dataset.from_tensor_slices((
#     dict(f_dev_encodings),
#     tf_dev_labels
# )).batch(16)

f_test_dataset = tf.data.Dataset.from_tensor_slices((
    dict(f_test_encodings),
    tf_test_labels
)).batch(16)

## Model Pipeline

### Initiliaze TF strategy (TPU preferred)

In [29]:
# Initialize the TPU devices
if os.environ['COLAB_TPU_ADDR']:
  cluster_resolver = tf.distribute.cluster_resolver.TPUClusterResolver(tpu='')
  tf.config.experimental_connect_to_cluster(cluster_resolver)
  tf.tpu.experimental.initialize_tpu_system(cluster_resolver)
  tpu_strategy = tf.distribute.TPUStrategy(cluster_resolver)
  print('Using TPU')
elif tf.config.list_physical_devices('GPU'):
  strategy = tf.distribute.MirroredStrategy()
  print('Using GPU')
else:
  raise ValueError('Running on CPU is not recommended.')





INFO:tensorflow:Initializing the TPU system: grpc://10.18.253.90:8470


INFO:tensorflow:Initializing the TPU system: grpc://10.18.253.90:8470


INFO:tensorflow:Clearing out eager caches


INFO:tensorflow:Clearing out eager caches


INFO:tensorflow:Finished initializing TPU system.


INFO:tensorflow:Finished initializing TPU system.


INFO:tensorflow:Found TPU system:


INFO:tensorflow:Found TPU system:


INFO:tensorflow:*** Num TPU Cores: 8


INFO:tensorflow:*** Num TPU Cores: 8


INFO:tensorflow:*** Num TPU Workers: 1


INFO:tensorflow:*** Num TPU Workers: 1


INFO:tensorflow:*** Num TPU Cores Per Worker: 8


INFO:tensorflow:*** Num TPU Cores Per Worker: 8


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:localhost/replica:0/task:0/device:CPU:0, CPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:localhost/replica:0/task:0/device:CPU:0, CPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:CPU:0, CPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:CPU:0, CPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:0, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:0, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:1, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:1, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:2, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:2, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:3, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:3, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:4, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:4, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:5, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:5, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:6, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:6, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:7, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:7, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU_SYSTEM:0, TPU_SYSTEM, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU_SYSTEM:0, TPU_SYSTEM, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:XLA_CPU:0, XLA_CPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:XLA_CPU:0, XLA_CPU, 0, 0)


Using TPU


### Baseline Model Architecture

In [32]:
# Starter function to create the model (we can improve on this when we start using more complex models)
def create_model():

  return TFDistilBertForSequenceClassification.from_pretrained('distilbert-base-uncased', num_labels=2)

### Baseline Model v1 (5 epochs)

#### Train model

In [None]:
# Create the model within each device scope
models = []
histories = []
for train, dev in [(train_dataset, dev_dataset), (n_train_dataset, dev_dataset)]:
  with tpu_strategy.scope():
    model = create_model()

    optimizer = tf.keras.optimizers.Adam(learning_rate=5e-5)

    model.compile(optimizer=optimizer, loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True), 
                  metrics=[tf.keras.metrics.SparseCategoricalAccuracy('accuracy')])
    
  print(model.summary())

  history = model.fit(train, validation_data=dev, epochs=5)
    
  histories.append(history)
  models.append(model)

#### Test metrics

In [38]:
# Evaluate the original baseline model
models[0].evaluate(x=test_dataset)



[0.31414440274238586, 0.8792732954025269]

In [39]:
# Evaluate the UNK baseline model
models[1].evaluate(x=test_dataset)



[0.37952134013175964, 0.8716232776641846]

#### Sentiment 

In [43]:
# Get the logits for both models on the male and female datasets respectively
orig_m_logit_preds = models[0].predict(x=m_test_dataset).logits
orig_f_logit_preds = models[0].predict(x=f_test_dataset).logits
unk_m_logit_preds = models[1].predict(x=m_test_dataset).logits
unk_f_logit_preds = models[1].predict(x=f_test_dataset).logits

INFO:absl:TPU has inputs with dynamic shapes: [<tf.Tensor 'Const:0' shape=() dtype=int32>, <tf.Tensor 'cond_8/Identity:0' shape=(None, 512) dtype=int32>, <tf.Tensor 'cond_8/Identity_1:0' shape=(None, 512) dtype=int32>, <tf.Tensor 'cond_8/Identity_2:0' shape=(None,) dtype=int32>]
INFO:absl:TPU has inputs with dynamic shapes: [<tf.Tensor 'Const:0' shape=() dtype=int32>, <tf.Tensor 'cond_8/Identity:0' shape=(None, 512) dtype=int32>, <tf.Tensor 'cond_8/Identity_1:0' shape=(None, 512) dtype=int32>, <tf.Tensor 'cond_8/Identity_2:0' shape=(None,) dtype=int32>]


In [44]:
# Get the average sentiments for the original model
m_pred_probs = tf.math.softmax(orig_m_logit_preds, axis=-1)
f_pred_probs = tf.math.softmax(orig_f_logit_preds, axis=-1)

print("Average Male Positive Sentiment:", np.mean(m_pred_probs[:,1]))
print("Average Female Positive Sentiment:", np.mean(f_pred_probs[:,1]))
print("Difference in Sentiment (Male - Female):", np.mean(m_pred_probs[:,1])-np.mean(f_pred_probs[:,1]))

Average Male Positive Sentiment: 0.55594724
Average Female Positive Sentiment: 0.5429732
Difference in Sentiment (Male - Female): 0.012974024


In [45]:
# Get the average sentiments for the unknown model
m_pred_probs = tf.math.softmax(unk_m_logit_preds, axis=-1)
f_pred_probs = tf.math.softmax(unk_f_logit_preds, axis=-1)

print("Average Male Positive Sentiment:", np.mean(m_pred_probs[:,1]))
print("Average Female Positive Sentiment:", np.mean(f_pred_probs[:,1]))
print("Difference in Sentiment (Male - Female):", np.mean(m_pred_probs[:,1])-np.mean(f_pred_probs[:,1]))

Average Male Positive Sentiment: 0.592404
Average Female Positive Sentiment: 0.598442
Difference in Sentiment (Male - Female): -0.00603801


#### Save models

In [40]:
# Save the models
models[0].save_pretrained(CWD + "/models/original_base_model_v1")
models[1].save_pretrained(CWD + "/models/UNK_base_model_v1")

### Baseline Model v2 (10 epochs)

#### Train Model

In [33]:
# Create the model within each device scope
models1 = []
histories1 = []
for train, dev in [(train_dataset, dev_dataset), (n_train_dataset, dev_dataset)]:
  with tpu_strategy.scope():
    model = create_model()

    optimizer = tf.keras.optimizers.Adam(learning_rate=5e-5)

    model.compile(optimizer=optimizer, loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True), 
                  metrics=[tf.keras.metrics.SparseCategoricalAccuracy('accuracy')])
    
  print(model.summary())

  history = model.fit(train, validation_data=dev, epochs=10)
    
  histories1.append(history)
  models1.append(model)

Some layers from the model checkpoint at distilbert-base-uncased were not used when initializing TFDistilBertForSequenceClassification: ['activation_13', 'vocab_projector', 'vocab_layer_norm', 'vocab_transform']
- This IS expected if you are initializing TFDistilBertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFDistilBertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some layers of TFDistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier', 'pre_classifier', 'dropout_39']
You should probably TRAIN this model on a down-stream task to be able to use i

Model: "tf_distil_bert_for_sequence_classification_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
distilbert (TFDistilBertMain multiple                  66362880  
_________________________________________________________________
pre_classifier (Dense)       multiple                  590592    
_________________________________________________________________
classifier (Dense)           multiple                  1538      
_________________________________________________________________
dropout_39 (Dropout)         multiple                  0         
Total params: 66,955,010
Trainable params: 66,955,010
Non-trainable params: 0
_________________________________________________________________
None
Epoch 1/10


INFO:absl:TPU has inputs with dynamic shapes: [<tf.Tensor 'Const:0' shape=() dtype=int32>, <tf.Tensor 'cond_8/Identity:0' shape=(None, 512) dtype=int32>, <tf.Tensor 'cond_8/Identity_1:0' shape=(None, 512) dtype=int32>, <tf.Tensor 'cond_8/Identity_2:0' shape=(None,) dtype=int32>]


Instructions for updating:
The `validate_indices` argument has no effect. Indices are always validated on CPU and never validated on GPU.


Instructions for updating:
The `validate_indices` argument has no effect. Indices are always validated on CPU and never validated on GPU.
INFO:absl:TPU has inputs with dynamic shapes: [<tf.Tensor 'Const:0' shape=() dtype=int32>, <tf.Tensor 'cond_8/Identity:0' shape=(None, 512) dtype=int32>, <tf.Tensor 'cond_8/Identity_1:0' shape=(None, 512) dtype=int32>, <tf.Tensor 'cond_8/Identity_2:0' shape=(None,) dtype=int32>]




INFO:absl:TPU has inputs with dynamic shapes: [<tf.Tensor 'Const:0' shape=() dtype=int32>, <tf.Tensor 'cond_8/Identity:0' shape=(None, 512) dtype=int32>, <tf.Tensor 'cond_8/Identity_1:0' shape=(None, 512) dtype=int32>, <tf.Tensor 'cond_8/Identity_2:0' shape=(None,) dtype=int32>]


Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


Some layers from the model checkpoint at distilbert-base-uncased were not used when initializing TFDistilBertForSequenceClassification: ['activation_13', 'vocab_projector', 'vocab_layer_norm', 'vocab_transform']
- This IS expected if you are initializing TFDistilBertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFDistilBertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some layers of TFDistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier', 'pre_classifier', 'dropout_59']
You should probably TRAIN this model on a down-stream task to be able to use i

Model: "tf_distil_bert_for_sequence_classification_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
distilbert (TFDistilBertMain multiple                  66362880  
_________________________________________________________________
pre_classifier (Dense)       multiple                  590592    
_________________________________________________________________
classifier (Dense)           multiple                  1538      
_________________________________________________________________
dropout_59 (Dropout)         multiple                  0         
Total params: 66,955,010
Trainable params: 66,955,010
Non-trainable params: 0
_________________________________________________________________
None
Epoch 1/10


INFO:absl:TPU has inputs with dynamic shapes: [<tf.Tensor 'Const:0' shape=() dtype=int32>, <tf.Tensor 'cond_8/Identity:0' shape=(None, 512) dtype=int32>, <tf.Tensor 'cond_8/Identity_1:0' shape=(None, 512) dtype=int32>, <tf.Tensor 'cond_8/Identity_2:0' shape=(None,) dtype=int32>]
INFO:absl:TPU has inputs with dynamic shapes: [<tf.Tensor 'Const:0' shape=() dtype=int32>, <tf.Tensor 'cond_8/Identity:0' shape=(None, 512) dtype=int32>, <tf.Tensor 'cond_8/Identity_1:0' shape=(None, 512) dtype=int32>, <tf.Tensor 'cond_8/Identity_2:0' shape=(None,) dtype=int32>]




INFO:absl:TPU has inputs with dynamic shapes: [<tf.Tensor 'Const:0' shape=() dtype=int32>, <tf.Tensor 'cond_8/Identity:0' shape=(None, 512) dtype=int32>, <tf.Tensor 'cond_8/Identity_1:0' shape=(None, 512) dtype=int32>, <tf.Tensor 'cond_8/Identity_2:0' shape=(None,) dtype=int32>]


Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


#### Test metrics

In [34]:
# Evaluate the original baseline model
models1[0].evaluate(x=test_dataset)



[0.6274470686912537, 0.87425297498703]

In [35]:
# Evaluate the UNK baseline model
models1[1].evaluate(x=test_dataset)



[0.4842031002044678, 0.8783169984817505]

#### Sentiment 

In [40]:
# Get the logits for both models on the male and female datasets respectively
orig_m_logit_preds = models1[0].predict(x=m_test_dataset).logits
orig_f_logit_preds = models1[0].predict(x=f_test_dataset).logits
unk_m_logit_preds = models1[1].predict(x=m_test_dataset).logits
unk_f_logit_preds = models1[1].predict(x=f_test_dataset).logits

INFO:absl:TPU has inputs with dynamic shapes: [<tf.Tensor 'Const:0' shape=() dtype=int32>, <tf.Tensor 'cond_8/Identity:0' shape=(None, 512) dtype=int32>, <tf.Tensor 'cond_8/Identity_1:0' shape=(None, 512) dtype=int32>, <tf.Tensor 'cond_8/Identity_2:0' shape=(None,) dtype=int32>]
INFO:absl:TPU has inputs with dynamic shapes: [<tf.Tensor 'Const:0' shape=() dtype=int32>, <tf.Tensor 'cond_8/Identity:0' shape=(None, 512) dtype=int32>, <tf.Tensor 'cond_8/Identity_1:0' shape=(None, 512) dtype=int32>, <tf.Tensor 'cond_8/Identity_2:0' shape=(None,) dtype=int32>]


In [41]:
# Get the average sentiments for the original model
m_pred_probs = tf.math.softmax(orig_m_logit_preds, axis=-1)
f_pred_probs = tf.math.softmax(orig_f_logit_preds, axis=-1)

print("Average Male Positive Sentiment:", np.mean(m_pred_probs[:,1]))
print("Average Female Positive Sentiment:", np.mean(f_pred_probs[:,1]))
print("Difference in Sentiment (Male - Female):", np.mean(m_pred_probs[:,1])-np.mean(f_pred_probs[:,1]))

Average Male Positive Sentiment: 0.54753333
Average Female Positive Sentiment: 0.53478456
Difference in Sentiment (Male - Female): 0.012748778


In [42]:
# Get the average sentiments for the unknown model
m_pred_probs = tf.math.softmax(unk_m_logit_preds, axis=-1)
f_pred_probs = tf.math.softmax(unk_f_logit_preds, axis=-1)

print("Average Male Positive Sentiment:", np.mean(m_pred_probs[:,1]))
print("Average Female Positive Sentiment:", np.mean(f_pred_probs[:,1]))
print("Difference in Sentiment (Male - Female):", np.mean(m_pred_probs[:,1])-np.mean(f_pred_probs[:,1]))

Average Male Positive Sentiment: 0.531735
Average Female Positive Sentiment: 0.535358
Difference in Sentiment (Male - Female): -0.0036230087


#### Save models

In [43]:
# Save the models
models1[0].save_pretrained(CWD + "/models/original_base_model_v2")
models1[1].save_pretrained(CWD + "/models/UNK_base_model_v2")