# Modify the bilstm.py to use GN-GloVe embeddings

In [1]:
!pip install scikit-learn pandas numpy



In [2]:
!unzip data.zip

Archive:  data.zip
   creating: data/
  inflating: __MACOSX/._data         
  inflating: data/gender_corpus.tsv  
  inflating: __MACOSX/data/._gender_corpus.tsv  
  inflating: data/train.tsv          
  inflating: __MACOSX/data/._train.tsv  
  inflating: data/plot_data.csv      
  inflating: __MACOSX/data/._plot_data.csv  
  inflating: data/dev.tsv            
  inflating: __MACOSX/data/._dev.tsv  
  inflating: data/gender.tsv         
  inflating: __MACOSX/data/._gender.tsv  


In [3]:
!unzip runs.zip

Archive:  runs.zip
   creating: runs/
  inflating: __MACOSX/._runs         
   creating: runs/gender/
  inflating: __MACOSX/runs/._gender  
  inflating: runs/results.txt        
  inflating: __MACOSX/runs/._results.txt  
  inflating: runs/readme.md          
  inflating: __MACOSX/runs/._readme.md  
  inflating: runs/logreg.txt         
  inflating: __MACOSX/runs/._logreg.txt  
  inflating: runs/plot.png           
  inflating: __MACOSX/runs/._plot.png  
  inflating: runs/lstm.txt           
  inflating: __MACOSX/runs/._lstm.txt  
  inflating: runs/lstm.h5            
  inflating: __MACOSX/runs/._lstm.h5  
  inflating: runs/gender/test_results.tsv  
  inflating: __MACOSX/runs/gender/._test_results.tsv  
  inflating: runs/gender/Bert_test_result  
  inflating: __MACOSX/runs/gender/._Bert_test_result  


In [4]:
!unzip src.zip

Archive:  src.zip
   creating: src/
  inflating: __MACOSX/._src          
  inflating: src/bert_finetune.py    
  inflating: __MACOSX/src/._bert_finetune.py  
  inflating: src/config.py           
  inflating: __MACOSX/src/._config.py  
  inflating: src/bert_finetune2.py   
  inflating: __MACOSX/src/._bert_finetune2.py  
  inflating: src/analysis.py         
  inflating: __MACOSX/src/._analysis.py  
  inflating: src/baseline.py         
  inflating: __MACOSX/src/._baseline.py  
  inflating: src/readme.md           
  inflating: __MACOSX/src/._readme.md  
  inflating: src/generate_corpus.py  
  inflating: __MACOSX/src/._generate_corpus.py  
  inflating: src/utils.py            
  inflating: __MACOSX/src/._utils.py  
  inflating: src/bilstm.py           
  inflating: __MACOSX/src/._bilstm.py  


In [5]:
# Path to your GN-GloVe file
gn_glove_file = "/content/1b-vectors300-0.8-0.8.txt"

# Initialize variables
dimensionality = None
inconsistent_lines = []

# Open and read the file
with open(gn_glove_file, "r") as f:
    for line_num, line in enumerate(f, start=1):
        values = line.strip().split()
        word = values[0]
        vector = values[1:]

        # Check dimensionality
        if dimensionality is None:
            dimensionality = len(vector)
        elif len(vector) != dimensionality:
            inconsistent_lines.append((line_num, word, len(vector)))

# Results
print(f"Expected dimensionality: {dimensionality}")
if inconsistent_lines:
    print("Inconsistent lines found:")
    for line_num, word, dim in inconsistent_lines:
        print(f"Line {line_num}: Word '{word}' has {dim} dimensions")
else:
    print("All lines have consistent dimensions.")

Expected dimensionality: 300
All lines have consistent dimensions.


In [6]:
!python3 src/baseline.py

0.8268348623853211


In [7]:
!python3 src/bilstm.py

2024-11-25 01:28:44.650746: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-11-25 01:28:44.668401: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-11-25 01:28:44.689154: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-11-25 01:28:44.695510: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1452] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-11-25 01:28:44.710405: I tensorflow/core/platform/cpu_feature_guar

# Debiasing by applying Adversarial training

In [14]:
!python3 src/bert_finetune2.py

2024-11-25 02:55:16.690960: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-11-25 02:55:16.708623: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-11-25 02:55:16.729566: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-11-25 02:55:16.735896: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1452] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-11-25 02:55:16.751058: I tensorflow/core/platform/cpu_feature_guar

# Analysis

In [15]:
!python3 src/analysis.py

Results: [[-5.906212425825358, 7.501185188242566e-09, 0.0345668387793015], [-5.3577531399903835, 1.4273495272982524e-07, 0.04603757691074861], [0.38867389986426015, 0.6977247092479395, -0.013222992]]


Male:  1182 0.5346869712351946
Female:  601 0.5990016638935108


Profession mean_sentiment female-male
doctor 0.5071565 0.030932724
tailor 0.5351617 0.14412275
baker 0.5559482 0.057710826
secretary 0.5083631 -0.00421232
professor 0.5770195 0.03572291
scientist 0.55970085 -0.035392284
writer 0.54189515 0.10923296
teacher 0.41381574 -0.13378006
truck driver 0.45750666 -0.09423432
pilot 0.39946228 -0.27853554
lawyer 0.52300674 0.037430465
flight attendant 0.55729085 -0.031856775
nurse 0.44082016 -0.07679489
chef 0.5337427 0.17204174
soldier 0.58594996 -0.14542407
dancer 0.47367015 -0.49381784
gym trainer 0.56000596 0.057891905
mechanic 0.46922523 0.13795525
clerk 0.4895978 0.253347
bartender 0.57123387 -0.0067995787
CONTROL 0.9913813 0.0


noun female-male
He -0.008233493
This boy 0.0552703