# Bias testing

This juputernotebook performs the bias analysis for sex prediction, and age grading in both males and females. It reproduces Table S3 of the manuscript

In [56]:
import sys

import pandas as pd

sys.path.append('../src')
from utilities import gridsearch_bias

In [57]:
# Import data
tsetse_data = pd.read_csv("../data/processed/TseTse_processed.csv")

## Sex bias

In [58]:
# slice thorax and heads
tsetse_thorax = tsetse_data.loc[tsetse_data['Tissue'] == 'Thorax']
tsetse_head = tsetse_data.loc[tsetse_data['Tissue'] == 'Head']

In [59]:

X_head_part1 = tsetse_head.loc[:,"4000":"402"]
X_head_part2 = tsetse_head.loc[:,"1750":"600"]
X_head_part3 = tsetse_head.loc[:,"3500":"2750"]
X_head_part4 = tsetse_head.loc[:,"2750":"1750"]

X_head_desbiased = pd.concat([X_head_part3,X_head_part2],axis=1)


y_head_sex = tsetse_head.loc[:,"Sex"]

In [60]:
# Thorax

X_thorax_part1 = tsetse_thorax.loc[:,"4000":"402"]
X_thorax_part2 = tsetse_thorax.loc[:,"1750":"600"]
X_thorax_part3 = tsetse_thorax.loc[:,"3500":"2750"]
X_thorax_part4 = tsetse_thorax.loc[:,"2750":"1750"]

X_thorax_desbiased = pd.concat([X_thorax_part3,X_thorax_part2],axis=1)

y_thorax_sex = tsetse_thorax.loc[:,"Sex"]

In [61]:
# Head for age

bias_test_head_part1 = gridsearch_bias(X_head_part1, y_head_sex)
bias_test_head_part2 = gridsearch_bias(X_head_part2, y_head_sex)
bias_test_head_part3 = gridsearch_bias(X_head_part3, y_head_sex)
bias_test_head_part4 = gridsearch_bias(X_head_part4, y_head_sex)
bias_test_head_part5 = gridsearch_bias(X_head_desbiased, y_head_sex)

In [62]:
# thorax
bias_test_part1 = gridsearch_bias(X_thorax_part1,y_thorax_sex)
bias_test_part2 = gridsearch_bias(X_thorax_part2,y_thorax_sex)
bias_test_part3 = gridsearch_bias(X_thorax_part3,y_thorax_sex)
bias_test_part4 = gridsearch_bias(X_thorax_part4,y_thorax_sex)
bias_test_part5 = gridsearch_bias(X_thorax_desbiased,y_thorax_sex)

In [63]:
result = pd.concat([bias_test_part1, bias_test_part2['mean_test_score'],bias_test_part3['mean_test_score'],bias_test_part4['mean_test_score'],bias_test_part5['mean_test_score']], axis=1)

result['Tissue'] = 'Thorax'

result2 = pd.concat([bias_test_head_part1, 
    bias_test_head_part2['mean_test_score'],
    bias_test_head_part3['mean_test_score'],
    bias_test_head_part4['mean_test_score'],
    bias_test_head_part5['mean_test_score']], axis=1)
result2['Tissue'] = 'Head'

In [64]:
bias_sex = pd.concat([result2, result])
bias_sex["problem"] = 'Sex'
bias_sex.columns = ["Model", "kernel","D1","D2","D3","D4","D5",'Tissue','problem']
bias_sex


Unnamed: 0,Model,kernel,D1,D2,D3,D4,D5,Tissue,problem
0,SVM,rbf,0.880374,0.87757,0.882243,0.842056,0.892523,Head,Sex
1,SVM,linear,0.957009,0.971963,0.928037,0.908411,0.968224,Head,Sex
2,LR,,0.957009,0.950467,0.906542,0.894393,0.957009,Head,Sex
3,RF,,0.858879,0.875701,0.858879,0.861682,0.88785,Head,Sex
0,SVM,rbf,0.883333,0.808333,0.814815,0.764815,0.874074,Thorax,Sex
1,SVM,linear,0.95463,0.944444,0.932407,0.878704,0.963889,Thorax,Sex
2,LR,,0.950926,0.942593,0.925926,0.866667,0.957407,Thorax,Sex
3,RF,,0.860185,0.839815,0.792593,0.801852,0.875,Thorax,Sex


## Age male bias

In [65]:
tsetse_males_thorax = tsetse_data.loc[(tsetse_data['Sex'] == 'm') & (tsetse_data['Tissue'] == 'Thorax')]

tsetse_males_head = tsetse_data.loc[(tsetse_data['Sex'] == 'm') & (tsetse_data['Tissue'] == 'Head')]

In [66]:
# Thorax

X_thorax_part1 = tsetse_males_thorax.loc[:,"4000":"402"]
X_thorax_part2 = tsetse_males_thorax.loc[:,"1750":"600"]
X_thorax_part3 = tsetse_males_thorax.loc[:,"3500":"2750"]
X_thorax_part4 = tsetse_males_thorax.loc[:,"2750":"1750"]

X_thorax_desbiased = pd.concat([X_thorax_part3,X_thorax_part2],axis=1)

y_thorax = tsetse_males_thorax.loc[:,"Age"]


In [67]:
# head

X_head_part1 = tsetse_males_head.loc[:,"4000":"402"]
X_head_part2 = tsetse_males_head.loc[:,"1750":"600"]
X_head_part3 = tsetse_males_head.loc[:,"3500":"2750"]
X_head_part4 = tsetse_males_head.loc[:,"2750":"1750"]

X_head_desbiased = pd.concat([X_head_part3,X_head_part2],axis=1)


y_head = tsetse_males_head.loc[:,"Age"]

In [68]:
# thorax
bias_test_part1 = gridsearch_bias(X_thorax_part1,y_thorax)
bias_test_part2 = gridsearch_bias(X_thorax_part2,y_thorax)
bias_test_part3 = gridsearch_bias(X_thorax_part3,y_thorax)
bias_test_part4 = gridsearch_bias(X_thorax_part4,y_thorax)
bias_test_part5 = gridsearch_bias(X_thorax_desbiased,y_thorax)

In [69]:
# head

bias_test_head_part1 = gridsearch_bias(X_head_part1, y_head)
bias_test_head_part2 = gridsearch_bias(X_head_part2, y_head)
bias_test_head_part3 = gridsearch_bias(X_head_part3, y_head)
bias_test_head_part4 = gridsearch_bias(X_head_part4, y_head)
bias_test_head_part5 = gridsearch_bias(X_head_desbiased, y_head)

In [70]:
result = pd.concat([bias_test_part1, bias_test_part2['mean_test_score'],bias_test_part3['mean_test_score'],bias_test_part4['mean_test_score'],bias_test_part5['mean_test_score']], axis=1)
result["Tissue"] = "Thorax"

result2 = pd.concat([bias_test_head_part1, 
    bias_test_head_part2['mean_test_score'],
    bias_test_head_part3['mean_test_score'],
    bias_test_head_part4['mean_test_score'],
    bias_test_head_part5['mean_test_score']], axis=1)

result2["Tissue"] = "Head"

bias_agemales = pd.concat([result2, result])
bias_agemales["problem"] = 'Males age'
bias_agemales.columns = ["Model", "kernel","D1","D2","D3","D4","D5",'Tissue','problem']


## Females age bias

In [71]:
tsetse_females_thorax = tsetse_data.loc[(tsetse_data['Sex'] == 'f') & (tsetse_data['Tissue'] == 'Thorax')]

tsetse_females_head = tsetse_data.loc[(tsetse_data['Sex'] == 'f') & (tsetse_data['Tissue'] == 'Head')]

In [72]:
X_head_part1 = tsetse_females_head.loc[:,"4000":"402"]
X_head_part2 = tsetse_females_head.loc[:,"1750":"600"]
X_head_part3 = tsetse_females_head.loc[:,"3500":"2750"]
X_head_part4 = tsetse_females_head.loc[:,"2750":"1750"]

X_head_desbiased = pd.concat([X_head_part3,X_head_part2],axis=1)


y_head = tsetse_females_head.loc[:,"Age"]

In [73]:
# Thorax

X_thorax_part1 = tsetse_females_thorax.loc[:,"4000":"402"]
X_thorax_part2 = tsetse_females_thorax.loc[:,"1750":"600"]
X_thorax_part3 = tsetse_females_thorax.loc[:,"3500":"2750"]
X_thorax_part4 = tsetse_females_thorax.loc[:,"2750":"1750"]

X_thorax_desbiased = pd.concat([X_thorax_part3,X_thorax_part2],axis=1)

y_thorax = tsetse_females_thorax.loc[:,"Age"]

In [74]:
bias_test_head_part1 = gridsearch_bias(X_head_part1, y_head)
bias_test_head_part2 = gridsearch_bias(X_head_part2, y_head)
bias_test_head_part3 = gridsearch_bias(X_head_part3, y_head)
bias_test_head_part4 = gridsearch_bias(X_head_part4, y_head)
bias_test_head_part5 = gridsearch_bias(X_head_desbiased, y_head)

In [75]:
# thorax
bias_test_part1 = gridsearch_bias(X_thorax_part1,y_thorax)
bias_test_part2 = gridsearch_bias(X_thorax_part2,y_thorax)
bias_test_part3 = gridsearch_bias(X_thorax_part3,y_thorax)
bias_test_part4 = gridsearch_bias(X_thorax_part4,y_thorax)
bias_test_part5 = gridsearch_bias(X_thorax_desbiased,y_thorax)

In [76]:
result = pd.concat([bias_test_head_part1, 
    bias_test_head_part2['mean_test_score'],
    bias_test_head_part3['mean_test_score'],
    bias_test_head_part4['mean_test_score'],
    bias_test_head_part5['mean_test_score']], axis=1)

result["Tissue"] = "Head"

In [77]:
result2 = pd.concat([bias_test_part1, bias_test_part2['mean_test_score'],bias_test_part3['mean_test_score'],bias_test_part4['mean_test_score'],bias_test_part5['mean_test_score']], axis=1)
result2["Tissue"] = "Thorax"

In [78]:
bias_agefemales = pd.concat([result2, result])
bias_agefemales['problem'] = 'Females age'

bias_agefemales.columns = ["Model", "kernel","D1","D2","D3","D4","D5",'Tissue','problem']
bias_agefemales

Unnamed: 0,Model,kernel,D1,D2,D3,D4,D5,Tissue,problem
0,SVC(random_state=123),rbf,0.75493,0.770423,0.721127,0.633803,0.777465,Thorax,Females age
1,SVC(random_state=123),linear,0.897183,0.859155,0.856338,0.887324,0.85493,Thorax,Females age
2,LR,,0.912676,0.867606,0.846479,0.887324,0.878873,Thorax,Females age
3,RF,,0.771831,0.773239,0.708451,0.666197,0.784507,Thorax,Females age
0,SVC(random_state=123),rbf,0.797101,0.801449,0.744928,0.721739,0.804348,Head,Females age
1,SVC(random_state=123),linear,0.885507,0.853623,0.846377,0.847826,0.850725,Head,Females age
2,LR,,0.886957,0.857971,0.831884,0.866667,0.865217,Head,Females age
3,RF,,0.776812,0.82029,0.786957,0.708696,0.815942,Head,Females age


In [79]:
final_table = pd.concat([bias_sex, bias_agemales, bias_agefemales])
final_table.set_index(['problem', 'Tissue'], inplace=True)
final_table.to_excel("../results/tables/wholespectra_results/bias_analysis.xlsx")


Unnamed: 0,Model,kernel,D1,D2,D3,D4,D5,Tissue,problem
0,SVM,rbf,0.880374,0.87757,0.882243,0.842056,0.892523,Head,Sex
1,SVM,linear,0.957009,0.971963,0.928037,0.908411,0.968224,Head,Sex
2,LR,,0.957009,0.950467,0.906542,0.894393,0.957009,Head,Sex
3,RF,,0.858879,0.875701,0.858879,0.861682,0.88785,Head,Sex
0,SVM,rbf,0.883333,0.808333,0.814815,0.764815,0.874074,Thorax,Sex
1,SVM,linear,0.95463,0.944444,0.932407,0.878704,0.963889,Thorax,Sex
2,LR,,0.950926,0.942593,0.925926,0.866667,0.957407,Thorax,Sex
3,RF,,0.860185,0.839815,0.792593,0.801852,0.875,Thorax,Sex
0,SVC(random_state=123),rbf,0.705263,0.663158,0.671053,0.655263,0.676316,Head,Males age
1,SVC(random_state=123),linear,0.889474,0.863158,0.813158,0.876316,0.871053,Head,Males age
