##  Genbit Sample Notebook

### The notebook is intended to provide quick start for users to build on top of the GenBit API.

## Install Genbit   

In [None]:
!pip install genbit

### Import utilities

In [6]:
import pprint
import genbit
from genbit.genbit_metrics import GenBitMetrics

### Specify Filename Location and Language  
(Accepted codes: EN,IT, RU, FR, DE and ES)

In [42]:
input_file = "sample_100.tsv"
language = "en"

In [43]:
metric = GenBitMetrics(language, context_window=30, distance_weight=0.95,
                 percentile_cutoff=80)

In [44]:
contiguous = False
data = []
with open(input_file, "r", encoding="utf-8") as input_sentence_file:
    for line in input_sentence_file:
        if(contiguous):
            line = line.strip()
            data.append(line)
        else:
            data = []
            line = line.strip()
            data.append(line)
            metric.add_data(data, tokenized=False)
if(contiguous):
    metric.add_data(data, tokenized=False)

In [45]:
metrics = metric.get_metrics(output_statistics=True, output_word_list=False)

### Print the results
The results using the test file should show a heavy non-binary bias as many terms in the sample file are gender 'neutral' or explicitly non-binary. You will also observe that in the case of binary bias, there is a slight male-leaning bias due to the more frequently occure male gender definition words throughout the samples in the data.

In [46]:
pprint.pprint(metrics)

{'additional_metrics': {'avg_bias_conditional': -0.03408793873500453,
                        'avg_bias_conditional_absolute': 0.21136226833284888,
                        'avg_bias_ratio': -0.013720635910570779,
                        'avg_bias_ratio_absolute': 0.20659960523309384,
                        'avg_non_binary_bias_conditional': 0.6686957743179468,
                        'avg_non_binary_bias_conditional_absolute': 0.7000381293796363,
                        'avg_non_binary_bias_ratio': 0.5229287558226876,
                        'avg_non_binary_bias_ratio_absolute': 0.5920022700905334,
                        'std_dev_bias_conditional': 0.439433185699777,
                        'std_dev_bias_ratio': 0.43943318569977696,
                        'std_dev_non_binary_bias_conditional': 0.516392943941314,
                        'std_dev_non_binary_bias_ratio': 0.5164280054168683},
 'genbit_score': 0.21136226833284888,
 'percentage_of_female_gender_definition_words': 0.266375

###  ----- END -------- 