In [None]:
# Import dependencies

import csv
import random #generates random numbers
from collections import Counter #allows you to count frequency of elements in lists

In [None]:
# Creating empty lists to store data
all_rows = [] 
columns = []
loci = []

# Import data csv file (f) in a read-only (r) fashion
# with means opening data and then closing...
with open('./idandsex.csv', 'r') as f:
    reader = csv.reader(f, delimiter=',') # creates a csv reader object that interprets file (uses comma as separator)
    # all_rows list will be a list of lists e.g. [["a", "b", "c"], ["d", "e", "f"]] = [row, row... last row]
    for i, row in enumerate(reader): # for every row in the excel file
        if i == 0: #if the row is the very first row (index position 0)
            loci = row[2:] #assign row (loci names) to loci variable (ignores first 2 columns - title of id and sex)
        else:
            all_rows.append(row) #anything that isnt the loci name row is appended to all_rows

random.shuffle(all_rows) #randomly shuffles all rows

# Creation of sex-balanced dataset
sex_balanced_rows = [] #creates empty lists to store sex balanced rows - same number of males and females to make sure its balanced
female_count = 0
male_count = 0


# Collect female isolates first as we know these are the limiting variable - we want all female isolates in sex-balanced rows
for row in all_rows:
    if row[1] == 'female': #if in column 1 it says female
        sex_balanced_rows.append(row) #appending to sex balanced rows
        female_count += 1 #female count + 1

#There are more more males than females - execute the same counting and appending for male isolates
for row in all_rows:
    if male_count < female_count and row[1] == 'male': #if male count is below female count, we append it
        #do not append further if male count is no longer less than female count (ie. equal)
        sex_balanced_rows.append(row)
        male_count += 1

#Create a balanced dataset 
rows = sex_balanced_rows

# create columns - [["a", "d"], ["b", "e"], ["c", "f"]]
for r, row in enumerate(rows):
    for c, cell in enumerate(row): # iterate through all rows
        if r == 0: # if first row, append a list containing the single value to the columns variable (create a column)
            columns.append([cell])
        else: # if anything but the first row, append the value of the cell to the existing column list
            columns[c].append(cell)

# remove id and sex columns from columns list
ids = columns[0]
sexes = columns[1]
columns = columns[2:]

columns_male = []
columns_female = []

for column in columns: # for each column in the columns list
    columns_male.append([cell for i, cell in enumerate(column) if sexes[i] == 'male']) # extract all the values from male rows and assign to a male-only column
    columns_female.append([cell for i, cell in enumerate(column) if sexes[i] == 'female']) # the same for females
        

In [None]:
#Count the number of unique alleles at each locus (male + female combined)
unique_count = []
unique_count_male = []
unique_count_female = []

for i in range(0, len(columns)): # for every column
    unique_count.append( len(set(columns[i])) ) # find unique allele numbers (both male and female)
    unique_count_male.append( len(set(columns_male[i])) ) # and males
    unique_count_female.append( len(set(columns_female[i])) ) #  and for females

In [None]:
for _ in unique_count_female:
    print(_)

    #repeat for unique_count_male 
    #repeat for unique_count (both m & f)


In [None]:
# Count the sex differences in allele diversity at each locus
# - Positive values indicate that the allele diversity is greater amongst males
# - Negative values indicate that the allele diversity is greater amongst females
# - Absolute delta = difference in the number of unique alleles at each locus between the sexes
# - Relative delta = absolute delta scaled according to the total number of unique alleles at each locus
#    - i.e. a difference of 5 is more significant if there are 10 unique alleles than if there are 100 unique alleles 

absolute_delta = []
relative_delta = []

for i in range(0, len(unique_count)):
    # Uses count derived from a singular random sample of n males
    # absolute_delta.append( unique_count_male[i] - unique_count_female[i] )
    # relative_delta.append( absolute_delta[i] / unique_count[i] ) 

    # Uses average unique count for males derived from x permutations
    absolute_delta.append( average_unique_count_male[i] - unique_count_female[i] )
    relative_delta.append( absolute_delta[i] / unique_count[i] ) 