In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import linregress

In [None]:
def herit_data_parse(vars_file,chosen_file,gen_no):

    """
    vars_file: CSV file containing family-wise variability scores for each population across generations (generated in fam_var_herit.m)
    chosen_file: CSV file noting the indices of selected/chosen families for that population across generations
    gen_no: Number of generations 
    """

    pop =  pd.read_csv(vars_file)

    pop_fams = pop.loc[:, pop.columns.str.startswith('fam')]
    pop_fams.columns = pop_fams.columns.str.strip('fam')
    pop_fams.columns = pop_fams.columns.astype(int)
    pop_fams = pop_fams.sort_index(axis=1)

    pop_vars = pop.loc[:, pop.columns.str.startswith('var')]
    pop_vars.columns = pop_vars.columns.str.strip('var')
    pop_vars.columns = pop_vars.columns.astype(int)
    pop_vars = pop_vars.sort_index(axis=1)
    pop_vars.replace(0, np.nan, inplace=True)

    pop_means_all = np.array(pop_vars.mean(axis = 0)) #first return variable

    pop_chosen = pd.read_csv(chosen_file, header = None)
    pop_chosen = pop_chosen.T
    pop_chosen.columns = list(range(1,gen_no+1))

    pop_means_chosen = []

    for i in range (1,gen_no+1):
        #print(i)
        fams_chosen = list(pop_chosen[i])
        chosen_vars = []

        for j in range(len(fams_chosen)):
            row_num = pop_fams[pop_fams[i] == fams_chosen[j]].index
            #print(row_num)
            var = pop_vars[i][row_num].iloc[0]

            chosen_vars.append(var)

        chosen_mean = np.mean(chosen_vars)
        pop_means_chosen.append(chosen_mean)
    
    pop_means_chosen = np.array(pop_means_chosen) #second return variable

    return pop_means_all, pop_means_chosen

In [None]:
VS1_means_all,VS1_means_chosen = herit_data_parse('./processed/Sel_vars_1.csv','./processed/chosen_fams_VS1.csv',21)

In [None]:
VC1_means_all,VC1_means_chosen = herit_data_parse('./processed/Con_vars_1.csv','./processed/chosen_fams_VC1.csv',21)

In [None]:
VS2_means_all,VS2_means_chosen = herit_data_parse('./processed/Sel_vars_2.csv','./processed/chosen_fams_VS2.csv',21)

In [None]:
VC2_means_all,VC2_means_chosen = herit_data_parse('./processed/Con_vars_2.csv','./processed/chosen_fams_VC2.csv',21)

In [None]:
VS3_means_all,VS3_means_chosen = herit_data_parse('./processed/Sel_vars_3.csv','./processed/chosen_fams_VS3.csv',21)

In [None]:
VC3_means_all,VC3_means_chosen = herit_data_parse('./processed/Con_vars_3.csv','./processed/chosen_fams_VC3.csv',21)

In [None]:
df_means = pd.DataFrame(np.vstack((VS1_means_all, VC1_means_all, VS2_means_all, VC2_means_all, VS3_means_all, VC3_means_all)))

In [None]:
df_means = df_means.T
df_means.columns = ['VS1', 'VC1', 'VS2', 'VC2', 'VS3', 'VC3']

In [None]:
df_means_chosen = pd.DataFrame(np.vstack((VS1_means_chosen, VC1_means_chosen, VS2_means_chosen, VC2_means_chosen, VS3_means_chosen, VC3_means_chosen)))

In [None]:
df_means_chosen = df_means_chosen.T
df_means_chosen.columns = ['VS1', 'VC1', 'VS2', 'VC2', 'VS3', 'VC3']

### Calculating cumulative selection differential and response

In [None]:
df_S = df_means_chosen - df_means

In [None]:
df_S = df_S.drop([20])

In [None]:
df_R = df_means.diff()

In [None]:
df_R = df_R.drop([0])

In [None]:
df_Rc = df_R.cumsum()

In [None]:
df_Sc = df_S.cumsum()

In [None]:
pops = ['VS1', 'VC1', 'VS2', 'VC2', 'VS3', 'VC3']

In [None]:
df_Sc_VS = df_Sc[['VS1','VS2','VS3']]
df_Rc_VS = df_Rc[['VS1','VS2','VS3']]

df_Sc_VC = df_Sc[['VC1','VC2','VC3']]
df_Rc_VC = df_Rc[['VC1','VC2','VC3']]

In [None]:
df_Sc_VS = df_Sc_VS.melt(var_name = 'pop', value_name = 'Sc')
df_Rc_VS = df_Rc_VS.melt(var_name = 'pop', value_name = 'Rc')

In [None]:
df_VS = df_Sc_VS.copy()

In [None]:
df_VS['Rc']=df_Rc_VS['Rc']

In [None]:
df_Sc_VC = df_Sc_VC.melt(var_name = 'pop', value_name = 'Sc')
df_Rc_VC = df_Rc_VC.melt(var_name = 'pop', value_name = 'Rc')

In [None]:
df_VC = df_Sc_VC.copy()

In [None]:
df_VC['Rc']=df_Rc_VC['Rc']

In [None]:
df_VC

#### Renaming for best match with downstream modelling in R

In [None]:
df_VC['pop'] = df_VC['pop'].str.removeprefix('VC')

In [None]:
df_VC.rename(columns = {'pop':'block'}, inplace = True)

In [None]:
df_VS['pop'] = df_VS['pop'].str.removeprefix('VS')

In [None]:
df_VS.rename(columns = {'pop':'block'}, inplace = True)

In [None]:
df_VC.to_csv('con_herit.csv', index = False)
df_VS.to_csv('sel_herit.csv', index = False)