In [1]:
import os, config, numpy as np, pandas as pd

# Extracting lxmx data for analysis

You will need the following packages installed to be able to run the notebook.

In [2]:
LXMX = pd.read_excel(os.path.join(config.DATA_DIR, config.LXMX_FILENAME), index_col=None, header=None)

## Find the species: 
* store their names 
* slice indices in the DataFrame

In [3]:
def find_species(df):
    species_ix = np.where(pd.isna(df[config.SPECIES_COL]) == False)[0]
    prev_index = -100
    fixed_species_ix = []
    for ix in species_ix:
            if ix-1 == prev_index:
                continue
            else:
                fixed_species_ix.append(ix)
                prev_index = ix
    species_names = list(df[config.SPECIES_COL][fixed_species_ix])
    fixed_species_ix.append(len(df))
    species_ix = np.array(fixed_species_ix)
    species = {}
    for i in range(len(species_names)):
        s = species_names[i]
        species[s] = (species_ix[i], species_ix[i+1]) 
    return species

In [4]:
SPECIES_ix = find_species(LXMX)

## Auxillary classes

#### Data
The Data object stores the matrices, the life history variables, and the auhtor/collector of the data entry as mentioned in the xls.

In [5]:
class Data(object):
    def __init__(self, df):
        cols = ['age', 'l(a)', 'f(a)', 'p(a)']
        L = len(df)
        df.index = range(L)
        self.matrix =df.loc[0:L-2, 0:3]
        self.matrix.columns = cols
        self.matrix.index = self.matrix['age']
        self.matrix = self.matrix[cols[1:]]
        self.matrix = self.matrix.fillna(0.)
        self.author = df[L-1:L][7].item()
        derivatives = df.loc[0:1, 5:6]
        self.reported = {'T_c':[]}
        x = list(derivatives.to_numpy()[0])
        y = list(derivatives.to_numpy()[1])
        self.reported['T_c'].append(None if pd.isna(x[0]) else float(x[0][3:].replace(',', '.')))    
        self.reported['T_c'].append(float(x[1][3:].replace(',', '.')))
        self.reported['V'] = float(y[1][7:].replace(',', '.'))
        self.reported['R_0'] = None if pd.isna(y[0]) else float(y[0][3:].replace(',', '.'))
       
    @property
    def ages(self):
        return self.matrix.index.to_numpy()
        
    def __repr__(self):
        return self.author

#### Species
Species object contains the raw DataFrame slice of all of the species data entries, as well as the name and notes (if present).

In [6]:
class Species(object):
    def __init__(self, name, df, lookup):
        self.name = name
        self.df = df[lookup[name][0]:lookup[name][1]]
        self.df.index = range(len(self.df))
        self.notes = df[lookup[name][0]+1:lookup[name][0]+2][4].item()
        if pd.isna(self.notes):
            self.notes = []
        else:
            self.notes = self.notes.split(',')
        Species.clean_data(self, lookup[name][1])
        
    def __repr__(self):
        return '{}: {} data entries, notes: {}'.format(self.name, len(self.data), self.notes)
    
    @staticmethod
    def clean_data(s, end):
        ix = list(np.where(pd.isna(s.df[6]) == False)[0][::2])
        N = len(ix)
        ix.append(end)
        s.data=[]
        for n in range(N):
            s.data.append(Data(s.df[ix[n]:ix[n+1]]))

In [7]:
SPECIES = {}
for s in SPECIES_ix:
    SPECIES[s] = Species(s, LXMX, SPECIES_ix)

Save the final SPECIES dictionary for future use.

In [8]:
save = True
if save:
    config.save_pickle(SPECIES, os.path.join(config.OUTPUT_DIR, 'species.pkl'))

## Working with the Species objects

You can list the keys with the following code (splice at the end is for the sake of limiting output), 

In [9]:
list(SPECIES.keys())[0:10]

['Spermophilus columbianus',
 'Spermophilus lateralis',
 'Theropithecus gelada',
 'Yellow-bellied marmot',
 'Spermophilus armatus',
 'Tursiops truncatus',
 'Papio cynocephalus',
 'Helogale parvula',
 'Callorhinus ursinus',
 'Phacochoerus aethiopicus']

and access the Species object as such.

In [10]:
SPECIES['Capreolus capreolus']

Capreolus capreolus: 2 data entries, notes: []

In [11]:
SPECIES['Capreolus capreolus'].data

[Kaluzinski 1982, Johnson 1982]

In [12]:
SPECIES['Capreolus capreolus'].data[0].matrix

Unnamed: 0_level_0,l(a),f(a),p(a)
age,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0.0,1.0,0.0,0.51
1.0,0.502,0.0,0.95
2.0,0.476,1.0,0.93
3.0,0.442,0.885,0.93
4.0,0.395,1.0,0.88
5.0,0.349,1.27,0.87
6.0,0.297,1.0,0.83
7.0,0.246,1.0,0.8
8.0,0.17,0.89,0.56
10.0,0.063,0.865,0.371


In [13]:
SPECIES['Capreolus capreolus'].data[0].reported

{'T_c': [None, 4.754], 'V': 4.814, 'R_0': None}

In [14]:
SPECIES['Capreolus capreolus'].data[0].ages

array([ 0.,  1.,  2.,  3.,  4.,  5.,  6.,  7.,  8., 10., 15.])

## Bugs