## Import dataset of ASV in each media

*NOTE: please unzip the file data_genomic/data_day_7.zip in the data_genomic directory before proceeding to import the following csv file and running this code.*

In [13]:
import numpy as np
import pandas as pd
import csv

dat = pd.read_csv('data_genomic/dat_day7_ok.csv')

### Pulling out the list of ASVs in each media

In [14]:
ASVlist = np.sort( list( set(dat[ dat['Abundance']>0 ]['OTU']) ) );

### Mapping media to ASVs

The mapping is a matrix that contains 1 if the ASV is present in the medium, and 0 otherwise.

In [15]:
med2asv = np.zeros((75, 619));
for i in np.arange(75):
    asvs = list( set( dat[(dat['medium']==i+1) & (dat['Abundance']>0)]['OTU'] ) );
    for j in np.arange( len(asvs) ):
        target = np.where(ASVlist == asvs[j] )[0][0];
        med2asv[ i, target ] = 1;

Marking out the single resources

In [16]:
name_srs = ['glucose_c', 'fructose_c', 'xylose_c', 'mannose_c',
       'cellobiose_c', 'maltose_c', 'sucrose_c', 'citric_acid_c', 'fumaric_acid_c',
       'galacturonic_acid_c', 'mannitol_c', 'sorbitol_c', 'glycerol_c', 'proline_c',
       'cellulose_c', 'starch_c'] 

### Mapping ASVs to media (the inverse mapping)

In [17]:
WRmed = np.zeros((75,16));
for i in np.arange(75):
    for k in np.arange(16):
        well = np.where(dat['medium']==i+1)[0][0]
        if(dat[name_srs[k]][well] > 0):
            WRmed[i,k] += 1;

## Calculating specialists and generalists, based on their resource occupancy in single resource media.

First calculating how many single-resource media an ASV is found in, out of 16 single-resource media.

In [18]:
soccur = np.sum( med2asv[42:58,:], axis=0 )

Now calculating generalists, as those found in >= 13 resource media, and specialists as those found in <= 3.

In [19]:
gen = np.where(soccur>12)[0];
sp = np.where( (soccur<4)*(soccur>0))[0];

Every other ASV is an intermediate ASV.

In [20]:
oth = np.where((soccur<13)*(soccur>3))[0];

## Calculate specificity score

The algorithm to calculate the resource-specificity score proceeds as follows:


1. For each single resource target, find ASVs that are found in the single resource media.

2. For each of those ASVs, get probability that ASV is found when target resource is present:
   > X = multi-resource media with the target resource where ASV is found
   
   > p1 = X / (multi-resource media with the target resource)
   
3. For each of those ASVs, get probability that ASV is found when target resource is absent:
    Y = multi-resource media without the target resource where ASV is found
    p2 = Y / (multi-resource media without the target resource)
    
4. For each of those ASVs, calculate specificity score = (p1-p2)/(p1+p2)

5. Pick specialist ASVs and find average specificity score over specialist ASVs for the target media

6. Pick generalist ASVs and find average specificity score over generalist ASVs for the target media


*NOTE: due to historical contingencies, we sometimes refer to generalists as cosmpolitan taxa, and specialists as endemic taxa, respectively.*

In [21]:
# specificity score of specialists, for all 16 resources
pssp = {};

# specificity score of generalists, for all 16 resources
psgen = {}; 

# for CSV export
a = []; b = []; c=[];

# for each single resource target
for i in np.arange(16):
   
    # initialize specificity scores for the resource
    pssp[i] = []; psgen[i] = []; 
   
    # ASVs that are found in the single resource
    targets = np.where( (med2asv[42+i]==1) )[0]
   
    # multi-resource media that contain the single resource
    nest = np.where( (WRmed[:,i]==1)*(np.sum(WRmed, axis=1)>1)  )[0];
   
    # multi-resource media without the single resource
    nonest = np.where((WRmed[:,i]==0)*(np.sum(WRmed, axis=1)>1) )[0];
   
    # for each ASV found in the single resource
    for target in targets:
       
        # probability that ASV is found when target resource is present
        p1 = np.sum( med2asv[nest, target] )/len(nest);
       
        # probability that ASV is found when target resource is absent
        p2 = np.sum( med2asv[nonest, target] )/len(nonest);
       
        # only count when ASV is found at least once in multi-resource media
        if(p1+p2 > 0):
           
            # if the ASV is a specialist
            if target in sp:
               
                # add to specialist specificity score list for the resource
                pssp[i] += [ (p1-p2)/(p1+p2) ];
               
                # for CSV export format
                c+=['Endemics']
                a += [name_srs[i]];
                b += [ (p1-p2)/(p1+p2) ];
           
            # if the ASV is a generalist
            elif target in gen:
               
                # add to generalist specificity score list for the resource
                psgen[i] += [ (p1-p2)/(p1+p2) ];
               
                # for CSV export format
                c+=['Cosmopolitans']
                a += [name_srs[i]];
                b += [ (p1-p2)/(p1+p2) ]

Saving and exporting results.

In [None]:
df = pd.DataFrame();
df['Resource']=a;
df['Preference']=b;
df['Type']=c;

df.to_csv('pref.csv')