This code is as the normal analysis, but divided by indigenous and introduced.

In [1]:
import numpy as np
import pandas as pd
import sar # This is our custom function
import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
# Import data
df = pd.read_csv('./RawData/IndividualPitfalls.csv')

# Separate
# Strip extra whitespace
df['N/E/I'] = df['N/E/I'].str.strip()
df['N/E/I'] = df['N/E/I'].str.strip('?') # Have to also trim question marks. I checked these are as in the main data.
# See below for more info.
# Get indices for N/E
indigenous_inds = np.any([df['N/E/I'] =='N',df['N/E/I'] == 'E'],axis=0)
introduced_inds = (df['N/E/I'] =='I').values

# Check how many aren't categorized
print("Not categorized: {}".format(len(df)-len(df[indigenous_inds])-len(df[introduced_inds])))

# Get land use types
lu = df['Habitat (general)'].unique()
lu_nsites = np.zeros(len(lu),dtype=int)
for i,l in enumerate(lu):
    lu_nsites[i] = df['Site code'].loc[df['Habitat (general)']==l].nunique()
    print("Land use and number of sites:   {}\t{}".format(l,lu_nsites[i]))
    
# Get list of sites
ls = df['Site code'].unique()

# Get list of habitats that these sites correspond to
lu_ls = []
for i,s in enumerate(ls):
    # Get habitat type
    hb = df[df['Site code'] == s]['Habitat (general)'].unique()
    # Throw an error if the length of this isn't one, as the site has more than one habitat assigned to it.
    if len(hb) != 1:
        print("RUH ROH!")
    else:
        lu_ls.append(hb[0])
lu_ls = np.array(lu_ls)
    
# Get the unique sample codes and sort this list.
sc = np.sort(df['Sample number'].unique())

# Get all mf codes
mf = df['MF'].unique()
mf_idg = df[indigenous_inds]['MF'].unique()
mf_int = df[introduced_inds]['MF'].unique()

Not categorized: 7
Land use and number of sites:   Exotic forest - mono	12
Land use and number of sites:   Native vegetation	44
Land use and number of sites:   Pasture - intensive	24
Land use and number of sites:   Pasture - semi-natural	10


In [3]:
# Create a multiindex for all sites and samples, and sort sample codes
index = pd.MultiIndex.from_product([ls,sc], names=["Site", "Sample"])
# Now create dataframe where columns are species abundances and index gives site and sample.
# As long as I have the mf codes for indigenous and introduced, I can do that part later.
data = pd.DataFrame(np.zeros((len(index),len(mf))), columns=mf, index=index,dtype=int)
# Now iterate through all rows in df and add proper abundance to proper place
for i,f in df.iterrows():
    data.loc[(f['Site code'],f['Sample number']),f['MF']] += f['Adults']

In [4]:
# Define scales we want to analyse at
scales = np.array([1,2,3,5,6,10,15,30])
ns = len(scales)

In [5]:
# Now for each site, get observed s,n,z at each scale, and mete s,z at each scale.
# Make a master dataframe for each of these

# Make empirical and METE dataframes to append to
emp_idg = pd.DataFrame()
mete_idg = pd.DataFrame()
emp_int = pd.DataFrame()
mete_int = pd.DataFrame()

# Loop over every site and calculate everything
# This is pretty slow and I could probably find a better way to do it but oh well
for site in ls:
    # Indigenous
    sntemp,metetemp = sar.mete_sar(data[mf_idg].loc[site],scales)
    emp_idg = emp_idg.append(sntemp)
    mete_idg = mete_idg.append(metetemp)
    # Introduced
    sntemp,metetemp = sar.mete_sar(data[mf_int].loc[site],scales)
    emp_int = emp_int.append(sntemp)
    mete_int = mete_int.append(metetemp)
    
# Reset indices
index1 = pd.MultiIndex.from_product([ls,scales], names=['Site','Scale'])
emp_idg = emp_idg.set_index(index1)
mete_idg = mete_idg.set_index(index1)
emp_int = emp_int.set_index(index1)
mete_int = mete_int.set_index(index1)

  return dist.logser.pmf(x, p) / dist.logser.cdf(self.N, p)
  mete['z'].iloc[i+1] = np.log(sn['s'].iloc[i+1]/mete['s'].iloc[i])/np.log(sn.index[i+1]/sn.index[i])
  p = dist.logser.translate_args(self.N / self.S)
  sn['z'].iloc[i+1] = np.log(sn['s'].iloc[i+1]/sn['s'].iloc[i])/np.log(sn.index[i+1]/sn.index[i])
  mete['z'].iloc[i+1] = np.log(sn['s'].iloc[i+1]/mete['s'].iloc[i])/np.log(sn.index[i+1]/sn.index[i])


# Analysis

In [6]:
# Make least squares array with multi index
index2 = pd.MultiIndex.from_arrays([lu_ls,ls], names=['Land use','Site'])
lsq = pd.DataFrame(index=index2,columns=['s (idg)','z (idg)', 's (int)', 'z (int)'],dtype=float)

# Loop over sites and add to least squares for both z and s
for i,site in enumerate(ls):
    # Indigenous
    # Get indices for this site where s is less than 4, just to pick some number
    inds = emp_idg.loc[site,'s']>4
    # Get number of good scales
    ngs = inds.sum()
    # Only do it if ngs>1
    if ngs>1:
        # Now get least squares for only these scales
        # Have to take -1 here is the smallest scale is included, since we can't use z there.
        if inds.loc[1]:
            lsq['z (idg)'].iloc[i] = np.sum(((mete_idg.loc[site,'z'][inds]-emp_idg.loc[site,'z'][inds])**2)/(ngs-1))
        else: # Else we just take mean over all scales
            lsq['z (idg)'].iloc[i] = np.sum(((mete_idg.loc[site,'z'][inds]-emp_idg.loc[site,'z'][inds])**2)/ngs)
        # Have to minus one here because the top scale is always exact
        lsq['s (idg)'].iloc[i] = np.sum(((np.log(mete_idg.loc[site,'s'][inds])-np.log(emp_idg.loc[site,'s'][inds]))**2))/(ngs-1)
    else:
        lsq['z (idg)'].iloc[i] = np.nan
        lsq['s (idg)'].iloc[i] = np.nan

    # Introduced
    # Get indices for this site where s is less than 4, just to pick some number
    inds = emp_int.loc[site,'s']>4
    # Get number of good scales
    ngs = inds.sum()
    
    if ngs>1:
        # Now get least squares for only these scales
        # Have to take -1 here if the smallest scale is included, since we can't use z there.
        if inds.loc[1]:
            lsq['z (int)'].iloc[i] = np.sum(((mete_int.loc[site,'z'][inds]-emp_int.loc[site,'z'][inds])**2)/(ngs-1))
        else: # Else we just take mean over all scales
            lsq['z (int)'].iloc[i] = np.sum(((mete_int.loc[site,'z'][inds]-emp_int.loc[site,'z'][inds])**2)/ngs)
        # Have to minus one here because the top scale is always exact
        lsq['s (int)'].iloc[i] = np.sum(((np.log(mete_int.loc[site,'s'][inds])-np.log(emp_int.loc[site,'s'][inds]))**2))/(ngs-1)
    else:
        lsq['z (int)'].iloc[i] = np.nan
        lsq['s (int)'].iloc[i] = np.nan

In [7]:
#Indigenous
lu_nsites_idg = pd.Series(index=lu,dtype='float64')
for l in lu:
    lu_nsites_idg.loc[l] = np.count_nonzero(~np.isnan(lsq['z (idg)'].loc[l]))

# Print out results for least squares
mean_s_idg = lsq['s (idg)'].mean(level=0)
mean_z_idg = lsq['z (idg)'].mean(level=0)
se_s_idg = lsq['s (idg)'].std(level=0)/np.sqrt(lu_nsites_idg)
se_z_idg = lsq['z (idg)'].std(level=0)/np.sqrt(lu_nsites_idg)

print(mean_z_idg)
print(se_z_idg)


#Introduced
lu_nsites_int = pd.Series(index=lu,dtype='float64')
for l in lu:
    lu_nsites_int.loc[l] = np.count_nonzero(~np.isnan(lsq['z (int)'].loc[l]))

# Print out results for least squares
mean_s_int = lsq['s (int)'].mean(level=0)
mean_z_int = lsq['z (int)'].mean(level=0)
se_s_int = lsq['s (int)'].std(level=0)/np.sqrt(lu_nsites_int)
se_z_int = lsq['z (int)'].std(level=0)/np.sqrt(lu_nsites_int)

print(mean_z_int)
print(se_z_int)

Land use
Exotic forest - mono      0.027253
Native vegetation         0.027506
Pasture - intensive       0.035319
Pasture - semi-natural    0.028525
Name: z (idg), dtype: float64
Land use
Exotic forest - mono      0.003524
Native vegetation         0.002928
Pasture - intensive       0.006138
Pasture - semi-natural    0.003839
dtype: float64
Land use
Exotic forest - mono      0.042844
Native vegetation         0.042810
Pasture - intensive       0.023315
Pasture - semi-natural    0.036850
Name: z (int), dtype: float64
Land use
Exotic forest - mono      0.010036
Native vegetation         0.011918
Pasture - intensive       0.004900
Pasture - semi-natural    0.007319
dtype: float64


In [8]:
# Save to file
mlsq_data = pd.DataFrame([mean_z_idg,se_z_idg,lu_nsites_idg,mean_z_int,se_z_int,lu_nsites_int],
                         index=['Mean (idg)','Standard error (idg)','N (idg)',
                                'Mean (int)','Standard error (int)', 'N (int)'])
display(mlsq_data)
mlsq_data.to_csv('ProcessedData/sar_mlsq_indigenous.csv')

Land use,Exotic forest - mono,Native vegetation,Pasture - intensive,Pasture - semi-natural
Mean (idg),0.027253,0.027506,0.035319,0.028525
Standard error (idg),0.003524,0.002928,0.006138,0.003839
N (idg),10.0,43.0,20.0,9.0
Mean (int),0.042844,0.04281,0.023315,0.03685
Standard error (int),0.010036,0.011918,0.0049,0.007319
N (int),9.0,9.0,24.0,10.0
