This file will get the mean least squares for the adults and juveniles combined dataset by land use and sub divided by indigenous and non-indigenous status.

In [1]:
import numpy as np
import pandas as pd
import scipy.stats as st
from scipy import integrate
import matplotlib.pyplot as plt
import sad_mrdi as sm
%matplotlib inline

In [2]:
# Import data WITH Juveniles
df = pd.read_csv('./RawData/Azores_Combined.csv',header=[0,1])

In [3]:
# Strip extra whitespace
df['Data','N/E/I'] = df['Data','N/E/I'].str.strip()
# Get indices for N/E
indigenous_inds = np.any([df['Data','N/E/I'] =='N',df['Data','N/E/I'] == 'E'],axis=0)
introduced_inds = (df['Data','N/E/I'] =='I').values

In [4]:
# Some more preamble and calculating some state variables
# Get total s0
s0 = len(df)
print('Number of species: {}'.format(s0)) 
lu = list(df.columns.levels[0])
lu.remove('Data')
# Get length to use to loop over etc.
lutypes = len(lu)
# Get how many sites for each land use
lu_sites = pd.Series(index=lu,dtype=int)
for l in lu:
    lu_sites[l] = len(df[l].columns)

# Reorder to disturbance gradient
lu = [lu[2],lu[0],lu[3],lu[1]]
# Get total n0
n0 = df[lu].sum().sum()
print('Number of individuals: {}'.format(n0))

# How many indigenous versus introduced species?
n0_indigenous = np.sum(indigenous_inds)
n0_introduced = np.sum(introduced_inds)
print('Number of indigenous species: {}'.format(n0_indigenous))
print('Number of introduced species: {}'.format(n0_introduced))
# Note 4 species aren't defined

Number of species: 271
Number of individuals: 46250
Number of indigenous species: 126
Number of introduced species: 141


In [5]:
# Look at four species that are not classified
display(df[(~indigenous_inds)&(~introduced_inds)])
for l in lu:
    print(l)
    print(df[(~indigenous_inds)&(~introduced_inds)][l].sum(axis=1))

Unnamed: 0_level_0,Data,Data,Data,Data,Exotic forest,Exotic forest,Exotic forest,Exotic forest,Exotic forest,Exotic forest,...,Semi-natural pasture,Semi-natural pasture,Semi-natural pasture,Semi-natural pasture,Semi-natural pasture,Semi-natural pasture,Semi-natural pasture,Semi-natural pasture,Semi-natural pasture,Semi-natural pasture
Unnamed: 0_level_1,Order (new),MF,N/E/I,Trophic,TER-AGUA-T-66,TER-MNEG-T-62,TER-MNEG-T-63,TER-TCHA-T-64,TER-ACAR-T111,TER-ACAR-T112,...,TER-MNEG-T-76,TER-MNEG-T-79,TER-NFBF-T-61,TER-NFGM-T-69,TER-NFPG-T118,TER-NFPG-T-67,TER-NFTB-T117,TER-NFTB-T-28,TER-PB-T165,TER-SBAR-T-72
170,Hemiptera,54,,H,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
192,Hemiptera,407,,H,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
206,Hemiptera,1021,,H,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
233,Lepidoptera,375,,H,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


Native forest
170    1
192    0
206    0
233    1
dtype: int64
Exotic forest
170    0
192    0
206    1
233    0
dtype: int64
Semi-natural pasture
170    0
192    1
206    0
233    0
dtype: int64
Intensive pasture
170    0
192    6
206    1
233    0
dtype: int64


In [6]:
# Get list of sites, ignoring first 4 indices which are data information
ls = df.columns[4:]
# Get abundances at each site

# Make arrays
# For s,n,beta
sn_ls_idg = pd.DataFrame(index=ls,columns = {'s0','n0','beta'})
sn_ls_int = pd.DataFrame(index=ls,columns = {'s0','n0','beta'})
# For abundances
abd_ls_idg = pd.DataFrame(columns=ls)
abd_ls_int = pd.DataFrame(columns=ls)
for l in ls:
    abd_ls_idg[l] = df[l].iloc[indigenous_inds]
    abd_ls_int[l] = df[l].iloc[introduced_inds]
    # Indigenous
    # Get n0 and s0
    stemp = np.count_nonzero(abd_ls_idg[l])
    ntemp = abd_ls_idg[l].sum()
    # Get beta
    btemp = sm.get_beta(stemp,ntemp)
    # Add to dataframe
    sn_ls_idg.loc[l] = {'n0': ntemp, 's0': stemp, 'beta': btemp}
    # Introduced
    # Get n0 and s0
    stemp = np.count_nonzero(abd_ls_int[l])
    ntemp = abd_ls_int[l].sum()
    # Get beta
    btemp = sm.get_beta(stemp,ntemp)
    # Add to dataframe
    sn_ls_int.loc[l] = {'n0': ntemp, 's0': stemp, 'beta': btemp}

# Rename indexes for abundaces to species code
abd_ls_idg.rename(index=df['Data','MF'],inplace=True)
abd_ls_idg.index.name = 'MF'
abd_ls_int.rename(index=df['Data','MF'],inplace=True)
abd_ls_int.index.name = 'MF'

# Fix datatype for sn_lu
sn_ls_idg = sn_ls_idg.astype({'s0': 'int64','n0':'int64','beta':'float64'})
sn_ls_int = sn_ls_int.astype({'s0': 'int64','n0':'int64','beta':'float64'})

In [7]:
# Indigenous
# Get least squares for each site
# Create storage
sn_ls_idg['mlsq'] = np.zeros(len(sn_ls_idg))
for l in ls:
    ranks = np.arange(sn_ls_idg['s0'][l])+1
    theory = sm.sad_rank(ranks,sn_ls_idg['s0'][l],sn_ls_idg['beta'][l])
    abd = abd_ls_idg[l]
    emp = np.sort(abd[abd!=0])[::-1]
    # Calculate least squares of log
    if sn_ls_idg['s0'][l] == len(emp):
        if sn_ls_idg['n0'][l] == sum(emp):
            lsq = (np.log(theory)-np.log(emp))**2
            sn_ls_idg.loc[l,'mlsq'] = np.sum(lsq)/len(emp)
        else:
            print("Ruh ro! 1")
    else:
        print("Ruh ro! 2")
# Introduced
sn_ls_int['mlsq'] = np.zeros(len(sn_ls_int))
for l in ls:
    ranks = np.arange(sn_ls_int['s0'][l])+1
    theory = sm.sad_rank(ranks,sn_ls_int['s0'][l],sn_ls_int['beta'][l])
    abd = abd_ls_int[l]
    emp = np.sort(abd[abd!=0])[::-1]
    # Calculate least squares of log
    if sn_ls_int['s0'][l] == len(emp):
        if sn_ls_int['n0'][l] == sum(emp):
            lsq = (np.log(theory)-np.log(emp))**2
            sn_ls_int.loc[l,'mlsq'] = np.sum(lsq)/len(emp)
        else:
            print("Ruh ro! 1")
    else:
        print("Ruh ro! 2")

In [8]:
# Get means and standard errors
# Note that .std in pandas already has ddof=1, which is correct here since we estimate the mean
# To get the standard error of the mean, we have to divide by sqrt(n)
# Indigenous first
mean_idg = sn_ls_idg['mlsq'].mean(level=0)
se_idg = sn_ls_idg['mlsq'].std(level=0)
for l in lu:
    se_idg.loc[l] /= np.sqrt(lu_sites[l])
# Introduced
mean_int = sn_ls_int['mlsq'].mean(level=0)
se_int = sn_ls_int['mlsq'].std(level=0)
for l in lu:
    se_int.loc[l] /= np.sqrt(lu_sites[l])

In [9]:
print("Indigenous")
display(mean_idg)
print("Introduced")
display(mean_int)
print("Mean")
display((mean_idg+mean_int)/2)

Indigenous


Exotic forest           0.571333
Native forest           0.177788
Intensive pasture       0.462645
Semi-natural pasture    0.293201
Name: mlsq, dtype: float64

Introduced


Exotic forest           0.312580
Native forest           0.189948
Intensive pasture       0.434088
Semi-natural pasture    1.156515
Name: mlsq, dtype: float64

Mean


Exotic forest           0.441957
Native forest           0.183868
Intensive pasture       0.448366
Semi-natural pasture    0.724858
Name: mlsq, dtype: float64

In [10]:
# Save to file for plotting
individual_data = pd.DataFrame([mean_idg,se_idg,mean_int,se_int,lu_sites],
                index = ['Mean indigenous','Standard error indigenous',
                         'Mean introduced','Standard error introduced','N'])
display(individual_data)
individual_data.to_csv("ProcessedData/sad_mlsq_indigenous.csv")

Unnamed: 0,Exotic forest,Native forest,Intensive pasture,Semi-natural pasture
Mean indigenous,0.571333,0.177788,0.462645,0.293201
Standard error indigenous,0.210839,0.022262,0.090407,0.05069
Mean introduced,0.31258,0.189948,0.434088,1.156515
Standard error introduced,0.114672,0.02653,0.099004,0.203237
N,12.0,44.0,24.0,16.0
