This file will calculate the data for the first table in the manuscript, which for each land use will have the number of sites, species, and individuals, and the mean number of species and individuals at each site.

In [2]:
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt

%matplotlib inline

In [3]:
# Import data
df = pd.read_csv('./RawData/Azores_Combined.csv',header=[0,1])

In [4]:
# Some more preamble and calculating some state variables

# Number of sites at each land use
print("Native forest: {} sites".format(len(df['Native forest'].columns)))
print("Exotic forest: {} sites".format(len(df['Exotic forest'].columns)))
print("Semi-natural pasture: {} sites".format(len(df['Semi-natural pasture'].columns)))
print("Intensive pasture: {} sites".format(len(df['Intensive pasture'].columns)))
print()
# Get total s0
s0 = len(df)
print('Number of species: {}'.format(s0))
# Get land use types
lu = list(df.columns.levels[0])
lu.remove('Data')
# Reorder to disturbance gradient
lu = [lu[2],lu[0],lu[3],lu[1]]
# Get length to use to loop over etc.
lutypes = len(lu)
# Get list of sites, ignoring first 4 indices which are data information
ls = df.columns[4:]
# Get total n0
n0 = df[lu].sum().sum()
print('Number of individuals: {}'.format(n0))

Native forest: 44 sites
Exotic forest: 12 sites
Semi-natural pasture: 16 sites
Intensive pasture: 24 sites

Number of species: 271
Number of individuals: 46250


In [7]:
# Get total s0 and n0 for each land use

# Make arrays
# For s,n,beta
sn_lu = pd.DataFrame(index=lu,columns = {'s0','n0'})
# For abundances
abd_lu = pd.DataFrame(columns=lu)
for l in lu:
    abd_lu[l] = df[l].sum(axis=1)
    # Get n0 and s0
    stemp = np.count_nonzero(abd_lu[l])
    ntemp = abd_lu[l].sum()
    # Add to dataframe
    sn_lu.loc[l] = {'n0': ntemp, 's0': stemp}

# Rename indexes for abundaces to species code
abd_lu.rename(index=df['Data','MF'],inplace=True)
abd_lu.index.name = 'MF'

# Fix datatype for sn_lu
sn_lu = sn_lu.astype({'s0': 'int64','n0':'int64'})

sn_lu

Unnamed: 0,n0,s0
Native forest,10291,148
Exotic forest,3385,87
Semi-natural pasture,11421,127
Intensive pasture,21153,136


In [8]:
# Get mean and variance for s0 and n0 for each land use by transect

# Make arrays
# For s,n,beta
sn_ls = pd.DataFrame(index=ls,columns = {'s0','n0'})
# For abundances
abd_ls = pd.DataFrame(columns=ls)
for l in ls:
    abd_ls[l] = df[l]
    # Get n0 and s0
    stemp = np.count_nonzero(abd_ls[l])
    ntemp = abd_ls[l].sum()
    # Add to dataframe
    sn_ls.loc[l] = {'n0': ntemp, 's0': stemp}

# Rename indexes for abundaces to species code
abd_ls.rename(index=df['Data','MF'],inplace=True)
abd_ls.index.name = 'MF'

# Fix datatype for sn_lu
sn_ls = sn_ls.astype({'s0': 'int64','n0':'int64'})

sn_ls

Unnamed: 0,Unnamed: 1,n0,s0
Exotic forest,TER-AGUA-T-66,114,18
Exotic forest,TER-MNEG-T-62,240,22
Exotic forest,TER-MNEG-T-63,151,15
Exotic forest,TER-TCHA-T-64,362,22
Exotic forest,TER-ACAR-T111,80,17
...,...,...,...
Semi-natural pasture,TER-NFPG-T-67,446,32
Semi-natural pasture,TER-NFTB-T117,310,18
Semi-natural pasture,TER-NFTB-T-28,512,24
Semi-natural pasture,TER-PB-T165,816,29


In [9]:
# Get mean
sn_ls.unstack(level=0).mean()

n0  Exotic forest           282.083333
    Native forest           233.886364
    Intensive pasture       881.375000
    Semi-natural pasture    713.812500
s0  Exotic forest            20.000000
    Native forest            24.250000
    Intensive pasture        35.958333
    Semi-natural pasture     28.375000
dtype: float64

In [10]:
# Get std
sn_ls.unstack(level=0).std()

n0  Exotic forest           315.715769
    Native forest           131.776805
    Intensive pasture       327.787950
    Semi-natural pasture    312.476179
s0  Exotic forest             3.861229
    Native forest             6.039117
    Intensive pasture         8.720187
    Semi-natural pasture      7.948794
dtype: float64

In [13]:
# Get median
sn_ls.unstack(level=0).median()

n0  Exotic forest           196.0
    Native forest           195.0
    Intensive pasture       878.0
    Semi-natural pasture    766.0
s0  Exotic forest            19.5
    Native forest            24.0
    Intensive pasture        36.0
    Semi-natural pasture     27.5
dtype: float64

In [16]:
# Get inter quantile range
display(sn_ls.unstack(level=0).quantile(0.25))
display(sn_ls.unstack(level=0).quantile(0.75))

n0  Exotic forest           140.25
    Native forest           148.00
    Intensive pasture       738.50
    Semi-natural pasture    429.50
s0  Exotic forest            17.75
    Native forest            19.00
    Intensive pasture        30.00
    Semi-natural pasture     23.25
Name: 0.25, dtype: float64

n0  Exotic forest           275.00
    Native forest           287.25
    Intensive pasture       984.75
    Semi-natural pasture    919.75
s0  Exotic forest            22.00
    Native forest            29.25
    Intensive pasture        43.25
    Semi-natural pasture     32.25
Name: 0.75, dtype: float64

# Indigenous and exotic numbers


In [5]:
# Strip extra whitespace
df['Data','N/E/I'] = df['Data','N/E/I'].str.strip()
# Get indices for N/E
indigenous_inds = np.any([df['Data','N/E/I'] =='N',df['Data','N/E/I'] == 'E'],axis=0)
introduced_inds = (df['Data','N/E/I'] =='I').values

In [55]:
# How many indigenous versus introduced species?
s0_indigenous = np.sum(indigenous_inds)
s0_introduced = np.sum(introduced_inds)
print('Number of indigenous species: {}'.format(s0_indigenous))
print('Number of indigenous individuals: {}'.format(df[lu].loc[indigenous_inds].sum().sum()))
print('Number of introduced species: {}'.format(s0_introduced))
print('Number of introduced individuals: {}'.format(df[lu].loc[introduced_inds].sum().sum()))
# Note 4 species aren't defined

Number of indigenous species: 126
Number of indigenous individuals: 14950
Number of introduced species: 141
Number of introduced individuals: 31289


In [59]:
# Double checking this makes sense with ones unidentified
unidentified_inds = (~indigenous_inds)&(~introduced_inds)
print("Number of unidentified species",np.sum(unidentified_inds))
print("Number of unidentified individuals",df[lu].loc[unidentified_inds].sum().sum())

Number of unidentified species 4
Number of unidentified individuals 11


In [43]:
# Get s0 and n0 for each different land use, one for indigenous and one for introduced

# Make arrays
# For s,n,beta
sn_idg = pd.DataFrame(index=lu,columns = {'s0','n0'})
sn_int = pd.DataFrame(index=lu,columns = {'s0','n0'})
# For abundances
abd_idg = pd.DataFrame(columns=lu)
abd_int = pd.DataFrame(columns=lu)
for l in lu:
    abd_idg[l] = df[l].iloc[indigenous_inds].sum(axis=1)
    abd_int[l] = df[l].iloc[introduced_inds].sum(axis=1)
    
    # For indigenous
    # Get n0 and s0
    stemp = np.count_nonzero(abd_idg[l])
    ntemp = abd_idg[l].sum()
    # Add to dataframe
    sn_idg.loc[l] = {'n0': ntemp, 's0': stemp}
    
    # For introduced
    # Get n0 and s0
    stemp = np.count_nonzero(abd_int[l])
    ntemp = abd_int[l].sum()
    # Add to dataframe
    sn_int.loc[l] = {'n0': ntemp, 's0': stemp}

# Rename indexes for abundaces to species code
abd_idg.rename(index=df['Data','MF'].iloc[indigenous_inds],inplace=True)
abd_idg.index.name = 'MF'

abd_int.rename(index=df['Data','MF'].iloc[introduced_inds],inplace=True)
abd_int.index.name = 'MF'

# Fix datatype for sn_lu
sn_idg = sn_idg.astype({'s0': 'int64','n0':'int64'})
sn_int = sn_int.astype({'s0': 'int64','n0':'int64'})

display(sn_idg)
print("Indigenous")
display(sn_int)
print("Introduced")

Unnamed: 0,n0,s0
Native forest,7288,86
Exotic forest,1476,44
Semi-natural pasture,2110,50
Intensive pasture,4076,40


Indigenous


Unnamed: 0,n0,s0
Native forest,3001,60
Exotic forest,1908,42
Semi-natural pasture,9310,76
Intensive pasture,17070,94


Introduced


In [44]:
# For individual sites
# Get abundances at each site

# Make arrays
# For s,n,beta
sn_ls_idg = pd.DataFrame(index=ls,columns = {'s0','n0'})
sn_ls_int = pd.DataFrame(index=ls,columns = {'s0','n0'})
# For abundances
abd_ls_idg = pd.DataFrame(columns=ls)
abd_ls_int = pd.DataFrame(columns=ls)
for l in ls:
    abd_ls_idg[l] = df[l].iloc[indigenous_inds]
    abd_ls_int[l] = df[l].iloc[introduced_inds]
    # Indigenous
    # Get n0 and s0
    stemp = np.count_nonzero(abd_ls_idg[l])
    ntemp = abd_ls_idg[l].sum()
    # Add to dataframe
    sn_ls_idg.loc[l] = {'n0': ntemp, 's0': stemp}
    # Introduced
    # Get n0 and s0
    stemp = np.count_nonzero(abd_ls_int[l])
    ntemp = abd_ls_int[l].sum()
    # Add to dataframe
    sn_ls_int.loc[l] = {'n0': ntemp, 's0': stemp}

# Rename indexes for abundaces to species code
abd_ls_idg.rename(index=df['Data','MF'],inplace=True)
abd_ls_idg.index.name = 'MF'
abd_ls_int.rename(index=df['Data','MF'],inplace=True)
abd_ls_int.index.name = 'MF'

# Fix datatype for sn_lu
sn_ls_idg = sn_ls_idg.astype({'s0': 'int64','n0':'int64'})
sn_ls_int = sn_ls_int.astype({'s0': 'int64','n0':'int64'})

In [45]:
# Get median
display(sn_ls_idg.unstack(level=0).median())
display(sn_ls_int.unstack(level=0).median())

n0  Exotic forest           111.0
    Native forest           129.0
    Intensive pasture       160.5
    Semi-natural pasture    101.0
s0  Exotic forest            10.0
    Native forest            15.5
    Intensive pasture        10.0
    Semi-natural pasture     10.0
dtype: float64

n0  Exotic forest            51.0
    Native forest            50.0
    Intensive pasture       684.0
    Semi-natural pasture    623.0
s0  Exotic forest             9.0
    Native forest             8.0
    Intensive pasture        26.5
    Semi-natural pasture     17.0
dtype: float64