In [25]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay, accuracy_score

No: Seedling unique ID number.
Plot: Number of the field plot the seedling was planted in (1-18).
Subplot: Subplot within the main plot the seedling was planted in. Broken into 5 subplots (1 per corner, plus 1 in the middle) (A-E).
Species: Includes Acer saccharum, Prunus serotina, Quercus alba, and Quercus rubra.
Light ISF: Light level quantified with HemiView software. Represents the amount of light reaching each subplot at a height of 1m.
Light Cat: Categorical light level created by splitting the range of Light_ISF values into three bins (low, med, high).
Core: Year the soil core was removed from the field.
Soil: Species from which the soil core was taken. Includes all species, plus Acer rubrum, Populus grandidentata, and a sterilized conspecific for each species.
Adult: Individual tree that soil was taken from. Up to 6 adults per species. Used as a random effect in analyses.
Sterile: Whether the soil was sterilized or not.
Conspecific: Whether the soil was conspecific, heterospecific, or sterilized conspecific.
Myco: Mycorrhizal type of the seedling species (AMF or EMF).
SoilMyco: Mycorrhizal type of the species culturing the soil (AMF or EMF).
PlantDate: The date that seedlings were planted in the field pots.
AMF: Percent arbuscular mycorrhizal fungi colonization on the fine roots of harvested seedlings.
EMF: Percent ectomycorrhizal fungi colonization on the root tips of harvested seedlings.
Phenolics: Calculated as nmol Gallic acid equivalents per mg dry extract (see manuscript for detailed methods).
NSC: Calculated as percent dry mass nonstructural carbohydrates (see manuscript for detailed methods).
Lignin: Calculated as percent dry mass lignin (see manuscript for detailed methods).
Census: The census number at which time the seedling died or was harvested.
Time: The number of days at which time the seedling died or was harvested.
Event: Used for survival analysis to indicate status of each individual seedling at a given time (above)
0 = harvested or experiment ended
1 = dead
Harvest: Indicates whether the seedling was harvested for trait measurement.
Alive: Indicates if the seedling was alive at the end of the second growing season. "X" in this field indicates alive status.

In [26]:
def unique(lists):
    unique_list = pd.Series(lists).drop_duplicates().tolist()
    for x in unique_list:
        print(x)

In [27]:
tree = pd.read_csv('Tree_Data.csv')
pd.set_option('display.max_columns', None)
tree.head()

Unnamed: 0,No,Plot,Subplot,Species,Light_ISF,Light_Cat,Core,Soil,Adult,Sterile,Conspecific,Myco,SoilMyco,PlantDate,AMF,EMF,Phenolics,Lignin,NSC,Census,Time,Event,Harvest,Alive
0,126,1,C,Acer saccharum,0.106,Med,2017,Prunus serotina,I,Non-Sterile,Heterospecific,AMF,AMF,6/11/18,22.0,,-0.56,13.86,12.15,4,14.0,1.0,0,Dead
1,11,1,C,Quercus alba,0.106,Med,2017,Quercus rubra,970,Non-Sterile,Heterospecific,EMF,EMF,5/25/18,15.82,31.07,5.19,20.52,19.29,33,115.5,0.0,0,Alive
2,12,1,C,Quercus rubra,0.106,Med,2017,Prunus serotina,J,Non-Sterile,Heterospecific,EMF,AMF,5/31/18,24.45,28.19,3.36,24.74,15.01,18,63.0,1.0,0,Dead
3,2823,7,D,Acer saccharum,0.08,Med,2016,Prunus serotina,J,Non-Sterile,Heterospecific,AMF,AMF,6/11/18,22.23,,-0.71,14.29,12.36,4,14.0,1.0,0,Dead
4,5679,14,A,Acer saccharum,0.06,Low,2017,Prunus serotina,689,Non-Sterile,Heterospecific,AMF,AMF,6/11/18,21.15,,-0.58,10.85,11.2,4,14.0,1.0,0,Dead


In [28]:
tree.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2783 entries, 0 to 2782
Data columns (total 24 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   No           2783 non-null   int64  
 1   Plot         2783 non-null   int64  
 2   Subplot      2783 non-null   object 
 3   Species      2783 non-null   object 
 4   Light_ISF    2783 non-null   float64
 5   Light_Cat    2783 non-null   object 
 6   Core         2783 non-null   int64  
 7   Soil         2783 non-null   object 
 8   Adult        2783 non-null   object 
 9   Sterile      2783 non-null   object 
 10  Conspecific  2783 non-null   object 
 11  Myco         2783 non-null   object 
 12  SoilMyco     2783 non-null   object 
 13  PlantDate    2783 non-null   object 
 14  AMF          2783 non-null   float64
 15  EMF          1283 non-null   float64
 16  Phenolics    2783 non-null   float64
 17  Lignin       2783 non-null   float64
 18  NSC          2783 non-null   float64
 19  Census

In [29]:
tree.describe()

Unnamed: 0,No,Plot,Light_ISF,Core,AMF,EMF,Phenolics,Lignin,NSC,Census,Time,Event,Harvest
count,2783.0,2783.0,2783.0,2783.0,2783.0,1283.0,2783.0,2783.0,2783.0,2783.0,2783.0,2782.0,2783.0
mean,3914.513834,9.561624,0.085707,2016.64894,20.553069,26.47675,1.933105,15.759792,14.219641,15.28207,53.487244,0.570453,0.252964
std,2253.515063,5.203659,0.025638,0.477387,12.309587,16.63689,1.969842,6.779607,4.298271,9.166555,32.082942,0.4951,0.434789
min,3.0,1.0,0.032,2016.0,0.0,0.0,-1.35,2.23,4.3,4.0,14.0,0.0,0.0
25%,1971.0,5.0,0.066,2016.0,13.4,13.78,0.17,10.355,11.605,7.0,24.5,0.0,0.0
50%,3932.0,10.0,0.082,2017.0,18.0,27.72,0.75,14.04,12.66,13.0,45.5,1.0,0.0
75%,5879.0,14.0,0.1,2017.0,24.445,35.71,3.78,21.115,17.275,18.0,63.0,1.0,1.0
max,7772.0,18.0,0.161,2017.0,100.0,87.5,6.1,32.77,29.45,33.0,115.5,1.0,1.0


In [30]:
correlation_matrix = tree.corr()

ValueError: could not convert string to float: 'C'