In [1]:
# Initial Model Fitting in PyGam
# PB 1/18/23
import pygam
from pygam import LinearGAM, s
import pandas as pd
# Awesome package for non-linear correlation analysis
# should be really useful for feature selection
# https://www.sciencedirect.com/science/article/pii/S2352711021000315#fig2
# https://github.com/ElsevierSoftwareX/SOFTX-D-20-00028
from ennemi import pairwise_mi, pairwise_corr

# TBD
# Set up model validation (cross validation)
# Do Herb H and Max/mean field height comparisons (correlation, regressions, etc.)

region = 'Mpala'
XYdf = pd.read_csv(f'./data/out/{region}/XYdata.csv')

In [None]:
# Compute pairwise correlation using mututal information index
corr = pairwise_corr(XYdf.select_dtypes(include=np.number), drop_nan=True)

# Print Top Variables in order
# corr.loc['Dry Weight Without Bag (g)'].sort_values(ascending=False)
# corr.loc['log(DryWeight)'].sort_values(ascending=False)
corr.loc['Wet Weight Without Bag (g)'].sort_values(ascending=False)

In [None]:
# Mpala - top pairwise corr - 1/19/23 (PAI missing)
# Looks like the best (for non-logged data) are:
# mean                          0.767734
# cover_5cm                     0.758918
# 100                           0.746070
# 50                            0.731141
# coverherb_5cm                 0.727583
# 98                            0.725110
# 75                            0.722333
# FHD                           0.713418
# 25                            0.710809

In [None]:
# Let's GAM!

X = XYdf[['mean', 'coverherb_5cm']]
y = XYdf[['Dry Weight Without Bag (g)']]
gam = LinearGAM(s(0) + s(1)).fit(X, y)
gam.summary()

In [None]:
# Log Model 
# X = XYdf[['mean', 'coverherb_5cm']]
# y = XYdf[['log(DryWeight)']]
# gam = LinearGAM(s(0) + s(1)).fit(X, y)
# gam.summary()

In [None]:
# Partial Dependence plots
# NOTE: Code Pulled directly from the gam site
for i, term in enumerate(gam.terms):
    if term.isintercept:
        continue

    XX = gam.generate_X_grid(term=i)
    pdep, confi = gam.partial_dependence(term=i, X=XX, width=0.95)

    plt.figure()
    plt.plot(XX[:, term.feature], pdep)
    plt.plot(XX[:, term.feature], confi, c='r', ls='--')
    plt.title(repr(term))
    plt.show()

In [None]:
# Do a gridspace search 
# Also taken directly from pyGam website

lam = np.logspace(-3, 5, 5)
# lam = np.array([0.9, 0.8, 0.7, 0.6, 0.5, 0.4, 0.3, 0.2, 0.1])
lams = [lam] * 2
# https://pygam.readthedocs.io/en/latest/notebooks/quick_start.html#Fit-a-Model
gam = LinearGAM(s(0) + s(1)).gridsearch(X, y, lam=lams)
gam.summary()
