#### In this notebook, assuming an effective size of 16 species, we will look at CS (BPDN,ADMM) on effectively 186 points. For each species, we will only look at predictions on the points where the species was present to begin with. (this is the second set of species that maximises data points)

#### import the relevant files and modules

In [1]:
import pandas as pd
import numpy as np
import cvxpy as cvx
import random
import time
from __future__ import print_function
from builtins import input


from sporco.admm import bpdn
### using the ADMM algorithm 
### we can also use the PGM algorithm 
from sporco import util
from sporco import plot
plot.config_notebook_plotting()

from scipy.linalg import hadamard

import matplotlib.pyplot as plt

from sklearn.model_selection import LeaveOneOut
from sklearn.model_selection import KFold

import time 
from scipy import stats
from matplotlib import pyplot
plt.style.use('ggplot')
plt.style.use('seaborn-dark-palette')
iHiV = pd.read_pickle("~/bge-analysis-simv3/iHiV.pkl")
B = pd.read_pickle("~/bge-analysis-simv3/B.pkl")


import warnings
warnings.filterwarnings("ignore")

In [2]:
X = pd.read_pickle("~/bge-analysis-simv3/X16.pkl")

In [3]:
import matplotlib as mpl
from matplotlib import pyplot
import matplotlib.pyplot as plt
mpl.rcParams['font.family'] = 'sans-serif'
def set_violin_color(vp,color,mediancolor):
    plt.setp(vp['bodies'], facecolor=color, edgecolor="white",alpha=0.7)
    plt.setp(vp['cbars'], color = color)
    plt.setp(vp['cmins'], color = "black")
    plt.setp(vp['cmaxes'], color = "black")
    plt.setp(vp['cmeans'], color = "black")
    plt.setp(vp['cmedians'],color = mediancolor)

def make_violin_plot(dataframe,col0,mediancolor,legendlabel,diff):
    stepepi_master0 = dataframe
    bp0 = plt.violinplot(stepepi_master0, positions=np.array(range(len(stepepi_master0)))*3.0+diff,showmeans = True,showmedians=True)
    positions=np.array(range(len(stepepi_master0)))
    set_violin_color(bp0,col0,mediancolor)
    plt.plot([], c=col0, label=legendlabel)
    plt.legend(loc='upper left') 
    return positions

import seaborn as sns
colorlist1 = sns.color_palette("bright").as_hex()
sns.color_palette("bright")

from sklearn.metrics import r2_score

In [4]:
opt = bpdn.BPDN.Options({'Verbose': False, 'MaxMainIter': 500,
                         'RelStopTol': 1e-8, 'AutoRho': {'RsdlTarget': 1.0}})

#### read the effective species presence-absence

In [5]:
pa_redv1 = pd.read_pickle("~/compressed_sensingv1/realdatasets/ophelli-ryan/eff-16-list1-red-pa.pkl")

##### compile the well numbers for each species for its presence 

In [6]:
species_ones = []
for i in range(16):
    species_ones.append(pa_redv1[pa_redv1[i]==1.0]["well number"].values)

In [7]:
ones_len = [] 
for ii in range(len(species_ones)):
    ones_len.append(len(species_ones[ii])) 

#### read the steady states (relative abundances)

In [8]:
steadystate = pd.read_pickle("~/compressed_sensingv1/realdatasets/ophelli-ryan/eff-16-list1-red-sst-REL.pkl")
sst = steadystate.T

In [9]:
labdl = np.round(np.logspace(-5,0.8,5),5)
lam = list(labdl)
lam.append([0.5,0.6,1.0])
lamf = list(pd.DataFrame(lam)[0].explode().values)
lamf.sort()
lamf = lamf[:7]
lamf

[1e-05, 0.00028, 0.00794, 0.22387, 0.5, 0.6, 1.0]

In [10]:
samp = 100
n = 2**16
cv_splitsl = [2,3,5,6,7]
random_state = None
laml = lamf

In [None]:
err = [] 
errlin  = [] 
for cv_splits in [2,3,5,6,7]:
    for species in range(4,6):
            m = int(samp*(ones_len[species])/100)
            cvdata = m
            ri = random.sample(list(species_ones[species]),m)
            ri.sort() 
            startt = time.time()
            y2 = sst[ri].T[species]
            data_present = y2[ri].T.index
            xs = list(data_present)
            random.shuffle(xs)
            data_present = np.array(xs)
            kf = KFold(n_splits=cv_splits)
            kf.get_n_splits(data_present)
            KFold(n_splits=cv_splits, random_state=random_state, shuffle=False)
            for lmda in laml:
                for train_index, test_index in kf.split(data_present):
                    X_train, X_test = data_present[train_index], data_present[test_index]
                    rinew = list(X_train)
                    rileft = list(X_test)
                    D = iHiV[rinew,:]
                    y3 = y2[rinew]
                    s2 = np.array([y3.values])
                    s3 = s2.T
                    b = bpdn.BPDN(D, s3, lmda, opt)
                    x = b.solve()
                    yrecon = np.dot(iHiV,x.ravel())
                    err.append([lmda,species,yrecon[rileft],rileft,rinew,cv_splits,len(rileft),len(rinew)])
                    D = X[rinew,:]
                    y3 = y2[rinew]
                    s2 = np.array([y3.values])
                    s3 = s2.T
                    b = bpdn.BPDN(D, s3, lmda, opt)
                    x = b.solve()
                    yrecon = np.dot(X,x.ravel())
                    errlin.append([lmda,species,yrecon[rileft],rileft,rinew,cv_splits,len(rileft),len(rinew)])

In [None]:
errdf  = pd.DataFrame(err)
errdflin = pd.DataFrame(errlin)

In [None]:
pd.to_pickle(errdf,"16l1-46-bge.pkl")

In [None]:
pd.to_pickle(errdflin,"16l1-46-lin.pkl")

In [None]:
c