In [3]:
import numpy as np
import pandas as pd
from matplotlib.pyplot import subplots
from statsmodels.api import OLS
import sklearn.model_selection as skm
import sklearn.linear_model as skl
from sklearn.preprocessing import StandardScaler
from ISLP import load_data
from ISLP.models import ModelSpec as MS
from functools import partial
from sklearn.pipeline import Pipeline
from sklearn.decomposition import PCA
from sklearn.cross_decomposition import PLSRegression
from ISLP.models import \
     (Stepwise,
      sklearn_selected,
      sklearn_selection_path)
from l0bnb import fit_path


In [7]:
metadata = pd.read_csv('../../chemofmars/Data/metadata.csv')

In [8]:
metadata

Unnamed: 0,sample_id,split,instrument_type,features_path,features_md5_hash
0,S0000,train,commercial,train_features/S0000.csv,017b9a71a702e81a828e6242aa15f049
1,S0001,train,commercial,train_features/S0001.csv,0d09840214054d254bd49436c6a6f315
2,S0002,train,commercial,train_features/S0002.csv,3f58b3c9b001bfed6ed4e4f757083e09
3,S0003,train,commercial,train_features/S0003.csv,e9a12f96114a2fda60b36f4c0f513fb1
4,S0004,train,commercial,train_features/S0004.csv,b67603d3931897bfa796ac42cc16de78
...,...,...,...,...,...
1489,S1501,test,commercial,test_features/S1501.csv,f3834e4746797ff9e28efd3778d0a0f0
1490,S1502,test,commercial,test_features/S1502.csv,59402e65b5689766fbc18943c6c0a881
1491,S1503,test,commercial,test_features/S1503.csv,dd42287ddc74a8639495fa035ecc59a0
1492,S1504,test,commercial,test_features/S1504.csv,3030319eb6c5ed1cc6c15f8210991572


In [9]:
submission = pd.read_csv('../../chemofmars/Data/submission_format.csv')

In [10]:
submission

Unnamed: 0,sample_id,basalt,carbonate,chloride,iron_oxide,oxalate,oxychlorine,phyllosilicate,silicate,sulfate,sulfide
0,S0766,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5
1,S0767,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5
2,S0768,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5
3,S0769,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5
4,S0770,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5
...,...,...,...,...,...,...,...,...,...,...,...
735,S1501,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5
736,S1502,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5
737,S1503,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5
738,S1504,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5


In [11]:
supplemental = pd.read_csv('../../chemofmars/Data/supplemental_metadata.csv')

In [12]:
supplemental

Unnamed: 0,sample_id,split,instrument_type,carrier_gas,different_pressure,features_path,features_md5_hash
0,X0000,supplemental,commercial,he,0,supplemental_features/X0000.csv,415d9be2aaa9151551ec2ce94aac3236
1,X0001,supplemental,commercial,he,0,supplemental_features/X0001.csv,4d0fb1f6f9b970c2a21280b64e146148
2,X0002,supplemental,commercial,he,0,supplemental_features/X0002.csv,4d8b44276eea5291de30f6764f67a5d3
3,X0003,supplemental,commercial,he,0,supplemental_features/X0003.csv,6c21bb6c254f38734ec7d9afd5146099
4,X0004,supplemental,commercial,he,0,supplemental_features/X0004.csv,b8c549be189f6cb14f6afa60d9b9b105
...,...,...,...,...,...,...,...
342,X0342,supplemental,commercial,o2,1,supplemental_features/X0342.csv,587ee18c5f861c5b12b745d9c7977af8
343,X0343,supplemental,commercial,o2,1,supplemental_features/X0343.csv,a9474fb5488a1c6b2c8741665dbe9d02
344,X0344,supplemental,commercial,o2,1,supplemental_features/X0344.csv,221ba9467dc101e8ec652b8bbd319029
345,X0345,supplemental,commercial,o2,1,supplemental_features/X0345.csv,5343295a256e8c8cd27eb8f60aed3dfc


In [13]:
train_labels = pd.read_csv('../../chemofmars/Data/train_labels.csv')

In [14]:
train_labels

Unnamed: 0,sample_id,basalt,carbonate,chloride,iron_oxide,oxalate,oxychlorine,phyllosilicate,silicate,sulfate,sulfide
0,S0000,0,0,0,0,0,0,0,0,1,0
1,S0001,0,1,0,0,0,0,0,0,0,0
2,S0002,0,0,0,0,0,1,0,0,0,0
3,S0003,0,1,0,1,0,0,0,0,1,0
4,S0004,0,0,0,1,0,1,1,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...
749,S0749,0,0,0,0,0,0,0,0,0,0
750,S0750,0,0,0,0,0,0,1,0,0,0
751,S0751,0,0,0,0,0,0,0,1,0,0
752,S0752,0,0,0,1,0,0,0,0,0,0


In [15]:
val_labels = pd.read_csv('../../chemofmars/Data/val_labels.csv')

In [16]:
val_labels

Unnamed: 0,sample_id,basalt,carbonate,chloride,iron_oxide,oxalate,oxychlorine,phyllosilicate,silicate,sulfate,sulfide
0,S0766,0,0,0,0,0,0,0,0,0,0
1,S0767,0,0,0,0,0,0,0,0,0,0
2,S0768,1,0,0,0,0,0,1,1,0,0
3,S0769,0,0,0,0,0,1,0,0,1,0
4,S0770,0,0,0,1,0,1,1,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...
288,S1054,0,0,0,0,0,0,0,0,0,0
289,S1055,0,0,0,0,0,0,0,1,0,0
290,S1056,0,0,0,0,1,0,0,0,0,0
291,S1057,0,1,0,0,0,0,0,0,0,0


In [17]:
val_labels2 = pd.read_csv('../../chemofmars/Data/val_labels2.csv')

In [18]:
val_labels2

Unnamed: 0,sample_id,basalt,carbonate,chloride,iron_oxide,oxalate,oxychlorine,phyllosilicate,silicate,sulfate,sulfide
0,S0766,0,0,0,0,0,0,0,0,0,0
1,S0767,0,0,0,0,0,0,0,0,0,0
2,S0768,1,0,0,0,0,0,1,1,0,0
3,S0769,0,0,0,0,0,1,0,0,1,0
4,S0770,0,0,0,1,0,1,1,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...
288,S1054,0,0,0,0,0,0,0,0,0,0
289,S1055,0,0,0,0,0,0,0,1,0,0
290,S1056,0,0,0,0,1,0,0,0,0,0
291,S1057,0,1,0,0,0,0,0,0,0,0
