In [1]:
# Put these at the top of every notebook, to get automatic reloading and inline plotting
%reload_ext autoreload
%autoreload 2
%matplotlib inline

In [2]:
from fastai.structured import *
from fastai.column_data import *
from torch.nn import functional as F

In [3]:
print(torch.cuda.is_available())

x = torch.rand(5, 3)
y = torch.rand(5, 3)
x = x.cuda()
y = y.cuda()
x + y

True



 0.6442  0.0847  1.0253
 0.5889  0.8890  0.1898
 0.8223  1.7587  0.7341
 0.6780  0.8726  0.7073
 1.0423  0.7254  1.2464
[torch.cuda.FloatTensor of size 5x3 (GPU 0)]

In [4]:
import pandas as pd
pd.set_option('display.max_columns', None)

In [5]:
class MixedInputModel(nn.Module):
    def __init__(self, emb_szs, n_cont, emb_drop, out_sz, szs, drops,
                 y_range=None, use_bn=False):
        super().__init__()
        self.embs = nn.ModuleList([nn.Embedding(c, s) for c,s in emb_szs])
        for emb in self.embs: emb_init(emb)
        n_emb = sum(e.embedding_dim for e in self.embs)
        self.n_emb, self.n_cont=n_emb, n_cont
        
        szs = [n_emb+n_cont] + szs
        self.lins = nn.ModuleList([
            nn.Linear(szs[i], szs[i+1]) for i in range(len(szs)-1)])
        self.bns = nn.ModuleList([
            nn.BatchNorm1d(sz) for sz in szs[1:]])
        for o in self.lins: kaiming_normal(o.weight.data)
        self.outp = nn.Linear(szs[-1], out_sz)
        kaiming_normal(self.outp.weight.data)

        self.emb_drop = nn.Dropout(emb_drop)
        self.drops = nn.ModuleList([nn.Dropout(drop) for drop in drops])
        self.bn = nn.BatchNorm1d(n_cont)
        self.use_bn,self.y_range = use_bn,y_range

    def forward(self, x_cat, x_cont):
        if self.n_emb != 0:
            x = [e(x_cat[:,i]) for i,e in enumerate(self.embs)]
            x = torch.cat(x, 1)
            x = self.emb_drop(x)
        if self.n_cont != 0:
            x2 = self.bn(x_cont)
            x = torch.cat([x, x2], 1) if self.n_emb != 0 else x2
        for l,d,b in zip(self.lins, self.drops, self.bns):
            x = F.relu(l(x))
            if self.use_bn: x = b(x)
            x = d(x)
        x = self.outp(x)
        if self.y_range:
            x = F.log_softmax(x)
            x = x*(self.y_range[1] - self.y_range[0])
            x = x+self.y_range[0]
        return x

In [6]:
class ColumnarDataset(Dataset):
    def __init__(self, cats, conts, y):
        n = len(cats[0]) if cats else len(conts[0])
        self.cats = np.stack(cats, 1).astype(np.int64) if cats else np.zeros((n,1))
        self.conts = np.stack(conts, 1).astype(np.float32) if conts else np.zeros((n,1))
        self.y = np.zeros((n,1)) if y is None else y.values # THIS LINE IS CHANGED FROM y[:, None]
        
    def __len__(self): return len(self.y)

    def __getitem__(self, idx):
        return [self.cats[idx], self.conts[idx], self.y[idx]]

    @classmethod
    def from_data_frames(cls, df_cat, df_cont, y=None):
        cat_cols = [c.values for n,c in df_cat.items()]
        cont_cols = [c.values for n,c in df_cont.items()]
        return cls(cat_cols, cont_cols, y)

    @classmethod
    def from_data_frame(cls, df, cat_flds, y=None):
        return cls.from_data_frames(df[cat_flds], df.drop(cat_flds, axis=1), y)

In [7]:
class ColumnarModelData(ModelData):
    def __init__(self, path, trn_ds, val_ds, bs, test_ds=None, shuffle=True):
        test_dl = DataLoader(test_ds, bs, shuffle=False, num_workers=1) if test_ds is not None else None
        super().__init__(path, DataLoader(trn_ds, bs, shuffle=shuffle, num_workers=1),
            DataLoader(val_ds, bs*2, shuffle=False, num_workers=1), test_dl)

    @classmethod
    def from_arrays(cls, path, val_idxs, xs, y, bs=64, test_xs=None, shuffle=True):
        ((val_xs, trn_xs), (val_y, trn_y)) = split_by_idx(val_idxs, xs, y)
        test_ds = PassthruDataset(*(test_xs.T), [0] * len(test_xs)) if test_xs is not None else None
        return cls(path, PassthruDataset(*(trn_xs.T), trn_y), PassthruDataset(*(val_xs.T), val_y),
                   bs=bs, shuffle=shuffle, test_ds=test_ds)

    @classmethod
    def from_data_frames(cls, path, trn_df, val_df, trn_y, val_y, cat_flds, bs, test_df=None):
        test_ds = ColumnarDataset.from_data_frame(test_df, cat_flds) if test_df is not None else None
        return cls(path, ColumnarDataset.from_data_frame(trn_df, cat_flds, trn_y),
                    ColumnarDataset.from_data_frame(val_df, cat_flds, val_y), bs, test_ds=test_ds)

    @classmethod
    def from_data_frame(cls, path, val_idxs, df, y, cat_flds, bs, test_df=None):
        ((val_df, trn_df), (val_y, trn_y)) = split_by_idx(val_idxs, df, y)
        return cls.from_data_frames(path, trn_df, val_df, trn_y, val_y, cat_flds, bs, test_df=test_df)

    def get_learner(self, emb_szs, n_cont, emb_drop, out_sz, szs, drops,
                    y_range=None, use_bn=False, **kwargs):
        model = MixedInputModel(emb_szs, n_cont, emb_drop, out_sz, szs, drops, y_range, use_bn)
        return StructuredLearner(self, StructuredModel(to_gpu(model)), opt_fn=optim.Adam, **kwargs)

In [8]:
PATH = 'data/biaobin/'
data_txt = f'{PATH}/mmm_p15_m8.txt'

In [9]:
df = pd.read_table(data_txt)
df.shape

(9317, 494)

In [10]:
df.head()

Unnamed: 0,Lung,Breast,Colon,Central_Nervous_System,Pancreas,Ovary,Prostate,Uterus,Kidney,Head_and_Neck,Stomach,Bladder,Liver,Skin,Thyroid,mut_ABL1,mut_AKT1,mut_AKT2,mut_AKT3,mut_ALK,mut_ALOX12B,mut_AMER1,mut_APC,mut_AR,mut_ARAF,mut_ARID1A,mut_ARID2,mut_ASXL1,mut_ATM,mut_ATR,mut_ATRX,mut_AURKA,mut_AURKB,mut_AXL,mut_BAP1,mut_BARD1,mut_BCL2,mut_BCL6,mut_BCOR,mut_BLM,mut_BRAF,mut_BRCA1,mut_BRCA2,mut_BRIP1,mut_BTK,mut_CARD11,mut_CASP8,mut_CBFB,mut_CBL,mut_CCND1,mut_CCND2,mut_CCND3,mut_CCNE1,mut_CD79A,mut_CD79B,mut_CDC73,mut_CDH1,mut_CDK12,mut_CDK4,mut_CDK6,mut_CDK8,mut_CDKN1B,mut_CDKN2A,mut_CDKN2B,mut_CDKN2C,mut_CEBPA,mut_CHEK1,mut_CHEK2,mut_CIC,mut_CREBBP,mut_CRKL,mut_CRLF2,mut_CSF1R,mut_CTCF,mut_CTNNB1,mut_DAXX,mut_DDR2,mut_DIS3,mut_DNMT3A,mut_DOT1L,mut_EGFR,mut_EP300,mut_EPHA3,mut_EPHA5,mut_EPHB1,mut_ERBB2,mut_ERBB3,mut_ERBB4,mut_ERG,mut_ESR1,mut_EZH2,mut_FAM46C,mut_FANCA,mut_FANCC,mut_FBXW7,mut_FGF19,mut_FGF3,mut_FGF4,mut_FGFR1,mut_FGFR2,mut_FGFR3,mut_FGFR4,mut_FIP1L1,mut_FLT1,mut_FLT3,mut_FLT4,mut_FOXL2,mut_GATA1,mut_GATA2,mut_GATA3,mut_GNA11,mut_GNAQ,mut_GNAS,mut_GRIN2A,mut_GSK3B,mut_HGF,mut_HLA_A,mut_HRAS,mut_IDH1,mut_IDH2,mut_IGF1,mut_IGF1R,mut_IGF2,mut_IKBKE,mut_IKZF1,mut_IL7R,mut_INHBA,mut_INSRR,mut_IRF4,mut_IRS2,mut_JAK1,mut_JAK2,mut_JAK3,mut_JUN,mut_KDM5A,mut_KDM5C,mut_KDM6A,mut_KDR,mut_KEAP1,mut_KIT,mut_KMT2A,mut_KMT2D,mut_KRAS,mut_LMO1,mut_MAP2K1,mut_MAP2K2,mut_MAP2K4,mut_MAP3K1,mut_MAP3K13,mut_MCL1,mut_MDM2,mut_MDM4,mut_MED12,mut_MEF2B,mut_MEN1,mut_MET,mut_MITF,mut_MLH1,mut_MPL,mut_MRE11A,mut_MSH2,mut_MSH6,mut_MTOR,mut_MUTYH,mut_MYC,mut_MYCL,mut_MYCN,mut_MYD88,mut_NBN,mut_NCOR1,mut_NF1,mut_NF2,mut_NFE2L2,mut_NFKBIA,mut_NKX2_1,mut_NOTCH1,mut_NOTCH2,mut_NOTCH3,mut_NOTCH4,mut_NPM1,mut_NRAS,mut_NSD1,mut_NTRK1,mut_NTRK2,mut_NTRK3,mut_NUP93,mut_PAK7,mut_PALB2,mut_PARP1,mut_PAX5,mut_PBRM1,mut_PDGFRA,mut_PDGFRB,mut_PDPK1,mut_PIK3C2G,mut_PIK3C3,mut_PIK3CA,mut_PIK3CG,mut_PIK3R1,mut_PIK3R2,mut_PMS2,mut_PNRC1,mut_PPP2R1A,mut_PRDM1,mut_PRKAR1A,mut_PTCH1,mut_PTEN,mut_PTPN11,mut_RAD50,mut_RAD51,mut_RAD51B,mut_RAD51C,mut_RAD51D,mut_RAD52,mut_RAD54L,mut_RAF1,mut_RARA,mut_RB1,mut_REL,mut_RET,mut_RICTOR,mut_RNF43,mut_RPTOR,mut_RUNX1,mut_SETD2,mut_SF3B1,mut_SH2B3,mut_SMAD2,mut_SMAD4,mut_SMARCA4,mut_SMARCB1,mut_SMARCD1,mut_SMO,mut_SOCS1,mut_SOX2,mut_SPEN,mut_SPOP,mut_SRC,mut_STAG2,mut_STK11,mut_SUFU,mut_SYK,mut_TBX3,mut_TET2,mut_TGFBR2,mut_TNFAIP3,mut_TNFRSF14,mut_TOP1,mut_TP53,mut_TSC1,mut_TSC2,mut_TSHR,mut_VHL,mut_WT1,mut_XPO1,cna_ABL1,cna_AKT1,cna_AKT2,cna_AKT3,cna_ALK,cna_ALOX12B,cna_AMER1,cna_APC,cna_AR,cna_ARAF,cna_ARID1A,cna_ARID2,cna_ASXL1,cna_ATM,cna_ATR,cna_ATRX,cna_AURKA,cna_AURKB,cna_AXL,cna_BAP1,cna_BARD1,cna_BCL2,cna_BCL6,cna_BCOR,cna_BLM,cna_BRAF,cna_BRCA1,cna_BRCA2,cna_BRIP1,cna_BTK,cna_CARD11,cna_CASP8,cna_CBFB,cna_CBL,cna_CCND1,cna_CCND2,cna_CCND3,cna_CCNE1,cna_CD79A,cna_CD79B,cna_CDC73,cna_CDH1,cna_CDK12,cna_CDK4,cna_CDK6,cna_CDK8,cna_CDKN1B,cna_CDKN2A,cna_CDKN2B,cna_CDKN2C,cna_CEBPA,cna_CHEK1,cna_CHEK2,cna_CIC,cna_CREBBP,cna_CRKL,cna_CRLF2,cna_CSF1R,cna_CTCF,cna_CTNNB1,cna_DAXX,cna_DDR2,cna_DIS3,cna_DNMT3A,cna_DOT1L,cna_EGFR,cna_EP300,cna_EPHA3,cna_EPHA5,cna_EPHB1,cna_ERBB2,cna_ERBB3,cna_ERBB4,cna_ERG,cna_ESR1,cna_EZH2,cna_FANCA,cna_FANCC,cna_FBXW7,cna_FGF19,cna_FGF3,cna_FGF4,cna_FGFR1,cna_FGFR2,cna_FGFR3,cna_FGFR4,cna_FLT1,cna_FLT3,cna_FLT4,cna_FOXL2,cna_GATA1,cna_GATA2,cna_GATA3,cna_GNA11,cna_GNAQ,cna_GNAS,cna_GRIN2A,cna_GSK3B,cna_HGF,cna_HLA_A,cna_HRAS,cna_IDH1,cna_IDH2,cna_IGF1,cna_IGF1R,cna_IGF2,cna_IKBKE,cna_IKZF1,cna_IL7R,cna_INHA,cna_INHBA,cna_IRF4,cna_IRS2,cna_JAK1,cna_JAK2,cna_JAK3,cna_JUN,cna_KDM5A,cna_KDM5C,cna_KDM6A,cna_KDR,cna_KEAP1,cna_KIT,cna_KMT2A,cna_KMT2D,cna_KRAS,cna_LMO1,cna_MAP2K1,cna_MAP2K2,cna_MAP2K4,cna_MAP3K1,cna_MAP3K13,cna_MCL1,cna_MDM2,cna_MDM4,cna_MED12,cna_MEF2B,cna_MEN1,cna_MET,cna_MITF,cna_MLH1,cna_MPL,cna_MRE11A,cna_MSH2,cna_MSH6,cna_MTOR,cna_MUTYH,cna_MYC,cna_MYCL,cna_MYCN,cna_MYD88,cna_NBN,cna_NCOR1,cna_NF1,cna_NF2,cna_NFE2L2,cna_NFKBIA,cna_NKX2_1,cna_NOTCH1,cna_NOTCH2,cna_NOTCH3,cna_NOTCH4,cna_NPM1,cna_NRAS,cna_NSD1,cna_NTRK1,cna_NTRK2,cna_NTRK3,cna_NUP93,cna_PAK7,cna_PALB2,cna_PARP1,cna_PAX5,cna_PBRM1,cna_PDGFRA,cna_PDGFRB,cna_PDPK1,cna_PIK3C2G,cna_PIK3C3,cna_PIK3CA,cna_PIK3CG,cna_PIK3R1,cna_PIK3R2,cna_PMS2,cna_PNRC1,cna_PPP2R1A,cna_PRDM1,cna_PRKAR1A,cna_PTCH1,cna_PTEN,cna_PTPN11,cna_RAD50,cna_RAD51,cna_RAD51B,cna_RAD51C,cna_RAD51D,cna_RAD52,cna_RAD54L,cna_RAF1,cna_RARA,cna_RB1,cna_REL,cna_RET,cna_RICTOR,cna_RNF43,cna_RPTOR,cna_RUNX1,cna_SETD2,cna_SF3B1,cna_SH2B3,cna_SMAD2,cna_SMAD4,cna_SMARCA4,cna_SMARCB1,cna_SMARCD1,cna_SMO,cna_SOX2,cna_SPEN,cna_SPOP,cna_SRC,cna_STAG2,cna_STK11,cna_SUFU,cna_SYK,cna_TBX3,cna_TET2,cna_TGFBR2,cna_TNFAIP3,cna_TNFRSF14,cna_TOP1,cna_TP53,cna_TSC1,cna_TSC2,cna_TSHR,cna_VHL,cna_WT1,cna_XPO1,CIN,destination
0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,5
1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,6
2,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
3,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2
4,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,-1,-1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,7


In [11]:
# change -1 labels
#df.replace(-1 , 10)

# each row is a patient

# 15 Cancer Types
Lung, Breast, Colon, Central_Nervous_System, Pancreas, Ovary, Prostate, Uterus, Kidney, Head_and_Neck, Stomch, Bladder, Liver, Skin, Thyroid

# mut

# cna

# CIN

# destination

In [12]:
display(DataFrameSummary(df).summary())

Unnamed: 0,Lung,Breast,Colon,Central_Nervous_System,Pancreas,Ovary,Prostate,Uterus,Kidney,Head_and_Neck,Stomach,Bladder,Liver,Skin,Thyroid,mut_ABL1,mut_AKT1,mut_AKT2,mut_AKT3,mut_ALK,mut_ALOX12B,mut_AMER1,mut_APC,mut_AR,mut_ARAF,mut_ARID1A,mut_ARID2,mut_ASXL1,mut_ATM,mut_ATR,mut_ATRX,mut_AURKA,mut_AURKB,mut_AXL,mut_BAP1,mut_BARD1,mut_BCL2,mut_BCL6,mut_BCOR,mut_BLM,mut_BRAF,mut_BRCA1,mut_BRCA2,mut_BRIP1,mut_BTK,mut_CARD11,mut_CASP8,mut_CBFB,mut_CBL,mut_CCND1,mut_CCND2,mut_CCND3,mut_CCNE1,mut_CD79A,mut_CD79B,mut_CDC73,mut_CDH1,mut_CDK12,mut_CDK4,mut_CDK6,mut_CDK8,mut_CDKN1B,mut_CDKN2A,mut_CDKN2B,mut_CDKN2C,mut_CEBPA,mut_CHEK1,mut_CHEK2,mut_CIC,mut_CREBBP,mut_CRKL,mut_CRLF2,mut_CSF1R,mut_CTCF,mut_CTNNB1,mut_DAXX,mut_DDR2,mut_DIS3,mut_DNMT3A,mut_DOT1L,mut_EGFR,mut_EP300,mut_EPHA3,mut_EPHA5,mut_EPHB1,mut_ERBB2,mut_ERBB3,mut_ERBB4,mut_ERG,mut_ESR1,mut_EZH2,mut_FAM46C,mut_FANCA,mut_FANCC,mut_FBXW7,mut_FGF19,mut_FGF3,mut_FGF4,mut_FGFR1,mut_FGFR2,mut_FGFR3,mut_FGFR4,mut_FIP1L1,mut_FLT1,mut_FLT3,mut_FLT4,mut_FOXL2,mut_GATA1,mut_GATA2,mut_GATA3,mut_GNA11,mut_GNAQ,mut_GNAS,mut_GRIN2A,mut_GSK3B,mut_HGF,mut_HLA_A,mut_HRAS,mut_IDH1,mut_IDH2,mut_IGF1,mut_IGF1R,mut_IGF2,mut_IKBKE,mut_IKZF1,mut_IL7R,mut_INHBA,mut_INSRR,mut_IRF4,mut_IRS2,mut_JAK1,mut_JAK2,mut_JAK3,mut_JUN,mut_KDM5A,mut_KDM5C,mut_KDM6A,mut_KDR,mut_KEAP1,mut_KIT,mut_KMT2A,mut_KMT2D,mut_KRAS,mut_LMO1,mut_MAP2K1,mut_MAP2K2,mut_MAP2K4,mut_MAP3K1,mut_MAP3K13,mut_MCL1,mut_MDM2,mut_MDM4,mut_MED12,mut_MEF2B,mut_MEN1,mut_MET,mut_MITF,mut_MLH1,mut_MPL,mut_MRE11A,mut_MSH2,mut_MSH6,mut_MTOR,mut_MUTYH,mut_MYC,mut_MYCL,mut_MYCN,mut_MYD88,mut_NBN,mut_NCOR1,mut_NF1,mut_NF2,mut_NFE2L2,mut_NFKBIA,mut_NKX2_1,mut_NOTCH1,mut_NOTCH2,mut_NOTCH3,mut_NOTCH4,mut_NPM1,mut_NRAS,mut_NSD1,mut_NTRK1,mut_NTRK2,mut_NTRK3,mut_NUP93,mut_PAK7,mut_PALB2,mut_PARP1,mut_PAX5,mut_PBRM1,mut_PDGFRA,mut_PDGFRB,mut_PDPK1,mut_PIK3C2G,mut_PIK3C3,mut_PIK3CA,mut_PIK3CG,mut_PIK3R1,mut_PIK3R2,mut_PMS2,mut_PNRC1,mut_PPP2R1A,mut_PRDM1,mut_PRKAR1A,mut_PTCH1,mut_PTEN,mut_PTPN11,mut_RAD50,mut_RAD51,mut_RAD51B,mut_RAD51C,mut_RAD51D,mut_RAD52,mut_RAD54L,mut_RAF1,mut_RARA,mut_RB1,mut_REL,mut_RET,mut_RICTOR,mut_RNF43,mut_RPTOR,mut_RUNX1,mut_SETD2,mut_SF3B1,mut_SH2B3,mut_SMAD2,mut_SMAD4,mut_SMARCA4,mut_SMARCB1,mut_SMARCD1,mut_SMO,mut_SOCS1,mut_SOX2,mut_SPEN,mut_SPOP,mut_SRC,mut_STAG2,mut_STK11,mut_SUFU,mut_SYK,mut_TBX3,mut_TET2,mut_TGFBR2,mut_TNFAIP3,mut_TNFRSF14,mut_TOP1,mut_TP53,mut_TSC1,mut_TSC2,mut_TSHR,mut_VHL,mut_WT1,mut_XPO1,cna_ABL1,cna_AKT1,cna_AKT2,cna_AKT3,cna_ALK,cna_ALOX12B,cna_AMER1,cna_APC,cna_AR,cna_ARAF,cna_ARID1A,cna_ARID2,cna_ASXL1,cna_ATM,cna_ATR,cna_ATRX,cna_AURKA,cna_AURKB,cna_AXL,cna_BAP1,cna_BARD1,cna_BCL2,cna_BCL6,cna_BCOR,cna_BLM,cna_BRAF,cna_BRCA1,cna_BRCA2,cna_BRIP1,cna_BTK,cna_CARD11,cna_CASP8,cna_CBFB,cna_CBL,cna_CCND1,cna_CCND2,cna_CCND3,cna_CCNE1,cna_CD79A,cna_CD79B,cna_CDC73,cna_CDH1,cna_CDK12,cna_CDK4,cna_CDK6,cna_CDK8,cna_CDKN1B,cna_CDKN2A,cna_CDKN2B,cna_CDKN2C,cna_CEBPA,cna_CHEK1,cna_CHEK2,cna_CIC,cna_CREBBP,cna_CRKL,cna_CRLF2,cna_CSF1R,cna_CTCF,cna_CTNNB1,cna_DAXX,cna_DDR2,cna_DIS3,cna_DNMT3A,cna_DOT1L,cna_EGFR,cna_EP300,cna_EPHA3,cna_EPHA5,cna_EPHB1,cna_ERBB2,cna_ERBB3,cna_ERBB4,cna_ERG,cna_ESR1,cna_EZH2,cna_FANCA,cna_FANCC,cna_FBXW7,cna_FGF19,cna_FGF3,cna_FGF4,cna_FGFR1,cna_FGFR2,cna_FGFR3,cna_FGFR4,cna_FLT1,cna_FLT3,cna_FLT4,cna_FOXL2,cna_GATA1,cna_GATA2,cna_GATA3,cna_GNA11,cna_GNAQ,cna_GNAS,cna_GRIN2A,cna_GSK3B,cna_HGF,cna_HLA_A,cna_HRAS,cna_IDH1,cna_IDH2,cna_IGF1,cna_IGF1R,cna_IGF2,cna_IKBKE,cna_IKZF1,cna_IL7R,cna_INHA,cna_INHBA,cna_IRF4,cna_IRS2,cna_JAK1,cna_JAK2,cna_JAK3,cna_JUN,cna_KDM5A,cna_KDM5C,cna_KDM6A,cna_KDR,cna_KEAP1,cna_KIT,cna_KMT2A,cna_KMT2D,cna_KRAS,cna_LMO1,cna_MAP2K1,cna_MAP2K2,cna_MAP2K4,cna_MAP3K1,cna_MAP3K13,cna_MCL1,cna_MDM2,cna_MDM4,cna_MED12,cna_MEF2B,cna_MEN1,cna_MET,cna_MITF,cna_MLH1,cna_MPL,cna_MRE11A,cna_MSH2,cna_MSH6,cna_MTOR,cna_MUTYH,cna_MYC,cna_MYCL,cna_MYCN,cna_MYD88,cna_NBN,cna_NCOR1,cna_NF1,cna_NF2,cna_NFE2L2,cna_NFKBIA,cna_NKX2_1,cna_NOTCH1,cna_NOTCH2,cna_NOTCH3,cna_NOTCH4,cna_NPM1,cna_NRAS,cna_NSD1,cna_NTRK1,cna_NTRK2,cna_NTRK3,cna_NUP93,cna_PAK7,cna_PALB2,cna_PARP1,cna_PAX5,cna_PBRM1,cna_PDGFRA,cna_PDGFRB,cna_PDPK1,cna_PIK3C2G,cna_PIK3C3,cna_PIK3CA,cna_PIK3CG,cna_PIK3R1,cna_PIK3R2,cna_PMS2,cna_PNRC1,cna_PPP2R1A,cna_PRDM1,cna_PRKAR1A,cna_PTCH1,cna_PTEN,cna_PTPN11,cna_RAD50,cna_RAD51,cna_RAD51B,cna_RAD51C,cna_RAD51D,cna_RAD52,cna_RAD54L,cna_RAF1,cna_RARA,cna_RB1,cna_REL,cna_RET,cna_RICTOR,cna_RNF43,cna_RPTOR,cna_RUNX1,cna_SETD2,cna_SF3B1,cna_SH2B3,cna_SMAD2,cna_SMAD4,cna_SMARCA4,cna_SMARCB1,cna_SMARCD1,cna_SMO,cna_SOX2,cna_SPEN,cna_SPOP,cna_SRC,cna_STAG2,cna_STK11,cna_SUFU,cna_SYK,cna_TBX3,cna_TET2,cna_TGFBR2,cna_TNFAIP3,cna_TNFRSF14,cna_TOP1,cna_TP53,cna_TSC1,cna_TSC2,cna_TSHR,cna_VHL,cna_WT1,cna_XPO1,CIN,destination
count,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317
mean,0.235162,0.218418,0.148331,0.00311259,0.0642911,0.0645057,0.0505528,0.0383171,0.0297306,0.027906,0.0333798,0.0207148,0.0125577,0.0332725,0.0197488,0.0151336,0.0199635,0.00729849,0.0112697,0.0321992,0.00869379,0.0290866,0.154556,0.0324139,0.0105184,0.0977783,0.042503,0.02855,0.0633251,0.0368144,0.0382097,0.0066545,0.00332725,0.0165289,0.0245787,0.0324139,0.00236128,0.0112697,0.0291939,0.0165289,0.0536653,0.0295159,0.0508747,0.0199635,0.0115917,0.025974,0.0151336,0.00826446,0.0165289,0.00708383,0.00622518,0.00300526,0.00579586,0.00246861,0.00246861,0.0080498,0.0341312,0.0208222,0.00461522,0.00364924,0.0080498,0.00901578,0.0519481,0.00364924,0.00289793,0.0108404,0.00729849,0.0143823,0.0236128,0.0463669,0.00268327,0.00289793,0.0159923,0.0151336,0.0350971,0.0143823,0.0151336,0.0122357,0.0314479,0.0240421,0.0593539,0.0357411,0.0354191,0.038639,0.0257594,0.0361704,0.02533,0.0382097,0.011699,0.0441129,0.00890845,0.00547386,0.0214661,0.00847912,0.0358484,0.00397124,0.00633251,0.00504454,0.0143823,0.0162069,0.0148116,0.0138457,0.00354191,0.0347751,0.0173876,0.0312332,0.0106257,0.00783514,0.00826446,0.0397124,0.00654717,0.00772781,0.0430396,0.0429323,0.00686916,0.0244714,0.0038639,0.00783514,0.0124504,0.00697649,0.00440056,0.0164216,0.00579586,0.00890845,0.0206075,0.0135237,0.0156703,0.00246861,0.00880112,0.0367071,0.0214661,0.0159923,0.0193195,0.00697649,0.0203928,0.0267253,0.0303746,0.0281206,0.0465815,0.0185682,0.039283,0.0991736,0.191693,0.00225394,0.0094451,0.00697649,0.0170656,0.0340238,0.013953,0.00729849,0.00622518,0.00440056,0.0582806,0.00622518,0.0183535,0.0235054,0.00912311,0.00987442,0.00965976,0.00890845,0.012343,0.0198562,0.0328432,0.0107331,0.00794247,0.0041859,0.0107331,0.00482988,0.0109477,0.0327359,0.0645057,0.0156703,0.0167436,0.00590319,0.011699,0.0450789,0.0313406,0.0369218,0.0391757,0.00472255,0.0237201,0.0302673,0.0188902,0.0140603,0.0297306,0.0104111,0.025974,0.0173876,0.0150263,0.00976709,0.0321992,0.0246861,0.0159923,0.00472255,0.0298379,0.0128797,0.150263,0.0342385,0.0277986,0.0128797,0.0106257,0.0041859,0.014275,0.0114844,0.00450789,0.0242567,0.0579586,0.00933777,0.0156703,0.00450789,0.00429323,0.00482988,0.00504454,0.00461522,0.00837179,0.00751315,0.0100891,0.0526994,0.00697649,0.0210368,0.0186755,0.0237201,0.0180316,0.0174949,0.0465815,0.0222175,0.0110551,0.0109477,0.0520554,0.0506601,0.00815713,0.00590319,0.015241,0.00289793,0.00697649,0.0465815,0.0122357,0.00611785,0.025008,0.0393904,0.00611785,0.00890845,0.0302673,0.0359558,0.0173876,0.00998175,0.00332725,0.00880112,0.542342,0.0198562,0.0245787,0.013631,0.0178169,0.014919,0.0109477,0.00547386,0.0109477,0.0222175,0.0261887,0.00762048,-0.00257594,0.00697649,0.000965976,0.0184609,0.0080498,0.00107331,0.00923044,0.0367071,0.00171729,0.015241,0.0027906,0.0479768,-0.00182462,0.0100891,-0.000214661,0.00601052,0.0013953,0.0227541,0.00300526,0.00837179,0.0148116,0.00697649,0.00955243,0.025974,0.00257594,0.0210368,0.00643984,0.000858645,0.00332725,0.0713749,0.0201782,0.0197488,0.042503,0.0105184,0.0242567,0.0260814,-0.00118064,0.0338092,0.0239347,0.0237201,0.0168509,0.0182462,-0.0857572,-0.0762048,0.00536653,0.0294086,0.00171729,0.00375657,0.00751315,0.00998175,0.0109477,-0.00128797,0.00643984,0.000965976,0.00440056,0.0109477,0.0367071,0.0114844,0.00729849,0.00236128,0.0406783,0.00429323,0.00847912,0.00375657,0.012665,0.0526994,0.00869379,0.00622518,0.00923044,0.00923044,0.0157776,-0.00407857,0.00601052,0.00375657,0.067511,0.0639691,0.0656864,0.0481915,0.00783514,0.00343458,0.00955243,0.0177096,0.0187829,0.00729849,0.012021,0.00815713,0.0118064,0.0156703,0.0041859,0.0052592,0.0564559,0.0140603,0.0105184,0.0181389,0.0112697,0.00321992,0.00708383,0.0109477,0.00772781,0.0118064,0.0041859,0.0289793,0.0206075,0.0373511,0.00504454,0.0200708,0.00579586,0.0206075,0.0052592,0.00740582,0.011699,0.00601052,0.0235054,0.00547386,0.00482988,0.00933777,0.00901578,0.00987442,0.00375657,0.00708383,0.0347751,0.00364924,0.00397124,0.0052592,-0.00965976,-0.000107331,0.0227541,0.0739508,0.0410003,0.0328432,0.00461522,0.0119137,0.00643984,0.0238274,0.00311259,0.00429323,0.0113771,0.00676183,0.00794247,0.00880112,0.00343458,0.00654717,0.12386,0.0157776,0.0108404,0.0038639,0.066545,-0.00107331,0.00128797,0.00493721,0.00987442,0.0317699,0.0340238,0.00601052,0.0174949,0.0138457,0.00923044,0.00933777,0.00697649,0.00998175,0.0301599,0.00547386,0.00729849,0.0013953,0.0170656,0.0108404,0.025008,0.00869379,0.000107331,0.0103037,0.00729849,0.0107331,0.0192122,0.0038639,0.0297306,0.0156703,0.000107331,0.012987,0.0226468,0.00193195,0.0124504,0.00858645,0.025974,0.00622518,-0.0337018,0.00729849,0.00429323,0.00321992,0.00568853,0.0213588,0.00890845,0.02533,0.00601052,0.00772781,0.0168509,-0.0143823,0.00912311,0.00504454,0.038961,0.0193195,0.0173876,0.00869379,0.000858645,0.00955243,0.00783514,0.000858645,-0.0110551,0.00751315,0.00450789,0.00729849,0.0128797,0.0284426,0.00440056,0.0199635,0.0331652,0.00375657,-0.00289793,-0.00107331,0.00601052,0.00923044,0.00160996,0.000751315,0.00912311,0.00343458,0.0333798,-0.0104111,0.00536653,0.00976709,0.00601052,0.00729849,0.00987442,0.00880112,0.0345605,4.8873
std,0.424122,0.413195,0.355447,0.0557067,0.245284,0.245665,0.219094,0.191971,0.169852,0.164712,0.179636,0.142436,0.111361,0.179357,0.139143,0.122091,0.139882,0.0851234,0.105565,0.176538,0.0928393,0.168058,0.3615,0.177106,0.102024,0.297031,0.201744,0.166547,0.24356,0.188316,0.191713,0.0813076,0.0575894,0.127505,0.154846,0.177106,0.0485382,0.105565,0.168359,0.127505,0.225368,0.169257,0.219754,0.139882,0.107045,0.159066,0.122091,0.0905375,0.127505,0.0838713,0.0786581,0.0547407,0.0759137,0.0496264,0.0496264,0.0893636,0.181576,0.142796,0.0677821,0.0603019,0.0893636,0.0945275,0.221934,0.0603019,0.0537572,0.103557,0.0851234,0.119067,0.151848,0.21029,0.0517335,0.0537572,0.125452,0.122091,0.184035,0.119067,0.122091,0.109942,0.174534,0.153188,0.236298,0.185654,0.184847,0.192744,0.158425,0.186724,0.157134,0.191713,0.107533,0.205357,0.0939683,0.0737868,0.14494,0.0916959,0.185922,0.0628959,0.079329,0.0708494,0.119067,0.126277,0.120805,0.116856,0.0594117,0.18322,0.130718,0.173957,0.102538,0.0881736,0.0905375,0.195293,0.0806536,0.0875723,0.202957,0.202715,0.0825997,0.154516,0.0620434,0.0881736,0.11089,0.083238,0.0661941,0.127097,0.0759137,0.0939683,0.142074,0.115508,0.124203,0.0496264,0.0934055,0.188052,0.14494,0.125452,0.137653,0.083238,0.141347,0.161288,0.171625,0.165326,0.210752,0.135001,0.194278,0.298911,0.393654,0.0474247,0.096731,0.083238,0.129523,0.1813,0.117302,0.0851234,0.0786581,0.0661941,0.234286,0.0786581,0.134233,0.15151,0.0950834,0.0988836,0.0978135,0.0939683,0.110417,0.139513,0.178236,0.103049,0.0887707,0.0645664,0.103049,0.069333,0.104063,0.177954,0.245665,0.124203,0.128316,0.0766092,0.107533,0.207488,0.174246,0.18858,0.194023,0.068562,0.152184,0.171331,0.136145,0.117746,0.169852,0.101508,0.159066,0.130718,0.121664,0.0983501,0.176538,0.155175,0.125452,0.068562,0.170149,0.112762,0.357348,0.181851,0.164404,0.112762,0.102538,0.0645664,0.118628,0.106554,0.0669929,0.153853,0.233678,0.0961851,0.124203,0.0669929,0.0653854,0.069333,0.0708494,0.0677821,0.0911186,0.0863568,0.0999418,0.223445,0.083238,0.143515,0.135384,0.152184,0.133073,0.131113,0.210752,0.147398,0.104566,0.104063,0.222151,0.219314,0.0899526,0.0766092,0.122516,0.0537572,0.083238,0.210752,0.109942,0.0779812,0.156158,0.194532,0.0779812,0.0939683,0.171331,0.18619,0.130718,0.0994142,0.0575894,0.0934055,0.498231,0.139513,0.154846,0.11596,0.132293,0.121235,0.104063,0.090749,0.116704,0.153813,0.16629,0.0985397,0.0960458,0.118377,0.110131,0.157408,0.119213,0.0949505,0.111205,0.18919,0.112532,0.126822,0.102527,0.214731,0.0966201,0.114929,0.0993752,0.0991935,0.105139,0.151979,0.114397,0.104302,0.130213,0.106944,0.152322,0.161743,0.102532,0.150095,0.0947379,0.0993718,0.113918,0.258712,0.158557,0.143698,0.203334,0.116744,0.155932,0.161394,0.105142,0.190012,0.158373,0.157726,0.147381,0.172403,0.313651,0.301687,0.0992304,0.173348,0.108649,0.106098,0.121477,0.125218,0.132228,0.0788935,0.093598,0.0955152,0.108078,0.109098,0.188622,0.158409,0.0969167,0.0971624,0.201322,0.104549,0.113651,0.0943148,0.119262,0.228198,0.115508,0.104452,0.115931,0.120471,0.133761,0.108586,0.0991935,0.0908363,0.252198,0.246894,0.249043,0.241507,0.119227,0.110568,0.111659,0.15088,0.15427,0.108418,0.110938,0.118754,0.110961,0.146416,0.0976521,0.0919368,0.231741,0.124826,0.104107,0.140513,0.111499,0.093764,0.0958188,0.11485,0.0968834,0.126341,0.0931515,0.170927,0.152285,0.192997,0.0907739,0.145509,0.101347,0.167074,0.0953752,0.134484,0.115243,0.0970051,0.168293,0.111933,0.122054,0.109738,0.118692,0.11401,0.114843,0.1002,0.192922,0.0840908,0.0808214,0.094243,0.13789,0.113021,0.152683,0.262115,0.200455,0.180033,0.109058,0.115221,0.103406,0.164048,0.101993,0.100358,0.11574,0.111863,0.106373,0.103232,0.0914379,0.0952954,0.33009,0.131331,0.111542,0.0960026,0.249246,0.124323,0.134283,0.113387,0.108213,0.179629,0.185976,0.0947661,0.144362,0.123987,0.102149,0.106763,0.106944,0.112579,0.172287,0.090749,0.10125,0.0999043,0.142919,0.1125,0.161563,0.114575,0.0966373,0.113026,0.098018,0.112032,0.160359,0.109578,0.172983,0.134971,0.0977417,0.121457,0.15514,0.101494,0.115629,0.137961,0.163722,0.100257,0.202872,0.0923802,0.0914016,0.0815159,0.109007,0.150407,0.109774,0.170878,0.0913048,0.108388,0.138365,0.196052,0.104752,0.109039,0.195719,0.148168,0.136345,0.111729,0.0926641,0.102643,0.0951982,0.115368,0.147201,0.122358,0.115283,0.0958027,0.124523,0.168806,0.094287,0.146626,0.18205,0.101971,0.122115,0.0960743,0.0958921,0.106269,0.0782043,0.11858,0.11732,0.0993161,0.183186,0.128568,0.0948048,0.116349,0.0991935,0.105405,0.109201,0.102186,0.182674,2.06386
min,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,0,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,0,1
25%,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,4
50%,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,5
75%,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,7
max,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,8
counts,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317,9317
uniques,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,8


In [13]:
df.columns

Index(['Lung', 'Breast', 'Colon', 'Central_Nervous_System', 'Pancreas',
       'Ovary', 'Prostate', 'Uterus', 'Kidney', 'Head_and_Neck',
       ...
       'cna_TOP1', 'cna_TP53', 'cna_TSC1', 'cna_TSC2', 'cna_TSHR', 'cna_VHL',
       'cna_WT1', 'cna_XPO1', 'CIN', 'destination'],
      dtype='object', length=494)

In [None]:
#df.drop(df.iloc[:, 30:450], inplace=True, axis=1)

In [14]:
#df.columns

In [15]:
df.head().T.head(500)

Unnamed: 0,0,1,2,3,4
Lung,0,0,0,0,1
Breast,1,0,0,0,0
Colon,0,0,0,0,0
Central_Nervous_System,0,0,0,0,0
Pancreas,0,0,0,0,0
Ovary,0,0,0,0,0
Prostate,0,0,0,0,0
Uterus,0,1,1,1,0
Kidney,0,0,0,0,0
Head_and_Neck,0,0,0,0,0


In [16]:
cat_vars = [col for col in df.columns if col not in ['destination']]
cat_vars

['Lung',
 'Breast',
 'Colon',
 'Central_Nervous_System',
 'Pancreas',
 'Ovary',
 'Prostate',
 'Uterus',
 'Kidney',
 'Head_and_Neck',
 'Stomach',
 'Bladder',
 'Liver',
 'Skin',
 'Thyroid',
 'mut_ABL1',
 'mut_AKT1',
 'mut_AKT2',
 'mut_AKT3',
 'mut_ALK',
 'mut_ALOX12B',
 'mut_AMER1',
 'mut_APC',
 'mut_AR',
 'mut_ARAF',
 'mut_ARID1A',
 'mut_ARID2',
 'mut_ASXL1',
 'mut_ATM',
 'mut_ATR',
 'mut_ATRX',
 'mut_AURKA',
 'mut_AURKB',
 'mut_AXL',
 'mut_BAP1',
 'mut_BARD1',
 'mut_BCL2',
 'mut_BCL6',
 'mut_BCOR',
 'mut_BLM',
 'mut_BRAF',
 'mut_BRCA1',
 'mut_BRCA2',
 'mut_BRIP1',
 'mut_BTK',
 'mut_CARD11',
 'mut_CASP8',
 'mut_CBFB',
 'mut_CBL',
 'mut_CCND1',
 'mut_CCND2',
 'mut_CCND3',
 'mut_CCNE1',
 'mut_CD79A',
 'mut_CD79B',
 'mut_CDC73',
 'mut_CDH1',
 'mut_CDK12',
 'mut_CDK4',
 'mut_CDK6',
 'mut_CDK8',
 'mut_CDKN1B',
 'mut_CDKN2A',
 'mut_CDKN2B',
 'mut_CDKN2C',
 'mut_CEBPA',
 'mut_CHEK1',
 'mut_CHEK2',
 'mut_CIC',
 'mut_CREBBP',
 'mut_CRKL',
 'mut_CRLF2',
 'mut_CSF1R',
 'mut_CTCF',
 'mut_CTNNB1',
 

In [17]:
dep = 'destination'
data_df = df[cat_vars + [dep]].copy()

In [18]:
for v in cat_vars: data_df[v] = data_df[v].astype('category').cat.as_ordered()

In [19]:
n = len(data_df) ; n

9317

In [20]:
idxs = get_cv_idxs(n, val_pct=1000/n)
samp = df.iloc[idxs]
samp_size = len(samp)
samp.head()

Unnamed: 0,Lung,Breast,Colon,Central_Nervous_System,Pancreas,Ovary,Prostate,Uterus,Kidney,Head_and_Neck,Stomach,Bladder,Liver,Skin,Thyroid,mut_ABL1,mut_AKT1,mut_AKT2,mut_AKT3,mut_ALK,mut_ALOX12B,mut_AMER1,mut_APC,mut_AR,mut_ARAF,mut_ARID1A,mut_ARID2,mut_ASXL1,mut_ATM,mut_ATR,mut_ATRX,mut_AURKA,mut_AURKB,mut_AXL,mut_BAP1,mut_BARD1,mut_BCL2,mut_BCL6,mut_BCOR,mut_BLM,mut_BRAF,mut_BRCA1,mut_BRCA2,mut_BRIP1,mut_BTK,mut_CARD11,mut_CASP8,mut_CBFB,mut_CBL,mut_CCND1,mut_CCND2,mut_CCND3,mut_CCNE1,mut_CD79A,mut_CD79B,mut_CDC73,mut_CDH1,mut_CDK12,mut_CDK4,mut_CDK6,mut_CDK8,mut_CDKN1B,mut_CDKN2A,mut_CDKN2B,mut_CDKN2C,mut_CEBPA,mut_CHEK1,mut_CHEK2,mut_CIC,mut_CREBBP,mut_CRKL,mut_CRLF2,mut_CSF1R,mut_CTCF,mut_CTNNB1,mut_DAXX,mut_DDR2,mut_DIS3,mut_DNMT3A,mut_DOT1L,mut_EGFR,mut_EP300,mut_EPHA3,mut_EPHA5,mut_EPHB1,mut_ERBB2,mut_ERBB3,mut_ERBB4,mut_ERG,mut_ESR1,mut_EZH2,mut_FAM46C,mut_FANCA,mut_FANCC,mut_FBXW7,mut_FGF19,mut_FGF3,mut_FGF4,mut_FGFR1,mut_FGFR2,mut_FGFR3,mut_FGFR4,mut_FIP1L1,mut_FLT1,mut_FLT3,mut_FLT4,mut_FOXL2,mut_GATA1,mut_GATA2,mut_GATA3,mut_GNA11,mut_GNAQ,mut_GNAS,mut_GRIN2A,mut_GSK3B,mut_HGF,mut_HLA_A,mut_HRAS,mut_IDH1,mut_IDH2,mut_IGF1,mut_IGF1R,mut_IGF2,mut_IKBKE,mut_IKZF1,mut_IL7R,mut_INHBA,mut_INSRR,mut_IRF4,mut_IRS2,mut_JAK1,mut_JAK2,mut_JAK3,mut_JUN,mut_KDM5A,mut_KDM5C,mut_KDM6A,mut_KDR,mut_KEAP1,mut_KIT,mut_KMT2A,mut_KMT2D,mut_KRAS,mut_LMO1,mut_MAP2K1,mut_MAP2K2,mut_MAP2K4,mut_MAP3K1,mut_MAP3K13,mut_MCL1,mut_MDM2,mut_MDM4,mut_MED12,mut_MEF2B,mut_MEN1,mut_MET,mut_MITF,mut_MLH1,mut_MPL,mut_MRE11A,mut_MSH2,mut_MSH6,mut_MTOR,mut_MUTYH,mut_MYC,mut_MYCL,mut_MYCN,mut_MYD88,mut_NBN,mut_NCOR1,mut_NF1,mut_NF2,mut_NFE2L2,mut_NFKBIA,mut_NKX2_1,mut_NOTCH1,mut_NOTCH2,mut_NOTCH3,mut_NOTCH4,mut_NPM1,mut_NRAS,mut_NSD1,mut_NTRK1,mut_NTRK2,mut_NTRK3,mut_NUP93,mut_PAK7,mut_PALB2,mut_PARP1,mut_PAX5,mut_PBRM1,mut_PDGFRA,mut_PDGFRB,mut_PDPK1,mut_PIK3C2G,mut_PIK3C3,mut_PIK3CA,mut_PIK3CG,mut_PIK3R1,mut_PIK3R2,mut_PMS2,mut_PNRC1,mut_PPP2R1A,mut_PRDM1,mut_PRKAR1A,mut_PTCH1,mut_PTEN,mut_PTPN11,mut_RAD50,mut_RAD51,mut_RAD51B,mut_RAD51C,mut_RAD51D,mut_RAD52,mut_RAD54L,mut_RAF1,mut_RARA,mut_RB1,mut_REL,mut_RET,mut_RICTOR,mut_RNF43,mut_RPTOR,mut_RUNX1,mut_SETD2,mut_SF3B1,mut_SH2B3,mut_SMAD2,mut_SMAD4,mut_SMARCA4,mut_SMARCB1,mut_SMARCD1,mut_SMO,mut_SOCS1,mut_SOX2,mut_SPEN,mut_SPOP,mut_SRC,mut_STAG2,mut_STK11,mut_SUFU,mut_SYK,mut_TBX3,mut_TET2,mut_TGFBR2,mut_TNFAIP3,mut_TNFRSF14,mut_TOP1,mut_TP53,mut_TSC1,mut_TSC2,mut_TSHR,mut_VHL,mut_WT1,mut_XPO1,cna_ABL1,cna_AKT1,cna_AKT2,cna_AKT3,cna_ALK,cna_ALOX12B,cna_AMER1,cna_APC,cna_AR,cna_ARAF,cna_ARID1A,cna_ARID2,cna_ASXL1,cna_ATM,cna_ATR,cna_ATRX,cna_AURKA,cna_AURKB,cna_AXL,cna_BAP1,cna_BARD1,cna_BCL2,cna_BCL6,cna_BCOR,cna_BLM,cna_BRAF,cna_BRCA1,cna_BRCA2,cna_BRIP1,cna_BTK,cna_CARD11,cna_CASP8,cna_CBFB,cna_CBL,cna_CCND1,cna_CCND2,cna_CCND3,cna_CCNE1,cna_CD79A,cna_CD79B,cna_CDC73,cna_CDH1,cna_CDK12,cna_CDK4,cna_CDK6,cna_CDK8,cna_CDKN1B,cna_CDKN2A,cna_CDKN2B,cna_CDKN2C,cna_CEBPA,cna_CHEK1,cna_CHEK2,cna_CIC,cna_CREBBP,cna_CRKL,cna_CRLF2,cna_CSF1R,cna_CTCF,cna_CTNNB1,cna_DAXX,cna_DDR2,cna_DIS3,cna_DNMT3A,cna_DOT1L,cna_EGFR,cna_EP300,cna_EPHA3,cna_EPHA5,cna_EPHB1,cna_ERBB2,cna_ERBB3,cna_ERBB4,cna_ERG,cna_ESR1,cna_EZH2,cna_FANCA,cna_FANCC,cna_FBXW7,cna_FGF19,cna_FGF3,cna_FGF4,cna_FGFR1,cna_FGFR2,cna_FGFR3,cna_FGFR4,cna_FLT1,cna_FLT3,cna_FLT4,cna_FOXL2,cna_GATA1,cna_GATA2,cna_GATA3,cna_GNA11,cna_GNAQ,cna_GNAS,cna_GRIN2A,cna_GSK3B,cna_HGF,cna_HLA_A,cna_HRAS,cna_IDH1,cna_IDH2,cna_IGF1,cna_IGF1R,cna_IGF2,cna_IKBKE,cna_IKZF1,cna_IL7R,cna_INHA,cna_INHBA,cna_IRF4,cna_IRS2,cna_JAK1,cna_JAK2,cna_JAK3,cna_JUN,cna_KDM5A,cna_KDM5C,cna_KDM6A,cna_KDR,cna_KEAP1,cna_KIT,cna_KMT2A,cna_KMT2D,cna_KRAS,cna_LMO1,cna_MAP2K1,cna_MAP2K2,cna_MAP2K4,cna_MAP3K1,cna_MAP3K13,cna_MCL1,cna_MDM2,cna_MDM4,cna_MED12,cna_MEF2B,cna_MEN1,cna_MET,cna_MITF,cna_MLH1,cna_MPL,cna_MRE11A,cna_MSH2,cna_MSH6,cna_MTOR,cna_MUTYH,cna_MYC,cna_MYCL,cna_MYCN,cna_MYD88,cna_NBN,cna_NCOR1,cna_NF1,cna_NF2,cna_NFE2L2,cna_NFKBIA,cna_NKX2_1,cna_NOTCH1,cna_NOTCH2,cna_NOTCH3,cna_NOTCH4,cna_NPM1,cna_NRAS,cna_NSD1,cna_NTRK1,cna_NTRK2,cna_NTRK3,cna_NUP93,cna_PAK7,cna_PALB2,cna_PARP1,cna_PAX5,cna_PBRM1,cna_PDGFRA,cna_PDGFRB,cna_PDPK1,cna_PIK3C2G,cna_PIK3C3,cna_PIK3CA,cna_PIK3CG,cna_PIK3R1,cna_PIK3R2,cna_PMS2,cna_PNRC1,cna_PPP2R1A,cna_PRDM1,cna_PRKAR1A,cna_PTCH1,cna_PTEN,cna_PTPN11,cna_RAD50,cna_RAD51,cna_RAD51B,cna_RAD51C,cna_RAD51D,cna_RAD52,cna_RAD54L,cna_RAF1,cna_RARA,cna_RB1,cna_REL,cna_RET,cna_RICTOR,cna_RNF43,cna_RPTOR,cna_RUNX1,cna_SETD2,cna_SF3B1,cna_SH2B3,cna_SMAD2,cna_SMAD4,cna_SMARCA4,cna_SMARCB1,cna_SMARCD1,cna_SMO,cna_SOX2,cna_SPEN,cna_SPOP,cna_SRC,cna_STAG2,cna_STK11,cna_SUFU,cna_SYK,cna_TBX3,cna_TET2,cna_TGFBR2,cna_TNFAIP3,cna_TNFRSF14,cna_TOP1,cna_TP53,cna_TSC1,cna_TSC2,cna_TSHR,cna_VHL,cna_WT1,cna_XPO1,CIN,destination
1512,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
7612,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,6
7103,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2
960,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,7
2770,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,-1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2


In [21]:
samp.info(memory_usage='deep')

<class 'pandas.core.frame.DataFrame'>
Int64Index: 1000 entries, 1512 to 7295
Columns: 494 entries, Lung to destination
dtypes: int64(494)
memory usage: 3.8 MB


In [23]:
cat_sz = [(c, len(data_df[c].cat.categories) + 1) for c in cat_vars]

In [24]:
cat_sz

[('Lung', 3),
 ('Breast', 3),
 ('Colon', 3),
 ('Central_Nervous_System', 3),
 ('Pancreas', 3),
 ('Ovary', 3),
 ('Prostate', 3),
 ('Uterus', 3),
 ('Kidney', 3),
 ('Head_and_Neck', 3),
 ('Stomach', 3),
 ('Bladder', 3),
 ('Liver', 3),
 ('Skin', 3),
 ('Thyroid', 3),
 ('mut_ABL1', 3),
 ('mut_AKT1', 3),
 ('mut_AKT2', 3),
 ('mut_AKT3', 3),
 ('mut_ALK', 3),
 ('mut_ALOX12B', 3),
 ('mut_AMER1', 3),
 ('mut_APC', 3),
 ('mut_AR', 3),
 ('mut_ARAF', 3),
 ('mut_ARID1A', 3),
 ('mut_ARID2', 3),
 ('mut_ASXL1', 3),
 ('mut_ATM', 3),
 ('mut_ATR', 3),
 ('mut_ATRX', 3),
 ('mut_AURKA', 3),
 ('mut_AURKB', 3),
 ('mut_AXL', 3),
 ('mut_BAP1', 3),
 ('mut_BARD1', 3),
 ('mut_BCL2', 3),
 ('mut_BCL6', 3),
 ('mut_BCOR', 3),
 ('mut_BLM', 3),
 ('mut_BRAF', 3),
 ('mut_BRCA1', 3),
 ('mut_BRCA2', 3),
 ('mut_BRIP1', 3),
 ('mut_BTK', 3),
 ('mut_CARD11', 3),
 ('mut_CASP8', 3),
 ('mut_CBFB', 3),
 ('mut_CBL', 3),
 ('mut_CCND1', 3),
 ('mut_CCND2', 3),
 ('mut_CCND3', 3),
 ('mut_CCNE1', 3),
 ('mut_CD79A', 3),
 ('mut_CD79B', 3),
 ('m

In [25]:
emb_szs = [(c, min(50, (c+1)//2)) for _,c in cat_sz]
emb_szs

[(3, 2),
 (3, 2),
 (3, 2),
 (3, 2),
 (3, 2),
 (3, 2),
 (3, 2),
 (3, 2),
 (3, 2),
 (3, 2),
 (3, 2),
 (3, 2),
 (3, 2),
 (3, 2),
 (3, 2),
 (3, 2),
 (3, 2),
 (3, 2),
 (3, 2),
 (3, 2),
 (3, 2),
 (3, 2),
 (3, 2),
 (3, 2),
 (3, 2),
 (3, 2),
 (3, 2),
 (3, 2),
 (3, 2),
 (3, 2),
 (3, 2),
 (3, 2),
 (3, 2),
 (3, 2),
 (3, 2),
 (3, 2),
 (3, 2),
 (3, 2),
 (3, 2),
 (3, 2),
 (3, 2),
 (3, 2),
 (3, 2),
 (3, 2),
 (3, 2),
 (3, 2),
 (3, 2),
 (3, 2),
 (3, 2),
 (3, 2),
 (3, 2),
 (3, 2),
 (3, 2),
 (3, 2),
 (3, 2),
 (3, 2),
 (3, 2),
 (3, 2),
 (3, 2),
 (3, 2),
 (3, 2),
 (3, 2),
 (3, 2),
 (3, 2),
 (3, 2),
 (3, 2),
 (3, 2),
 (3, 2),
 (3, 2),
 (3, 2),
 (3, 2),
 (3, 2),
 (3, 2),
 (3, 2),
 (3, 2),
 (3, 2),
 (3, 2),
 (3, 2),
 (3, 2),
 (3, 2),
 (3, 2),
 (3, 2),
 (3, 2),
 (3, 2),
 (3, 2),
 (3, 2),
 (3, 2),
 (3, 2),
 (3, 2),
 (3, 2),
 (3, 2),
 (3, 2),
 (3, 2),
 (3, 2),
 (3, 2),
 (3, 2),
 (3, 2),
 (3, 2),
 (3, 2),
 (3, 2),
 (3, 2),
 (3, 2),
 (3, 2),
 (3, 2),
 (3, 2),
 (3, 2),
 (3, 2),
 (3, 2),
 (3, 2),
 (3, 2),
 (3, 2),
 

In [26]:
model = MixedInputModel(emb_szs, n_cont=0, emb_drop=0, out_sz=2, szs=[500], drops=[0.5]).cuda()

In [27]:
bm = BasicModel(model, 'multiclass_classifier')

In [28]:
train_ratio = 0.75
train_size = int(samp_size * train_ratio); train_size
val_idx = list(range(train_size, len(df)))
val_idx[0]

750

In [None]:
md = ColumnarModelData.from_data_frames(PATH, )

In [None]:
df, y, nas, mapper = proc_df(samp, 'destination', do_scale=True)

In [None]:
df.head()

In [None]:
y

In [None]:
nas

In [None]:
yl = np.log(y)

In [None]:
yl

In [None]:
#df_test, _, nas, mapper = proc_df()

In [None]:
train_ratio = 0.75
train_size = int(samp_size * train_ratio); train_size

In [None]:
val_idx = list(range(train_size, len(df)))
val_idx[0]

In [None]:
def inv_y(a): return np.exp(a)

def exp_rmspe(y_pred, targ):
    targ = inv_y(targ)
    pct_var = (targ - inv_y(y_pred))/targ
    return math.sqrt((pct_var**2).mean())

max_log_y = np.max(yl)
y_range = (0, max_log_y*1.2)

In [None]:
max_log_y

In [None]:
yl

In [None]:
y_range

In [None]:
#??ColumnarModelData

In [None]:
md = ColumnarModelData.from_data_frame(PATH, val_idx, df, yl.astype(np.float32), cat_flds=cat_vars, bs=128)

In [None]:
cat_sz = [(c, len(data_df[c].cat.categories) + 1) for c in cat_vars]

In [None]:
#cat_sz

In [None]:
emb_szs = [(c, min(50, (c+1)//2)) for _, c in cat_sz]

In [None]:
#emb_szs

In [None]:
m = md.get_learner(emb_szs, len(df.columns)-len(cat_vars),
                   0.04, 1, [1000,500], [0.001,0.01], y_range=y_range)
lr = 1e-3

In [None]:
df.head()

In [None]:
print(m)

In [None]:
m.fit(lr, 1, metrics=[exp_rmspe], cycle_len=1)