# Solution

This notebook shows the procedure we used to generate the final submission for the challenge. We first used the median of 146 models (selected by leaving 1 drug out for NK cells) as the base prediction. This model was trained only on the top 64 most variable genes (sorted by variance), and was used to predict the ~18K signed log-pvalues.

We then trained the same NN on a subset of the data. This produced again 146 models that we used to generate a median prediction only on that subset of the data. 

Finally, we replaced the base predictions with the for the final submission.

In [1]:
import scape
import pandas as pd
import numpy as np
import tensorflow as tf

scape.__version__, pd.__version__, np.__version__, tf.__version__




('0.1.0', '2.1.3', '1.26.2', '2.15.0')

In [2]:
df_de = scape.io.load_slogpvals("_data/de_train.parquet")
df_lfc = scape.io.load_lfc("_data/lfc_train.parquet")

# Make sure rows/columns are in the same order
df_lfc = df_lfc.loc[df_de.index, df_de.columns]
df_de.shape, df_lfc.shape

((614, 18211), (614, 18211))

In [3]:
# We select only a subset of the genes for the model (top most variant genes)
n_genes = 64
top_genes = scape.util.select_top_variable([df_de], k=n_genes)

## Base predictions

In [4]:
cell = "NK cells"
drugs = df_de.loc[df_de.index.get_level_values("cell_type") == cell].index.get_level_values("sm_name").unique().tolist()
len(drugs)

146

In [5]:
df_id_map = pd.read_csv("_data/id_map.zip")
df_sub_example = pd.read_csv("_data/sample_submission.zip", index_col = 0)
df_sub_ix = df_id_map.set_index(["cell_type", "sm_name"])
df_sub_ix

Unnamed: 0_level_0,Unnamed: 1_level_0,id
cell_type,sm_name,Unnamed: 2_level_1
B cells,5-(9-Isopropyl-8-methyl-2-morpholino-9H-purin-6-yl)pyrimidin-2-amine,0
B cells,ABT-199 (GDC-0199),1
B cells,ABT737,2
B cells,AMD-070 (hydrochloride),3
B cells,AT 7867,4
...,...,...
Myeloid cells,Vandetanib,250
Myeloid cells,Vanoxerine,251
Myeloid cells,Vardenafil,252
Myeloid cells,Vorinostat,253


In [6]:
base_predictions = []
for i, d in enumerate(drugs):
    print(i, d)
    scm = scape.model.create_default_model(n_genes, df_de, df_lfc)
    result = scm.train(
        val_cells=[cell], 
        val_drugs=[d],
        input_columns=top_genes,
        epochs=300,
        output_folder="_models",
        config_file_name="config.pkl",
        model_file_name=f"drug{i}.keras",
        baselines=["zero", "slogpval_drug"],
    )
    # Collect prediction in the OOF data
    df_pred = scm.predict(df_sub_ix)
    df_pred = df_pred.loc[:, df_sub.columns]
    base_predictions.append(df_pred)

df_sub = pd.DataFrame(np.median(base_predictions, axis=0), index=df_sub_ix.index, columns=df_sub.columns)
df_sub.to_csv("base_predictions.csv")
df_sub


0 Clotrimazole

Baseline zero MRRMSE: 0.6085
Baseline slogpval_drug MRRMSE: 0.6606
Model size (MB): 36.76
Model inputs: ['in_slogpval_drug', 'in_lfc_drug', 'in_slogpval_cell_encoder', 'in_lfc_cell_encoder', 'in_slogpval_cell_decoder', 'in_lfc_cell_decoder']
Model will be saved to _models\drug0.keras

Epoch    1: val_loss None -> 0.6024, train loss: 1.3084 (lr=0.00500)
	 > Improvement factor/percentage zero                 (err=0.6085): 1.0101, 1.00%
	 > Improvement factor/percentage slogpval_drug        (err=0.6606): 1.0966, 8.81%
Epoch    2: val_loss 0.6024 -> 0.5820, train loss: 1.2810 (lr=0.00500)
	 > Improvement factor/percentage zero                 (err=0.6085): 1.0455, 4.35%
	 > Improvement factor/percentage slogpval_drug        (err=0.6606): 1.1351, 11.90%
Epoch   17: val_loss 0.5820 -> 0.5656, train loss: 1.1059 (lr=0.00500)
	 > Improvement factor/percentage zero                 (err=0.6085): 1.0759, 7.05%
	 > Improvement factor/percentage slogpval_drug        (err=0.6606): 1.

Unnamed: 0_level_0,Unnamed: 1_level_0,A1BG,A1BG-AS1,A2M,A2M-AS1,A2MP1,A4GALT,AAAS,AACS,AAGAB,AAK1,...,ZUP1,ZW10,ZWILCH,ZWINT,ZXDA,ZXDB,ZXDC,ZYG11B,ZYX,ZZEF1
cell_type,sm_name,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
B cells,5-(9-Isopropyl-8-methyl-2-morpholino-9H-purin-6-yl)pyrimidin-2-amine,-0.000424,0.167305,0.395157,0.812879,0.740743,0.243306,-0.086658,0.236474,-0.080557,0.524134,...,-0.872206,-0.376905,-0.276706,0.092664,0.048062,0.329522,0.456628,0.539261,-0.697327,0.076841
B cells,ABT-199 (GDC-0199),-0.022968,0.036839,-0.023025,-0.043956,0.290724,0.219106,-0.086978,0.087635,0.148139,-0.197169,...,-0.034074,-0.040380,-0.183993,0.225442,0.053655,0.088075,0.168757,0.059710,-0.321941,-0.194725
B cells,ABT737,0.317689,0.075677,0.113208,0.318973,1.534299,1.270523,-0.087928,0.198073,0.274882,0.097781,...,-0.216399,-0.045569,-0.117039,0.327684,0.196373,0.225913,0.391139,0.195075,-0.540607,-0.198220
B cells,AMD-070 (hydrochloride),-0.104568,0.127176,0.018801,0.052044,0.060773,-0.038973,-0.133540,0.123870,0.117394,-0.134618,...,-0.226756,-0.036166,-0.220077,0.235155,0.042118,0.099197,0.185507,0.173660,-0.262918,-0.085755
B cells,AT 7867,0.083571,0.070740,0.156862,0.344585,0.722629,0.363112,-0.087192,0.194202,0.138275,0.220319,...,-0.506477,-0.107950,-0.253322,0.141190,0.103137,0.157693,0.368213,0.324066,-0.452249,0.012677
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Myeloid cells,Vandetanib,0.035938,-0.084461,-0.653102,-0.640561,0.359990,-0.344047,0.325969,0.174660,0.393311,0.175434,...,-0.255471,-0.244713,-0.095647,-0.282693,-0.014337,-0.163890,-0.211021,-0.098819,-0.169934,0.107705
Myeloid cells,Vanoxerine,0.266238,-0.101833,-0.441960,-0.379708,1.801045,0.658361,0.321461,0.045544,0.319278,0.260782,...,-0.490423,-0.410556,-0.127375,-0.466314,0.316248,-0.088692,-0.188443,-0.093596,-0.215230,-0.052415
Myeloid cells,Vardenafil,0.061215,0.014784,-0.075429,-0.325613,0.455803,-0.021895,0.176454,0.179469,0.270306,0.233661,...,-0.356186,-0.183518,0.004371,0.028076,0.099418,-0.006284,-0.058281,0.037225,-0.063435,-0.017980
Myeloid cells,Vorinostat,0.469729,0.171966,-2.308502,-0.382731,3.319891,0.674685,0.635902,0.084603,0.970868,0.938001,...,-0.372372,-0.522752,-1.362944,-1.399575,0.246517,-0.252140,-0.715950,0.087835,-0.282777,0.034445


## Enhanced predictions

We selected a subset of the dataset consisting of the top 256 genes and top 60 drugs (sorted by variance). We trained the same model as before on this subset of the data, and used the 146 models to generate a median prediction on this subset of the data. We finally merged the results with the base predictions using a weighted mean (0.80 for the enhaced predictions in the subset of 256 genes and 60 drugs, 0.20 for the base prediction)

In [7]:
sub_drugs = df_sub_ix.index.get_level_values("sm_name").unique().tolist()
len(sub_drugs)

129

In [8]:
min_n_top_drugs = 50
n_genes = 256

# This time, exclude control drugs for the calculation of the top genes, in order to
# introduce more variability in the model
top_genes = scape.util.select_top_variable([df_de], k=n_genes, exclude_controls=True)

In [9]:
df_drug_effects = pd.DataFrame(df_de.T.pow(2).mean().pow(0.5).groupby("sm_name").mean().sort_values(ascending=False), columns=["effect"])
df_drug_effects["effect_norm"] = (df_drug_effects["effect"] / df_drug_effects["effect"].sum())*100
df_drug_effects

Unnamed: 0_level_0,effect,effect_norm
sm_name,Unnamed: 1_level_1,Unnamed: 2_level_1
Belinostat,11.670403,6.522607
Alvocidib,8.718604,4.872841
MLN 2238,7.945529,4.440769
CEP-18770 (Delanzomib),7.204718,4.026728
Oprozomib (ONX 0912),6.818472,3.810855
...,...,...
RN-486,0.534020,0.298465
OSI-930,0.533040,0.297917
K-02288,0.525925,0.293940
Protriptyline,0.522145,0.291828


In [10]:
top_sub_drugs = df_drug_effects.loc[sub_drugs].sort_values("effect", ascending=False).head(min_n_top_drugs).index.tolist()
len(top_sub_drugs)

50

In [11]:
top_all_drugs = df_drug_effects.head(min_n_top_drugs).index.tolist()
top_drugs = set(top_all_drugs) | set(top_sub_drugs)
len(top_drugs)

60

In [12]:
df_de_c = df_de[df_de.index.get_level_values("sm_name").isin(top_drugs)]
df_de_c = df_de_c.loc[:, top_genes]
df_de_c

Unnamed: 0_level_0,Unnamed: 1_level_0,PMF1,CARD8,TXN,GPSM3,MPC2,LIMD2,CIRBP,CACYBP,BTN3A3,LINC01754,...,TMSB4X,PHPT1,HLA-C,CCL28,DNAJB1,TIMP1,TSTD1,FGD3,AHSA1,SSSCA1
cell_type,sm_name,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
NK cells,Mometasone Furoate,-1.601811,1.088172,-0.854849,-0.783131,0.461875,-4.446680,1.063768,-0.177853,-1.306615,3.215363,...,-0.709744,-2.434268,-2.291774,-0.049022,0.167938,-1.490780,-4.473388,0.878731,0.176429,-0.647815
T cells CD4+,Mometasone Furoate,-0.958129,-0.688085,0.020508,-1.151579,0.127972,-13.257320,0.439596,-0.848501,-2.350567,0.373264,...,-18.455302,-2.652384,-2.967686,-0.115619,-0.485290,-6.028989,-0.349467,-0.937968,-0.019402,-3.810257
T cells CD8+,Mometasone Furoate,-0.286406,-0.559175,0.422942,-0.479665,0.749796,-4.208248,-0.120351,0.081561,-0.072891,0.306493,...,-5.701234,-0.266434,-1.121729,-0.762128,-0.433669,0.214633,-1.002865,-0.983160,-0.115586,0.476012
T regulatory cells,Mometasone Furoate,-0.319948,-1.002340,-1.018861,0.080694,-0.218847,-1.651331,-0.555472,-0.556263,-0.226116,1.298248,...,-4.127160,-0.550117,-1.111563,-0.205221,-0.070733,-0.530995,-0.524443,0.278298,0.553118,0.360728
NK cells,Bosutinib,-0.621481,-1.151305,-0.454688,-0.284330,0.294029,-1.325008,-0.874206,-0.445198,0.242901,1.632421,...,-0.862431,-0.440624,-0.738548,-0.438567,-0.080423,0.168554,-1.350785,0.518682,0.635585,0.269281
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
T regulatory cells,5-(9-Isopropyl-8-methyl-2-morpholino-9H-purin-6-yl)pyrimidin-2-amine,-0.803600,1.350889,-1.952861,-0.760297,-0.997029,-0.609455,0.965889,-1.663079,0.127370,2.604641,...,-0.908227,-0.961550,-1.530040,0.294195,1.526852,-1.107965,-0.903197,-0.218520,-2.152501,-0.311559
NK cells,CGM-097,-0.604230,-2.680048,-0.830130,-2.286034,-0.900450,-1.386578,0.774331,-0.315077,0.530918,5.944322,...,-0.037932,17.956269,-0.222570,-1.267664,-0.029386,0.066338,-1.137688,0.326771,-0.405877,-2.897375
T cells CD4+,CGM-097,-0.415829,-0.241372,-0.106837,-2.345269,-1.450760,-2.659973,0.230902,-1.331745,-0.576835,6.142039,...,-1.664860,25.068613,-0.270440,0.244052,0.427935,-0.531699,-1.349872,0.356935,-0.425045,-0.192022
T cells CD8+,CGM-097,3.356807,1.085161,1.900394,-0.011922,0.045909,-5.330285,6.199376,0.106071,1.308236,-1.754835,...,51.526862,25.035836,56.694895,-1.196250,-0.567636,0.006408,-0.970049,-0.750767,3.906063,1.519298


In [13]:
df_lfc_c = df_lfc.loc[df_de_c.index, df_de_c.columns]
df_lfc_c.shape

(256, 256)

In [14]:
enhanced_predictions = []
for i, d in enumerate(top_drugs):
    print(i, d)
    scm = scape.model.create_default_model(n_genes, df_de_c, df_lfc_c)
    result = scm.train(
        val_cells=[cell], 
        val_drugs=[d],
        input_columns=top_genes,
        epochs=800,
        output_folder="_models",
        config_file_name="enhanced_config.pkl",
        model_file_name=f"enhanced_drug{i}.keras",
        baselines=["zero", "slogpval_drug"],
    )
    # Collect prediction in the OOF data
    df_pred = scm.predict(df_sub_ix)
    enhanced_predictions.append(df_pred)

df_sub_enhanced = pd.DataFrame(np.median(enhanced_predictions, axis=0), index=df_sub_ix.index, columns=df_de_c.columns)
df_sub_enhanced.to_csv("enhanced_predictions.csv")
df_sub_enhanced


0 Resminostat
Baseline zero MRRMSE: 8.5360
Baseline slogpval_drug MRRMSE: 4.9104
Model size (MB): 2.11
Model inputs: ['in_slogpval_drug', 'in_lfc_drug', 'in_slogpval_cell_encoder', 'in_lfc_cell_encoder', 'in_slogpval_cell_decoder', 'in_lfc_cell_decoder']
Model will be saved to _models\enhanced_drug0.keras
Epoch    1: val_loss None -> 8.5110, train loss: 5.1100 (lr=0.00500)
	 > Improvement factor/percentage zero                 (err=8.5360): 1.0029, 0.29%
	 > Improvement factor/percentage slogpval_drug        (err=4.9104): 0.5769, -73.33%
Epoch   58: val_loss 8.5110 -> 5.2186, train loss: 2.8138 (lr=0.00500)
	 > Improvement factor/percentage zero                 (err=8.5360): 1.6357, 38.86%
	 > Improvement factor/percentage slogpval_drug        (err=4.9104): 0.9409, -6.28%
Epoch   66: val_loss 5.2186 -> 4.8500, train loss: 2.6064 (lr=0.00500)
	 > Improvement factor/percentage zero                 (err=8.5360): 1.7600, 43.18%
	 > Improvement factor/percentage slogpval_drug        (err=4.

Unnamed: 0_level_0,Unnamed: 1_level_0,PMF1,CARD8,TXN,GPSM3,MPC2,LIMD2,CIRBP,CACYBP,BTN3A3,LINC01754,...,TMSB4X,PHPT1,HLA-C,CCL28,DNAJB1,TIMP1,TSTD1,FGD3,AHSA1,SSSCA1
cell_type,sm_name,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
B cells,5-(9-Isopropyl-8-methyl-2-morpholino-9H-purin-6-yl)pyrimidin-2-amine,-0.410959,1.372257,-2.172117,0.401408,-0.741207,1.779929,1.464998,-1.452526,0.615474,-0.245276,...,-6.084822,-4.142362,-0.024596,-0.248044,-0.896683,-0.782329,0.201657,0.697434,-1.108682,-1.789396
B cells,ABT-199 (GDC-0199),,,,,,,,,,,...,,,,,,,,,,
B cells,ABT737,-0.045752,0.430725,-0.053870,-0.312108,-0.185061,0.044129,-1.693828,0.542060,-0.259803,1.513517,...,-2.820948,-1.038401,-0.898505,-0.911630,0.023655,-0.362219,-0.610743,0.029548,0.134583,-0.487404
B cells,AMD-070 (hydrochloride),,,,,,,,,,,...,,,,,,,,,,
B cells,AT 7867,,,,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Myeloid cells,Vandetanib,,,,,,,,,,,...,,,,,,,,,,
Myeloid cells,Vanoxerine,,,,,,,,,,,...,,,,,,,,,,
Myeloid cells,Vardenafil,,,,,,,,,,,...,,,,,,,,,,
Myeloid cells,Vorinostat,-8.543391,-5.231460,1.738729,-8.794431,11.979540,-9.772974,-5.885924,0.354582,-0.265087,3.670224,...,-4.247910,-9.099482,-1.167702,0.733647,-0.614300,1.273509,5.008329,-0.607152,0.407371,3.194501


In [37]:
df_focus = df_sub.copy()
df_focus.update(df_sub_enhanced)
df_focus.loc[("Myeloid cells", "Vorinostat"), "PMF1"]

-8.543391

In [38]:
df_focus

Unnamed: 0_level_0,Unnamed: 1_level_0,A1BG,A1BG-AS1,A2M,A2M-AS1,A2MP1,A4GALT,AAAS,AACS,AAGAB,AAK1,...,ZUP1,ZW10,ZWILCH,ZWINT,ZXDA,ZXDB,ZXDC,ZYG11B,ZYX,ZZEF1
cell_type,sm_name,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
B cells,5-(9-Isopropyl-8-methyl-2-morpholino-9H-purin-6-yl)pyrimidin-2-amine,-0.000424,0.167305,0.395157,0.812879,0.740743,0.243306,-0.086658,0.236474,-0.080557,0.524134,...,-0.872206,-0.376905,-0.276706,0.092664,0.048062,0.329522,0.456628,0.539261,-0.697327,0.076841
B cells,ABT-199 (GDC-0199),-0.022968,0.036839,-0.023025,-0.043956,0.290724,0.219106,-0.086978,0.087635,0.148139,-0.197169,...,-0.034074,-0.040380,-0.183993,0.225442,0.053655,0.088075,0.168757,0.059710,-0.321941,-0.194725
B cells,ABT737,0.317689,0.075677,0.113208,0.318973,1.534299,1.270523,-0.087928,0.198073,0.274882,0.097781,...,-0.216399,-0.045569,-0.117039,0.327684,0.196373,0.225913,0.391139,0.195075,-0.540607,-0.198220
B cells,AMD-070 (hydrochloride),-0.104568,0.127176,0.018801,0.052044,0.060773,-0.038973,-0.133540,0.123870,0.117394,-0.134618,...,-0.226756,-0.036166,-0.220077,0.235155,0.042118,0.099197,0.185507,0.173660,-0.262918,-0.085755
B cells,AT 7867,0.083571,0.070740,0.156862,0.344585,0.722629,0.363112,-0.087192,0.194202,0.138275,0.220319,...,-0.506477,-0.107950,-0.253322,0.141190,0.103137,0.157693,0.368213,0.324066,-0.452249,0.012677
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Myeloid cells,Vandetanib,0.035938,-0.084461,-0.653102,-0.640561,0.359990,-0.344047,0.325969,0.174660,0.393311,0.175434,...,-0.255471,-0.244713,-0.095647,-0.282693,-0.014337,-0.163890,-0.211021,-0.098819,-0.169934,0.107705
Myeloid cells,Vanoxerine,0.266238,-0.101833,-0.441960,-0.379708,1.801045,0.658361,0.321461,0.045544,0.319278,0.260782,...,-0.490423,-0.410556,-0.127375,-0.466314,0.316248,-0.088692,-0.188443,-0.093596,-0.215230,-0.052415
Myeloid cells,Vardenafil,0.061215,0.014784,-0.075429,-0.325613,0.455803,-0.021895,0.176454,0.179469,0.270306,0.233661,...,-0.356186,-0.183518,0.004371,0.028076,0.099418,-0.006284,-0.058281,0.037225,-0.063435,-0.017980
Myeloid cells,Vorinostat,0.469729,0.171966,-2.308502,-0.382731,3.319891,0.674685,0.635902,0.084603,0.970868,0.938001,...,-0.372372,-0.522752,-1.362944,-1.399575,0.246517,-0.252140,-0.715950,0.087835,-0.282777,0.034445


In [39]:
df_submission = 0.80 * df_focus + 0.20 * df_sub
df_submission

Unnamed: 0_level_0,Unnamed: 1_level_0,A1BG,A1BG-AS1,A2M,A2M-AS1,A2MP1,A4GALT,AAAS,AACS,AAGAB,AAK1,...,ZUP1,ZW10,ZWILCH,ZWINT,ZXDA,ZXDB,ZXDC,ZYG11B,ZYX,ZZEF1
cell_type,sm_name,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
B cells,5-(9-Isopropyl-8-methyl-2-morpholino-9H-purin-6-yl)pyrimidin-2-amine,-0.000424,0.167305,0.395157,0.812879,0.740743,0.243306,-0.086658,0.236474,-0.080557,0.524134,...,-0.872206,-0.376905,-0.276706,0.092664,0.048062,0.329522,0.456628,0.539261,-0.697327,0.076841
B cells,ABT-199 (GDC-0199),-0.022968,0.036839,-0.023025,-0.043956,0.290724,0.219106,-0.086978,0.087635,0.148139,-0.197169,...,-0.034074,-0.040380,-0.183993,0.225442,0.053655,0.088075,0.168757,0.059710,-0.321941,-0.194725
B cells,ABT737,0.317689,0.075677,0.113208,0.318973,1.534299,1.270523,-0.087928,0.198073,0.274882,0.097781,...,-0.216399,-0.045569,-0.117039,0.327684,0.196373,0.225913,0.391139,0.195075,-0.540607,-0.198220
B cells,AMD-070 (hydrochloride),-0.104568,0.127176,0.018801,0.052044,0.060773,-0.038973,-0.133540,0.123870,0.117394,-0.134618,...,-0.226756,-0.036166,-0.220077,0.235155,0.042118,0.099197,0.185507,0.173660,-0.262918,-0.085755
B cells,AT 7867,0.083571,0.070740,0.156862,0.344585,0.722629,0.363112,-0.087192,0.194202,0.138275,0.220319,...,-0.506477,-0.107950,-0.253322,0.141190,0.103137,0.157693,0.368213,0.324066,-0.452249,0.012677
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Myeloid cells,Vandetanib,0.035938,-0.084461,-0.653102,-0.640562,0.359990,-0.344047,0.325969,0.174660,0.393311,0.175434,...,-0.255471,-0.244713,-0.095647,-0.282693,-0.014337,-0.163890,-0.211021,-0.098819,-0.169934,0.107706
Myeloid cells,Vanoxerine,0.266238,-0.101833,-0.441960,-0.379708,1.801045,0.658361,0.321461,0.045544,0.319278,0.260782,...,-0.490423,-0.410556,-0.127375,-0.466314,0.316248,-0.088692,-0.188443,-0.093596,-0.215230,-0.052415
Myeloid cells,Vardenafil,0.061215,0.014784,-0.075429,-0.325613,0.455803,-0.021895,0.176454,0.179469,0.270306,0.233661,...,-0.356186,-0.183518,0.004371,0.028076,0.099418,-0.006284,-0.058281,0.037225,-0.063435,-0.017980
Myeloid cells,Vorinostat,0.469729,0.171966,-2.308502,-0.382731,3.319891,0.674685,0.635902,0.084603,0.970868,0.938001,...,-0.372372,-0.522752,-1.362944,-1.399575,0.246517,-0.252140,-0.715950,0.087835,-0.282777,0.034445


In [43]:
df_submission_data = df_sub_ix.join(df_submission).reset_index(drop=True).set_index("id")
df_submission_data

Unnamed: 0_level_0,A1BG,A1BG-AS1,A2M,A2M-AS1,A2MP1,A4GALT,AAAS,AACS,AAGAB,AAK1,...,ZUP1,ZW10,ZWILCH,ZWINT,ZXDA,ZXDB,ZXDC,ZYG11B,ZYX,ZZEF1
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,-0.000424,0.167305,0.395157,0.812879,0.740743,0.243306,-0.086658,0.236474,-0.080557,0.524134,...,-0.872206,-0.376905,-0.276706,0.092664,0.048062,0.329522,0.456628,0.539261,-0.697327,0.076841
1,-0.022968,0.036839,-0.023025,-0.043956,0.290724,0.219106,-0.086978,0.087635,0.148139,-0.197169,...,-0.034074,-0.040380,-0.183993,0.225442,0.053655,0.088075,0.168757,0.059710,-0.321941,-0.194725
2,0.317689,0.075677,0.113208,0.318973,1.534299,1.270523,-0.087928,0.198073,0.274882,0.097781,...,-0.216399,-0.045569,-0.117039,0.327684,0.196373,0.225913,0.391139,0.195075,-0.540607,-0.198220
3,-0.104568,0.127176,0.018801,0.052044,0.060773,-0.038973,-0.133540,0.123870,0.117394,-0.134618,...,-0.226756,-0.036166,-0.220077,0.235155,0.042118,0.099197,0.185507,0.173660,-0.262918,-0.085755
4,0.083571,0.070740,0.156862,0.344585,0.722629,0.363112,-0.087192,0.194202,0.138275,0.220319,...,-0.506477,-0.107950,-0.253322,0.141190,0.103137,0.157693,0.368213,0.324066,-0.452249,0.012677
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
250,0.035938,-0.084461,-0.653102,-0.640562,0.359990,-0.344047,0.325969,0.174660,0.393311,0.175434,...,-0.255471,-0.244713,-0.095647,-0.282693,-0.014337,-0.163890,-0.211021,-0.098819,-0.169934,0.107706
251,0.266238,-0.101833,-0.441960,-0.379708,1.801045,0.658361,0.321461,0.045544,0.319278,0.260782,...,-0.490423,-0.410556,-0.127375,-0.466314,0.316248,-0.088692,-0.188443,-0.093596,-0.215230,-0.052415
252,0.061215,0.014784,-0.075429,-0.325613,0.455803,-0.021895,0.176454,0.179469,0.270306,0.233661,...,-0.356186,-0.183518,0.004371,0.028076,0.099418,-0.006284,-0.058281,0.037225,-0.063435,-0.017980
253,0.469729,0.171966,-2.308502,-0.382731,3.319891,0.674685,0.635902,0.084603,0.970868,0.938001,...,-0.372372,-0.522752,-1.362944,-1.399575,0.246517,-0.252140,-0.715950,0.087835,-0.282777,0.034445


In [45]:
df_submission_data.to_csv("submission.zip", compression="zip")