In [1]:
import os, sys
import numpy as np
import pandas as pd

In [2]:
from rdkit import Chem
from rdkit.Chem import *
from rdkit import Chem
from rdkit.Chem import Draw
from rdkit.Chem import PandasTools
from rdkit.Chem import rdMolDescriptors
from rdkit.Chem.rdMolDescriptors import *
from rdkit import DataStructs
from rdkit.Chem import AllChem

# 

# Read DrugCentral

In [7]:
drugid_df = pd.read_csv('../data/DeepCoverageMOA/drug.structure.id.csv')
drugid_df.head()

Unnamed: 0,DRUG_NAME,STRUCT_ID
0,levobupivacaine,4
1,(S)-nicardipine,5
2,(S)-nitrendipine,6
3,levdobutamine,13
4,aminopterin,21


# 

# Read SMILES IDs

In [11]:
smiles_df = pd.read_csv('../data/DrugCentral/raw_data/structures.smiles.tsv', sep = '\t')
smiles_df.head()

Unnamed: 0,SMILES,InChI,InChIKey,ID,INN,CAS_RN
0,CNC(=O)C1=C(C=C(C=C1)C2=NN3C(=CN=C3N=C2)CC4=CC...,InChI=1S/C23H17FN6O/c1-25-22(31)18-6-5-16(11-1...,LIOLIMKSCNQPLV-UHFFFAOYSA-N,5392,capmatinib,1029712-80-8
1,CC(C)(COC1=CN2C(=C(C=N2)C#N)C(=C1)C3=CN=C(C=C3...,"InChI=1S/C29H31N7O3/c1-29(2,37)18-39-24-9-25(2...",XIIOFHFUYBLOLW-UHFFFAOYSA-N,5393,selpercatinib,2152628-33-4
2,CCN1C2=CC(=NC=C2C=C(C1=O)C3=CC(=C(C=C3Br)F)NC(...,InChI=1S/C24H21BrFN5O2/c1-3-31-21-12-22(27-2)2...,CEFJVGZHQAGLHS-UHFFFAOYSA-N,5394,ripretinib,1442472-39-0
3,C[C@]12CC[C@H]3[C@H]([C@@H]1C[C@H]([C@@H]2O)[1...,InChI=1S/C18H23FO2/c1-18-7-6-13-12-5-3-11(20)8...,KDLLNMRYZGUVMA-ZYMZXAKXSA-N,5395,fluoroestradiol F 18,94153-53-4
4,C1=CC2=C(C=C1C3=CN=C(C=C3)[18F])NC4=C2C=NC=C4,InChI=1S/C16H10FN3/c17-16-4-2-11(8-19-16)10-1-...,GETAAWDSFUCLBS-SJPDSGJFSA-N,5396,flortaucipir F 18,1522051-90-6


In [13]:
subset_smiles_df = smiles_df[['SMILES', 'ID', 'INN']].drop_duplicates()

In [15]:
def standardize_smiles(df, smiles_col):
    smiles_list = df['SMILES']
    mol_list = [Chem.MolFromSmiles(smiles) for smiles in smiles_list]

    df['Molecule'] = mol_list
    
    new_smiles_list = [Chem.MolToSmiles(mol) for mol in mol_list]
    df['SMILES_String'] = new_smiles_list

    return df

In [17]:
subset_smiles_df = standardize_smiles(subset_smiles_df, 'SMILES')



In [18]:
subset_smiles_df

Unnamed: 0,SMILES,ID,INN,Molecule,SMILES_String
0,CNC(=O)C1=C(C=C(C=C1)C2=NN3C(=CN=C3N=C2)CC4=CC...,5392,capmatinib,<rdkit.Chem.rdchem.Mol object at 0x144a88eb0>,CNC(=O)c1ccc(-c2cnc3ncc(Cc4ccc5ncccc5c4)n3n2)cc1F
1,CC(C)(COC1=CN2C(=C(C=N2)C#N)C(=C1)C3=CN=C(C=C3...,5393,selpercatinib,<rdkit.Chem.rdchem.Mol object at 0x144a893f0>,COc1ccc(CN2C3CC2CN(c2ccc(-c4cc(OCC(C)(C)O)cn5n...
2,CCN1C2=CC(=NC=C2C=C(C1=O)C3=CC(=C(C=C3Br)F)NC(...,5394,ripretinib,<rdkit.Chem.rdchem.Mol object at 0x144a89380>,CCn1c(=O)c(-c2cc(NC(=O)Nc3ccccc3)c(F)cc2Br)cc2...
3,C[C@]12CC[C@H]3[C@H]([C@@H]1C[C@H]([C@@H]2O)[1...,5395,fluoroestradiol F 18,<rdkit.Chem.rdchem.Mol object at 0x144a89310>,C[C@]12CC[C@@H]3c4ccc(O)cc4CC[C@H]3[C@@H]1C[C@...
4,C1=CC2=C(C=C1C3=CN=C(C=C3)[18F])NC4=C2C=NC=C4,5396,flortaucipir F 18,<rdkit.Chem.rdchem.Mol object at 0x144a892a0>,[18F]c1ccc(-c2ccc3c(c2)[nH]c2ccncc23)cn1
...,...,...,...,...,...
4094,COC(=O)[C@@H]([C@H]1CCCCN1C(=O)OC[N+]2=CC=CC(=...,5448,serdexmethylphenidate,<rdkit.Chem.rdchem.Mol object at 0x144f25000>,COC(=O)[C@H](c1ccccc1)[C@H]1CCCCN1C(=O)OC[n+]1...
4095,C[C@]12CC[C@H]3[C@H]([C@@H]1[C@H]([C@H]([C@@H]...,5450,estetrol,<rdkit.Chem.rdchem.Mol object at 0x144f25070>,C[C@]12CC[C@@H]3c4ccc(O)cc4CC[C@H]3[C@@H]1[C@@...
4096,OC(=O)CC[C@H](NC(=O)N[C@@H](CCCCNC(=O)C1=C...,5458,piflufolastat F-18,<rdkit.Chem.rdchem.Mol object at 0x144f250e0>,O=C(O)CC[C@H](NC(=O)N[C@@H](CCCCNC(=O)c1ccc([1...
4097,CCN1CCN(CC1)C2=CC=C(C=C2)NC3=CC(=NC=N3)N(C)C(=...,5459,infigratinib,<rdkit.Chem.rdchem.Mol object at 0x144f25150>,CCN1CCN(c2ccc(Nc3cc(N(C)C(=O)Nc4c(Cl)c(OC)cc(O...


In [19]:
structure_drugid_df = pd.merge(subset_smiles_df, drugid_df, left_on = 'INN', right_on = 'DRUG_NAME', how = 'left')

In [23]:
structure_drugid_df

Unnamed: 0,SMILES,ID,INN,Molecule,SMILES_String,DRUG_NAME,STRUCT_ID
0,CNC(=O)C1=C(C=C(C=C1)C2=NN3C(=CN=C3N=C2)CC4=CC...,5392,capmatinib,<rdkit.Chem.rdchem.Mol object at 0x144a88eb0>,CNC(=O)c1ccc(-c2cnc3ncc(Cc4ccc5ncccc5c4)n3n2)cc1F,capmatinib,5392.0
1,CC(C)(COC1=CN2C(=C(C=N2)C#N)C(=C1)C3=CN=C(C=C3...,5393,selpercatinib,<rdkit.Chem.rdchem.Mol object at 0x144a893f0>,COc1ccc(CN2C3CC2CN(c2ccc(-c4cc(OCC(C)(C)O)cn5n...,selpercatinib,5393.0
2,CCN1C2=CC(=NC=C2C=C(C1=O)C3=CC(=C(C=C3Br)F)NC(...,5394,ripretinib,<rdkit.Chem.rdchem.Mol object at 0x144a89380>,CCn1c(=O)c(-c2cc(NC(=O)Nc3ccccc3)c(F)cc2Br)cc2...,ripretinib,5394.0
3,C[C@]12CC[C@H]3[C@H]([C@@H]1C[C@H]([C@@H]2O)[1...,5395,fluoroestradiol F 18,<rdkit.Chem.rdchem.Mol object at 0x144a89310>,C[C@]12CC[C@@H]3c4ccc(O)cc4CC[C@H]3[C@@H]1C[C@...,fluoroestradiol F 18,5395.0
4,C1=CC2=C(C=C1C3=CN=C(C=C3)[18F])NC4=C2C=NC=C4,5396,flortaucipir F 18,<rdkit.Chem.rdchem.Mol object at 0x144a892a0>,[18F]c1ccc(-c2ccc3c(c2)[nH]c2ccncc23)cn1,flortaucipir F 18,5396.0
...,...,...,...,...,...,...,...
4094,COC(=O)[C@@H]([C@H]1CCCCN1C(=O)OC[N+]2=CC=CC(=...,5448,serdexmethylphenidate,<rdkit.Chem.rdchem.Mol object at 0x144f25000>,COC(=O)[C@H](c1ccccc1)[C@H]1CCCCN1C(=O)OC[n+]1...,,
4095,C[C@]12CC[C@H]3[C@H]([C@@H]1[C@H]([C@H]([C@@H]...,5450,estetrol,<rdkit.Chem.rdchem.Mol object at 0x144f25070>,C[C@]12CC[C@@H]3c4ccc(O)cc4CC[C@H]3[C@@H]1[C@@...,estetrol,5450.0
4096,OC(=O)CC[C@H](NC(=O)N[C@@H](CCCCNC(=O)C1=C...,5458,piflufolastat F-18,<rdkit.Chem.rdchem.Mol object at 0x144f250e0>,O=C(O)CC[C@H](NC(=O)N[C@@H](CCCCNC(=O)c1ccc([1...,piflufolastat F-18,5458.0
4097,CCN1CCN(CC1)C2=CC=C(C=C2)NC3=CC(=NC=N3)N(C)C(=...,5459,infigratinib,<rdkit.Chem.rdchem.Mol object at 0x144f25150>,CCN1CCN(c2ccc(Nc3cc(N(C)C(=O)Nc4c(Cl)c(OC)cc(O...,infigratinib,5459.0


# 

# Read DeepCoverageMOA IDs

In [27]:
dcmoa_id_df = pd.read_excel('../data/DeepCoverageMOA/raw_data/41587_2022_1539_MOESM4_ESM.xlsx')

In [28]:
dcmoa_id_df.head()

Unnamed: 0,Compound Name,Primary Target,Secondary Target,SMILES,MW,Formula,Screening Concentration
0,SR 142948,NTSR1,NTSR2,COc1cccc(c1c1cc(nn1c1ccc(cc1C(C)C)C(=O)N(CCCN(...,685.38,C39H51N5O6,10uM
1,UK 356618,MMP3,MMP13;MMP9,ONC(=O)C[C@H](C(=O)N[C@@H](C(C)(C)C)C(=O)N[C@@...,557.32,C34H43N3O4,10uM
2,JW 480,NCEH1,,O=C(Oc1ccccc1C(C)C)NCCc1ccc2c(c1)cccc2,333.17,C22H23NO2,10uM
3,ML-265,PKM,,Nc1cccc(c1)Cn1ncc2c(c1=O)n(C)c1c2sc(c1)S(=O)C,372.07,C17H16N4O2S2,10uM
4,PF-04418948,PTGER2,,COc1ccc2c(c1)ccc(c2)OCC1(CN(C1)C(=O)c1ccc(cc1)...,409.13,C23H20FNO5,10uM


In [31]:
dcmoa_id_df = standardize_smiles(dcmoa_id_df, 'SMILES')

In [33]:
dcmoa_id_df

Unnamed: 0,Compound Name,Primary Target,Secondary Target,SMILES,MW,Formula,Screening Concentration,Molecule,SMILES_String
0,SR 142948,NTSR1,NTSR2,COc1cccc(c1c1cc(nn1c1ccc(cc1C(C)C)C(=O)N(CCCN(...,685.38,C39H51N5O6,10uM,<rdkit.Chem.rdchem.Mol object at 0x1603f4b30>,COc1cccc(OC)c1-c1cc(C(=O)NC2(C(=O)O)C3CC4CC(C3...
1,UK 356618,MMP3,MMP13;MMP9,ONC(=O)C[C@H](C(=O)N[C@@H](C(C)(C)C)C(=O)N[C@@...,557.32,C34H43N3O4,10uM,<rdkit.Chem.rdchem.Mol object at 0x1603f4e40>,Cc1cc(CCC[C@H](CC(=O)NO)C(=O)N[C@H](C(=O)N[C@H...
2,JW 480,NCEH1,,O=C(Oc1ccccc1C(C)C)NCCc1ccc2c(c1)cccc2,333.17,C22H23NO2,10uM,<rdkit.Chem.rdchem.Mol object at 0x1603f4dd0>,CC(C)c1ccccc1OC(=O)NCCc1ccc2ccccc2c1
3,ML-265,PKM,,Nc1cccc(c1)Cn1ncc2c(c1=O)n(C)c1c2sc(c1)S(=O)C,372.07,C17H16N4O2S2,10uM,<rdkit.Chem.rdchem.Mol object at 0x1603f4c80>,Cn1c2cc(S(C)=O)sc2c2cnn(Cc3cccc(N)c3)c(=O)c21
4,PF-04418948,PTGER2,,COc1ccc2c(c1)ccc(c2)OCC1(CN(C1)C(=O)c1ccc(cc1)...,409.13,C23H20FNO5,10uM,<rdkit.Chem.rdchem.Mol object at 0x1603f4c10>,COc1ccc2cc(OCC3(C(=O)O)CN(C(=O)c4ccc(F)cc4)C3)...
...,...,...,...,...,...,...,...,...,...
870,Celastrol,IL1B,,O=C1C=C2C(=CC=C3[C@@]2(C)CC[C@@]2([C@]3(C)CC[C...,450.28,C29H38O4,1uM,<rdkit.Chem.rdchem.Mol object at 0x160400970>,CC1=C(O)C(=O)C=C2C1=CC=C1[C@@]2(C)CC[C@@]2(C)[...
871,UNC 0631,EHMT2,,COC1=CC2=C(NC3CCN(CC4CCCCC4)CC3)N=C(N5CCN(C(C)...,635.49,C37H61N7O2,1uM,<rdkit.Chem.rdchem.Mol object at 0x1604009e0>,COc1cc2c(NC3CCN(CC4CCCCC4)CC3)nc(N3CCCN(C(C)C)...
872,Pyrimethamine,DHFR,GSTP1,CCC1=C(C(=NC(=N1)N)N)C2=CC=C(Cl)C=C2,248.08,C12H13ClN4,1uM,<rdkit.Chem.rdchem.Mol object at 0x160400a50>,CCc1nc(N)nc(N)c1-c1ccc(Cl)cc1
873,Darapladib,PLA2G7,,CCN(CCN(C(=O)Cn1c(SCc2ccc(cc2)F)nc(=O)c2c1CCC2...,666.26,C36H38F4N4O2S,1uM,<rdkit.Chem.rdchem.Mol object at 0x160400ac0>,CCN(CC)CCN(Cc1ccc(-c2ccc(C(F)(F)F)cc2)cc1)C(=O...


In [35]:
subset_dcmoa_id_df = dcmoa_id_df[['SMILES_String', 'Compound Name']]

In [37]:
subset_dcmoa_id_df

Unnamed: 0,SMILES_String,Compound Name
0,COc1cccc(OC)c1-c1cc(C(=O)NC2(C(=O)O)C3CC4CC(C3...,SR 142948
1,Cc1cc(CCC[C@H](CC(=O)NO)C(=O)N[C@H](C(=O)N[C@H...,UK 356618
2,CC(C)c1ccccc1OC(=O)NCCc1ccc2ccccc2c1,JW 480
3,Cn1c2cc(S(C)=O)sc2c2cnn(Cc3cccc(N)c3)c(=O)c21,ML-265
4,COc1ccc2cc(OCC3(C(=O)O)CN(C(=O)c4ccc(F)cc4)C3)...,PF-04418948
...,...,...
870,CC1=C(O)C(=O)C=C2C1=CC=C1[C@@]2(C)CC[C@@]2(C)[...,Celastrol
871,COc1cc2c(NC3CCN(CC4CCCCC4)CC3)nc(N3CCCN(C(C)C)...,UNC 0631
872,CCc1nc(N)nc(N)c1-c1ccc(Cl)cc1,Pyrimethamine
873,CCN(CC)CCN(Cc1ccc(-c2ccc(C(F)(F)F)cc2)cc1)C(=O...,Darapladib


# 

# How many drugs in DrugCentral were profiled in DeepCoverageAPI?

In [41]:
drugcentral_smiles_list = subset_smiles_df['SMILES_String'].unique()
dcmoa_smiles_list = dcmoa_id_df['SMILES_String'].unique()

In [43]:
subset_smiles_df['DeepCoverageMOA'] = np.where(subset_smiles_df['SMILES_String'].isin(dcmoa_smiles_list), 'Yes', 'No')

In [45]:
subset_smiles_df['DeepCoverageMOA'].value_counts()

DeepCoverageMOA
No     3892
Yes     207
Name: count, dtype: int64

In [47]:
subset_smiles_df

Unnamed: 0,SMILES,ID,INN,Molecule,SMILES_String,DeepCoverageMOA
0,CNC(=O)C1=C(C=C(C=C1)C2=NN3C(=CN=C3N=C2)CC4=CC...,5392,capmatinib,<rdkit.Chem.rdchem.Mol object at 0x144a88eb0>,CNC(=O)c1ccc(-c2cnc3ncc(Cc4ccc5ncccc5c4)n3n2)cc1F,No
1,CC(C)(COC1=CN2C(=C(C=N2)C#N)C(=C1)C3=CN=C(C=C3...,5393,selpercatinib,<rdkit.Chem.rdchem.Mol object at 0x144a893f0>,COc1ccc(CN2C3CC2CN(c2ccc(-c4cc(OCC(C)(C)O)cn5n...,No
2,CCN1C2=CC(=NC=C2C=C(C1=O)C3=CC(=C(C=C3Br)F)NC(...,5394,ripretinib,<rdkit.Chem.rdchem.Mol object at 0x144a89380>,CCn1c(=O)c(-c2cc(NC(=O)Nc3ccccc3)c(F)cc2Br)cc2...,No
3,C[C@]12CC[C@H]3[C@H]([C@@H]1C[C@H]([C@@H]2O)[1...,5395,fluoroestradiol F 18,<rdkit.Chem.rdchem.Mol object at 0x144a89310>,C[C@]12CC[C@@H]3c4ccc(O)cc4CC[C@H]3[C@@H]1C[C@...,No
4,C1=CC2=C(C=C1C3=CN=C(C=C3)[18F])NC4=C2C=NC=C4,5396,flortaucipir F 18,<rdkit.Chem.rdchem.Mol object at 0x144a892a0>,[18F]c1ccc(-c2ccc3c(c2)[nH]c2ccncc23)cn1,No
...,...,...,...,...,...,...
4094,COC(=O)[C@@H]([C@H]1CCCCN1C(=O)OC[N+]2=CC=CC(=...,5448,serdexmethylphenidate,<rdkit.Chem.rdchem.Mol object at 0x144f25000>,COC(=O)[C@H](c1ccccc1)[C@H]1CCCCN1C(=O)OC[n+]1...,No
4095,C[C@]12CC[C@H]3[C@H]([C@@H]1[C@H]([C@H]([C@@H]...,5450,estetrol,<rdkit.Chem.rdchem.Mol object at 0x144f25070>,C[C@]12CC[C@@H]3c4ccc(O)cc4CC[C@H]3[C@@H]1[C@@...,No
4096,OC(=O)CC[C@H](NC(=O)N[C@@H](CCCCNC(=O)C1=C...,5458,piflufolastat F-18,<rdkit.Chem.rdchem.Mol object at 0x144f250e0>,O=C(O)CC[C@H](NC(=O)N[C@@H](CCCCNC(=O)c1ccc([1...,No
4097,CCN1CCN(CC1)C2=CC=C(C=C2)NC3=CC(=NC=N3)N(C)C(=...,5459,infigratinib,<rdkit.Chem.rdchem.Mol object at 0x144f25150>,CCN1CCN(c2ccc(Nc3cc(N(C)C(=O)Nc4c(Cl)c(OC)cc(O...,No


# 

# Read DeepCoverageMOA Data

In [51]:
proteomics_df = pd.read_excel('../data/DeepCoverageMOA/raw_data/41587_2022_1539_MOESM5_ESM.xlsx')

In [53]:
proteomics_df

Unnamed: 0,Row,Column,Pearson (r),n,p-value,q-value
0,Nutlin3a_MDM2,Idasanutlin_MDM2,0.905866,8492,2.220446e-16,4.080696e-13
1,Vorinostat_HDAC1;HDAC3;HDAC6,Quisinostat_HDAC1,0.904564,8400,2.220446e-16,4.080696e-13
2,AZ 628_RAF1,MEK162_MAP2K1,0.899967,8227,2.220446e-16,4.080696e-13
3,TCS ERK 11e_MAPK1,AZ 628_RAF1,0.889503,8267,2.220446e-16,4.080696e-13
4,ONX 0914_PSMB8,MG-132_PSMB5,0.884305,8258,2.220446e-16,4.080696e-13
...,...,...,...,...,...,...
2538,L-779450_RAF1,Taranabant_CNR1,0.407294,8503,6.662036e-06,9.908778e-04
2539,CGP77675_SRC,Aztreonam_LACTB,-0.407277,8443,6.668409e-06,9.916993e-04
2540,Pimavanserin_HTR2A,Saracatinib_SRC,0.407239,8480,6.682735e-06,9.935452e-04
2541,STK410283_MAP3K9;TNIK,Saracatinib_SRC,0.407234,8327,6.684710e-06,9.937997e-04


# 

## Create Drug1 and Drug2 Names

In [57]:
proteomics_df['Drug1'] = proteomics_df['Row'].map(lambda x: x.split('_')[0])

In [59]:
proteomics_df['Gene1'] = proteomics_df['Row'].map(lambda x: x.split('_')[1])

In [61]:
proteomics_df['Drug2'] = proteomics_df['Column'].map(lambda x: x.split('_')[0])

In [63]:
proteomics_df['Gene2'] = proteomics_df['Column'].map(lambda x: x.split('_')[1])

# 

# Add STRING per Name

In [67]:
drug1_proteomics_df = pd.merge(proteomics_df, subset_dcmoa_id_df, left_on = 'Drug1', right_on = 'Compound Name', how = 'left')

In [69]:
drug1_proteomics_df = drug1_proteomics_df.rename(columns = {'SMILES_String': 'Drug1_SMILES'})

In [71]:
drug1_proteomics_df =  drug1_proteomics_df.drop(columns = ['Compound Name'])

In [73]:
drug2_proteomics_df = pd.merge(drug1_proteomics_df, subset_dcmoa_id_df, left_on = 'Drug2', right_on = 'Compound Name', how = 'left')

In [75]:
drug2_proteomics_df = drug2_proteomics_df.rename(columns = {'SMILES_String': 'Drug2_SMILES'})

In [77]:
drug2_proteomics_df =  drug2_proteomics_df.drop(columns = ['Compound Name'])

# 

# Map DCMOA IDs to DrugCentral IDs

In [108]:
drug1_proteomics_name_df = pd.merge(drug2_proteomics_df, structure_drugid_df[['SMILES_String', 'DRUG_NAME', 'STRUCT_ID']], left_on = 'Drug1_SMILES', right_on = 'SMILES_String', how = 'left')

In [110]:
drug1_proteomics_name_df = drug1_proteomics_name_df.rename(columns = {'DRUG_NAME': 'DRUG_NAME_1', 'STRUCT_ID': 'STRUCT_ID_1'})

In [112]:
drug1_proteomics_name_df = drug1_proteomics_name_df.drop(columns = ['SMILES_String'])

In [114]:
drug2_proteomics_name_df = pd.merge(drug1_proteomics_name_df, structure_drugid_df[['SMILES_String', 'DRUG_NAME', 'STRUCT_ID']], left_on = 'Drug2_SMILES', right_on = 'SMILES_String', how = 'left')

In [116]:
drug2_proteomics_name_df = drug2_proteomics_name_df.rename(columns = {'DRUG_NAME': 'DRUG_NAME_2', 'STRUCT_ID': 'STRUCT_ID_2'})

In [118]:
drug2_proteomics_name_df = drug2_proteomics_name_df.drop(columns = ['SMILES_String',])

# 

# Convert DrugCentral Strucutre IDs for LINCS Mapping

In [122]:
drugcentral_dcmoa_df = drug2_proteomics_name_df[(drug2_proteomics_name_df['DRUG_NAME_1'].isna() == False) & (drug2_proteomics_name_df['DRUG_NAME_2'].isna() == False)]

In [124]:
drugcentral_dcmoa_df['STRUCT_ID_1'] = drugcentral_dcmoa_df['STRUCT_ID_1'].map(lambda x: int(x))

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  drugcentral_dcmoa_df['STRUCT_ID_1'] = drugcentral_dcmoa_df['STRUCT_ID_1'].map(lambda x: int(x))


In [126]:
drugcentral_dcmoa_df['STRUCT_ID_2'] = drugcentral_dcmoa_df['STRUCT_ID_2'].map(lambda x: int(x))

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  drugcentral_dcmoa_df['STRUCT_ID_2'] = drugcentral_dcmoa_df['STRUCT_ID_2'].map(lambda x: int(x))


In [128]:
drugcentral_dcmoa_df = drugcentral_dcmoa_df.drop(columns = ['Row', 'Column'])

# 

# Write DeepCoverageMOA to LINCS Mapping File

In [132]:
subset_drugcentral_dcmoa_df = drugcentral_dcmoa_df[[
 'DRUG_NAME_1',
 'STRUCT_ID_1',
 'DRUG_NAME_2',
 'STRUCT_ID_2',
'Drug1_SMILES',
 'Drug2_SMILES',
'Pearson (r)',
 'n',
 'p-value',
 'q-value'
]]

In [134]:
subset_drugcentral_dcmoa_df = subset_drugcentral_dcmoa_df.rename(columns = {'Pearson (r)': 'DCMOA Pearson (r)',
                                                                             'n': 'DCMOA n',
                                                                             'p-value': 'DCMOA  p-value',
                                                                             'q-value': 'DCMOA q-value'
                                                                           })

In [136]:
subset_drugcentral_dcmoa_df

Unnamed: 0,DRUG_NAME_1,STRUCT_ID_1,DRUG_NAME_2,STRUCT_ID_2,Drug1_SMILES,Drug2_SMILES,DCMOA Pearson (r),DCMOA n,DCMOA p-value,DCMOA q-value
19,palbociclib,4941,ribociclib,5218,CC(=O)c1c(C)c2cnc(Nc3ccc(N4CCNCC4)cn3)nc2n(C2C...,CN(C)C(=O)c1cc2cnc(Nc3ccc(N4CCNCC4)cn3)nc2n1C1...,0.815885,8577,2.220446e-16,4.080696e-13
33,sunitinib,2544,bosutinib,4359,CCN(CC)CCNC(=O)c1c(C)[nH]c(/C=C2\C(=O)Nc3ccc(F...,COc1cc(Nc2c(C#N)cnc3cc(OCCCN4CCN(C)CC4)c(OC)cc...,0.776625,7746,2.220446e-16,4.080696e-13
63,formestane,1238,betamethasone,348,C[C@]12CCC(=O)C(O)=C1CC[C@@H]1[C@@H]2CC[C@]2(C...,C[C@H]1C[C@H]2[C@@H]3CCC4=CC(=O)C=C[C@]4(C)[C@...,0.741963,8869,2.220446e-16,4.080696e-13
113,encorafenib,5289,binimetinib,5290,COC(=O)N[C@@H](C)CNc1nccc(-c2cn(C(C)C)nc2-c2cc...,Cn1cnc2c(F)c(Nc3ccc(Br)cc3F)c(C(=O)NOCCO)cc21,0.708916,7985,2.220446e-16,4.080696e-13
192,sunitinib,2544,encorafenib,5289,CCN(CC)CCNC(=O)c1c(C)[nH]c(/C=C2\C(=O)Nc3ccc(F...,COC(=O)N[C@@H](C)CNc1nccc(-c2cn(C(C)C)nc2-c2cc...,0.676738,7582,2.220446e-16,4.080696e-13
...,...,...,...,...,...,...,...,...,...,...
2353,fingolimod,4167,simvastatin,2445,CCCCCCCCc1ccc(CCC(N)(CO)CO)cc1,CCC(C)(C)C(=O)O[C@H]1C[C@@H](C)C=C2C=C[C@H](C)...,0.416534,8110,3.899823e-06,6.271537e-04
2424,fingolimod,4167,fedratinib,5347,CCCCCCCCc1ccc(CCC(N)(CO)CO)cc1,Cc1cnc(Nc2ccc(OCCN3CCCC3)cc2)nc1Nc1cccc(S(=O)(...,0.412612,8085,4.904502e-06,7.641203e-04
2440,dasatinib,785,pazopanib,4118,Cc1nc(Nc2ncc(C(=O)Nc3c(C)cccc3Cl)s2)cc(N2CCN(C...,Cc1ccc(Nc2nccc(N(C)c3ccc4c(C)n(C)nc4c3)n2)cc1S...,0.411989,8545,5.085039e-06,7.878903e-04
2474,bosutinib,4359,betamethasone,348,COc1cc(Nc2c(C#N)cnc3cc(OCCCN4CCN(C)CC4)c(OC)cc...,C[C@H]1C[C@H]2[C@@H]3CCC4=CC(=O)C=C[C@]4(C)[C@...,-0.410643,8277,5.496717e-06,8.411267e-04


In [187]:
subset_drugcentral_dcmoa_df.to_csv('../data/DeepCoverageMOA/analyzed_data/deepcoveragemoa.lincs.csv', index = False)