# Protenix: virtual screening

## Install

```bash
git clone https://github.com/bytedance/Protenix.git
cd Protenix
pip install .
```

## Setup

In [None]:
from kdock.core import Data
from kdock.px import *

## Protein sequence

In [None]:
kras = Data.get_kras_seq()
kras

Unnamed: 0,ID,WT_sequence,g12d_seq,g12c_seq
0,kras_human,MTEYKLVVVGAGGVGKSALTIQLIQNHFVDEYDPTIEDSYRKQVVI...,MTEYKLVVVGADGVGKSALTIQLIQNHFVDEYDPTIEDSYRKQVVI...,MTEYKLVVVGACGVGKSALTIQLIQNHFVDEYDPTIEDSYRKQVVI...
1,kras_human_isoform2b,MTEYKLVVVGAGGVGKSALTIQLIQNHFVDEYDPTIEDSYRKQVVI...,MTEYKLVVVGADGVGKSALTIQLIQNHFVDEYDPTIEDSYRKQVVI...,MTEYKLVVVGACGVGKSALTIQLIQNHFVDEYDPTIEDSYRKQVVI...


In [None]:
g12d = kras.iloc[0]['g12d_seq']

In [None]:
g12d

'MTEYKLVVVGADGVGKSALTIQLIQNHFVDEYDPTIEDSYRKQVVIDGETCLLDILDTAGQEEYSAMRDQYMRTGEGFLCVFAINNTKSFEDIHHYREQIKRVKDSEDVPMVLVGNKCDLPSRTVDTKQAQDLARSYGIPFIETSAKTRQRVEDAFYTLVREIRQYRLKKISKEEKTPGCVKIKKCIIM'

## Get MSA on server

Submitted on protenix-server to get msa/pairing & unpairing a3m files.

Upload and make a folder that contains the two files, use it as `msa_dir`

## SMILES

In [None]:
df = Data.get_mirati_g12d()

In [None]:
df.ID.duplicated(keep=False).sum()

0

In [None]:
df.head()

Unnamed: 0,ID,SMILES,Kd,IC50,erk_IC50
0,US_1,CN1CCC[C@H]1COc1nc(N2CC3CCC(C2)N3)c2cnc(cc2n1)...,97.7,124.7,3159.1
1,US_4,Oc1cc(-c2ncc3c(nc(OCCc4ccccn4)nc3c2F)N2CC3CCC(...,155.7,496.2,8530.0
2,US_5,Cn1nccc1COc1nc(N2CC3CCC(C2)N3)c2cnc(c(F)c2n1)-...,294.8,722.9,8193.8
3,US_6,Cc1cccnc1CCOc1nc(N2CC3CCC(C2)N3)c2cnc(c(F)c2n1...,442.2,434.1,11518.2
4,US_7,Oc1cc(-c2ncc3c(nc(OCCc4ncccn4)nc3c2F)N2CC3CCC(...,463.5,1867.3,


## Test a positive control
> MRTX

In [None]:
get_single_protein_ligand_json?

[0;31mSignature:[0m
[0mget_single_protein_ligand_json[0m[0;34m([0m[0;34m[0m
[0;34m[0m    [0mjob_name[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mprotein_seq[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mmsa_dir[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mSMILES[0m[0;34m=[0m[0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mCCD[0m[0;34m=[0m[0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mjson_path[0m[0;34m=[0m[0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0;31mDocstring:[0m Generate and optionally save a JSON config for one protein-ligand job.
[0;31mFile:[0m      /tmp/ipykernel_1724/1197518213.py
[0;31mType:[0m      function

In [None]:
_ = get_single_protein_ligand_json('kras_g12d_mrtx',
                                 g12d,
                                 msa_dir='kras_g12d_msa',
                                 SMILES="C#CC1=C(C=CC2=CC(=CC(=C21)C3=NC=C4C(=C3F)N=C(N=C4N5CC6CCC(C5)N6)OC[C@@]78CCCN7C[C@@H](C8)F)O)F",
                                 json_path='g12d_mrtx.json'
                                )

JSON saved to g12d_mrtx.json


## Protenix command

```bash
protenix predict --input g12d_mrtx.json --out_dir  ./output --seeds 101
```

## Run with other SMILES

In [None]:
get_virtual_screening_json?

[0;31mSignature:[0m
[0mget_virtual_screening_json[0m[0;34m([0m[0;34m[0m
[0;34m[0m    [0mdf[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mprotein_seq[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mmsa_dir[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mid_col[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0msmi_col[0m[0;34m=[0m[0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mccd_col[0m[0;34m=[0m[0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0msave_json[0m[0;34m=[0m[0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0;31mDocstring:[0m Get json file of single protein against multiple SMILES in a dataframe.
[0;31mFile:[0m      /tmp/ipykernel_1724/3782683879.py
[0;31mType:[0m      function

In [None]:
_ = get_virtual_screening_json(df,
                               g12d,
                               'kras_g12d_msa',
                               id_col='ID',
                               smi_col='SMILES',
                               save_json='kras_g12d_input.json')

JSON saved to kras_g12d_input.json


```bash
protenix predict --input kras_g12d_input.json --out_dir  ./output --seeds 101
```