-
Notifications
You must be signed in to change notification settings - Fork 0
/
ColabGCNdesign_beta_v0_2_1.ipynb
277 lines (277 loc) · 13.3 KB
/
ColabGCNdesign_beta_v0_2_1.ipynb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
{
"nbformat": 4,
"nbformat_minor": 0,
"metadata": {
"colab": {
"name": "ColabGCNdesign_beta_v0_2_1.ipynb",
"provenance": [],
"collapsed_sections": []
},
"kernelspec": {
"name": "python3",
"display_name": "Python 3"
},
"language_info": {
"name": "python"
}
},
"cells": [
{
"cell_type": "markdown",
"source": [
"# ColabGCNdesign_beta\n",
"This notebook is for running Dr. Shintaro Minami's [GCNdesign](https://github.com/ShintaroMinami/GCNdesign) on Google Colaboratory."
],
"metadata": {
"id": "hfp_AC3zCkNG"
}
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "RzXSjeTIbL4U",
"cellView": "form"
},
"outputs": [],
"source": [
"#@title 0. Install software\n",
"#@markdown Please run this cell by pressing the ▶ _Play_ button on the left.\n",
"\n",
"#@markdown PyRosetta and GCNdesign will be installed.\n",
"\n",
"#@markdown PyRosetta is ~1.5 GB. It takes several tens of minutes to install PyRosetta.\n",
"\n",
"#@markdown Enter your user ID and password to download PyRosetta.\n",
"#@markdown You can obtain a license for PyRosetta at the following link https://els2.comotion.uw.edu/product/pyrosetta\n",
"\n",
"import os, sys, time\n",
"USER = \"\" #@param {type:\"string\"}\n",
"PASSWORD = \"\" #@param {type:\"string\"}\n",
"!mkdir /content/PyRosetta\n",
"!cd /content/PyRosetta && wget -O _PyRosetta_.tar.bz2 --no-check-certificate --user $USER --password $PASSWORD http://graylab.jhu.edu/download/PyRosetta4/archive/release/PyRosetta4.MinSizeRel.python37.linux/PyRosetta4.MinSizeRel.python37.linux.release-316.tar.bz2\n",
"!tar xjf /content/PyRosetta/_PyRosetta_.tar.bz2 -C /content/PyRosetta --strip-components=1\n",
"pyrosetta_distr_path = '/content' + '/PyRosetta/_PyRosetta_.tar.bz2'\n",
"!cd /content/PyRosetta/setup && python setup.py install\n",
"\n",
"for e in os.listdir('/content/PyRosetta/'):\n",
" sys.path.append('/content/PyRosetta/' + e)\n",
"\n",
"!pip install gcndesign\n",
"!pip install py3Dmol"
]
},
{
"cell_type": "code",
"source": [
"#@title 1. Autodesign\n",
"#! /usr/bin/env python\n",
"\n",
"import os\n",
"from os import path\n",
"import sys\n",
"import argparse\n",
"from google.colab import files\n",
"\n",
"# argument parser\n",
"pdbcode = \"6WMK\" #@param {type:\"string\"}\n",
"#@markdown - `pdbcode` Enter the PDB ID of the protein structure you want to design. ( e.g. 1qys ) \n",
"#@markdown - **<font color=\"red\">To input your own PDB file, run the cell without writing anything in this field and then select the file.</font>**\n",
"def get_pdb(pdb_code=\"\"):\n",
" if pdb_code is None or pdb_code == \"\":\n",
" upload_dict = files.upload()\n",
" pdb_string = upload_dict[list(upload_dict.keys())[0]]\n",
" with open(\"tmp.pdb\",\"wb\") as out: out.write(pdb_string)\n",
" return \"tmp.pdb\"\n",
" else:\n",
" os.system(f\"wget -qnc https://files.rcsb.org/view/{pdb_code}.pdb\")\n",
" import pyrosetta\n",
" pyrosetta.init()\n",
" pyrosetta.toolbox.cleanATOM(f\"{pdb_code}.pdb\")\n",
" clean_pdb=f\"{pdb_code}.clean.pdb\"\n",
" pose_in=pyrosetta.pose_from_pdb(clean_pdb)\n",
" chains = [\"A\",\"B\"] #@param {type:\"raw\"}\n",
" extract_chain=\"\".join(chains)\n",
" #@markdown - `chains` If you filled in the \"pdbcode\" field above, please enter the name of the Chain you wish to design. ( e.g. [\"A\"] or [\"A\",\"B\",\"C\"])\n",
" r=[]\n",
" for res in pose_in.residues:\n",
" num=res.seqpos()\n",
" chainletter=pose_in.pdb_info().chain(num)\n",
" print(num, chainletter)\n",
" for chain in chains:\n",
" if chainletter == chain:\n",
" r.append(num)\n",
" pose=pyrosetta.Pose(pose_in, r[0], r[-1])\n",
" print(r)\n",
" print(r[0])\n",
" print(r[-1])\n",
" pose.dump_pdb(f\"{pdb_code}_{extract_chain}.clean.pdb\")\n",
" return f\"{pdb_code}_{extract_chain}.clean.pdb\"\n",
"\n",
"#@title Select a PDB file, enter options, and then press _Play_ ▶ button to design.\n",
"\n",
"pdb = get_pdb(pdbcode)\n",
"\n",
"opts=[\"-ignore_unrecognized_res 1\", \"-ex1\", \"-ex2aro\"]\n",
"# ligand input\n",
"ligand = \"\" #@param {type:\"string\"}\n",
"#@markdown - `ligand` ligand/substrate parameter file. ( e.g. default: None )\n",
"#@markdown - If your PDB file includes a ligand, you need a params file for the ligand. Also, the input PDB file must also contain the coordinates of the substrate.\n",
"#@markdown - parameter file should be created using Rosetta's \"/Rosetta/main/source/scripts/python/public/molfile_to_params.pyl\".\n",
"#@markdown - See the following link for more information https://www.rosettacommons.org/demos/latest/tutorials/prepare_ligand/prepare_ligand_tutorial\n",
"if ligand:\n",
" opts.append(\"-extra_res_fa \"+ligand)\n",
"\n",
"symmetry_definition_file = \"\" #@param {type:\"string\"}\n",
"#@markdown - `symmetry_definition_file` If you want to do symmetry design, specify the symmetry definition file for Rosetta Design. ( e.g. default: None )\n",
"#@markdown - Symmetry definition file should be created using Rosetta's \"/Rosetta/main/source/src/apps/public/symmetry/make_symmdef_file.pl\".\n",
"#@markdown - See the following link for more information https://www.rosettacommons.org/docs/latest/application_documentation/utilities/make-symmdef-file\n",
"\n",
"nstruct = 1 #@param {type:\"integer\"}\n",
"#@markdown - `nstruct` Number of structures output ( e.g. 10 )\n",
"prefix = \"autodes\" #@param {type:\"string\"}\n",
"#@markdown - `prefix` Prefix for output PDB files. ( e.g. autodes ) \n",
"prob_cut = 0.8 #@param {type:\"number\"}\n",
"#@markdown - `prob_cut` Probability cutoff. ( e.g. 0.8 )\n",
"scorefxn = \"ref2015\" #@param {type:\"string\"}\n",
"#@markdown - `scorefxn` Rosetta score function. ( e.g. ref2015 )\n",
"keep = [] #@param {type:\"raw\"}\n",
"#@markdown - `keep` Residue numbers for keeping the initial amino-acid type. ( e.g. [] or [\"1A-10A\",\"15B\",\"30B\",\"32C-40C\",\"@D\"] @ represents all residues in the chain )\n",
"unused = [\"C\", \"H\", \"W\"] #@param {type:\"raw\"}\n",
"#@markdown - `unused` Residue types not to be used in design sequences. ( e.g. None or [\"C\", \"H\", \"W\"] )\n",
"include_init_restype = True #@param {type:\"boolean\"}\n",
"#@markdown - `include_init_restype` Include the initial residue type.\n",
"fastdesign_iterations = 1 #@param {type:\"integer\"}\n",
"#@markdown - `fastdesign_iterations` \"standard_repeats\" for Rosetta FastDesign. ( e.g. 3 )\n",
"paramin = None #@param {type:\"raw\"}\n",
"#@markdown - `paramin` NN parameter file. ( e.g. default: None )\n",
"\n",
"# pyrosetta\n",
"import pyrosetta\n",
"pyrosetta.init(opts)\n",
"scorefxn = pyrosetta.create_score_function(scorefxn)\n",
"\n",
"# gcndesign predictor\n",
"#dir_script = path.dirname(path.realpath(__file__))\n",
"#sys.path.append(dir_script+'/../')\n",
"from gcndesign.predictor import Predictor\n",
"predictor = Predictor(param=paramin)\n",
"\n",
"# pdb input\n",
"pose_in = pyrosetta.pose_from_pdb(pdb)\n",
"if symmetry_definition_file:\n",
" from pyrosetta.rosetta.protocols.symmetry import SetupForSymmetryMover\n",
" #DetectSymmetry().apply(pose_in)\n",
" SetupForSymmetryMover(symmetry_definition_file).apply(pose_in)\n",
"\n",
"## Setup TaskFactory\n",
"taskf = pyrosetta.rosetta.core.pack.task.TaskFactory()\n",
"taskf.push_back(pyrosetta.rosetta.core.pack.task.operation.InitializeFromCommandline())\n",
"if include_init_restype:\n",
" taskf.push_back(pyrosetta.rosetta.core.pack.task.operation.IncludeCurrent())\n",
"\n",
"# resfile task-operation\n",
"from gcndesign.resfile import fix_native_resfile, expand_nums\n",
"resfile = predictor.make_resfile(pdb=pdb, prob_cut=prob_cut, unused=unused)\n",
"resfile = fix_native_resfile(resfile, resnums=expand_nums(keep))\n",
"readresfile = pyrosetta.rosetta.core.pack.task.operation.ReadResfile()\n",
"readresfile.set_cached_resfile(resfile)\n",
"\n",
"# add readresfile to taskfactory\n",
"taskf.push_back(readresfile)\n",
"\n",
"## Check TaskFactory Setting\n",
"packer_task = taskf.create_task_and_apply_taskoperations(pose_in)\n",
"\n",
"## Setup MoveMapFactory\n",
"movemapf = pyrosetta.rosetta.core.select.movemap.MoveMapFactory()\n",
"movemapf.all_bb(setting=True)\n",
"movemapf.all_chi(setting=True)\n",
"movemapf.all_jumps(setting=True)\n",
"\n",
"## Check Setting\n",
"#display_pose = pyrosetta.rosetta.protocols.fold_from_loops.movers.DisplayPoseLabelsMover()\n",
"#display_pose.tasks(taskf)\n",
"#display_pose.movemap_factory(movemapf)\n",
"#display_pose.apply(pose)\n",
"\n",
"## Mover Setting\n",
"fastdesign = pyrosetta.rosetta.protocols.denovo_design.movers.FastDesign(scorefxn_in=scorefxn, standard_repeats=fastdesign_iterations)\n",
"fastdesign.set_task_factory(taskf)\n",
"fastdesign.set_movemap_factory(movemapf)\n",
"\n",
"## Apply\n",
"for i in range(nstruct):\n",
" pose = pose_in.clone()\n",
" fastdesign.apply(pose)\n",
" file_out = '{:s}-{:03d}.pdb'.format(prefix, i+1)\n",
" pose.dump_pdb(file_out)"
],
"metadata": {
"id": "L1UowAt0onSW",
"cellView": "form"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"#@title 2. View 3D Structure {run: \"auto\"}\n",
"import py3Dmol\n",
"import glob\n",
"import matplotlib.pyplot as plt\n",
"\n",
"color = \"rainbow\" #@param [\"rainbow\", \"chain\"]\n",
"show_sidechains = True #@param {type:\"boolean\"}\n",
"show_mainchains = False #@param {type:\"boolean\"}\n",
"\n",
"#pdb_file = glob.glob(pdb_filename)\n",
"pdb_file = \"autodes-001.pdb\" #@param {type:\"string\"}\n",
"def show_pdb(color=\"rainbow\", show_sidechains=False, show_mainchains=False):\n",
" view = py3Dmol.view(js='https://3dmol.org/build/3Dmol.js',)\n",
" view.addModel(open(pdb_file,'r').read(), 'pdb')\n",
" if color == \"rainbow\":\n",
" view.setStyle({'cartoon': {'color':'spectrum'}})\n",
" elif color == \"chain\":\n",
" import pyrosetta\n",
" pyrosetta.init()\n",
" pose_input=pyrosetta.pose_from_pdb(pdb_file)\n",
" chains = pose_input.pdb_info().num_chains() if pose_input.pdb_info().num_chains() >=2 else 1\n",
" for n,chain,color in zip(range(chains),list(\"ABCDEFGH\"), [\"lime\",\"cyan\",\"magenta\",\"yellow\",\"salmon\",\"white\",\"blue\",\"orange\"]):\n",
" view.setStyle({'chain':chain},{'cartoon': {'color':color}})\n",
" if show_sidechains:\n",
" BB = ['C','O','N']\n",
" view.addStyle({'and':[{'resn':[\"GLY\",\"PRO\"],'invert':True},{'atom':BB,'invert':True}]}, {'stick':{'colorscheme':f\"WhiteCarbon\",'radius':0.3}})\n",
" view.addStyle({'and':[{'resn':\"GLY\"},{'atom':'CA'}]}, {'sphere':{'colorscheme':f\"WhiteCarbon\",'radius':0.3}})\n",
" view.addStyle({'and':[{'resn':\"PRO\"},{'atom':['C','O'],'invert':True}]}, {'stick':{'colorscheme':f\"WhiteCarbon\",'radius':0.3}}) \n",
" if show_mainchains:\n",
" BB = ['C','O','N','CA']\n",
" view.addStyle({'atom':BB},{'stick':{'colorscheme':f\"WhiteCarbon\",'radius':0.3}})\n",
" view.zoomTo()\n",
" return view\n",
"\n",
"\n",
"show_pdb(color, show_sidechains, show_mainchains).show()"
],
"metadata": {
"id": "qmylOQg1qxfg",
"cellView": "form"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "markdown",
"source": [
"# License\n",
"The source code of ColabGCNdesign is licensed under [MIT](https://github.com/naokob/ColabGCNdesign/blob/main/LICENSE).\n",
"\n",
"This notebook uses the GCNdesign source code and its parameters licensed under [MIT](https://github.com/ShintaroMinami/GCNdesign/blob/master/LICENSE)."
],
"metadata": {
"id": "IA7DnSxTFfqZ"
}
}
]
}