In [None]:
from foldingdiff.plotting import *
from foldingdiff.tokenizer import *
from foldingdiff.bpe import *
import pickle
import os
os.chdir(Path.cwd().parents[0])

In [None]:
base_dir = 'ckpts/1744875790.3072364'

In [None]:
!pip install --upgrade google-api-python-client google-auth-httplib2 google-auth-oauthlib


In [None]:
def visualize_bonds(self, i1, length, output_path, xlim=None, ylim=None, zlim=None):
    def offset(tup, k):
        return (tup[0]-k, tup[1], tup[2])
    coords = self.compute_coords(i1, length)
    # ATOM_TYPES[i1%3], ATOM_TYPES[i1%3+1], ..., ATOM_TYPES[i1%3+length]
    bts = [Tokenizer.ATOM_TYPES[(i1%3+i)%3] for i in range(length+1)]
    tokens = [offset(self.bond_to_token[i], i1) for i in sorted(self.bond_to_token) if i >= i1 and i < i1+length]
    return plot_backbone(coords, output_path, bts, title=f"{Path(self.fname).stem} bonds {i1}-{i1+length-1}", vis_dihedral=False, zoom_factor=0.5, tokens=tokens, xlim=xlim, ylim=ylim, zlim=zlim)

In [None]:
from google.oauth2 import service_account
from googleapiclient.discovery import build
from googleapiclient.http import MediaFileUpload

# 1) Load credentials
SCOPES = ['https://www.googleapis.com/auth/drive.file']
creds = service_account.Credentials.from_service_account_file(
    'service-account.json', scopes=SCOPES)

# 2) Build the Drive service
service = build('drive', 'v3', credentials=creds)


In [None]:
from foldingdiff.datasets import FullCathCanonicalCoordsDataset

dataset = FullCathCanonicalCoordsDataset(pdbs="cath", use_cache=False, toy=10, debug=True, zero_center=False, pad=512, secondary=False)

In [None]:
def str2dict(v):
    m = re.match('\d+-\d+(?::\d+-\d+)*$', v)
    if not m:
        raise
    pairs = re.findall(r'(\d+)-(\d+)', v)        
    bins = {}
    for (a, b) in pairs:
        bins[int(a)] = int(b)
    return bins


bin_str = "1-100:2-20:5-10"
for strat in ['uniform','histogram']:
    bpe = BPE(dataset.structures, str2dict(bin_str), bin_strategy=strat)
    bpe.initialize(path=f'plots/hist_{bin_str}.png')

In [None]:
i = 3
lims = []
# for _iter in [0] + list(range(10,100,10)) + list(range(1000,10000,1000)):
for _iter in [0]:
    step = 100
    if _iter not in [0, 6000]: continue # delete
    bpe = pickle.load(open(f'{base_dir}/bpe_iter={_iter}.pkl', 'rb'))
    t = bpe.tokenizers[i]   
    if len(lims ) == 0:
        lims = [None for _ in range(0, 3*t.n-1, step)]
    for idx, start in enumerate(range(0, 3*t.n-1, step)):
        if idx > 0: continue # delete
        end = start+step
        start = t.token_pos[start]
        end = t.token_pos[end] if end < 3*t.n else 3*t.n-1
        print(start, end, 3*t.n-1)
        l = end-start
        path = os.path.join(base_dir, f'{i}_iter={_iter}_{start}-{end}.png')        
        if lims[idx] is None:
            res = visualize_bonds(t, start, l, path, )
            lims[idx] = tuple(res)
        else:
            visualize_bonds(t, start, l, path, xlim=lims[idx][0], ylim=lims[idx][1], zlim=lims[idx][2])
        print(path)
        # # 3) Prepare file metadata & media
        # file_metadata = {
        #     'name': Path(path).name,
        #     'parents': ['1NOxavUomer-WMYlaUG9olQBjEX9r7cx8']  # optional: specify folder
        # }
        # media = MediaFileUpload(path)
        # # 4) Upload
        # file = service.files().create(
        #     body=file_metadata,
        #     media_body=media,
        #     fields='id'
        # ).execute()
        # print('Uploaded file ID:', file.get('id'))

In [None]:
for i in range(100000):
    bpe.tokenizers[i].bond_to_token.tree.visualize(os.path.join('ckpts/1744875790.3072364', f'tokens_{i}_iter={_iter}.png'), horizontal_gap=0.5, font_size=6)