In [14]:
#importing necessary libraries 
from tqdm import tqdm
import numpy as np
import re
%matplotlib inline
import math

import warnings
warnings.filterwarnings('ignore')
warnings.simplefilter('ignore')

# trivial parallelization
from joblib import Parallel, delayed
import matplotlib.pyplot as plt

# SOME FUNCTIONS TO GET THE 3D FROM XDS FILES

In [15]:
### Ceate frame/spot dictionary

# Just round up and down the time, we can extend this to a larger time window
def round_time(raw_spots,dt=0):
    # try to remove duplicates
    # set_of_floats=set(list(map(tuple,raw_spots[:,:3])))
    max_frame=int(np.max(raw_spots[:,2]))
    expanded_list=[]
    for spot in raw_spots:
        # round to the time to the earest integer
        rounded=np.round(spot[2])
        # set the lower bound
        min_round=max(0,rounded-dt)
        # set the upper bound
        max_round=min(rounded+dt+1,max_frame+1)
        
        for new_time in np.arange(min_round,max_round):
            expanded_list.append((spot[0], spot[1], new_time))
        
    return np.asarray(expanded_list)

def match_spots_frame_expanded(predicted,n_frame):
    '''
    Given the list of spots found by XDS and a specific frame number, 
    return the list of centers found matching the frame number given
    '''
    # find all the spots in the same frame
    idx_rows=np.where(list(map(int,predicted[:,2]))==n_frame)
    tmp_list=predicted[idx_rows,([0],[1])].T
    
    # I use set to remove possible duplicates
    return np.asarray(list(set(list(map(tuple,tmp_list)))))

def match_spots_frame_expanded_Parallel(predicted,n_frame,dict_spots):
    '''
    Given the list of spots found by XDS and a specific frame number, 
    return the list of centers found matching the frame number given
    '''
    # find all the spots in the same frame
    idx_rows=np.where(list(map(int,predicted[:,2]))==n_frame)
    tmp_list=predicted[idx_rows,([0],[1])].T
    
    # I use set to remove possible duplicates
    dict_spots[n_frame]=np.asarray(list(set(list(map(tuple,tmp_list)))))
    
    return 0

# Just round up and down the time, we can extend this to a larger time window
def round_time_SPOTXDS(raw_spots,dt=0):
    # try to remove duplicates
    # set_of_floats=set(list(map(tuple,raw_spots[:,:3])))
    max_frame=int(np.max(raw_spots[:,2]))
    expanded_list=[]
    for spot in raw_spots:
        # round to the time to the earest integer
        rounded=np.round(spot[2])
        # set the lower bound
        min_round=max(0,rounded-dt)
        # set the upper bound
        max_round=min(rounded+dt+1,max_frame+1)
        
        for new_time in np.arange(min_round,max_round,1):
            expanded_list.append((spot[0], spot[1], new_time, spot[-3:]))
        
    return np.asarray(expanded_list)

def get_dict_from_spotlists(spots_reflections_raw):
    """
    get a list of and return a dictionary
    """
    from collections import defaultdict
    dict_spots = defaultdict(lambda: [])
    
    frames=set(spots_reflections_raw[:,2].astype(int))
    
    min_framenum=min(frames)
    
    for fr in tqdm(frames):
        dict_spots[fr-min_framenum]=match_spots_frame_expanded(spots_reflections_raw,fr)
        
    return dict_spots

def get_dict_from_spotlists_Parallel(spots_reflections_raw, njobs=-1):
    """
    get a list of and return a dictionary
    """
    from collections import defaultdict
    dict_spots = defaultdict(lambda: [])
    
    frames=set(spots_reflections_raw[:,2].astype(int))
    
    min_framenum=min(frames)
    
    #for fr in tqdm(frames):
    #    dict_spots[fr-min_framenum]=match_spots_frame_expanded(spots_reflections_raw,fr)
        
    out = Parallel(njobs)(  delayed(match_spots_frame_expanded_Parallel)(spots_reflections_raw,fr,dict_spots) for fr in tqdm(frames) )  
        
    return dict_spots


def get_list_spots_fromfileXDS_Parallel(filename,timerounded=True,shift_min=True,dt=3, njobs=-1):
    '''
    Load spot centers from a file and return a dictionary where each key
    is a frame number
    '''
    
    spots_reflections_raw=np.loadtxt(filename)
    
    if dt<1 :
        spots_reflections_raw[:,2]*=10
        
    # round only time up and down
    if timerounded:
        spots_reflections_raw=round_time_SPOTXDS(spots_reflections_raw,dt)
        
    
    # split indexablea and not-indexable frames
    
    indexable = []
    notindexable = []
    for sp in spots_reflections_raw:
        idxmill=sp[-1].astype(int)
        if tuple(idxmill)==(0,0,0):
            notindexable.append(sp[:3])
        else:
            indexable.append(sp[:3])
    
    #dict_spots_indexable=get_dict_from_spotlists(np.asarray(indexable))
    dict_spots_indexable=get_dict_from_spotlists_Parallel(np.asarray(indexable), njobs=-1)
    
    #dict_spots_notindexable=get_dict_from_spotlists(np.asarray(notindexable))
    
    
    return dict_spots_indexable


def get_list_spots_fromfileXDS(filename,timerounded=True,shift_min=True,dt=3):
    '''
    Load spot centers from a file and return a dictionary where each key
    is a frame number
    '''
    
    spots_reflections_raw=np.loadtxt(filename)
    
    if dt<1 :
        spots_reflections_raw[:,2]*=10
        spots_reflections_raw=round_time_SPOTXDS(spots_reflections_raw,dt*10)
    else:
        # round only time up and down
        if timerounded:
            spots_reflections_raw=round_time_SPOTXDS(spots_reflections_raw,dt)
        
    
    # split indexablea and not-indexable frames
    
    indexable = []
    notindexable = []
    for sp in spots_reflections_raw:
        idxmill=sp[-1].astype(int)
        if tuple(idxmill)==(0,0,0):
            notindexable.append(sp[:3])
        else:
            indexable.append(sp[:3])
    
    dict_spots_indexable=get_dict_from_spotlists(np.asarray(indexable))
    dict_spots_notindexable=get_dict_from_spotlists(np.asarray(notindexable))
    
    
    return dict_spots_indexable, dict_spots_notindexable

In [44]:
# REMOVE THE ROTATION!!!
os.system("awk '{if(NR==2){print $1,$2,"+'"'+"0.0000"+'"'+",$4,$5,$6}else{print $0}}' XPARM.XDS > XPARM_norotation.XDS")

0

# READ INDEXABLE AND NOT INDEXABLE SPOTS from SPOT.XDS

Basically in a SPOT file I expect something like:

x, y, FRAME, photon counts, h, k, l 

If h,k,l==0,0,0 I know that the spot is not a Bragg's reflection

In [45]:
## ddi = indexable frames
## ddni = not indexable frames
(ddi,ddni)=get_list_spots_fromfileXDS("SPOT-Lyso40.XDS",timerounded=True,shift_min=True,dt=1)

100%|█████████████████████████████████████████████████████████████████████| 900/900 [00:19<00:00, 45.44it/s]
100%|█████████████████████████████████████████████████████████████████████| 900/900 [00:13<00:00, 65.06it/s]


## Get those indexable

In [46]:
import os

pdbs=[]   
for FR in tqdm(ddi.keys()):
    ii=np.full(len(ddi[FR]),FR)
    oo=np.vstack([ii,ii]).T
    rr=np.hstack([ddi[FR],oo])

    np.savetxt("./SPOTtmp.XDS",rr)
    os.system("spot2pdb -r 0.1 -s ./SPOTtmp.XDS -x XPARM_norotation.XDS > /dev/null 2>&1")
    os.system("awk '!/ATOM/{{print $0}}' ./SPOT-notindexed.pdb > ./{:06d}.pdb".format(FR))
    #os.system("echo 'END' >> ./{:06d}.pdb".format(FR))
    os.system("rm ./SPOT-notindexed.pdb ./SPOT-indexed.pdb")
    
    pdbtmp=[]
    with open("./{:06d}.pdb".format(FR), "r") as file:
        pdb_lines = file.read()
    for line in pdb_lines.split('\n'):
        #print(line)
        if len(line)<10:
            continue
        if (line.split()[0]=='HETATM'):
            x=np.float(line[30:38])
            y=np.float(line[38:46])
            z=np.float(line[46:54])
            pdbtmp.append([x,y,z])
                    
    pdbs.append(pdbtmp)

os.system("rm 0*.pdb")
os.system("rm ./SPOTtmp.XDS")

100%|█████████████████████████████████████████████████████████████████████| 900/900 [00:28<00:00, 31.57it/s]


0

In [18]:
!pwd

/home/piero/work/3D-reciprocal/Lyso_40


### VISUALIZE THE EVOLUTION OF THE SPOTS

In [19]:
import ase
traj=[]
for fr in pdbs:
    traj.append(ase.Atoms(np.full(len(fr),"C"),positions=fr))

In [20]:
from ase.visualize import view

view(traj)

<subprocess.Popen at 0x7f23c163ac50>

### SAVE THE SPOTS

In [47]:
np.save("indexable-COLSPOT.npy",pdbs)

## Get those not indexable

In [48]:
import os

pdbs=[]   
for FR in tqdm(ddni.keys()):
    ii=np.full(len(ddni[FR]),FR)
    oo=np.vstack([ii,ii]).T
    rr=np.hstack([ddni[FR],oo])

    np.savetxt("./SPOTtmp.XDS",rr)
    os.system("spot2pdb -r 0.1 -s ./SPOTtmp.XDS -x XPARM_norotation.XDS > /dev/null 2>&1")
    os.system("awk '!/ATOM/{{print $0}}' ./SPOT-notindexed.pdb > ./{:06d}.pdb".format(FR))
    #os.system("echo 'END' >> ./{:06d}.pdb".format(FR))
    os.system("rm ./SPOT-notindexed.pdb ./SPOT-indexed.pdb")
    
    pdbtmp=[]
    with open("./{:06d}.pdb".format(FR), "r") as file:
        pdb_lines = file.read()
    for line in pdb_lines.split('\n'):
        #print(line)
        if len(line)<10:
            continue
        if (line.split()[0]=='HETATM'):
            x=np.float(line[30:38])
            y=np.float(line[38:46])
            z=np.float(line[46:54])
            pdbtmp.append([x,y,z])
                    
    pdbs.append(pdbtmp)

os.system("rm 0*.pdb")


100%|█████████████████████████████████████████████████████████████████████| 900/900 [00:27<00:00, 32.99it/s]


0

### VISUALIZE THE EVOLUTION OF THE SPOTS

In [23]:
import ase
traj=[]
for fr in pdbs:
    traj.append(ase.Atoms(np.full(len(fr),"S"),positions=fr))

In [24]:
from ase.visualize import view

view(traj)

<subprocess.Popen at 0x7f23c51ba550>

### SAVE THE SPOTS

In [49]:
np.save("not-indexable-COLSPOT.npy",pdbs)

# GET IDEAL SPOTS FROM XDS-ASCII

In [50]:
def get_list_spots_fromfile(filename,timerounded=False,shift_min=True,dt=3):
    '''
    Load spot centers from a file and return a dictionary where each key
    is a frame number
    '''
    from collections import defaultdict
    dict_spots = defaultdict(lambda: [])
    spots_reflections_raw=np.loadtxt(filename)
    
    if dt<1 :
        spots_reflections_raw[:,2]*=10
        spots_reflections_raw=round_time(spots_reflections_raw,dt*10)
    else:
        # round only time up and down
        if timerounded:
            spots_reflections_raw=round_time(spots_reflections_raw,dt)
    
    # get unique frames
    # frames=set(rounded_spots[:,2])
    frames=set(spots_reflections_raw[:,2].astype(int))
    min_framenum=0
    if shift_min:
        min_framenum=min(frames)
    
    for fr in tqdm(frames):
        dict_spots[fr-min_framenum]=match_spots_frame_expanded(spots_reflections_raw,fr)
    
    return dict_spots

In [52]:
os.system("awk '!/!/{print $6,$7,$8}' XDS_ASCII.HKL > ./preds_XDS")

# choose here a proper time window
ddi=get_list_spots_fromfile("preds_XDS",timerounded=True,shift_min=True,dt=1)

100%|█████████████████████████████████████████████████████████████████████| 900/900 [01:15<00:00, 11.90it/s]


In [53]:
os.system("rm ./preds_XDS")

0

## TURN THEM INTO 3D

In [54]:
import os

pdbs=[]   
for FR in tqdm(ddi.keys()):
    ii=np.full(len(ddi[FR]),FR)
    oo=np.vstack([ii,ii]).T
    rr=np.hstack([ddi[FR],oo])

    np.savetxt("./SPOTtmp.XDS",rr)
    os.system("spot2pdb -r 0.1 -s ./SPOTtmp.XDS -x XPARM_norotation.XDS > /dev/null 2>&1")
    os.system("awk '!/ATOM/{{print $0}}' ./SPOT-notindexed.pdb > ./{:06d}.pdb".format(FR))
    #os.system("echo 'END' >> ./{:06d}.pdb".format(FR))
    os.system("rm ./SPOT-notindexed.pdb ./SPOT-indexed.pdb")
    
    pdbtmp=[]
    with open("./{:06d}.pdb".format(FR), "r") as file:
        pdb_lines = file.read()
    for line in pdb_lines.split('\n'):
        #print(line)
        if len(line)<10:
            continue
        if (line.split()[0]=='HETATM'):
            x=np.float(line[30:38])
            y=np.float(line[38:46])
            z=np.float(line[46:54])
            pdbtmp.append([x,y,z])
                    
    pdbs.append(pdbtmp)

os.system("rm 0*.pdb")


100%|█████████████████████████████████████████████████████████████████████| 900/900 [00:33<00:00, 27.10it/s]


0

### VISUALIZE THE EVOLUTION OF THE SPOTS

In [56]:
import ase
traj=[]
for fr in pdbs:
    traj.append(ase.Atoms(np.full(len(fr),"N"),positions=fr))

In [57]:
from ase.visualize import view

view(traj)

<subprocess.Popen at 0x7f23c5587350>

## Save the points

In [55]:
np.save("idealBraggs-XDS.npy",pdbs)

In [2]:
import numpy as np
from matplotlib.animation import FuncAnimation
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits import mplot3d
%matplotlib qt

In [34]:
axes = plt.axes(projection = '3d')
xyz=np.asarray(pdbs[450])
axes.scatter3D(xyz[:,0], xyz[:,1], xyz[:,2])
plt.show()

libGL error: MESA-LOADER: failed to open radeonsi: /home/piero/.pyenv/versions/miniconda3-latest/envs/spots2reciprocal/lib/python3.7/site-packages/zmq/backend/cython/../../../../.././libstdc++.so.6: version `GLIBCXX_3.4.30' not found (required by /usr/lib/dri/radeonsi_dri.so) (search paths /usr/lib/dri, suffix _dri)
libGL error: failed to load driver: radeonsi
libGL error: MESA-LOADER: failed to open radeonsi: /home/piero/.pyenv/versions/miniconda3-latest/envs/spots2reciprocal/lib/python3.7/site-packages/zmq/backend/cython/../../../../.././libstdc++.so.6: version `GLIBCXX_3.4.30' not found (required by /usr/lib/dri/radeonsi_dri.so) (search paths /usr/lib/dri, suffix _dri)
libGL error: failed to load driver: radeonsi
libGL error: MESA-LOADER: failed to open swrast: /home/piero/.pyenv/versions/miniconda3-latest/envs/spots2reciprocal/lib/python3.7/site-packages/zmq/backend/cython/../../../../.././libstdc++.so.6: version `GLIBCXX_3.4.30' not found (required by /usr/lib/dri/swrast_dri.so) (

In [35]:
ideals = np.load("idealBraggs-XDS.npy", allow_pickle=True)
axes = plt.axes(projection = '3d')
xyz=np.asarray(ideals[450])
axes.scatter3D(xyz[:,0], xyz[:,1], xyz[:,2])
plt.show()

In [31]:
import ase
traj=[]
for fr in ideals: 
    traj.append(ase.Atoms(np.full(len(fr),"N"),positions=fr))

In [32]:
view(traj)

<subprocess.Popen at 0x7fc00f9deb10>

In [30]:
ideals[450] == pdbs[450]

True