In [None]:
import os
import numpy as np
import pandas as pd
import plotly.express as px
from tqdm import tqdm

In [None]:
wdir = 'DATA_N50_S1000'

nnodes = int(wdir.split('_')[1].replace('N',''))
nsamples = int(wdir.split('_')[2].replace('S',''))
print('nnodes:%d, nsamples:%d' %(nnodes, nsamples))
X, S = [], []
for fname in os.listdir(wdir):
    if fname.endswith('.csv'):       
        fcsv = os.path.join(wdir, fname)
        X.append(np.loadtxt(fcsv,delimiter=','))
    elif fname.endswith('.seq'):
        fseq = os.path.join(wdir, fname)
        S.append(np.loadtxt(fseq,delimiter=','))
print('len(X):%d, len(S):%d' % (len(X), len(S)))

In [None]:
# Reference:
# 1. Neumaier, Arnold. "Molecular modeling of proteins and mathematical prediction 
# of protein structure." SIAM review 39.3 (1997): 407-460.

W = [] # dihedral angles
for x in X:
    for i in range(3, len(x)):
        p = x[i-2] - x[i-3]
        r = x[i-1] - x[i-2]
        q = x[i-0] - x[i-1]
        pxr = np.cross(p,r)
        rxq = np.cross(r,q)
        qxp = np.cross(q,p)
        norm_r = np.linalg.norm(r)
        norm_pxr = np.linalg.norm(pxr)
        norm_rxq = np.linalg.norm(rxq)        
        cos_w = np.dot(pxr, rxq) / (norm_pxr * norm_rxq)
        sin_w = np.dot(qxp, r) * norm_r / (norm_pxr * norm_rxq)
        w = np.arctan2(sin_w, cos_w)
        if np.abs(np.abs(w) - np.pi) < 1E-8:
            w = np.pi
        W.append(w)
W = np.array(W)

In [None]:
# histogram of W
df = {'Torsion Angles (degrees)': np.degrees(W)}
fig = px.histogram(df, x="Torsion Angles (degrees)")
fig.show()

In [None]:
B = [] # percentage of ones
for s in S:
    B.append(np.sum(s) / len(s))

# histogram of W
df = {"Percentage of 1's": B}
fig = px.histogram(df, x="Percentage of 1's")
fig.show()