# Load packages

In [None]:
import os
import numpy as np
import pandas as pd
from tqdm import tqdm
import plotly.express as px
import matplotlib.pylab as plt
from scipy.sparse import coo_matrix
from codes.dmdgpOPT import read_nmr

# View NMR

In [None]:
numNodes, numEdges, D = read_nmr('DATA_D4_N50_S10/pid_0000.nmr')
print('numNodes:', numNodes, '\nnumEdges:', numEdges)
ai, aj, aa = list(zip(*D))
A = coo_matrix((aa,(ai,aj)), (numNodes, numNodes))
plt.spy(A, markersize=1);

In [None]:
wdir = 'DATA_D4_N50_S10'

numNodes = int(wdir.split('_N')[1].split('_')[0])
numSamples = int(wdir.split('_S')[1].split('_')[0])
print('numNodes:%d, numSamples:%d' %(numNodes, numSamples))
X, S = [], []
for fname in os.listdir(wdir):
    if fname.endswith('.csv'):       
        fcsv = os.path.join(wdir, fname)
        X.append(np.loadtxt(fcsv,delimiter=','))
    elif fname.endswith('.seq'):
        fseq = os.path.join(wdir, fname)
        S.append(np.loadtxt(fseq,delimiter=','))
print('len(X):%d, len(S):%d' % (len(X), len(S)))

In [None]:
# Reference:
# 1. Neumaier, Arnold. "Molecular modeling of proteins and mathematical prediction 
# of protein structure." SIAM review 39.3 (1997): 407-460.

W = [] # dihedral angles
for x in X:
    for i in range(3, len(x)):
        p = x[i-2] - x[i-3]
        r = x[i-1] - x[i-2]
        q = x[i-0] - x[i-1]
        pxr = np.cross(p,r)
        rxq = np.cross(r,q)
        qxp = np.cross(q,p)
        norm_r = np.linalg.norm(r)
        norm_pxr = np.linalg.norm(pxr)
        norm_rxq = np.linalg.norm(rxq)        
        cos_w = np.dot(pxr, rxq) / (norm_pxr * norm_rxq)
        sin_w = np.dot(qxp, r) * norm_r / (norm_pxr * norm_rxq)
        w = np.arctan2(sin_w, cos_w)
        if np.abs(np.abs(w) - np.pi) < 1E-8:
            w = np.pi
        W.append(w)
W = np.array(W)

In [None]:
# histogram of W
df = {'Torsion Angles (degrees)': np.degrees(W)}
fig = px.histogram(df, x="Torsion Angles (degrees)")
fig.show()

In [None]:
B = [] # percentage of ones
for s in S:
    B.append(np.sum(s) / len(s))

# histogram of W
df = {"Percentage of 1's": B}
fig = px.histogram(df, x="Percentage of 1's")
fig.show()

# Estimate instance difficult

In [None]:
import os
import pandas as pd


def count_preds(numNodes: int, D: list):
    K = np.zeros((numNodes,), dtype=int)
    for i, j, dij in D:
        if i > j: # ensure i < j
            i, j = j, i
        K[j] += 1
    return K
    

df = {'fn':[], 'num_K_LT_4':[]} # num_K_LT_4
wdir = 'DATA_D4_N50_S10'
for fn in sorted(os.listdir(wdir)):
    if not fn.endswith('.nmr'):
        continue
    numNodes, numEdges, D = read_nmr(os.path.join(wdir, fn))
    K = count_preds(numNodes, D)
    df['fn'].append(fn)
    df['num_K_LT_4'].append(np.sum(K < 4) - 4)

df = pd.DataFrame(df)
print('numNodes:', numNodes)
df

In [None]:
df = {'fn':[], 'sample':[], 'tElapsed':[], 'max_d':[]}
for fn in ['run1.log', 'run2.log', 'run3.log']:        
    with open(fn, 'r') as fid:
        for row in fid:
            if 'sample' in row:
                sample = int(row.split()[-1])
            if 'max_abs(d)' in row:
                max_d = float(row.split()[-1])
            if 'tElapsed' in row:
                tElapsed = float(row.split()[-1])
                df['fn'].append(fn)
                df['sample'].append(sample)
                df['tElapsed'].append(tElapsed)
                df['max_d'].append(max_d)

df = pd.DataFrame(df)
df.sort_values(by=['sample'])

In [None]:
df = {'fn':[], 'sample':[], 'tElapsed':[], 'max_d':[]}
for fn in ['runA.log']:        
    with open(fn, 'r') as fid:
        for row in fid:
            if 'sample' in row:
                sample = int(row.split()[-1])
            if 'max_abs(d)' in row:
                max_d = float(row.split()[-1])
            if 'tElapsed' in row:
                tElapsed = float(row.split()[-1])
                df['fn'].append(fn)
                df['sample'].append(sample)
                df['tElapsed'].append(tElapsed)
                df['max_d'].append(max_d)

df = pd.DataFrame(df)
print('numSolved: ', np.sum(df['max_d'] < 1E-3), 'out of', len(df))