Skip to content

Commit

Permalink
Add bioplib speed comparison
Browse files Browse the repository at this point in the history
Co-authored-by: AndrewCRMartin <andrew@bioinf.org.uk>
  • Loading branch information
samirelanduk and AndrewCRMartin committed Aug 9, 2019
1 parent 73ce4b7 commit 68d2ca8
Show file tree
Hide file tree
Showing 3 changed files with 55 additions and 12 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -11,3 +11,5 @@ profiles
*.png
*.json
*.svg
*.eps
readapdb
32 changes: 32 additions & 0 deletions scripts/readapdb.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
/*
This code written by Professor Andrew Martin on 2019-08-08
alias cc='/usr/bin/cc -I$HOME/include -L$HOME/lib -ansi -pedantic -Wall'
cc -o readapdb readapdb.c -lbiop -lgen -lm -lxml2
*/

#include <stdio.h>
#include "bioplib/pdb.h"

int main(int argc, char **argv)
{
FILE *fp;

if((fp=fopen(argv[1], "r"))!=NULL)
{
int natoms;
PDB *pdb;

if((pdb = blReadPDB(fp, &natoms))!=NULL)
{

PDBSTRUCT *pdbs;
pdbs = blAllocPDBStructure(pdb);

}
}

return(0);
}

33 changes: 21 additions & 12 deletions scripts/speed.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import requests
import random
import subprocess
import sys
import os
import json
Expand Down Expand Up @@ -32,7 +33,7 @@ def get_string(code):
codes = response.text.split()

data = []
while len (data) != 1000:
while len (data) != 10:
code = random.choice(codes)
d = {"code": code}
print(len(data) + 1, code)
Expand Down Expand Up @@ -61,13 +62,26 @@ def get_string(code):
d["biopython"] = delta.total_seconds()
except:
d["biopython"] = None

try:
with open("temp.pdb", "w") as f: f.write(string)
start = datetime.now()
subprocess.check_output("scripts/readapdb temp.pdb", shell=True)
end = datetime.now()
delta = end - start
d["bioplib"] = delta.total_seconds()
except:
d["bioplib"] = None

else:
d["biopython"] = None
d["bioplib"] = None

data.append(d)
codes.remove(code)

if os.path.exists("temp.pdb"):
os.remove("temp.pdb")
with open("scripts/speed.json", "w") as f:
json.dump(data, f)

Expand All @@ -81,21 +95,17 @@ def get_string(code):
cifs_x, cifs_y = zip(*[[d["atoms"], d[".cif"]] for d in data if d[".cif"] and d["models"] == 1 and d[".cif"] < 20])
mmtfs_x, mmtfs_y = zip(*[[d["atoms"], d[".mmtf"]] for d in data if d[".mmtf"] and d["models"] == 1 and d[".mmtf"] < 20])
bios_x, bios_y = zip(*[[d["atoms"], d["biopython"]] for d in data if d["biopython"] and d["models"] == 1 and d["biopython"] < 20])
bioplib_x, bioplib_y = zip(*[[d["atoms"], d["bioplib"]] for d in data if d["bioplib"] and d["models"] == 1 and d["bioplib"] < 20])

def best_fit(X, Y, label):

xbar = sum(X)/len(X)
ybar = sum(Y)/len(Y)
n = len(X) # or len(Y)

numer = sum([xi*yi for xi,yi in zip(X, Y)]) - n * xbar * ybar
denum = sum([xi**2 for xi in X]) - n * xbar**2

b = numer / denum
a = ybar - b * xbar

print('{} best fit line:\ny = {:.6f} + {:.6f}x'.format(label, a, b))

print("{} best fit line:\ny = {:.6f} + {:.6f}x".format(label, a, b))
return a, b

plt.xscale("log")
Expand All @@ -104,6 +114,7 @@ def best_fit(X, Y, label):
best_fit(pdbs_x, pdbs_y, "pdb")
best_fit(mmtfs_x, mmtfs_y, "mmtf")
best_fit(bios_x, bios_y, "biopython")
best_fit(bioplib_x, bioplib_y, "bioplib")
plt.scatter(cifs_x, cifs_y, s=6, c="#FD7272", label=".cif", alpha=0.3, linewidths=0)
plt.scatter(pdbs_x, pdbs_y, s=6, c="#58B19F", label=".pdb", alpha=0.3, linewidths=0)
plt.scatter(mmtfs_x, mmtfs_y, s=6, c="#182C61", label=".mmtf", alpha=0.3, linewidths=0)
Expand All @@ -112,22 +123,20 @@ def best_fit(X, Y, label):
plt.xlim([100, 1000000])
plt.ylim([0.001, 100])
plt.legend(loc=2)
plt.savefig("scripts/format-speed.svg", dpi=1000)
plt.savefig("scripts/format-speed.png", dpi=1000)
plt.clf()


plt.xscale("log")
plt.yscale("log")
plt.scatter(pdbs_x, pdbs_y, s=6, c="#58B19F", label=".pdb (atomium)", alpha=0.5, linewidths=0)
plt.scatter(bios_x, bios_y, s=6, c="#D6A2E8", label=".pdb (biopython)", alpha=0.5, linewidths=0)
plt.scatter(bioplib_x, bioplib_y, s=6, c="#333333", label=".pdb (bioplib)", alpha=0.5, linewidths=0)
plt.xlabel("Atom Count")
plt.ylabel("Parse time (s)")
plt.xlim([100, 100000])
plt.ylim([0.001, 10])
plt.legend(loc=2)
plt.savefig("scripts/library-speed.svg", dpi=1000)
plt.savefig("scripts/library-speed.png", dpi=1000)
plt.clf()


#plt.scatter(bios_x, bios_y, s=4, c="#F97F51")

0 comments on commit 68d2ca8

Please sign in to comment.