Skip to content

Commit

Permalink
Update speed script
Browse files Browse the repository at this point in the history
  • Loading branch information
samirelanduk committed Jul 11, 2019
1 parent 801ea6b commit 73ce4b7
Show file tree
Hide file tree
Showing 2 changed files with 39 additions and 11 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -10,3 +10,4 @@ notes
profiles
*.png
*.json
*.svg
49 changes: 38 additions & 11 deletions scripts/speed.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,8 @@ def get_string(code):
text = response.content if code.endswith(".mmtf") else response.text
return text

if len(sys.argv) > 1 and sys.argv[1] == "-rebuild":

if len(sys.argv) > 1 and sys.argv[1] == "--rebuild":
query = "<orgPdbQuery>"\
"<queryType>org.pdb.query.simple.ChemCompFormulaQuery</queryType>"\
"<formula>ZN</formula></orgPdbQuery>"
Expand Down Expand Up @@ -70,8 +71,9 @@ def get_string(code):
with open("scripts/speed.json", "w") as f:
json.dump(data, f)


with open("scripts/speed.json") as f:
data = json.load(f)[:500]
data = json.load(f)

print("There are {} data points".format(len(data)))

Expand All @@ -80,27 +82,52 @@ def get_string(code):
mmtfs_x, mmtfs_y = zip(*[[d["atoms"], d[".mmtf"]] for d in data if d[".mmtf"] and d["models"] == 1 and d[".mmtf"] < 20])
bios_x, bios_y = zip(*[[d["atoms"], d["biopython"]] for d in data if d["biopython"] and d["models"] == 1 and d["biopython"] < 20])

def best_fit(X, Y, label):

xbar = sum(X)/len(X)
ybar = sum(Y)/len(Y)
n = len(X) # or len(Y)

numer = sum([xi*yi for xi,yi in zip(X, Y)]) - n * xbar * ybar
denum = sum([xi**2 for xi in X]) - n * xbar**2

b = numer / denum
a = ybar - b * xbar

print('{} best fit line:\ny = {:.6f} + {:.6f}x'.format(label, a, b))

return a, b

plt.xscale("log")
plt.yscale("log")
plt.scatter(pdbs_x, pdbs_y, s=6, c="#58B19F", label=".pdb", alpha=0.8, linewidths=0)
plt.scatter(cifs_x, cifs_y, s=6, c="#FD7272", label=".cif", alpha=0.8, linewidths=0)
plt.scatter(mmtfs_x, mmtfs_y, s=6, c="#182C61", label=".mmtf", alpha=0.8, linewidths=0)
best_fit(cifs_x, cifs_y, "cif")
best_fit(pdbs_x, pdbs_y, "pdb")
best_fit(mmtfs_x, mmtfs_y, "mmtf")
best_fit(bios_x, bios_y, "biopython")
plt.scatter(cifs_x, cifs_y, s=6, c="#FD7272", label=".cif", alpha=0.3, linewidths=0)
plt.scatter(pdbs_x, pdbs_y, s=6, c="#58B19F", label=".pdb", alpha=0.3, linewidths=0)
plt.scatter(mmtfs_x, mmtfs_y, s=6, c="#182C61", label=".mmtf", alpha=0.3, linewidths=0)
plt.xlabel("Atom Count")
plt.ylabel("Parse time (s)")
plt.legend()
plt.savefig("scripts/compare.png", dpi=1000)
plt.xlim([100, 1000000])
plt.ylim([0.001, 100])
plt.legend(loc=2)
plt.savefig("scripts/format-speed.svg", dpi=1000)
plt.clf()


plt.xscale("log")
plt.yscale("log")
plt.scatter(pdbs_x, pdbs_y, s=6, c="#58B19F", label=".pdb (atomium)", alpha=0.8, linewidths=0)
plt.scatter(bios_x, bios_y, s=6, c="#D6A2E8", label=".pdb (biopython)", alpha=0.8, linewidths=0)
plt.scatter(pdbs_x, pdbs_y, s=6, c="#58B19F", label=".pdb (atomium)", alpha=0.5, linewidths=0)
plt.scatter(bios_x, bios_y, s=6, c="#D6A2E8", label=".pdb (biopython)", alpha=0.5, linewidths=0)
plt.xlabel("Atom Count")
plt.ylabel("Parse time (s)")
plt.legend()
plt.savefig("scripts/compare2.png", dpi=1000)
plt.xlim([100, 100000])
plt.ylim([0.001, 10])
plt.legend(loc=2)
plt.savefig("scripts/library-speed.svg", dpi=1000)
plt.clf()


#plt.scatter(bios_x, bios_y, s=4, c="#F97F51")

0 comments on commit 73ce4b7

Please sign in to comment.