In [None]:
import plyvel
import pandas as pd
import subprocess
import re
import ast
import numpy as np
import matplotlib.pyplot as plt

In [None]:
plt.rcParams.update({'font.size': 22})
#plt.rcParams.update({'size': (10,10)})


# Checking Progress of FAH Project

This notebook analyzes the database of a Folding@Home Workserver. With this you can check for the progress of projects. 
The database is a levelDB database so you must install levelDB on the computer where you run this notebook for the plyvel library to work.

## Download the database and project.xml

Please choose the path to the database in the server as if you were to use rsync (sergio@my_workserver:/my/path/to/worklevel.db) and the project number.

In [None]:
project_number = 16815
rsync_path = "banhof:/home/server/server2/data/SVR2997798026/work.leveldb/"
project_path = f"banhof:/home/server/server2/projects/p{project_number}/project.xml"
# rsync_path = "/home/sergio/bin/"

In [None]:
print(f"rsync -uiha --del {rsync_path} work.level.db/")

## Process Database

In [None]:
output = subprocess.run("rm -rf work.level.db/", shell=True)
output = subprocess.run(
    f"scp -r {rsync_path} ./work.level.db/", capture_output=True, shell=True
)
print("###############")
print("Return code scp work.level.db:", output.returncode)
print("###############")
output = subprocess.run(
    f"scp -r {project_path} work.level.db/project.xml", capture_output=True, shell=True
)
print("Return code scp project.xml:", output.returncode)
print("###############")
print("Output:", output.stdout.decode("utf-8"))

In [None]:
db = plyvel.DB("./work.level.db", create_if_missing=False)

In [None]:
l = []
for key, value in db:
    if re.search(f"P{project_number}", str(key)):
        entry = db.get(key)
        entry = ast.literal_eval(entry.decode("UTF-8"))
        l.append(entry)
df = pd.DataFrame()
df = df.append(l, ignore_index=True, sort=False)
df.head()

In [None]:
file = open("work.level.db/project.xml", "r")
for line in file:
    if re.search("runs", line):
        myRegex = re.compile(r"[0-9]+")
        n_runs = int(myRegex.findall(line)[0])
    if re.search("clones", line):
        myRegex = re.compile(r"[0-9]+")
        n_clones = int(myRegex.findall(line)[0])
    if re.search("gens", line):
        myRegex = re.compile(r"[0-9]+")
        n_gens = int(myRegex.findall(line)[0])
print(f"P{project_number} has {n_runs} runs with {n_clones} clones and {n_gens} gens.")

## Progress of project

In [None]:
finished_clones = df[np.logical_and(df.gen == n_gens, df.state == "FINISHED")].shape[0]
print(
    f"Finished {finished_clones} clones which is {100 * finished_clones / n_runs / n_clones:3.1f} % of clones."
)
finished_WU = np.sum(df.gen)
print(
    f"Finished {finished_WU} WU which is {100 * finished_WU / n_runs / n_clones / n_gens:3.1f} % of clones."
)

failed_clones = df[df.state == "FAILED"].shape[0]
print(
    f"Failed {failed_clones} clones which is {100 * failed_clones / n_runs / n_clones:3.1f} % of clones."
)
assigned_clones = df[df.state == "ASSIGNED"].shape[0]
print(
    f"Assigned {assigned_clones} clones which is {100 * assigned_clones / n_runs / n_clones:3.1f} % of clones."
)

In [None]:
fig, ax = plt.subplots(1, 1)
n, bins, patches = ax.hist(df.gen, cumulative=False, density=True)
ax.set_xlabel("Gens finished of clone")
ax.set_ylabel("p(Gens finished of clone)")
ax.set_title("Probability distribution of clones")

In [None]:
fig, ax = plt.subplots(1, 1)
n, bins, patches = ax.hist(df.gen, cumulative=True, density=True)
ax.set_xlabel("Gens finished of clone")
ax.set_ylabel("Cumulative Distribution Function")
ax.set_title("CDF")

## Distribution of traj lengths

In [None]:
wu_length = 1 # in ns
traj_lengths_ns = df['gen'].values*wu_length

In [None]:
plt.figure(figsize=(10,10))
plt.hist(traj_lengths_ns, range=(0, traj_lengths_ns.max()), bins=25)
plt.xlabel("Traj length (ns)")
plt.ylabel("Number of CLONEs")
plt.title(f"p{project_number}: "+str(traj_lengths_ns.sum()/1000)+ " $\mu$s")
plt.savefig("p16465-traj-distribution.png", dpi=300)