In [1]:
import glob
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt

# README

This notebook creates an overall CSV file from the *molecular* overview.csv.


In [2]:
for halogen in "chlorine bromine iodine".split():

    mols = sorted(glob.glob(f"../{halogen}/ZINC*"))

    with open(f"../{halogen}/nbonds.csv", "w") as out:

        # csv header
        out.write("mol,n_atoms,max_dist,type1")
        for dist in [0,1,2,3,4]:
            out.write(f",d{dist}num,d{dist}mean,d{dist}std")
        out.write("\n")

        
        # browse molecules
        for mol in mols:

            df = pd.read_csv(mol + "/overview.csv")

            zinc = mol.split("/")[-1]

            sigma = df[df["nb_distance"] == -1]

            zeroth = df[df["nb_distance"] == 0] # halogen
            first = df[df["nb_distance"] == 1]
            second = df[df["nb_distance"] == 2]
            third = df[df["nb_distance"] == 3]
            fourth = df[df["nb_distance"] == 4]

            n_atoms = len(df)
            max_dist = df.distance.max()
            type1 = first.type.values[0]

            out.write(f"{zinc},{n_atoms:.0f},{max_dist:.0f},{type1}")
            
            for distance in [0,1,2,3,4]:
                atoms = df[df["nb_distance"] == distance]
                diffs = atoms.q_mod - atoms.q_std
                mean = diffs.mean()
                std = diffs.std()
                num = len(diffs)
                out.write(f",{num:.0f},{mean:.5f},{std:.5f}")
            
            out.write("\n")
        
"done"

'done'

In [3]:
df

Unnamed: 0,name,type,nb_distance,distance,q_std,q_mod
0,C1,c3,7,4.76869,-0.343831,-0.36478
1,C2,c3,6,5.728391,0.217026,0.232752
2,C3,c3,7,6.446549,-0.343831,-0.36478
3,C4,c3,5,5.419522,-0.001134,-0.007434
4,C5,c3,6,6.792804,0.217026,0.232752
5,C6,c3,7,6.958298,-0.343831,-0.36478
6,C7,c3,7,7.944025,-0.343831,-0.36478
7,N1,n,4,4.455953,-0.354431,-0.398851
8,C8,c,3,3.442947,0.508724,0.560119
9,O1,o,4,3.419659,-0.552535,-0.540102


In [4]:
df_all = pd.read_csv("../bromine/nbonds.csv")

In [5]:
df_all.head()

Unnamed: 0,mol,n_atoms,max_dist,type1,d0num,d0mean,d0std,d1num,d1mean,d1std,d2num,d2mean,d2std,d3num,d3mean,d3std,d4num,d4mean,d4std
0,ZINC00000116,28,6,c3,1,-0.28809,,1,0.48032,,3,-0.13413,0.05835,5,0.01161,0.0492,6,0.01716,0.01722
1,ZINC00000900,19,7,cf,1,-0.17562,,1,0.19697,,2,-0.089,0.0158,4,0.01771,0.01303,2,0.00646,0.0
2,ZINC00001049,31,13,ca,1,-0.24035,,1,0.23464,,2,-0.07587,0.0,4,-0.00288,0.03987,3,0.01791,0.00878
3,ZINC00001059,26,7,ca,1,-0.19918,,1,0.1818,,2,-0.06022,0.01832,4,-0.00523,0.03213,6,0.00946,0.00542
4,ZINC00001093,26,6,c3,1,-0.26959,,1,0.22205,,3,-0.01573,0.01025,8,-0.01545,0.02782,8,0.01325,0.00703
