In [1]:
from rdkit import Chem
import pandas as pd
import numpy as np

import rcm
from typing import Iterable

In [3]:
# rcm.B(0,1,2,3,4,0,1,2,4,5,2)

(0.3578001235789995, 0.0, 0.0)

In [48]:
# provide path and get me pd of connectivity
def connectivity_reader(filepath: str) -> pd.DataFrame:
  return (
    pd.read_csv(
    filepath, header=None, 
    names = ['current_weight', 'start','end']
    )
  )

def get_xyz(filepath: str) -> pd.DataFrame:
  def count_iterable(i: Iterable) -> int:
    return sum(1 for e in i)
  mol = Chem.rdmolfiles.MolFromXYZFile(filepath)

  df_xyz = pd.DataFrame(
    np.nan, 
    columns=['atom', 'x', 'y', 'z'], 
    index=np.arange(count_iterable(mol.GetAtoms()))
  )

  for i,atom in enumerate(mol.GetAtoms()): 
    positions = mol.GetConformer().GetAtomPosition(i)
    df_xyz.at[i,'atom'] = atom.GetSymbol()
    df_xyz.at[i,'x'] = positions.x
    df_xyz.at[i,'y'] = positions.y
    df_xyz.at[i,'z'] = positions.z

  return df_xyz
  

In [18]:
file_path_connectivity_table = 'cp6t6_0p_blyp35_geom_connectivity_global.csv'
conn = connectivity_reader(file_path_connectivity_table)
xyz_file_path = 'cp6t6_0p_blyp35_geom.xyz'
xyz = get_xyz(xyz_file_path)

In [None]:
# so best is gonna be one function, taking xyz and conn and xyz where to assess

In [95]:
def bar(xyz: pd.DataFrame, conn: pd.DataFrame) -> tuple[float]:
  a2 = xyz.x[conn.start]
  b2 = xyz.y[conn.start]
  c2 = xyz.z[conn.start]

  a1 = xyz.x[conn.end] - a2
  b1 = xyz.y[conn.end] - b2
  c1 = xyz.z[conn.end] - c2
  J = conn.current_weight

  return a1,a2,b1,b2,c1,c2,J

a1,a2,b1,b2,c1,c2,J = bar(xyz,conn)

In [96]:
J

0      0.50
1      0.25
2      0.25
3      0.25
4      0.25
       ... 
193    1.00
194    1.00
195    1.00
196    1.00
197    1.00
Name: current_weight, Length: 198, dtype: float64

In [92]:
xyz.x[conn.start]

79   -12.834856
81   -10.277212
85    -9.299362
93   -11.560973
81   -10.277212
        ...    
80   -10.247040
78   -10.664779
76   -11.483450
75   -12.476036
77   -12.740943
Name: x, Length: 198, dtype: float64

In [93]:
len(conn)

198

In [94]:
len(xyz)


480

In [83]:
def foo(xyz: pd.DataFrame, conn: pd.DataFrame, index: int) -> tuple[float]:
  a2 = xyz.x[conn.start[index]]
  b2 = xyz.y[conn.start[index]]
  c2 = xyz.z[conn.start[index]]

  a1 = xyz.x[conn.end[index]] - a2
  b1 = xyz.y[conn.end[index]] - b2
  c1 = xyz.z[conn.end[index]] - c2
  J = conn.current_weight[index]

  return a1,a2,b1,b2,c1,c2,J
  

In [84]:
a1,a2,b1,b2,c1,c2,J = foo(xyz,conn,0)

In [87]:
def get_ind_B(x,y,z):
  return rcm.B(a1,a2,b1,b2,c1,c2,x,y,z,J,1)


In [88]:
get_ind_B(0,0,0)

(2.5744011916732584e-05, 0.00031938658455955355, -0.0017721921634852168)

In [86]:
rcm.B(a1,a2,b1,b2,c1,c2,0,0,0,J,1)

(2.5744011916732584e-05, 0.00031938658455955355, -0.0017721921634852168)

In [53]:
conn

Unnamed: 0,current_weight,start,end
0,0.50,79,81
1,0.25,81,85
2,0.25,85,93
3,0.25,93,95
4,0.25,81,86
...,...,...,...
193,1.00,80,78
194,1.00,78,76
195,1.00,76,75
196,1.00,75,77


In [52]:
# df_M = pd.DataFrame(
#   np.nan, 
#   columns=[
#     'a1', 'a2', 
#     'b1', 'b2', 
#     'c1', 'c2', 
#     'J'], 
#   index=np.arange(len(conn))
# )

# df_M


In [34]:
a2 = xyz.x[conn.start[0]]
b2 = xyz.y[conn.start[0]]
c2 = xyz.z[conn.start[0]]

a1 = xyz.x[conn.end[0]] - a2
b1 = xyz.y[conn.end[0]] - b2
c1 = xyz.z[conn.end[0]] - c2

J = conn.current_weight[0]

In [36]:
rcm.B(a1,a2,b1,b2,c1,c2,0,0,0,J,1)

(2.5744011916732584e-05, 0.00031938658455955355, -0.0017721921634852168)

In [None]:
file_path_list_of_spectator_atoms = 'cp6t6_0p_blyp35_geom_atoms.csv'

In [None]:
import csv
with open(file_path_list_of_spectator_atoms, mode ='r')as file:
  csvFile = csv.reader(file)
  for lines in csvFile:
    print(lines)
    for i in lines:
      print(i)
    break

In [None]:
def check_if_current_flow_conserved(df: pd.DataFrame) -> bool:

  # first basic check if the dataframe has 3 columns.
  assert df.shape[1] == 3, (
    f"The connectivity dataframe has {df.shape[1]}"
    f" columns, exactly 3 are required."
  )
  
  # check if all columns are named as they should
  for i in ['start','end','current_weight']:
    assert i in df.columns, (
      f"The connectivity dataframe does not contain" 
      f" expected column of name {i}."
    )

  # check all numbers in columns two and three
  combined_list = pd.concat([df.start, df.end], axis=0).unique()

  for i in combined_list:
    # check how many goes in and out
    current_in_out_balanced = (
        sum(df.loc[df.end == i,'current_weight']) - 
        sum(df.loc[df.start == i,'current_weight'])
      ) == 0
    assert current_in_out_balanced, (
        f'Current flowing in and out '
        f'of node with index {i} '
        f'is not conserved. '
    )


In [None]:
check_if_current_flow_conserved(df)

In [None]:
# import numpy as np
# import tkinter as tk
# from tkinter import filedialog
# import pandas as pd

# root = tk.Tk()
# root.withdraw()

# # file_path = filedialog.askopenfilename(
# #   title="Select geometry xyz file.",
# #   initialdir='.',
# #   filetypes=(
# #       # ("jpeg files", "*.jpg"),
# #       # ("csv files", "*.csv"),
# #       ("xyz files", "*.xyz"),
# #       ("gif files", "*.gif*")
# #       # ("png files", "*.png")
# #     )
# #   )
