In [1]:
'''
Create YAML data file (in ./molec_data/) for one molecule
Read these files in ./refdata/:
  ATcT (tab-separated)
  WebBook (YAML file)
  Additional thermochemical reference data (yaml)
  Spin-orbit corrections (Excel)
  Naming codes (tab-separated)
  Electronic energy levels for atoms (tab-separated)
  Electronic energy levels for molecules (tab-separated)
  File of overriding data (special cases) (yaml)
Read this file in ./geomfreq/:
  Geometry, vibrations (Gaussian output)
Read this file in ./energysp:/
  CCSD(T)-F12 energy (Molpro output)
  
To do:
  - Read these files in ./rotors/:
      Torsional transition-states (Gaussian output)
  - Consider multiple conformations
'''
#  This version for migration of function to library
None

In [2]:
import yaml, re, sys, os, glob
import pandas as pd
import numpy as np

sys.path.insert(0, '../atomic_SOC')
import molpro_subs as mpr
import chem_subs as chem
sys.path.insert(0, '../karlib')
import gaussian_subs as gau

import multirx_subs as mrx

In [3]:
# local names, to ease migration to library
GDIR = mrx.GDIR    # Gaussian files, geom opt and harmonic freqs
EDIR = mrx.EDIR    # Molpro files, single-point CCSD(T)-F12
REFDAT = mrx.REFDAT     # Reference data
MDAT = mrx.MDAT  # Processed molecular YAML files
read_yaml = mrx.read_yaml
find_CASRN_in_WB = mrx.find_CASRN_in_WB

In [4]:
# Create the reference data structures
atct, webbook, soc, dflabel, reflocal, dfeleca, dfelecm, override = mrx.generate_molec_yaml()

In [5]:
#molec = input('Molecular label (short name): ')
molec = 'cyrene'

In [6]:
# Create data file for one molecule
mdoc = mrx.generate_molec_yaml(molec, atct, webbook, soc, dflabel, reflocal, dfeleca, dfelecm, override, verbose=True)

--- Preparing data file for cyrene ---
CASRN = 53716-82-8
Local name = dihydrolevoglucosenone
Molpro and Gaussian nuclear repulsion energies are consistent
Molpro and Gaussian (RO)HF energies are consistent
Hill:    C6H8O3
no CASRN matches in ATcT
no CASRN matches in WebBook
WebBook has 0 values for EoF298
Functional groups detected:
    carbonyl        [(0, 7)]
    ketone          [(1, 0, 7, 6)]
    ester           [(4, 5, 0, 7), (3, 8, 0, 7)]
    C-C             [(0, 1), (0, 6), (1, 2), (2, 3), (3, 4)]
YAML file created:  molec_data\cyrene.yml


In [7]:
# Read it back and display, to be sure it didn't have trouble with any dtypes
ydat = mrx.read_molec_yaml(molec)
mrx.display_yaml(mrx.encode_yaml(ydat))

Charge: 0
Electronic: {}
Energy:
  CCSD(T)-F12b: -458.571166040378
  CCSD-F12b: -458.490264317891
  HF: -456.627764271881
  Nuclear_repulsion: 488.32673828
  basis: CC-PVTZ-F12
  basis_functions: 621
  nirreps: 1
  point_group: C1
  software: Molpro Version 2021.2
  state: 1A
Frequencies:
- 100.3598
- 173.7831
- 234.637
- 344.3887
- 417.4262
- 430.433
- 489.3895
- 547.8204
- 657.9298
- 753.8387
- 767.6748
- 811.196
- 822.0354
- 893.6788
- 914.0186
- 952.244
- 968.9015
- 1003.9848
- 1028.7942
- 1057.7867
- 1093.8302
- 1129.3436
- 1178.8086
- 1199.0397
- 1223.7213
- 1258.8231
- 1295.2579
- 1307.6008
- 1316.8414
- 1350.3452
- 1361.9093
- 1370.8376
- 1387.8146
- 1465.9911
- 1490.6892
- 1527.4438
- 1804.7334
- 3031.1798
- 3040.4168
- 3045.5436
- 3079.5389
- 3086.4802
- 3105.7479
- 3107.3954
- 3119.2953
Functional_groups:
  C-C: '[(0, 1), (0, 6), (1, 2), (2, 3), (3, 4)]'
  carbonyl: '[(0, 7)]'
  ester: '[(4, 5, 0, 7), (3, 8, 0, 7)]'
  ketone: '[(1, 0, 7, 6)]'
Geometry:
  E_scf: -459.28152871

In [8]:
ydat['Refdata']

{'WebBook': {}}