# Management - Substituted Phenyl Set 1

In [9]:
import qcportal as ptl
import pandas as pd
import datetime
import time

from management import get_incomplete_torsiondrive_optimizations

In [2]:
# connect with authentication, therefore write access
# don't use unless you plan to submit things
client = ptl.FractalClient.from_file()

In [3]:
client

## Dataset tracking

### 2020.06.10 16:30

In [4]:
dataset = "OpenFF Substituted Phenyl Set 1"

In [5]:
ds = client.get_collection("TorsionDriveDataset", dataset)

In [6]:
ds.list_specifications()

Unnamed: 0_level_0,Description
Name,Unnamed: 1_level_1
default,Standard OpenFF torsiondrive specification.


In [10]:
res = get_incomplete_torsiondrive_optimizations(ds, 'default', client, merged=False)
res

{'19953582': [OptimizationRecord(id='20174725', status='COMPLETE'),
  OptimizationRecord(id='20175028', status='COMPLETE'),
  OptimizationRecord(id='20175029', status='COMPLETE'),
  OptimizationRecord(id='20294323', status='COMPLETE'),
  OptimizationRecord(id='20294324', status='COMPLETE'),
  OptimizationRecord(id='20294325', status='COMPLETE'),
  OptimizationRecord(id='20294326', status='COMPLETE'),
  OptimizationRecord(id='20330114', status='COMPLETE'),
  OptimizationRecord(id='20330115', status='COMPLETE'),
  OptimizationRecord(id='20330116', status='COMPLETE'),
  OptimizationRecord(id='20330117', status='COMPLETE'),
  OptimizationRecord(id='20344139', status='COMPLETE'),
  OptimizationRecord(id='20344140', status='COMPLETE'),
  OptimizationRecord(id='20344141', status='COMPLETE'),
  OptimizationRecord(id='20344142', status='COMPLETE'),
  OptimizationRecord(id='20519420', status='COMPLETE'),
  OptimizationRecord(id='20519421', status='COMPLETE'),
  OptimizationRecord(id='20519422', 

In [11]:
ds.status("default")

Unnamed: 0,default
COMPLETE,155
ERROR,4


In [23]:
[ts for ts in ds.df.default if ts.status != 'COMPLETE']

[TorsionDriveRecord(id='3745388', status='ERROR'),
 TorsionDriveRecord(id='3745481', status='ERROR'),
 TorsionDriveRecord(id='3745569', status='ERROR')]

In [15]:
client.modify_services('restart', procedure_id='19953582')

Data(n_updated=1)

In [24]:
client.modify_services('restart', procedure_id='3745388')

Data(n_updated=1)

In [25]:
client.modify_services('restart', procedure_id='3745481')

Data(n_updated=1)

In [26]:
client.modify_services('restart', procedure_id='3745569')

Data(n_updated=1)

In [27]:
ds = client.get_collection("TorsionDriveDataset", dataset)

In [30]:
ds.status("default")

Unnamed: 0,default
COMPLETE,156
RUNNING,3


In [32]:
res = get_incomplete_torsiondrive_optimizations(ds, 'default', client, merged=False)
res

{}

## Archived

In [12]:
ds.df

Unnamed: 0,default
CCC(=O)Nc1[cH:1][c:2](ccn1)[NH:3][CH2:4]C,"TorsionDriveRecord(id='3745357', status='COMPL..."
CCCNc1cc[c:2]([cH:1]n1)[N:3](C)[CH3:4],"TorsionDriveRecord(id='3745358', status='COMPL..."
CCCNc1cc[c:2]([cH:1]n1)[O:3][CH2:4]C,"TorsionDriveRecord(id='3745359', status='COMPL..."
CCCNc1ccc[c:2]([cH:1]1)[N:3]([CH3:4])C,"TorsionDriveRecord(id='3745360', status='COMPL..."
CCCNc1ccc[c:2]([cH:1]1)[NH:3][CH3:4],"TorsionDriveRecord(id='3745361', status='COMPL..."
...,...
c1[cH:1][c:2](c(cc1O)Cl)[c:4]2[cH:3]c[nH+]cc2,"TorsionDriveRecord(id='19953579', status='COMP..."
c1[cH:1][c:2](c(cc1O)Cl)[c:4]2[cH:3]cncc2,"TorsionDriveRecord(id='19953580', status='COMP..."
c1[cH:1][c:2](c(cc1[O-])Cl)[c:4]2[cH:3]c[nH+]cc2,"TorsionDriveRecord(id='19953581', status='COMP..."
c1[cH:1][c:2](c(cc1[O-])Cl)[c:4]2[cH:3]cncc2,"TorsionDriveRecord(id='19953582', status='ERROR')"


In [54]:
[i for i in ds.df.default if i.status == 'ERROR']

[TorsionDriveRecord(id='3745388', status='ERROR'),
 TorsionDriveRecord(id='3745481', status='ERROR'),
 TorsionDriveRecord(id='3745569', status='ERROR'),
 TorsionDriveRecord(id='19953582', status='ERROR')]

In [55]:
ids = set(i.id for i in ds.df.default)

In [56]:
res = client.query_procedures(ids)

In [57]:
from collections import defaultdict

In [58]:
angle_optimizations = defaultdict(set)
for tdr in ds.df.default:
    if tdr.status == 'COMPLETE':
        continue
        
    for val in tdr.optimization_history.values():
        angle_optimizations[tdr.id].update(set(val))

In [59]:
angle_optimizations

defaultdict(set,
            {'19953582': {'20174725',
              '20175028',
              '20175029',
              '20294323',
              '20294324',
              '20294325',
              '20294326',
              '20330114',
              '20330115',
              '20330116',
              '20330117',
              '20344139',
              '20344140',
              '20344141',
              '20344142',
              '20519420',
              '20519421',
              '20519422',
              '20519423',
              '20528122',
              '20528148',
              '20528149',
              '20528150',
              '20541376',
              '20541377',
              '20541378',
              '20541379',
              '20549645',
              '20549646',
              '20549647',
              '20549648',
              '20561993',
              '20561994',
              '20561995',
              '20561996',
              '20577519',
              '20577520',
         

In [56]:
client.modify_tasks(operation='restart', base_result='20576181')

Data(n_updated=0)

In [60]:
angle_optimizations_i = set()

for i in angle_optimizations.values():
    angle_optimizations_i.update(set(i))
    
angle_optimizations_i

{'20174725',
 '20175028',
 '20175029',
 '20294323',
 '20294324',
 '20294325',
 '20294326',
 '20330114',
 '20330115',
 '20330116',
 '20330117',
 '20344139',
 '20344140',
 '20344141',
 '20344142',
 '20519420',
 '20519421',
 '20519422',
 '20519423',
 '20528122',
 '20528148',
 '20528149',
 '20528150',
 '20541376',
 '20541377',
 '20541378',
 '20541379',
 '20549645',
 '20549646',
 '20549647',
 '20549648',
 '20561993',
 '20561994',
 '20561995',
 '20561996',
 '20577519',
 '20577520',
 '20577521',
 '20577522',
 '20580368',
 '20580369',
 '20580370',
 '20580371',
 '20614210',
 '20614230',
 '20614231',
 '20614232',
 '20634721',
 '20634722'}

In [61]:
len(angle_optimizations_i)

49

In [62]:
res_angle_opt = client.query_procedures(angle_optimizations_i)

In [63]:
res_angle_opt

[OptimizationRecord(id='20174725', status='COMPLETE'),
 OptimizationRecord(id='20175028', status='COMPLETE'),
 OptimizationRecord(id='20175029', status='COMPLETE'),
 OptimizationRecord(id='20294323', status='COMPLETE'),
 OptimizationRecord(id='20294324', status='COMPLETE'),
 OptimizationRecord(id='20294325', status='COMPLETE'),
 OptimizationRecord(id='20294326', status='COMPLETE'),
 OptimizationRecord(id='20330114', status='COMPLETE'),
 OptimizationRecord(id='20330115', status='COMPLETE'),
 OptimizationRecord(id='20330116', status='COMPLETE'),
 OptimizationRecord(id='20330117', status='COMPLETE'),
 OptimizationRecord(id='20344139', status='COMPLETE'),
 OptimizationRecord(id='20344140', status='COMPLETE'),
 OptimizationRecord(id='20344141', status='COMPLETE'),
 OptimizationRecord(id='20344142', status='COMPLETE'),
 OptimizationRecord(id='20519420', status='COMPLETE'),
 OptimizationRecord(id='20519421', status='COMPLETE'),
 OptimizationRecord(id='20519422', status='COMPLETE'),
 Optimizat

In [64]:
review = ((i, i.get_error().error_message) for i in res_angle_opt if i.status == 'ERROR')

In [41]:
print(next(review))

(OptimizationRecord(id='20634722', status='ERROR'), 'Caught Executor Error:\nTraceback (most recent call last):\n  File "/opt/conda/envs/qcfractal/lib/python3.6/site-packages/qcfractal/queue/executor_adapter.py", line 15, in _get_future\n    return future.result()\n  File "/opt/conda/envs/qcfractal/lib/python3.6/concurrent/futures/_base.py", line 425, in result\n    return self.__get_result()\n  File "/opt/conda/envs/qcfractal/lib/python3.6/concurrent/futures/_base.py", line 384, in __get_result\n    raise self._exception\nconcurrent.futures.process.BrokenProcessPool: A process in the process pool was terminated abruptly while the future was running or pending.\n')


In [40]:
client.modify_tasks(operation='restart', base_result='20634721')

Data(n_updated=1)

In [42]:
client.modify_tasks(operation='restart', base_result='20634722')

Data(n_updated=1)

Why are there only 2 failures? Don't we see 4 errors at the top level? Not what I would expect, but I could be thinking about this wrong?

Only one of the torsion drive ids (`19953582`) shows up in our iteration with `optimaization_history` values. Do the others not?

In [27]:
for i in ds.df.default:
    if i.status == 'ERROR':
        print(i.optimization_history)

{}
{}
{}
{'[-90]': ['20174725', '20294324', '20294325'], '[-105]': ['20175028', '20330115'], '[-75]': ['20175029', '20330116'], '[-120]': ['20294323', '20344140'], '[-60]': ['20294326', '20344141'], '[-135]': ['20330114', '20519421'], '[-45]': ['20330117', '20519422'], '[-150]': ['20344139', '20528122'], '[-30]': ['20344142', '20528148'], '[-165]': ['20519420', '20541376'], '[-15]': ['20519423', '20541377'], '[0]': ['20528149', '20549645'], '[180]': ['20528150', '20549648'], '[15]': ['20541378', '20561993'], '[165]': ['20541379', '20561996'], '[30]': ['20549646', '20577519'], '[150]': ['20549647', '20577522'], '[45]': ['20561994', '20580368'], '[135]': ['20561995', '20580371'], '[60]': ['20577520', '20614210'], '[120]': ['20577521', '20614232'], '[75]': ['20580369', '20634721'], '[105]': ['20580370', '20634722'], '[90]': ['20614230', '20614231']}


Guess some of these had no optimizations assigned? Need to check the original PR if this was noted.

In [62]:
failed = tuple(i.id for i in res_angle_opt if i.status == 'ERROR')

In [63]:
failed

('20634721', '20634722')

In [64]:
for i in ds.df.default:
    if i.status == 'ERROR':
        
        for key, value in i.optimization_history.items():
            for f in failed:
                if f in value:
                    print(key)

[75]
[105]


Two angles failed it appears.

Restart again?

In [66]:
angle_optimizations.keys()

dict_keys(['19953582'])

In [68]:
for tdrid in angle_optimizations.keys():
    client.modify_services(operation='restart', procedure_id=tdrid)

In [76]:
ds.status('default')

Unnamed: 0,default
COMPLETE,155
ERROR,4


## Problem cases (2020.05.27)

From the 'OpenFF Substituted Phenyl Set 1' : job index `c1[cH:1]c:2[c:4]2[cH:3]cncc2` , job id:

```
TorsionDriveRecord(id='19953582', status='ERROR') . Two optimizations failed with the following Error messages: ComputeError(error_type='BrokenProcessPool', error_message='Caught Executor Error:\nTraceback (most recent call last):\n  File "/opt/conda/envs/qcfractal/lib/python3.6/site-packages/qcfractal/queue/executor_adapter.py", line 15, in _get_future\n    return future.result()\n  File "/opt/conda/envs/qcfractal/lib/python3.6/concurrent/futures/_base.py", line 425, in result\n    return self.__get_result()\n  File "/opt/conda/envs/qcfractal/lib/python3.6/concurrent/futures/_base.py", line 384, in __get_result\n    raise self._exception\nconcurrent.futures.process.BrokenProcessPool: A process in the process pool was terminated abruptly while the future was running or pending.\n')
```

In [10]:
erred = [i for i in ds.df.default.values if i.status == 'ERROR']
erred

[TorsionDriveRecord(id='3745388', status='ERROR'),
 TorsionDriveRecord(id='3745481', status='ERROR'),
 TorsionDriveRecord(id='3745569', status='ERROR'),
 TorsionDriveRecord(id='19953582', status='ERROR')]

Start with the last one:

In [11]:
tdr = erred[-1]
tdr

TorsionDriveRecord(id='19953582', status='ERROR')

In [31]:
tdr.fields

  tdr.fields


{'client': ModelField(name='client', type=Optional[Any], required=False, default=None),
 'cache': ModelField(name='cache', type=Mapping[str, Any], required=False, default={}),
 'id': ModelField(name='id', type=Optional[ObjectId], required=False, default=None),
 'hash_index': ModelField(name='hash_index', type=Optional[str], required=False, default=None),
 'procedure': ModelField(name='procedure', type=ConstrainedStrValue, required=False, default='torsiondrive'),
 'program': ModelField(name='program', type=ConstrainedStrValue, required=False, default='torsiondrive'),
 'version': ModelField(name='version', type=int, required=False, default=1),
 'protocols': ModelField(name='protocols', type=Optional[Mapping[str, Any]], required=False, default=None),
 'extras': ModelField(name='extras', type=Mapping[str, Any], required=False, default={}),
 'stdout': ModelField(name='stdout', type=Optional[ObjectId], required=False, default=None),
 'stderr': ModelField(name='stderr', type=Optional[ObjectId

In [16]:
tdr.get_stdout()

In [12]:
tdr.id

'19953582'

In [33]:
client.modify_services(operation='restart', procedure_id=tdr.id)

Data(n_updated=1)

In [36]:
ds = client.get_collection("TorsionDriveDataset", dataset)

In [37]:
ds.list_specifications()

Unnamed: 0_level_0,Description
Name,Unnamed: 1_level_1
default,Standard OpenFF torsiondrive specification.


'default' always means 'b3lyp-d3bj' for us.

In [39]:
ds.status("default")

Unnamed: 0,default
COMPLETE,155
ERROR,3
RUNNING,1


In [40]:
ds.df

Unnamed: 0,default
CCC(=O)Nc1[cH:1][c:2](ccn1)[NH:3][CH2:4]C,"TorsionDriveRecord(id='3745357', status='COMPL..."
CCCNc1cc[c:2]([cH:1]n1)[N:3](C)[CH3:4],"TorsionDriveRecord(id='3745358', status='COMPL..."
CCCNc1cc[c:2]([cH:1]n1)[O:3][CH2:4]C,"TorsionDriveRecord(id='3745359', status='COMPL..."
CCCNc1ccc[c:2]([cH:1]1)[N:3]([CH3:4])C,"TorsionDriveRecord(id='3745360', status='COMPL..."
CCCNc1ccc[c:2]([cH:1]1)[NH:3][CH3:4],"TorsionDriveRecord(id='3745361', status='COMPL..."
...,...
c1[cH:1][c:2](c(cc1O)Cl)[c:4]2[cH:3]c[nH+]cc2,"TorsionDriveRecord(id='19953579', status='COMP..."
c1[cH:1][c:2](c(cc1O)Cl)[c:4]2[cH:3]cncc2,"TorsionDriveRecord(id='19953580', status='COMP..."
c1[cH:1][c:2](c(cc1[O-])Cl)[c:4]2[cH:3]c[nH+]cc2,"TorsionDriveRecord(id='19953581', status='COMP..."
c1[cH:1][c:2](c(cc1[O-])Cl)[c:4]2[cH:3]cncc2,"TorsionDriveRecord(id='19953582', status='RUNN..."
