-
Notifications
You must be signed in to change notification settings - Fork 62
/
material.py
172 lines (146 loc) · 5.99 KB
/
material.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
""" Core definition of a Materials Document """
from datetime import datetime
from functools import partial
from typing import ClassVar, List, Mapping, Optional, Sequence, Tuple, TypeVar, Union
from pydantic import BaseModel, Field, create_model
from pymatgen.analysis.structure_matcher import ElementComparator, StructureMatcher
from emmet.core import SETTINGS
from emmet.core.material import MaterialsDoc as CoreMaterialsDoc
from emmet.core.material import PropertyOrigin as PropertyOrigin
from emmet.core.structure import StructureMetadata
from emmet.core.vasp.calc_types import CalcType, RunType, TaskType
from emmet.core.vasp.task import TaskDocument
from emmet.stubs import ComputedStructureEntry, Structure
class MaterialsDoc(CoreMaterialsDoc, StructureMetadata):
calc_types: Mapping[str, CalcType] = Field( # type: ignore
None,
description="Calculation types for all the calculations that make up this material",
)
task_types: Mapping[str, TaskType] = Field(
None,
description="Task types for all the calculations that make up this material",
)
run_types: Mapping[str, RunType] = Field(
None,
description="Run types for all the calculations that make up this material",
)
origins: Sequence[PropertyOrigin] = Field(
None, description="Mappingionary for tracking the provenance of properties"
)
entries: Mapping[RunType, ComputedStructureEntry] = Field(
None, description="Dictionary for tracking entries for VASP calculations"
)
@classmethod
def from_tasks(
cls,
task_group: List[TaskDocument],
quality_scores=SETTINGS.VASP_QUALITY_SCORES,
) -> "MaterialsDoc":
"""
Converts a group of tasks into one material
"""
# Metadata
last_updated = max(task.last_updated for task in task_group)
created_at = min(task.completed_at for task in task_group)
task_ids = list({task.task_id for task in task_group})
deprecated_tasks = list(
{task.task_id for task in task_group if not task.is_valid}
)
run_types = {task.task_id: task.run_type for task in task_group}
task_types = {task.task_id: task.task_type for task in task_group}
calc_types = {task.task_id: task.calc_type for task in task_group}
# TODO: Fix the type checking by hardcoding the Enums?
structure_optimizations = [
task
for task in task_group
if task.task_type == TaskType.Structure_Optimization # type: ignore
]
statics = [task for task in task_group if task.task_type == TaskType.Static] # type: ignore
# Material ID
possible_mat_ids = [task.task_id for task in structure_optimizations]
possible_mat_ids = sorted(possible_mat_ids, key=ID_to_int)
if len(possible_mat_ids) == 0:
raise Exception(f"Could not find a material ID for {task_ids}")
else:
material_id = possible_mat_ids[0]
def _structure_eval(task: TaskDocument):
"""
Helper function to order structures optimziation and statics calcs by
- Functional Type
- Spin polarization
- Special Tags
- Energy
"""
task_run_type = task.run_type
is_valid = task.task_id in deprecated_tasks
return (
-1 * is_valid,
-1 * quality_scores.get(task_run_type.value, 0),
-1 * task.input.parameters.get("ISPIN", 1),
-1 * task.input.parameters.get("LASPH", False),
task.output.energy_per_atom,
)
structure_calcs = structure_optimizations + statics
best_structure_calc = sorted(structure_calcs, key=_structure_eval)[0]
structure = best_structure_calc.output.structure
# Initial Structures
initial_structures = [task.input.structure for task in task_group]
sm = StructureMatcher(
ltol=0.1, stol=0.1, angle_tol=0.1, scale=False, attempt_supercell=False
)
initial_structures = [
group[0] for group in sm.group_structures(initial_structures)
]
# Deprecated
deprecated = all(
task.task_id in deprecated_tasks for task in structure_optimizations
)
# Origins
origins = [
PropertyOrigin(
name="structure",
task_id=best_structure_calc.task_id,
last_updated=best_structure_calc.last_updated,
)
]
# entries
entries = {}
all_run_types = set(run_types.values())
for rt in all_run_types:
relevant_calcs = sorted(
[doc for doc in structure_calcs if doc.run_type == rt],
key=_structure_eval,
)
if len(relevant_calcs) > 0:
best_task_doc = relevant_calcs[0]
entry = best_task_doc.structure_entry
entry.data["material_id"] = entry.entry_id
entry.entry_id = material_id
entries[rt] = entry
return cls.from_structure(
structure=structure,
material_id=material_id,
last_updated=last_updated,
created_at=created_at,
task_ids=task_ids,
calc_types=calc_types,
run_types=run_types,
task_types=task_types,
initial_structures=initial_structures,
deprecated=deprecated,
deprecated_tasks=deprecated_tasks,
origins=origins,
entries=entries,
)
def ID_to_int(s_id: str) -> Tuple[str, int]:
"""
Converts a string id to tuple
falls back to assuming ID is an Int if it can't process
Assumes string IDs are of form "[chars]-[int]" such as mp-234
"""
if isinstance(s_id, str):
return (s_id.split("-")[0], int(str(s_id).split("-")[-1]))
elif isinstance(s_id, (int, float)):
return ("", s_id)
else:
return None