In [72]:
import json
import chemparse
with open('run15000chars.json') as f:
    model_output = json.load(f)
with open('database.json') as f:
    database = json.load(f)

model_output = {key:model_output[key][:2] for key in model_output.keys()}

In [73]:
model_output

{'physrevb.10.4572': ['Ga1', ' 6.07 K'],
 'physrevb.100.014503': ['Eu1Fe2As2', ' 27 K'],
 'physrevb.100.014507': ['Pd1S2', ' 8.0 K'],
 'physrevb.100.041109': ['Rb1', ' 2 K'],
 'physrevb.100.060103': ['Au2Pb1', ' 4 K'],
 'physrevb.100.094511': ['Rb1Cr3As3', ' 7.17 K'],
 'physrevb.100.094522': ['W3Al2C1', ' 7.6 K'],
 'physrevb.100.134503': ['Ta1Os1Si1', ' 5.8 K'],
 'physrevb.23.2219': ['K0.18W1O3', ' 1.2 K'],
 'physrevb.26.6315': ['K1Hg1C4', ' 1.7 K'],
 'physrevb.28.1389': ['La1Ir2Si2', ' 1.58 K'],
 'physrevb.30.1182': ['Ce1Cu2Si2', ' 2 K'],
 'physrevb.30.1583': ['Pt3U1', ' 0.54 K'],
 'physrevb.30.2986': ['Pt3U1', ' 0.49 K'],
 'physrevb.30.444': ['Ce1Ru3Si2', ' 1 K'],
 'physrevb.30.5135': ['Co35.38Zr64.62', ' 2.67 K'],
 'physrevb.31.1654': ['Pt3U1', ' 17.6 K'],
 'physrevb.32.135': ['Pd2Yb1Sn1', ' 2.46 K'],
 'physrevb.34.4590': ['Lu5Ir4Si10', ' 79 K'],
 'physrevb.34.4920': ['Ga1', ' 0.7 K'],
 'physrevb.36.3906': ['La1.8Sr0.2Cu1O4', ' 39 K'],
 'physrevb.36.3910': ['Y1.2Ba0.8Cu1O4', ' 34 K'

In [74]:
def format_material_string(string, splitchar):
    string = string.replace('MATERIAL:', '')
    string_list = "".join(string.split()).split(splitchar)
    return string_list
    
def format_temperature_string(temp_list):
    temp_list = "".join(temp_list.replace('K', '').split()).split(',')
    temp_list = [temp_string[:-1] if temp_string[-1] == '.' else temp_string for temp_string in temp_list]
    temp_list_float = []
    for temp in temp_list:
        try:
            temp_list_float.append(float(temp))
        except ValueError:
            print(temp)
            temp_list_float.append(-100.0)
    return temp_list_float

def standard_chem_formula(dictionary):
    return ''.join(char for char in ''.join(key for key in sorted(dictionary.keys())) if char.isalpha() or char.isnumeric())

In [75]:
processed_model_output = {}
for key in model_output.keys():
    if key == 'questions': continue
    materials = format_material_string(model_output[key][0], '&')
    temperatures = format_temperature_string(model_output[key][1])
    if len(materials) - len(temperatures) != 0: continue
    materials = list(set(materials))
    indices = [materials.index(s) for s in materials]
    if len(materials) > len(temperatures):
        temperatures = temperatures + [-100.0] * (len(materials) - len(temperatures))
    temperatures = [temperatures[i] for i in indices]
    processed_model_output[key] = (materials, temperatures)

563+5m
8-10
8.5-9
31-36
15.2T


In [76]:
processed_database_output = {}
for key in database.keys():
    materials = format_material_string(database[key][0], ',')
    temperatures = format_temperature_string(database[key][1].replace('CRITICAL TEMPERATURE:', ''))
    if len(materials) > len(temperatures):
        temperatures = temperatures + [-100.0] * (len(materials) - len(temperatures))
    processed_database_output[key] = (materials, temperatures)

In [77]:
shared_keys = list(sorted(processed_model_output.keys() & processed_database_output.keys()))
processed_model_output = {key:processed_model_output[key] for key in shared_keys}
processed_database_output = {key:processed_database_output[key] for key in shared_keys}

In [78]:
processed_model_output

{'physrevb.10.4572': (['Ga1'], [6.07]),
 'physrevb.100.014503': (['Eu1Fe2As2'], [27.0]),
 'physrevb.100.014507': (['Pd1S2'], [8.0]),
 'physrevb.100.041109': (['Rb1'], [2.0]),
 'physrevb.100.060103': (['Au2Pb1'], [4.0]),
 'physrevb.100.094511': (['Rb1Cr3As3'], [7.17]),
 'physrevb.100.094522': (['W3Al2C1'], [7.6]),
 'physrevb.100.134503': (['Ta1Os1Si1'], [5.8]),
 'physrevb.23.2219': (['K0.18W1O3'], [1.2]),
 'physrevb.26.6315': (['K1Hg1C4'], [1.7]),
 'physrevb.28.1389': (['La1Ir2Si2'], [1.58]),
 'physrevb.30.1182': (['Ce1Cu2Si2'], [2.0]),
 'physrevb.30.1583': (['Pt3U1'], [0.54]),
 'physrevb.30.2986': (['Pt3U1'], [0.49]),
 'physrevb.30.444': (['Ce1Ru3Si2'], [1.0]),
 'physrevb.30.5135': (['Co35.38Zr64.62'], [2.67]),
 'physrevb.31.1654': (['Pt3U1'], [17.6]),
 'physrevb.32.135': (['Pd2Yb1Sn1'], [2.46]),
 'physrevb.34.4590': (['Lu5Ir4Si10'], [79.0]),
 'physrevb.34.4920': (['Ga1'], [0.7]),
 'physrevb.36.3906': (['La1.8Sr0.2Cu1O4'], [39.0]),
 'physrevb.36.3910': (['Y1.2Ba0.8Cu1O4'], [34.0]),
 'p

In [79]:
processed_database_output

{'physrevb.10.4572': (['Ga1'], [6.07]),
 'physrevb.100.014503': (['Eu1Fe2As2'], [27.0]),
 'physrevb.100.014507': (['Pd1S2'], [2.0]),
 'physrevb.100.041109': (['Rb1'], [2.1]),
 'physrevb.100.060103': (['Au2Pb1'], [3.61]),
 'physrevb.100.094511': (['Rb1Cr3As3'], [7.17]),
 'physrevb.100.094522': (['W3Al2C1'], [7.6]),
 'physrevb.100.134503': (['Ta1Os1Si1'], [5.51]),
 'physrevb.23.2219': (['K0.18W1O3'], [3.9]),
 'physrevb.26.6315': (['K1Hg1C4'], [1.42]),
 'physrevb.28.1389': (['La1Ir2Si2'], [1.58]),
 'physrevb.30.1182': (['Ce1Cu2Si2'], [0.7]),
 'physrevb.30.1583': (['Pt3U1'], [0.52]),
 'physrevb.30.2986': (['Pt3U1'], [0.49]),
 'physrevb.30.444': (['Ce1Ru3Si2'], [1.25]),
 'physrevb.30.5135': (['Co35.38Zr64.62'], [2.67]),
 'physrevb.31.1654': (['Pt3U1'], [0.48]),
 'physrevb.32.135': (['Pd2Yb1Sn1'], [2.36]),
 'physrevb.34.4590': (['Lu5Ir4Si10'], [3.77]),
 'physrevb.34.4920': (['Ga1'], [8.2]),
 'physrevb.36.3906': (['La1.8Sr0.2Cu1O4'], [35.7]),
 'physrevb.36.3910': (['Y1.2Ba0.8Cu1O4'], [75.0]),

In [80]:
len(processed_database_output)

511

In [81]:
len(processed_model_output)

511

In [82]:
correct_num_materials = 0
for key in processed_database_output.keys():
    dbase_output = set(set(processed_database_output[key][0]))
    print(dbase_output)
    chemform_dbase = [standard_chem_formula(chemparse.parse_formula(entry)) for entry in dbase_output]
    model_output = set(set(processed_model_output[key][0]))
    out = []
    for entry in model_output:
        try:
            out.append(standard_chem_formula(chemparse.parse_formula(entry)))
        except:
            out.append('blah')
    print(key)
    print(chemform_dbase)
    print(out)
    print(set(chemform_dbase).intersection(set(out)))
    print("-----------")
    if len(set(chemform_dbase).intersection(set(out))) > 0: correct_num_materials += 1 

{'Ga1'}
physrevb.10.4572
['Ga']
['Ga']
{'Ga'}
-----------
{'Eu1Fe2As2'}
physrevb.100.014503
['AsEuFe']
['AsEuFe']
{'AsEuFe'}
-----------
{'Pd1S2'}
physrevb.100.014507
['PdS']
['PdS']
{'PdS'}
-----------
{'Rb1'}
physrevb.100.041109
['Rb']
['Rb']
{'Rb'}
-----------
{'Au2Pb1'}
physrevb.100.060103
['AuPb']
['AuPb']
{'AuPb'}
-----------
{'Rb1Cr3As3'}
physrevb.100.094511
['AsCrRb']
['AsCrRb']
{'AsCrRb'}
-----------
{'W3Al2C1'}
physrevb.100.094522
['AlCW']
['AlCW']
{'AlCW'}
-----------
{'Ta1Os1Si1'}
physrevb.100.134503
['OsSiTa']
['OsSiTa']
{'OsSiTa'}
-----------
{'K0.18W1O3'}
physrevb.23.2219
['KOW']
['KOW']
{'KOW'}
-----------
{'K1Hg1C4'}
physrevb.26.6315
['CHgK']
['CHgK']
{'CHgK'}
-----------
{'La1Ir2Si2'}
physrevb.28.1389
['IrLaSi']
['IrLaSi']
{'IrLaSi'}
-----------
{'Ce1Cu2Si2'}
physrevb.30.1182
['CeCuSi']
['CeCuSi']
{'CeCuSi'}
-----------
{'Pt3U1'}
physrevb.30.1583
['PtU']
['PtU']
{'PtU'}
-----------
{'Pt3U1'}
physrevb.30.2986
['PtU']
['PtU']
{'PtU'}
-----------
{'Ce1Ru3Si2'}
physrevb.3

In [83]:
correct_num_materials / len(processed_database_output)

1.0

In [88]:
correct_num_materials = 0
for key in processed_database_output.keys():
    dbase_output = processed_database_output[key][1][0]
    model_output = processed_model_output[key][1][0]
   
    if abs(dbase_output - model_output) < 0.01: correct_num_materials += 1
    else: print(key, dbase_output, model_output, sep='\t')

physrevb.100.014507	2.0	8.0
physrevb.100.041109	2.1	2.0
physrevb.100.060103	3.61	4.0
physrevb.100.134503	5.51	5.8
physrevb.23.2219	3.9	1.2
physrevb.26.6315	1.42	1.7
physrevb.30.1182	0.7	2.0
physrevb.30.1583	0.52	0.54
physrevb.30.444	1.25	1.0
physrevb.31.1654	0.48	17.6
physrevb.32.135	2.36	2.46
physrevb.34.4590	3.77	79.0
physrevb.34.4920	8.2	0.7
physrevb.36.3906	35.7	39.0
physrevb.36.3910	75.0	34.0
physrevb.36.4018	42.0	31.0
physrevb.36.8791	86.0	0.87
physrevb.39.11445	35.0	37.0
physrevb.39.7339	26.0	30.0
physrevb.41.9551	83.0	30.0
physrevb.48.16869	74.0	120.0
physrevb.49.12322	138.0	133.0
physrevb.49.13184	87.0	110.0
physrevb.49.1495	109.0	110.3
physrevb.49.15959	82.4	3.0
physrevb.49.3502	28.5	32.0
physrevb.49.6392	17.2	16.0
physrevb.49.9073	95.0	38.0
physrevb.50.15875	36.7	36.0
physrevb.50.16125	91.0	91.5
physrevb.50.351	16.5	14.6
physrevb.50.426	92.8	15.1
physrevb.50.496	5.3	7.5
physrevb.50.6523	1.26	1.23
physrevb.51.1286	35.0	222.0
physrevb.51.3316	92.0	92.5
physrevb.52.10569	93.5	9

In [89]:
correct_num_materials / len(processed_database_output)

0.5185909980430529

In [43]:
len(processed_database_output)

510