## Getting a list of all material_ids in the Materials Project database 

In [59]:
from pymatgen import MPRester
import pprint

api_key=None

mpr = MPRester(api_key)
all_mpids = mpr.query({}, ["material_id", "elements", "e_above_hull"])

print 'Number of materials in database:', len(all_mpids)
print all_mpids[8464]

Number of materials in database: 68335
{u'elements': [u'C', u'Nb'], u'material_id': u'mp-15660', u'e_above_hull': 0.17086415607142946}


In [60]:
temp_set = all_mpids

## Filtering out materials

### Deleting noble gas materials and matierals with energy above hull > 0.2 eV

In [63]:
# delete all with e_above_hull > 0.2 and if contains any Noble gases
noble_gases = ['He', 'Ne', 'Ar', 'Kr', 'Xe', 'Rn']

num_mats_removed = None
while num_mats_removed != 0: # not sure why, but it takes multiple iterations to clear out
    old_num_mats = len(all_mpids)
    
    for mat in all_mpids:
        # print 'cleaning nobles gases'
        if bool(set(mat['elements']) & set(noble_gases)):
            all_mpids.remove(mat)

    for mat in all_mpids:
        # print 'cleaning high energy above hull materials'
        if float(mat['e_above_hull']) > 0.2:
            all_mpids.remove(mat)
            
    num_mats_removed = old_num_mats - len(all_mpids)
        
        
        
print 'Number of materials in database e_bove_hull < 0.2 eV and does not contain noble gases:', len(all_mpids)

Number of materials in database e_bove_hull < 0.2 eV and does not contain noble gases: 62149


### Deleting specific materials that are liquid/gas at standard

In [64]:
mats_to_delete = ['mp-632250', 'mp-754417', 'mp-570752', 'mp-634659', 'mp-23907',   # H
                  'mp-999498', 'mp-12103','mp-754514', 'mp-672234',                 # N
                  'mp-607540', 'mp-560602',                                         # O
                  'mp-21848',                                                       # F
                                                                                    # Cl is None
                  'mp-998864', 'mp-1010048',                                        # Br
                  'mp-611219'                                                       # Hg
                 ]

for mat in all_mpids:
    if mat['material_id'] in mats_to_delete:
        all_mpids.remove(mat)
        print mat['material_id'], 'deleted'

mp-570752 deleted
mp-999498 deleted
mp-12103 deleted
mp-998864 deleted
mp-607540 deleted
mp-1010048 deleted
mp-634659 deleted
mp-632250 deleted
mp-754417 deleted
mp-611219 deleted
mp-754514 deleted
mp-21848 deleted
mp-672234 deleted
mp-23907 deleted


In [65]:
print 'final count:', len(all_mpids)

final count: 62135


## Retrieve full structure data and import into MongoDB

In [55]:
from pymongo import MongoClient
client = MongoClient()

db = client.structure_sets
print db.collection_names()

[u'mp_test', u'mp_final', u'materials_project', u'materials_project_test']


In [54]:
db.materials_project_test_set.delete_many({})
db.materials_project_test_set.drop()

In [29]:
# Here is the main section, depending on internet speeds should take a few hours

count = 0
for mat in all_mpids:
    s = mpr.get_structure_by_material_id(mat['material_id']).as_dict()
    s['material_id'] = mat['material_id']
    s['elements'] = mat['elements']
    result = db.materials_project.insert_one(s)
    
    count +=1
    if count%100 == 0:
        print count, 'structures imported.'
    

100 structures imported.
200 structures imported.
300 structures imported.
400 structures imported.
500 structures imported.
600 structures imported.
700 structures imported.
800 structures imported.
900 structures imported.
1000 structures imported.
1100 structures imported.
1200 structures imported.
1300 structures imported.
1400 structures imported.
1500 structures imported.
1600 structures imported.
1700 structures imported.
1800 structures imported.
1900 structures imported.
2000 structures imported.
2100 structures imported.
2200 structures imported.
2300 structures imported.
2400 structures imported.
2500 structures imported.
2600 structures imported.
2700 structures imported.
2800 structures imported.
2900 structures imported.
3000 structures imported.
3100 structures imported.
3200 structures imported.
3300 structures imported.
3400 structures imported.
3500 structures imported.
3600 structures imported.
3700 structures imported.
3800 structures imported.
3900 structures impor

In [22]:
cursor = db.materials_project.find()
cursor.count()
# confirm this number is equal to the final count, making sure all structure have been imported

62595