## ofet-db CRUD executions

This notebook provides some of the basic create, read, update, and delete (CRUD) operations to interact with the ofet-db device repository

### Import Libraries and Connect to ofet-db
This block will import some of the main libraries required to interact with ofet-db (through MongoDB), plus some of the basic functions to connect to the database

In [4]:
from crud_functions import *
import bson
import bson.json_util

uri = "mongodb+srv://aliu319:LO9UfXxBfDEPPfcQ@aliu319-gt.t7rt0.mongodb.net/test?retryWrites=true&w=majority"
db = "ofet-db"
collection = "P3HT" 

### Example with inserting P3HT dataset

Note that _id is not filled in an original dataset. This is kept updated as new rows are added

In [9]:
mydb = connect_mongo(uri=uri, db=db)
col = mydb["P3HT"]
df = pd.read_csv('data/P3HT_literature_dataset_feed.csv')
df.columns

Index(['Unnamed: 0', '_id', 'Author', 'Year', 'DOI', 'Mn_kDa', 'Mw_kDa', 'PDI',
       'RR', 'solution_concentration_mg_ml', 'solvent_name',
       'poor_solvent_name', 'solvent1_VF', 'solvent_bp_C', 'hansen_radius',
       'age_time_hr', 'age_temp_c', 'substrate_surface_treatment',
       'process_environment', 'spin_rate_rpm', 'spin_time_s',
       'dip_rate_mm_min', 'dip_time_min', 'film_thickness_nm', 'anneal_temp_c',
       'anneal_time_hr', 'mobility_environment', 'mobility_regime',
       'electrode_config', 'channel_length_um', 'channel_width_mm', 'Vds_V',
       'electrode_material', 'deposition_method', 'mobility_cm2_Vs'],
      dtype='object')

In [10]:
for i in [217]:

    row_i = get_persson_dataset_dict(df.iloc[i])  

    #note: bson.json_util is required if bson types are used, such as converting an _id to ObjectId(str) type
    json_i = bson.json_util.dumps(row_i, cls=NpEncoder) #this is to encode numpy data types into regular
    # json_i = json.dumps(row_i)

    col.insert_one(bson.json_util.loads(json_i))

In [3]:


docs = read_mongo_docs(uri, db, collection, proj={"literature": 1})
# normalize(docs)
temp = pd.json_normalize(docs)

ModuleNotFoundError: No module named 'pandas.io.json.normalize'

In [17]:
query = {'solution.solvent.boiling_point_C': 61}
proj = {'_id': 0, 'solution.polymer': 1}

df = read_mongo(uri=uri, db=db, collection=collection, query=query, proj=proj)
df

Unnamed: 0,solution.polymer.semiconductor.Mw_kDa,solution.polymer.semiconductor.Mn_kDa,solution.polymer.semiconductor.PDI,solution.polymer.semiconductor.RR,solution.polymer.semiconductor.name,solution.polymer.semiconductor.x_wt
0,47.700,24.0,1.9875,93.0,P3HT,1.0
1,47.700,24.0,1.9875,93.0,P3HT,1.0
2,47.700,24.0,1.9875,93.0,P3HT,1.0
3,47.700,24.0,1.9875,93.0,P3HT,1.0
4,47.700,24.0,1.9875,93.0,P3HT,1.0
...,...,...,...,...,...,...
82,6.608,5.6,1.1800,91.0,P3HT,1.0
83,20.424,13.8,1.4800,91.0,P3HT,1.0
84,20.424,13.8,1.4800,91.0,P3HT,1.0
85,25.650,19.0,1.3500,91.0,P3HT,1.0


In [10]:
mydb = _connect_mongo(uri=uri, db=db)
col = mydb["devices"]
cursor = col.find({})
type(list(cursor)[0])

dict

In [21]:
import json
from bson.objectid import ObjectId

class NpEncoder(json.JSONEncoder):
    def default(self, obj):
        if isinstance(obj, np.integer):
            return int(obj)
        if isinstance(obj, np.floating):
            return float(obj)
        if isinstance(obj, np.ndarray):
            return obj.tolist()
        if isinstance(obj, np.nan):
            return None
        return super(NpEncoder, self).default(obj)

def get_sample_dict(sample_row):
     
    sample_dict = {
        {
            "name": "P3HT",
            "Mn_kDa": sample_row.Mn_kDa,
            "Mw_kDa": sample_row.Mw_kDa,
            "PDI": sample_row.PDI,
            "RR": sample_row.RR,
            "x_wt": 1.0
        }
    }
    
    return sample_dict

In [31]:
df = pd.read_csv('data/P3HT_literature_dataset_feed.csv')
df

Unnamed: 0.1,Unnamed: 0,_id,Author,Year,DOI,Mn_kDa,Mw_kDa,PDI,RR,solution_concentration_mg_ml,...,anneal_time_hr,mobility_environment,mobility_regime,electrode_config,channel_length_um,channel_width_mm,Vds_V,electrode_material,deposition_method,mobility_cm2_Vs
0,0,627d547b973eca8e8a04e96f,Aiyar,2011,10.1002/adfm.201002729,24.0,47.7,1.9875,93.0,4.0,...,,Air,Linear,BGBC,50.0,2.0,-3.0,Au,SPUN,0.000202
1,1,627d547b973eca8e8a04e970,Aiyar,2011,10.1002/adfm.201002729,24.0,47.7,1.9875,93.0,4.0,...,,Air,Linear,BGBC,50.0,2.0,-3.0,Au,SPUN,0.007190
2,2,627d547b973eca8e8a04e971,Aiyar,2011,10.1002/adfm.201002729,24.0,47.7,1.9875,93.0,4.0,...,,Air,Linear,BGBC,50.0,2.0,-3.0,Au,SPUN,0.029400
3,3,627d547b973eca8e8a04e972,Aiyar,2011,10.1002/adfm.201002729,24.0,47.7,1.9875,93.0,4.0,...,,Air,Linear,BGBC,50.0,2.0,-3.0,Au,SPUN,0.050600
4,4,627d547c973eca8e8a04e973,Aiyar,2011,10.1002/adfm.201002729,24.0,47.7,1.9875,93.0,4.0,...,,Air,Linear,BGBC,50.0,2.0,-3.0,Au,SPUN,0.030600
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
123,123,627d5481973eca8e8a04e9ea,Zhao,2013,10.1002/adfm.201301007,68.0,163.2,2.4000,90.0,3.5,...,,Air,Saturation,BGTC,30.0,0.5,,Au,SPUN,0.001020
124,124,627d5481973eca8e8a04e9eb,Zhao,2013,10.1002/adfm.201301007,68.0,163.2,2.4000,90.0,3.5,...,,Air,Saturation,BGTC,30.0,0.5,,Au,SPUN,0.003050
125,125,627d5481973eca8e8a04e9ec,Zhao,2013,10.1002/adfm.201301007,68.0,163.2,2.4000,90.0,3.5,...,,Air,Saturation,BGTC,30.0,0.5,,Au,SPUN,0.008030
126,126,627d5481973eca8e8a04e9ed,Zhao,2013,10.1002/adfm.201301007,68.0,163.2,2.4000,90.0,3.5,...,,Air,Saturation,BGTC,30.0,0.5,,Au,SPUN,0.009530


In [11]:
mydb = _connect_mongo(uri=uri, db=db)
col = mydb["devices"]

NameError: name '_connect_mongo' is not defined

In [33]:

for i in df.index:

    row_i = df.iloc[i]
    _id_i = row_i['_id']
    col.find_one_and_update(
        {
            "_id": ObjectId(_id_i)
        }, 
        {
            "$set": {
                'solution.polymer.semiconductor.name': "P3HT",
                'solution.polymer.semiconductor.Mn_kDa': row_i.Mn_kDa,
                'solution.polymer.semiconductor.Mw_kDa': row_i.Mw_kDa,
                'solution.polymer.semiconductor.PDI': row_i.PDI,
                'solution.polymer.semiconductor.RR': row_i.RR,
                'solution.polymer.semiconductor.x_wt': 1.0
            }
        }
    )
