In [3]:
import os

from sqlalchemy import and_, or_
import pandas as pd
pd.set_option("display.max_rows", None)

from grdb.config import Config
from grdb.database.v1_1_0 import dal as db1     # prod dal
from grdb.database import dal as db2            # test dal
from grdb.database.v1_1_0 import Base as Base1  # prod base
from grdb.database import Base as Base2         # test base
from grdb.database.v1_1_0.models import Sample,Recipe,Author

In [4]:
# Setup old schema (production db) as db1 using read only
config_prefix = "PROD_DATABASE"
config_suffix = "_READ"
prod_conf = Config(prefix=config_prefix, suffix=config_suffix, debug=True, try_secrets=False)
db1.init_db(prod_conf, privileges={"read": True, "write": False, "validate": False})

In [33]:
from grdb.database.v1_1_0.models import Author
from grdb.database.v1_1_0.models import SemFile
from grdb.database.v1_1_0.models import RamanFile
from grdb.database.v1_1_0.models import RamanSet
from grdb.database.v1_1_0.models import RamanSpectrum
from grdb.database.models import Author as Author2
from grdb.database.models import Furnace as Furnace2
from grdb.database.models import Experiment as Experiment2
from grdb.database.models import Recipe as Recipe2
from grdb.database.models import PreparationStep as PreparationStep2
from grdb.database.models import Substrate as Substrate2
from grdb.database.models import EnvironmentConditions as EnvironmentConditions2
from grdb.database.models import Properties as Properties2
from grdb.database.models import SemFile as SemFile2
from grdb.database.models import SemAnalysis as SemAnalysis2
from grdb.database.models import RamanFile as RamanFile2
from grdb.database.models import RamanAnalysis as RamanAnalysis2

# Define some queries
ses1 = db1.Session()
unique_furnace_query = ses1.query(Recipe.tube_diameter, Recipe.cross_sectional_area, Recipe.tube_length).distinct().filter(
        or_(
            Recipe.tube_diameter != None,
            Recipe.cross_sectional_area != None,
            Recipe.tube_length != None,
        )
    )
unique_substrate_query = ses1.query(Recipe.catalyst, Recipe.thickness, Recipe.diameter, Recipe.length, Recipe.sample_surface_area).distinct().filter(
        or_(
            Recipe.catalyst != None,
            Recipe.thickness != None,
            Recipe.diameter != None,
            Recipe.sample_surface_area != None,
            Recipe.length != None,
        )
    )
unique_authors_query = ses1.query(
        Author.first_name, Author.last_name, Author.institution
    ).distinct()
sem_file_query = ses1.query(SemFile)
pd.read_sql(
    sem_file_query.statement,
    ses1.bind,
)
raman_file_query = ses1.query(RamanFile)
pd.read_sql(
    raman_file_query.statement,
    ses1.bind,
)
raman_set_query = ses1.query(RamanSet)
raman_spectrum_query = ses1.query(RamanSpectrum)
unique_raman_spectrum_query = ses1.query(
        RamanSpectrum.xcoord, RamanSpectrum.ycoord
    ).distinct()


In [34]:
from sqlalchemy import inspect
insp = inspect(Author2)
from sqlalchemy_utils import get_hybrid_properties

def mapToC3Primative(str_in):
    #print(type(str_in))
    if "VAR" in str(str_in):
        str_out = "string"
    if "INT" in str(str_in):
        str_out = "int"
    if "FLOA" in str(str_in):
        str_out = "double"
    if "DATE" in str(str_in):
        str_out = "datetime"
    if "BOOL" in str(str_in):
        str_out = "boolean"
    
    return str_out

import re

def anti_vowel(s,keep_first=False):
    if keep_first:
        first = s[0]
        rest = re.sub(r'[AEIOU]', '', s[1:], flags=re.IGNORECASE)
        return first + rest
    else:
        return re.sub(r'[AEIOU]', '', s, flags=re.IGNORECASE)
        
    return first+rest

def createC3Typedef(modelClass,fileOut=False):
    from sqlalchemy import inspect
    insp = inspect(modelClass)
    typeName = insp.class_.__name__
    fileName = typeName + ".c3typ"
    lines = []
    lines.extend(["/*","*",f"* {typeName}.c3typ","*","*/"])
    lines.append(f"entity type {typeName} schema name '{anti_vowel(typeName,True).upper()}'"+"{")
    lines.extend(["    /*","    *  Relationship fields","    */"])
    for r in insp.relationships.keys():
        temp = r.split('_')
        r_new = temp[0] + ''.join(ele.title() for ele in temp[1:])
        lines.append(f"    //{r_new}: ")
                  
    lines.extend(["    /*","    *  Data fields","    */"])
    for c in insp.columns:
        if c.name == "id":
            continue
        isRequired = False
        if hasattr(c,"info"):
            if 'required' in c.info:
                if c.info['required']:
                    #print(f"{c.name}: {c.info}")
                    isRequired=True
        # split underscore using split, joining result
        temp = c.name.split('_')
        cc_name = temp[0] + ''.join(ele.title() for ele in temp[1:]) 
        if isRequired:
            lines.append(f"    {cc_name}: !{mapToC3Primative(c.type)}")
        else:
            lines.append(f"    {cc_name}: {mapToC3Primative(c.type)}")
        if hasattr(c,"info"):
            lines.append(f"    {cc_name}Info: json")
            
    lines.extend(["    /*","    *  Hybrid Property Fields","    */"])
    for h in list(get_hybrid_properties(modelClass).keys()):
        temp = h.split('_')
        h_new = temp[0] + ''.join(ele.title() for ele in temp[1:])
        lines.append(f"    //{h_new}: ")
        
    lines.append("}")
    str_out = '\n'.join(lines)
    if fileOut:
        with open(fileName,'w') as f:
            f.write(str_out)
    
    return str_out

In [35]:
# Write out basic types from SQLAlchemy models
print(createC3Typedef(Author2,False))
# Un-commenting below will (over)write files
# td = createC3Typedef(Author2,True)
# td = createC3Typedef(Furnace2,True)
# td = createC3Typedef(Experiment2,True)
# td = createC3Typedef(Recipe2,True)
# td = createC3Typedef(PreparationStep2,True)
# td = createC3Typedef(Substrate2,True)
# td = createC3Typedef(EnvironmentConditions2,True)
# td = createC3Typedef(Properties2,True)
# td = createC3Typedef(SemFile2,True)
# td = createC3Typedef(SemAnalysis2,True)
# td = createC3Typedef(RamanFile2,True)
# td = createC3Typedef(RamanAnalysis2,True)
#print(td)
# td = createC3Typedef(Furnace2)
# print(td)

/*
*
* Author.c3typ
*
*/
entity type Author schema name 'ATHR'{
    /*
    *  Relationship fields
    */
    //submittedExperiments: 
    //authoredExperiments: 
    /*
    *  Data fields
    */
    firstName: string
    firstNameInfo: json
    lastName: string
    lastNameInfo: json
    institution: string
    institutionInfo: json
    nanohubUserid: int
    nanohubUseridInfo: json
    /*
    *  Hybrid Property Fields
    */
    //fullNameAndInstitution: 
}


In [132]:
c3commit = True
if c3commit:
    from c3python import get_c3
    c3 = get_c3(
        'http://localhost:8080', 'grdb', 'devgrdb', 
        auth = '303388cc8c03354b2a59b779ea99b02d9d80fc72a2114e27329159b8cb0a6e24a567'
    )

keyfile=None keystring=None username=None
Getting C3 client with auth token for http://localhost:8080...


In [133]:
furnaces = unique_furnace_query
f = furnaces[0]

In [134]:
Furnace2.tube_diameter.info

{'verbose_name': 'Tube Diameter',
 'std_unit': 'mm',
 'conversions': {'mm': 1, 'inches': 25.4},
 'required': False,
 'tooltip': 'Diameter of the furnace tube'}

In [135]:
substrates = unique_substrate_query
s = substrates[0]

In [136]:
Substrate2.surface_area.info

{'verbose_name': 'Sample Surface Area',
 'std_unit': 'mm2',
 'conversions': {'mm2': 1, 'um2': 1e-06, 'cm2': 100.0},
 'required': False,
 'tooltip': 'Surface area of the substrate'}

In [137]:
s.keys()

RMKeyView(['catalyst', 'thickness', 'diameter', 'length', 'sample_surface_area'])

In [138]:
######################################################################################
# Migration script
# ToDo:
# - TESTING!!!
# - nanohub_userid
# - environment_conditions
# - sem_files + analysis
# - raman_files + analysis
# - software versions
######################################################################################

from math import isclose

remove_to = True

print("Adding furnace rows...")
if c3commit:
    print("C3 commit...")
    new_c3furnaces = [
        c3.Furnace(
            tubeDiameter =  f.tube_diameter,
            tubeDiameterInfo =  Furnace2.tube_diameter.info,
            crossSectionalArea = f.cross_sectional_area,
            crossSectionalAreaInfo = Furnace2.cross_sectional_area.info,
            tubeLength = f.tube_length,
            tubeLengthInfo = Furnace2.tube_length.info
        ) for f in furnaces
        ]
    if remove_to:
        c3.Furnace.removeAll()
    c3.Furnace.mergeBatch(new_c3furnaces)
print("done.")

print("Adding substrate rows...")
if c3commit: 
    print("C3 commit...")
    substrates = unique_substrate_query
    new_substrates = [
            c3.Substrate(
                catalyst = s.catalyst,
                catalystInfo = Substrate2.catalyst.info,
                thickness = s.thickness,
                thicknessInfo = Substrate2.thickness.info,
                diameter = s.diameter,
                diameterInfo = Substrate2.diameter.info,
                surfaceArea = s.sample_surface_area,
                surfaceAreaInfo = Substrate2.surface_area.info,
                length = s.length,
                lengthInfo = Substrate2.length.info
            ) for s in substrates
        ]
    if remove_to:
        c3.Substrate.removeAll()
    c3.Substrate.mergeBatch(new_substrates)
    
print("Adding author rows...")
if c3commit: 
    print("C3 commit...")
    authors = unique_authors_query
    new_authors = [
        c3.Author(
            firstName = a.first_name,
            firstNameInfo = Author2.first_name.info,
            lastName = a.last_name,
            lastNameInfo = Author2.last_name.info,
            institution = a.institution,
            institutionInfo = Author2.institution.info
        ) for a in authors
    ]
    if remove_to:
        c3.Author.removeAll()
    c3.Author.mergeBatch(new_authors)
    
print("Getting old sample data...")
samples = ses1.query(Sample,Recipe).filter(Sample.id==Recipe.sample_id)
oauthors = ses1.query(Author).all()
print("done.")

new_furnaces = c3.Furnace.fetch().objs
new_substrates = c3.Substrate.fetch().objs
new_authors = c3.Author.fetch().objs

furnacemap = {}
substratemap = {}
authormap = {}
tol = 0.01
print ("Matching parent tables...")
for s in samples:
    authormap[s.Sample.id]=[]
    #print(f"name: {s.Sample.material_name}")
    
    #################################################################################
    # Match to a row in the furnace table
    #################################################################################
    for f in new_furnaces:
        #print(f"id: {f.id}")
        match = match1 = match2 = match3 = False
        null = null1 = null2 = null3 = False
        if f.tubeDiameter:
            if s.Sample.recipe.tube_diameter:
                if isclose(f.tubeDiameter,s.Sample.recipe.tube_diameter,rel_tol=0.001,abs_tol=0.001):
                    match1 = True
        else:
            if not s.Sample.recipe.tube_diameter:
                match1 = True
                null1 = True
        if f.tubeLength:
            if s.Sample.recipe.tube_length:
                if isclose(f.tubeLength,s.Sample.recipe.tube_length,rel_tol=0.001,abs_tol=0.001):
                    match2 = True
        else:
            if not s.Sample.recipe.tube_length:
                match2 = True
                null2 = True
        if f.crossSectionalArea:
            if s.Sample.recipe.cross_sectional_area:
                if isclose(f.crossSectionalArea,s.Sample.recipe.cross_sectional_area,rel_tol=0.001,abs_tol=0.001):
                    match3 = True
        else:
            if not s.Sample.recipe.cross_sectional_area:
                match3 = True
                null3 = True
        # If there are any non null matches    
        match = match1 and match2 and match3
        if match:
            furnacemap[s.Sample.id] = f.id
            #print(f"match: {s.Sample.id}: {f.id}")
            break
        else:
            furnacemap[s.Sample.id] = None
            
        
    #################################################################################
    # Match to row in Substrate table
    #################################################################################
    for sb in new_substrates:
        match = match1 = match2 = match3 = match4 = match5 = False
        if sb.catalyst:
            if sb.catalyst == s.Sample.recipe.catalyst:
                match1 = True
        else:
            if not s.Sample.recipe.catalyst:
                match1 = True
        if sb.thickness:
            if s.Sample.recipe.thickness:
                if isclose(sb.thickness,s.Sample.recipe.thickness,rel_tol=tol,abs_tol=tol):
                    match2 = True
        else:
            if not s.Sample.recipe.thickness:
                match2 = True
        if sb.diameter:
            if s.Sample.recipe.diameter:
                if isclose(sb.diameter,s.Sample.recipe.diameter,rel_tol=tol,abs_tol=tol):
                    match3 = True
        else:
            if not s.Sample.recipe.diameter:
                match3 = True
        if sb.length:
            if s.Sample.recipe.length:
                if isclose(sb.length,s.Sample.recipe.length,rel_tol=tol,abs_tol=tol):
                    match4 = True
        else:
            if not s.Sample.recipe.length:
                match4 = True
        if sb.surfaceArea:
            if s.Sample.recipe.sample_surface_area:
                if isclose(sb.surfaceArea,s.Sample.recipe.sample_surface_area,rel_tol=tol,abs_tol=tol):
                    match5 = True
        else:
            if not s.Sample.recipe.sample_surface_area:
                match5 = True
        match = match1 and match2 and match3 and match4 and match5
        if match:
            substratemap[s.Sample.id] = sb.id
            break
        else:
            substratemap[s.Sample.id] = None
    #################################################################################
    # Match to row(s) in Author table
    #################################################################################
    for oa in oauthors:
        if not oa.sample_id == s.Sample.id:
            continue
        for na in new_authors:
            match = match1 = match2 = False
            if oa.first_name == na.firstName:
                match1 = True
            if oa.last_name == na.lastName:
                match2 = True
            match = match1 and match2
            if match:
                authormap[s.Sample.id].append(na)
                break

#Print out the maps

# print (f"fmap: {furnacemap}")
# print (f"sbmap: {substratemap}")
# print(f"amap: {authormap}")
print ("Maps Done.")        

Adding furnace rows...
C3 commit...
done.
Adding substrate rows...
C3 commit...
Adding author rows...
C3 commit...
Getting old sample data...
done.
Matching parent tables...
Maps Done.


In [139]:
environment_condition_map = {}        
print("Adding Experiment rows...")
if c3commit:
    if remove_to:
        c3.PreparationStep.removeAll()
        c3.Recipe.removeAll()
        c3.Experiment.removeAll()
    for s in samples:
        print(f"{s.Sample.id}")
        try:
            carbon_source = s.Recipe.carbon_source
        except IndexError:
            carbon_source = None
        environment_condition_map[s.Sample.id] = c3.EnvironmentConditions(
            dewPoint = s.Sample.recipe.dewpoint,
            ambientTemperature = 10
        )
        recipe = c3.Recipe(
                    carbonSource=carbon_source,
                    basePressure=s.Recipe.base_pressure
        ).merge()
        preparation_steps = [
        c3.PreparationStep(
                            recipe=recipe,
                            name=step.name,
                            step=step.step,
                            duration=step.duration,
                            furnacePressure=step.furnace_pressure,
                            furnaceTemperature=step.furnace_temperature,
                            sampleLocation=step.sample_location,
                            heliumFlowRate=step.helium_flow_rate,
                            hydrogenFlowRate=step.hydrogen_flow_rate,
                            carbonSourceFlowRate=step.carbon_source_flow_rate,
                            argonFlowRate=step.argon_flow_rate,
                            coolingRate=step.cooling_rate
                        ) for step in s.Recipe.preparation_steps
        ]
        c3.PreparationStep.mergeBatch(preparation_steps)
        
        # Add the new experiment based on the old sample
        # Create a recipe for each old sample and add all of its prep steps
        newexp = c3.Experiment(
                experimentId=s.Sample.id,
                sampleId=s.Sample.id,
                recipe = recipe,
                furnace = furnacemap[s.Sample.id],
                substrate = substratemap[s.Sample.id],
                authors = authormap[s.Sample.id],
                materialName=s.Sample.material_name,
                experimentDate=s.Sample.experiment_date
                ).merge()
        
print("done.")

Adding Experiment rows...
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
done.


In [140]:
new_experiments = c3.Experiment.fetch().objs
new_experiments_map = {
    o.sampleId:o for o in new_experiments
}

####################################################################################
if c3commit: 
    print("C3 commit...")
    print("Adding Properties rows...")
    if remove_to:
        c3.Properties.removeAll()
    new_properties = [
        c3.Properties(
            experiment = new_experiments_map[s.Sample.properties.sample_id],
            experimentId = s.Sample.properties.sample_id,
            averageThicknessOfGrowth = s.Sample.properties.average_thickness_of_growth,
            standardDeviationOfGrowth = s.Sample.properties.standard_deviation_of_growth,
            numberOfLayers = s.Sample.properties.number_of_layers,
            growthCoverage = s.Sample.properties.growth_coverage,
            domainSize = s.Sample.properties.domain_size,
            shape = s.Sample.properties.shape,
        ) for s in samples
    ]

    c3.Properties.mergeBatch(new_properties)

    print("done.")

    #####################################################################################
    print("Adding Environment Conditions Field...")
    for e in new_experiments:
        e.environmentConditions = environment_condition_map[e.sampleId]
    c3.Experiment.mergeBatch(new_experiments)
    
    print("done.")
    
    #####################################################################################
    print("Adding SemFile rows...")        

    sem_files = ses1.query(SemFile).all()

    new_sem_files = [
        c3.SemFile(
            id = f.id,
            experimentId=f.sample_id,
            filename=f.filename,
            boxUrl=f.url,
            experiment = new_experiments_map[s.Sample.properties.sample_id]
        )
        for f in sem_files
    ]
    c3.SemFile.mergeBatch(new_sem_files)
    print("done.")  
    
    #####################################################################################

    print("Adding RamanFile rows...")        
    raman_files = ses1.query(RamanFile).all()
    new_raman_files = [
        c3.RamanFile(
            id = f.id,
            experimentId=f.sample_id,
            filename=f.filename,
            boxUrl=f.url,
            wavelength=f.wavelength,
        )
        for f in raman_files
    ]
    c3.RamanFile.mergeBatch(new_raman_files)
    print("done.") 

C3 commit...
Adding Properties rows...
done.
Adding Environment Conditions Field...
done.
Adding SemFile rows...
done.
Adding RamanFile rows...
done.


In [None]:
new_furnaces = c3.Furnace.fetch().objs

In [None]:
new_furnaces[0].tubeDiameter