# Create a database of organic molecules based on TCI scraped data

So far:
* An AWS Aurora server has already been created. See https://mi-6.docbase.io/posts/2582931
* A RDKit cartridge has already been setup in the database following https://mi-6.docbase.io/posts/2547303
* TCI data was cleaned, preprocessed and the naming of properties was taken care of (avoid using SQL special characters). Jupyter notebook https://github.com/stephanyvargas/Learning_DataScience/blob/master/WebScrapping_data/test_data_TCI.ipynb

## Imports

In [1]:
import pandas as pd
import numpy as np
from tqdm.notebook import tqdm

## Functions

In [222]:
def build_descriptions_table(df, list_properties=False):
    if list_properties:
        df=df[list_properties]
    
    for value in df.keys():
        a=1
        try:
            if df[value].unique()[1]:
                example = df[value].unique()[1]
            else:
                example = df[value].unique()[0]

            print('| {0} | TEXT | - | {1} | {2} |'.format(value, \
                                                              df[value].notnull().sum(), \
                                                              example))
        except AttributeError:
            print('Error', value)

# Load the data

In [2]:
%cd data
!ls 

/home/stephy/Learning_DataScience/Chemical_db/TCI_database_app/data
TCIAvailableStock.json		    TCI.smi
TCICompoundIdentifications.json     TCISpecifications.json
TCIGeneralInformation.json	    TCISpecificProperties.json
TCIGHSprecautionaryStatements.json  TCITransportationInformation.json
TCIRealtedLaws.json		    TCI.txt


## Compound Identifications Data

In [3]:
df_identifications = pd.read_json('TCICompoundIdentifications.json', orient ='split', compression = 'infer')

In [4]:
df_identifications.sample(3)

Unnamed: 0,name,CAS,code,grade,ProductNumber,CasRN,reaxysRegistryNumber,pubchemSubstanceId,SMILESPubChem,merckIndex14,mdlNumber,sdbsAistSpectralDB,relatedCasRN,colourIndex,enzymeCommissionNumber
M1356,Methyl-beta-cyclodextrin,128446-36-6,M1356,,m1356,128446-36-6,,87573311.0,,,mfcd00074980,,,,
B5371,sec-Butyl Methacrylate,2998-18-7,B5371,,b5371,2998-18-7,,,CCC(C)OC(=O)C(=C)C,,mfcd00048637,,,,
D4283,Dipropylene Glycol Dimethyl Ether,111109-77-4,D4283,,d4283,111109-77-4,,172088909.0,,,mfcd00210047,,,,


In [5]:
df_identifications.drop(['ProductNumber'], axis=1, inplace=False)

Unnamed: 0,name,CAS,code,grade,CasRN,reaxysRegistryNumber,pubchemSubstanceId,SMILESPubChem,merckIndex14,mdlNumber,sdbsAistSpectralDB,relatedCasRN,colourIndex,enzymeCommissionNumber
A0001,Abietic Acid,514-10-3,A0001,,514-10-3,2221451.0,87561707.0,CC(C)C1=CC2=CCC3C(C2CC1)(CCCC3(C)C(=O)O)C,7.0,mfcd03423567,1471.0,,,
A0002,Ethyl Abietate,631-71-0,A0002,,631-71-0,,87561708.0,CCOC(=O)C1(CCCC2(C1CC=C3C2CCC(=C3)C(C)C)C)C,,mfcd00028860,,,,
A0003,Acenaphthene,83-32-9,A0003,,83-32-9,386081.0,87561709.0,C1CC2=CC=CC3=C2C1=CC=C3,28.0,mfcd00003807,863.0,,,
A0004,Acenaphthenequinone,82-86-0,A0004,,82-86-0,879172.0,87561710.0,C1=CC2=C3C(=C1)C(=O)C(=O)C3=CC=C2,,mfcd00003805,3313.0,,,
A0005,Acenaphthylene,208-96-8,A0005,,208-96-8,774092.0,87561711.0,C1=CC2=C3C(=C1)C=CC3=CC=C2,,mfcd00003806,1349.0,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
C2157,Carbon Nanotube Multi-walled,308068-56-6,C2157,,308068-56-6,,87559343.0,[C],,,,,,
C2158,Carbon Nanotube Multi-walled,308068-56-6,C2158,,308068-56-6,,87559344.0,[C],,,,,,
C3133,Carbon Nanotube Single-walled,308068-56-6,C3133,,308068-56-6,,253660889.0,[C],,,,,,
D6015,5-Iodo-2'-deoxytubercidin,166247-63-8,D6015,,166247-63-8,7626194.0,,,,mfcd07778650,,,,


### Details

| column name | data type | table constraint | description |
| ---- | ---- | ---- | ---- |
|name | TEXT | - | Name of the compound |
|CAS|TEXT|-|CAS identification|
|reaxysRegistryNumber|TEXT|-|Identifier for chemical substance in Reaxys (Beilstein Registry Number)|
|pubchemSubstanceId|TEXT|-|Identifier from database of chemical molecules and their activities in biological assays PubChem ID|
|sdbsAistSpectralDB|TEXT|-|Spectral Database for Organic Compounds|
|merckIndex14|TEXT|-| Merck Index for authoritative information on chemicals, drugs and biologicals| 
|mdlNumber|TEXT|-|MLD unique identification number for each reaction and variation.|
|SMILESPubChem|TEXT|-|SMILES identifier from Pubchem|
|code|TEXT|PRIMARY KEY|TCI unique id|
|grade|TEXT|-|Grade refering to the purity of the chemical|
|CasRn|TEXT|-|CAS RN unique identifier that provides an unambiguous means to distinguish chemical substances or molecular structures| 
|colourIndex|TEXT|-|Colour Index Generic Name describes a commercial product by its recognised usage class, its hue and a serial number|
|relatedCasRn|TEXT|-|CAS RN unique identifier that provides an unambiguous means to distinguish chemical substances or molecular structures| 
|enzymeCommissionNumber|TEXT|-|Enzyme Commission numerical classification scheme for enzymes, based on the chemical reactions they catalyze|


### Create table and upload to server

In [7]:
host ='molecule-db-instance-1.czixbih3kolx.us-west-2.rds.amazonaws.com'
port = 5432
database = 'smallmoleculedb'
user = 'MoleculeMaster'
password = 'UXT7nljK3!R791Tlz!KAgHu'

In [8]:
from sqlalchemy import create_engine
engine = create_engine(f'postgresql://{user}:{password}@{host}:{port}/{database}')
engine

Engine(postgresql://MoleculeMaster:***@molecule-db-instance-1.czixbih3kolx.us-west-2.rds.amazonaws.com:5432/smallmoleculedb)

In [9]:
# Upload the dataframe -> changed everything to kumoji!!!
df_identifications.to_sql('tci_compound_identifications', engine, index=False)

In [10]:
# define a primary Key
with engine.connect() as con:
    con.execute('ALTER TABLE tci_compound_identifications ADD PRIMARY KEY ("code");')

### Test the connection

In [11]:
%%time
table_retrieved = engine.execute("SELECT * FROM tci_compound_identifications;").fetchall()

CPU times: user 151 ms, sys: 63 ms, total: 214 ms
Wall time: 5.36 s


In [12]:
%%time
df_retrieved = pd.read_sql("SELECT * FROM tci_compound_identifications;", engine)

CPU times: user 239 ms, sys: 38.6 ms, total: 277 ms
Wall time: 3.21 s


The *CPU or execution time*, measures how much time a CPU spent on executing a program.

  - The *user time* corresponds to the time spent executing the process.

  - The *System time* corresponds to the time that the CPU is answering to system calls from the process. The system time can change depending on the operating system.

*Wall time or wall-clock time*, measures the total time to execute a program in a computer. 

If *(CPU time)/(wall clock time) < 1* means the program was just waiting and not executing the program. It can be affected by 
* Processes running on the machine, if other processes are keeping the CPU busy, there might be the need to wait for some free CPU.
* Unstable or slow network.
* Waiting for the server response.

More information on `%%time` [here](https://ipython.readthedocs.io/en/stable/interactive/magics.html?highlight=%25time#magic-time). And [here](https://pythonspeed.com/articles/blocking-cpu-or-io/) is some discussion regarding time bottle necks.

In [13]:
table_retrieved[:3]

[('Abietic Acid', '514-10-3', 'A0001', None, 'a0001', '514-10-3', 2221451.0, 87561707.0, 'CC(C)C1=CC2=CCC3C(C2CC1)(CCCC3(C)C(=O)O)C', 7.0, 'mfcd03423567', 1471.0, None, None, None),
 ('Ethyl Abietate', '631-71-0', 'A0002', None, 'a0002', '631-71-0', None, 87561708.0, 'CCOC(=O)C1(CCCC2(C1CC=C3C2CCC(=C3)C(C)C)C)C', None, 'mfcd00028860', None, None, None, None),
 ('Acenaphthene', '83-32-9', 'A0003', None, 'a0003', '83-32-9', 386081.0, 87561709.0, 'C1CC2=CC=CC3=C2C1=CC=C3', 28.0, 'mfcd00003807', 863.0, None, None, None)]

In [14]:
df_retrieved.sample(3)

Unnamed: 0,name,CAS,code,grade,ProductNumber,CasRN,reaxysRegistryNumber,pubchemSubstanceId,SMILESPubChem,merckIndex14,mdlNumber,sdbsAistSpectralDB,relatedCasRN,colourIndex,enzymeCommissionNumber
21190,Methyl-2-nitrosopropane Dimer,31107-20-7,M1164,,m1164,31107-20-7,,,CC(C)(C)N=O.CC(C)(C)N=O,,mfcd00002065,23542.0,6841-96-9,,
13587,"Dibenzothiophene 5,5-Dioxide",1016-05-3,D4153,,d4153,1016-05-3,146515.0,160870915.0,C1=CC=C2C(=C1)C3=CC=CC=C3S2(=O)=O,,mfcd00004970,,,,
28692,Tetrakis(dimethylsilyl)silane,2003-85-2,T1753,,t1753,2003-85-2,2074444.0,87577487.0,C[Si](C)[Si]([Si](C)C)([Si](C)C)[Si](C)C,,mfcd01631315,,,,


## Available Stock

In [15]:
df_stock = pd.read_json('TCIAvailableStock.json', orient ='split', compression = 'infer')

In [16]:
df_stock.sample(3).dropna(axis=1, how='all')

Unnamed: 0,code,OtherWH1G,price1G,Hyogo1G,Saitama1G,OtherWH5G,price5G,Hyogo5G,Saitama5G,lowestPriceOption
14285,D5147,Contact Company,3900.0,Contact Company,4,Contact Company,13200.0,2,10,3900.0
5780,B4143,Contact Company,4200.0,1,1,Contact Company,14400.0,2,Contact Company,4200.0
21413,M1626,Contact Company,21900.0,2,3,Contact Company,67600.0,Contact Company,1,21900.0


### Details

| column name | data type | table constraint | description |
| ---- | ---- | ---- | ---- |
|code|TEXT|PRIMARY KEY|TCI unique id|
|lowestPriceOption|REAL|-|Lowest avaliable price for a given compound|
|price[Amount][Units]|TEXT|-|Price for the specified amount of a product|
|Saitama[Amount][Units]|TEXT|-|Number of goods available in Saitama_Kawaguchi for a given amount|
|Hyogo[Amount][Units]|TEXT|-|Number of goods available in Hyogo_Amagasaki for a given amount|
|OtherWH[Amount][Units]|TEXT|-| Number of goods in other Warehouses for a given amount|

In [17]:
# Upload the dataframe -> changed everything to kumoji!!!
df_stock.to_sql('tci_available_stock', engine,  index=True)

In [18]:
# define a primary Key
with engine.connect() as con:
    con.execute('ALTER TABLE tci_available_stock ADD PRIMARY KEY ("code");')

In [19]:
%%time
df_retrieved_2 = pd.read_sql("SELECT * FROM tci_available_stock;", engine)

CPU times: user 2.03 s, sys: 104 ms, total: 2.13 s
Wall time: 12.4 s


In [20]:
df_retrieved_2.sample(2).dropna(axis=1, how='all')

Unnamed: 0,index,code,OtherWH1G,price1G,Hyogo1G,Saitama1G,OtherWH25G,price25G,Hyogo25G,Saitama25G,OtherWH5G,price5G,Hyogo5G,Saitama5G,lowestPriceOption
2123,2123,A2845,,,,,18,52000.0,1,3,Contact Company,14600.0,1,4,14600.0
25403,25403,P1625,20.0,5600.0,19.0,20.0,20,53700.0,1,10,20,16300.0,20,1,5600.0


## General Information

In [21]:
df_general = pd.read_json('TCIGeneralInformation.json', orient ='split', compression = 'infer')

In [22]:
df_general.sample(3).dropna(axis=1, how='all')

Unnamed: 0,code,MolecularFormula,Molecular Weight,purityAnalysisMethod,appearance,solubilityWater,storeUnderInertGas,purity
T1916,T1916,C__2__1H__2__2Si,302.49,>98.0%(gc),white to light yellow powder to crystal,practically insoluble,store under inert gas,98.0
T0241,T0241,C__1__5H__1__5NO__4,273.29,>98.0%(t),white to almost white powder to crystal,,,98.0
M1983,M1983,C__7H__1__2O,112.17,>95.0%(gc),colorless to light orange to yellow clear liquid,,,95.0


### Details

| column name | data type | table constraint | description | Non-Null Count | Example output |
| ---- | ---- | ---- | ---- | ---- |  ---- |
|code|TEXT|PRIMARY KEY|TCI unique id| - |  'A0001' |
| MolecularWeight |  NUMERIC | - | - | 29682 | 302.46 |
| purity |  NUMERIC | - | - | 27969 | 80. |
| MolecularFormula | TEXT | - | - | 29819 | 'C__2__0H__3__0O__2' |
| purityAnalysisMethod | TEXT | - | - | 27969 | '>80.0%(gc)' | 
| appearance | TEXT | - | - | 29773 | 'white to light yellow powder to crystal' |
| solubilityWater | TEXT | - | - | 5378 | 'decomposes in contact with water,practically insoluble' |
| rangeMolecularWeight | TEXT | - | - | 19 | '5000 to 150000(calcd.on dried substance)' |
| sensitiveness* | TEXT | - | - | 82 | 'colorimetric test : turn red with 1ppm aluminium' |
| averageActiveOxygen | TEXT | - | - | 1 | 'min. 98.0 %' |
| averageN | TEXT | - | - | 42 | '40.0 to 50.0' |
| averageMN | TEXT | - | - | 1 | '3.5 to 4.5' |
| averageNM | TEXT | - | - | 3 | '2.5 to 3.5' |
| contentDryingSubstance | TEXT | - | - | 2 | '6.0 to 8.5 %' |
| etherificationValueDryingSubstance | TEXT | - | - | 2 |  '0.5 to 0.8' |
| cw | TEXT | - | - | 34 | 'first-class designated chemicals (precursor)' |
| AssayMonoEster | TEXT | - | - | 6 | '35.0 to 47.0 %' | 
| AssayDiester | TEXT | - | - | 6 | '53.0 to 65.0 %' |
| suitabilityAbsorptiometry | TEXT | - | - | 1 | 'to pass test' |
| suitabilityAldehydeAnalysis | TEXT | - | - | 2 | 'abs min 0.450(near 635nm) in the presence of formaldehyde(0.2ppm)' |
| suitabilityAminoAcidAnalysis | TEXT | - | - | 3 | 'to pass test' |
| suitabilityArsenicAnalysis | TEXT | - | - | 1 | 'to pass test' |
| suitabilityBeryliumAnalysis | TEXT | - | - | 1 | 'to pass test' |
| suitabilityCalciumAnalysis | TEXT | - | - | 1 | 'to pass test' |
| suitabilityChromeAnalysis | TEXT | - | - | 1 | 'abs min.0.42(near 540nm)in the presence of dichromate(1 ppm)' |
| suitabilityCobaltAnalysis | TEXT | - | - | 2 | 'to pass test' |
| suitabilityCyanAnalysis | TEXT | - | - | 2 | 'to pass test' |
| suitabilityElectrophoresis | TEXT | - | - | 1 | 'to pass test' |
| suitabilityFormaldehydeAnalysys | TEXT | - | - | 1 | 'abs min. 0.300(near 580nm) in the presence of formaldehyde(0.6ppm)' |
| suitabilityGCAnalysis | TEXT | - | - | 2 | 'to pass test' |
| suitabilityIronAnalysis | TEXT | - | - | 6 | 'min. 0.50(535 nm)' |
| suitabilityLcMsAnalysis | TEXT | - | - | 2 | 'to pass test' |
| suitabilityMagnesiumAnalysis | TEXT | - | - | 1 | 'abs min. 0.30(540nn) in the presence of mg(10ppm)' |
| suitabilityMassAnalysisCalibratio | TEXT | - | - | 3 | 'to pass test' |
| suitabilityMelamineAnalysis | TEXT | - | - | 1 | 'to pass test' |
| suitabilityNitrateAnalysis | TEXT | - | - | 2 | 'abs min. 0.08(near 410nm) in the presence of nitrate ion(6 ppm)' |
| suitabilityNMRAnalysis | TEXT | - | - | 6 | 'effective as chiral shift reagent for 1-phenylethylamine' |
| suitabilitypCresolAnalysis | TEXT | - | - | 1 | 'to pass test' |
| suitabilityProteinAnalysisEcoli | TEXT | - | - | 1 | 'to pass test' |
| suitabilityProteinAnalysisYeast | TEXT | - | - | 1 | 'to pass test' |
| suitabilityRedoxReagent | TEXT | - | - | 1 | 'to pass test' | 
| suitabilitySilverAnalysis | TEXT | - | - | 1 | 'to pass test' |
| suitabilitySulfateAnalysis | TEXT | - | - | 1 | 'abs min. 0.40(near 530nm) in the presence of sulfate ion(200 ppm)' |
| suitabilitySulfideAnalysis | TEXT | - | - | 2 | 'abs min. 0.37(near 668nm) in the presence of sulfide' |
| suitabilitySulfiteAnalysis | TEXT | - | - | 1 | 'to pass test' |
| suitabilityTitaniumAnalysis | TEXT | - | - | 2 | 'abs min.0.28(near 390nm) in the presence of ti(1 ppm)' |
| suitabilityElisaTests | TEXT | - | - | 7 | 'min. 1.0 (human lactoferrin, 10 micro g/ml, od450)' |
| suitabilityVanadiumAnalysis | TEXT | - | - | 2 | 'to pass test' |
| suitabilityVCMAnalysis | TEXT | - | - | 1 | 'to pass test' |
| suitabilityTest | TEXT | - | - | 5 | 'to pass test(detection of primary and secondary amines)' |
| suitabilityTestProteinAnalysis | TEXT | - | - | 1 | 'to pass test' |
| elementalAnalysisNitrogen | TEXT | - | - | 340 | '24.00% to 25.50% (calcd.on anh.substance)' |
| elementalAnalysisCarbon | TEXT | - | - | 88 | '39.80 to 43.00 %' |
| elementalAnalysisOxygen | TEXT | - | - | 2 | '45 to 55 %(in 50deg-c, after drying in a vacuum)' |
| elementalAnalysisSulfuride | TEXT | - | - | 13 | '65.00 to 69.00 %' |
| elementalAnalysisHydrogen | TEXT | - | - | 3 | '6.80 to 7.50 %' |
| storeUnderInertGas | TEXT | - | - | 11072 | 'store under inert gas' |
| storageTemperature | TEXT | - | - | 4855 | '0-10°c' |
| suitabilityProteinAnalysis | TEXT | - | - | 37 | 'Noneto pass test' |

(*) May have mixed unicode characters in the output 

In [34]:
df_general.to_sql('tci_general_information', engine,  index=False)

In [35]:
# define a primary Key
with engine.connect() as con:
    con.execute('ALTER TABLE tci_general_information ADD PRIMARY KEY ("code");')

In [36]:
%%time
df_retrieved_3 = pd.read_sql("SELECT * FROM tci_general_information;", engine)

CPU times: user 633 ms, sys: 88.9 ms, total: 722 ms
Wall time: 17.2 s


In [38]:
df_retrieved_3.sample(3).dropna(axis=1, how='all')

Unnamed: 0,code,MolecularFormula,Molecular Weight,purityAnalysisMethod,appearance,solubilityWater,storeUnderInertGas,storageTemperature,purity
11511,D1408,C__5H__1__0Br__2,229.94,>95.0%(gc),colorless to light orange to yellow clear liquid,,,0-10°c,95.0
170,A0205,,,,white to light yellow powder to crystal,soluble,,,
7455,B6269,C__8H__1__8O__2Si,174.32,>95.0%(gc),colorless to light yellow clear liquid,,store under inert gas,0-10°c,95.0


##  GHS precautionary Statements

In [40]:
df_ghs = pd.read_json('TCIGHSprecautionaryStatements.json', orient ='split', compression = 'infer')

In [63]:
df_ghs.dropna(axis=0, how='any').sample(3)

Unnamed: 0,code,conditionsToAvoid,signalWord,poisonousOrDeleterious
C2580,C2580,heat sensitive,warning,deleterious substance
C1410,C1410,air sensitive,danger,deleterious substance
T2023,T2023,hygroscopic,danger,poisonous substance


### Details

| column name | data type | table constraint | description | Non-Null Count | Example output |
| ---- | ---- | ---- | ---- | ---- |  ---- |
|code|TEXT|PRIMARY KEY|TCI unique id| 30669 |  'A0001' |
| conditionsToAvoid | TEXT | - | - | 13784 | 'heat sensitive' |
| signalWord | TEXT | - | - | 21106 | 'warning' |
| poisonousOrDeleterious | TEXT | - | - | 1494 | '	poisonous substance' |

In [65]:
df_ghs.to_sql('tci_ghs', engine,  index=False)

In [66]:
# define a primary Key
with engine.connect() as con:
    con.execute('ALTER TABLE tci_ghs ADD PRIMARY KEY ("code");')

In [67]:
%%time
df_retrieved_4 = pd.read_sql("SELECT * FROM tci_ghs;", engine)

CPU times: user 53 ms, sys: 1.25 ms, total: 54.3 ms
Wall time: 3.4 s


In [71]:
df_retrieved_4.sample(3)

Unnamed: 0,code,conditionsToAvoid,signalWord,poisonousOrDeleterious
18553,H1395,,warning,
16431,F0532,,warning,deleterious substance
28579,T1612,moisture sensitive,danger,


## Related Laws

In [73]:
df_laws = pd.read_json('TCIRealtedLaws.json', orient ='split', compression = 'infer')

In [94]:
df_laws.drop('code', axis=1).dropna(axis=0, how = 'all').sample(5).dropna(axis=1, how = 'all')

Unnamed: 0,code,ChemicalSubstanceLawNumber,RTECS,fireDefenseLaw,prtrLawNewSpecificChemical,ishl
D3813,D3813,,,group-4-3-iii,,
N1202,N1202,1-417,qr7040000,,specified class 1 designated chemical substances,
G0409,G0409,"3-559, 3-594",,group-4-3-iii,class 1 designated chemical substances,mutagens
M2422,M2422,,,,,
D1955,D1955,3-1250,am7700000,,,


### Details

| column name | data type | table constraint | description | Non-Null Count | Example output |
| ---- | ---- | ---- | ---- | ---- |  ---- |
|code|TEXT|PRIMARY KEY|TCI unique id| 30669 |  'A0001' |
| ChemicalSubstanceLawNumber | TEXT | - | - | 8258 | '4-675' |
| RTECS | TEXT | - | - | 8231 | 'tp8580000' |
| fireDefenseLaw | TEXT | - | - | 8951 | 'group-4-3-iii' |
| prtrLawNewSpecificChemical | TEXT | - | - | 1959 | 'class 1 designated chemical substances' |
| narcoticsAndPsychotropicsControlLaw | TEXT | - | - | 72 | 'narcotic or psychotropic raw material' |
| ishl | TEXT | - | - | 389 | 'mutagens' |
| chemicalSubstanceLawEncs | TEXT | - | - | 49 | 'priority assessment chemical substance' |
| pharmaceuticalAffairsLawScheduled | TEXT | - | - | 12 | 'designated substance' |
| protectionOfTheOzoneLayerLawTypeSpecifiedMaterial | TEXT | - | - | 7 | appendix 1-1' |

In [98]:
df_laws.to_sql('tci_laws', engine,  index=False)

In [99]:
# define a primary Key
with engine.connect() as con:
    con.execute('ALTER TABLE tci_laws ADD PRIMARY KEY ("code");')

In [100]:
%%time
df_retrieved_5 = pd.read_sql("SELECT * FROM tci_laws;", engine)

CPU times: user 56.8 ms, sys: 9.11 ms, total: 65.9 ms
Wall time: 2.74 s


In [102]:
df_retrieved_5.dropna(axis=0, how = 'all').sample(5).dropna(axis=1, how = 'all')

Unnamed: 0,code,ChemicalSubstanceLawNumber,RTECS,fireDefenseLaw,prtrLawNewSpecificChemical
25235,P1319,,,group-4-1-ii,
10358,D0093,,,,
20124,L0403,9-2424,oj6360000,,
7508,C0110,3-194,bx0350000,group-4-3-iii,class 1 designated chemical substances
10841,D0696,3-2227,,group-4-1-ii,


## Transportation Information

In [106]:
df_transportation = pd.read_json('TCITransportationInformation.json', orient ='split', compression = 'infer')

In [127]:
df_transportation.drop('code', axis=1).dropna(axis=0, how = 'all').sample(5)

Unnamed: 0,unNumber,PackagingAndContainer,packingGroup,class,airTransportation
T2426,un1993,,iii,3,
B5015,,1G-Glass Bottle with Plastic Insert,,,
C1541,un2734,,ii,8 / 3,
T0078,un3439,,iii,6.1,
D2408,,100MG-Glass Bottle with Plastic Insert,,,


### Details

| column name | data type | table constraint | description | Non-Null Count | Example output |
| ---- | ---- | ---- | ---- | ---- |  ---- |
|code|TEXT|PRIMARY KEY|TCI unique id| 30669 |  'A0001' |
| unNumber |TEXT| - | - | 7432 | 'un3077' |
| PackagingAndContainer* |TEXT| - | - | 2627 | '1G-Glass Bottle with Plastic Insert' |
| packingGroup |TEXT| - | - | 7263 | 'ii' |
| class |TEXT| - | - | 7439 | 3.0 |
| airTransportation |TEXT| - | - | 186 | 'airmail banned' |

In [135]:
df_transportation.to_sql('tci_transportation', engine,  index=False)

In [136]:
# define a primary Key
with engine.connect() as con:
    con.execute('ALTER TABLE tci_transportation ADD PRIMARY KEY ("code");')

In [137]:
%%time
df_retrieved_6 = pd.read_sql("SELECT * FROM tci_transportation;", engine)

CPU times: user 70.3 ms, sys: 28.7 ms, total: 99 ms
Wall time: 2.52 s


In [138]:
df_retrieved_6.drop('code', axis=1).dropna(axis=0, how = 'all').sample(5)

Unnamed: 0,unNumber,PackagingAndContainer,packingGroup,class,airTransportation
7372,,1G-Glass Bottle with Plastic Insert,,,
12770,un3077,,iii,9,
20692,un1224,,iii,3,
28236,un2924,,ii,3 / 8,
28980,un3077,,iii,9,


## Specific Properties

In [139]:
df_properties = pd.read_json('TCISpecificProperties.json', orient ='split', compression = 'infer')

In [152]:
df_properties.drop('code', axis=1).dropna(axis=0, how = 'all').sample(6).dropna(axis=1, how = 'all')

Unnamed: 0,maximumAbsorptionWavelength,solubilitySolubleIn,solubilityInHotToluene,solubilityInHotDmf,boilingPoint,meltingPoint
B5739,313(meoh) nm,toluene,,,192.0,124.0
D5207,,acetone,,,,118.0
T0167,,,almost transparency,,,225.0
D2842,,methanol,,,,
N0603,,,,almost transparency,,
B1385,,methanol,,,231.0,74.0


### Details

| column name | data type | table constraint | description | Non-Null Count | Example output |
| ---- | ---- | ---- | ---- | ---- |  ---- |
| code | TEXT | PRIMARY KEY | - | 30669 | A0002 |
| flashPoint | NUMERIC | - | - | 6944 | 178.0 |
| boilingPoint | NUMERIC | - | - | 10179 | 350.0 |
| meltingPoint | NUMERIC | - | - | 18276 | 164.0 |
| refractiveIndex | NUMERIC | - | - | 8034 | 1.5299999714 |
| refractiveIndexn20d | TEXT | - | - | 45 | 1.4480 to 1.4520 |
| solubilityMiscibleWith | TEXT | - | - | 532 | ether,alcohol |
| solubilityInsolubleIn | TEXT | - | - | 518 | ether |
| solubilityVerySolubleIn | TEXT | - | - | 601 | alcohol |
| solubilitySolubleIn | TEXT | - | - | 6428 | acetone,benzene,alcohol,chloroform,ether |
| solubilityVerySlightly | TEXT | - | - | 416 | benzene |
| solubilitySlightlySolIN | TEXT | - | - | 733 | ether |
| degreeOfSolubilityInWater | TEXT | - | - | 1086 | 16.1 mg/l   25 °c |
| specificGravity | TEXT | - | - | 8336 | 1.03 |
| maximumAbsorptionWavelength | TEXT | - | - | 1923 | 493(h2o) nm |
| density20degC | TEXT | - | - | 12 | 0.8460 to 0.8560 g/ml |
| transitionIntervalPH | TEXT | - | - | 81 | (pale yellow)ph10　to　ph12(deep yellow) |
| viscosity | TEXT | - | - | 38 | 500.0 to 900.0 mpa-s(2 %, h2o, 25 deg-c) |
| averageMolecularWeight | TEXT | - | - | 28 | 285 to 310 |
| freezingPoint | TEXT | - | - | 87 | 36.0 to 42.0  °c |
| bindingCapacity | TEXT | - | - | 10 | to pass test(min. 5 mg/ml gel, human lactoferrin) |
| specificRotation | TEXT | - | - | 2581 | -75° (c=1,etoh) |
| specificRotationA20perD | TEXT | - | - | 2272 | +31.0 to +35.0 deg(c=1, etoh) |
| specificRotationValue | TEXT | - | - | 8 | 95.0 to 115.0 deg |
| specificRotationA25perD | TEXT | - | - | 3 | -34.0 to -39.5 deg(c=2, dioxane) |
| specificRotationA25 | TEXT | - | - | 1 | +108 to +115 deg(365nm, c=2, methanol) |
| solubilityInMethanol | TEXT | - | - | 735 | almost transparency |
| solubilityINhotEtoh | TEXT | - | - | 29 | almost transparency |
| solubilityINetoh | TEXT | - | - | 48 | almost transparency |
| solubilityInHotWater | TEXT | - | - | 169 | almost transparency |
| solubilityInHot1MolperLofHCl | TEXT | - | - | 2 | almost transparency |
| solubilityInSodiumHydroxideSolution | TEXT | - | - | 14 | almost transparency |
| solubilityInHCl1plus1 | TEXT | - | - | 4 | very faint turbidity |
| solubilityInToluene | TEXT | - | - | 105 | almost transparency |
| solubilityInHotToluene | TEXT | - | - | 58 | very faint turbidity |
| solubilityInHotMethanol | TEXT | - | - | 206 | almost transparency |
| solubilityInDiluteHCl | TEXT | - | - | 13 | almost transparency |
| solubilityInHCl | TEXT | - | - | 6 | almost transparency |
| solubilityInHotDiluteHCl | TEXT | - | - | 3 | very faint turbidity |
| solubilityInHCl1plus3 | TEXT | - | - | 2 | almost transparency |
| solubilityInHotHCl1plus3 | TEXT | - | - | 1 | almost transparency |
| solubilityIn1molperLofHCl | TEXT | - | - | 16 | almost transparency |
| solubilityIn1molperLofNaOH | TEXT | - | - | 27 | within almost transparency |
| solubilityInHCl1plus10 | TEXT | - | - | 2 | almost transparency |
| solubilityInAceticAcid | TEXT | - | - | 13 | almost transparency |
| solubilityInAcetone | TEXT | - | - | 37 | almost transparency |
| solubilityInPyridine | TEXT | - | - | 11 | very faint turbidity |
| solubilityInNNdmf | TEXT | - | - | 36 | very faint turbidity |
| solubilityIn5molperLofHCl | TEXT | - | - | 2 | almost transparency |
| solubilityInAcetonitrile | TEXT | - | - | 7 | very faint turbidity |
| solubilityInChloroform | TEXT | - | - | 47 | almost transparency |
| solubilityInThf | TEXT | - | - | 23 | almost transparency |
| solubilityInHotAcetonitrile | TEXT | - | - | 9 | almost transparency |
| solubilityIn1mmolperLofNaOH | TEXT | - | - | 2 | almost transparency |
| solubilityInHotAceticAcid | TEXT | - | - | 6 | almost transparency |
| solubilityInHotPyridine | TEXT | - | - | 2 | within almost transparency |
| solubilityInHotEtoh50volpercent | TEXT | - | - | 2 | almost transparency |
| solubilityInNa2CO3 | TEXT | - | - | 2 | almost transparency |
| solubilityInEthylacetate | TEXT | - | - | 5 | almost transparency |
| solubilityInHotAcetone | TEXT | - | - | 11 | almost transparency |
| solubilityInHotDioxane | TEXT | - | - | 3 | within almost transparency |
| solubilityInHot1molperLNaOH | TEXT | - | - | 1 | very faint turbidity |
| solubilityInHotHCl | TEXT | - | - | 2 | almost transparency |
| solubilityInHot1mmOfHCl | TEXT | - | - | 1 | almost transparency |
| solubilityInEtoh95volpercent | TEXT | - | - | 8 | almost transparency |
| solubilityIn5mmolperLofHydrochloricAcid | TEXT | - | - | 1 | almost transparency |
| solubilityInDioxane | TEXT | - | - | 7 | lmost transparency |
| solubilityInEtoh50volpercent | TEXT | - | - | 4 | almost transparency |
| solubilityInHotDmf | TEXT | - | - | 9 | almost transparency |
| solubilityInHotMek | TEXT | - | - | 1 | almost transparency |
| solubilityInDichloromethane | TEXT | - | - | 2 | almost transparency |
| solubilityInNH3AP2plus3 | TEXT | - | - | 1 | almost transparency |
| solubilityInHotChloroform | TEXT | - | - | 3 | very faint turbidity |
| solubilityInH2SO4 | TEXT | - | - | 1 | almost transparency |
| solubilityInHotThf | TEXT | - | - | 2 | almost transparency |
| solubilityInNH3Aq | TEXT | - | - | 2 | almost transparency |
| solubilityInFormicAcid | TEXT | - | - | 1 | almost transparency |
| solubilityInNaOH100GperL | TEXT | - | - | 2 | very faint turbidity |
| solubilityIn2mmolperLofNaOH | TEXT | - | - | 1 | almost transparency |
| solubilityIn1methyl2pyrrolidone | TEXT | - | - | 4 | faint turbidity |
| solubilityInTolueneEtohMix | TEXT | - | - | 3 | almost transparency |
| solubilityIn2propanol | TEXT | - | - | 2 | almost transparency |
| absorbance275nm | TEXT | - | - | 1 | max. 0.25 |
| absorbance260nm | TEXT | - | - | 13 | max. 0.300 |
| absorbance270nm | TEXT | - | - | 22 | max. 0.020 |
| absorbance280nm | TEXT | - | - | 8 | max. 0.010 |
| absorbance400nm | TEXT | - | - | 62 | max. 0.010 |
| absorbance330nm | TEXT | - | - | 4 | max. 1.000 |
| absorbance340nm | TEXT | - | - | 7 | max. 0.100 |
| absorbance350nm | TEXT | - | - | 4 | max. 0.020 |
| absorbance360nm | TEXT | - | - | 5 | max. 0.010 |
| absorbance1cmE1 | TEXT | - | - | 209 | min. 900(h2o, 490.0 to 494.0 nm,calcd.on dried substance) |
| absorbance254nm | TEXT | - | - | 37 | max. 1.000 |
| absorbance300nm | TEXT | - | - | 5 | max. 0.030 |
| absorbance310nm | TEXT | - | - | 4 | max. 0.020 |
| absorbance320nm | TEXT | - | - | 4 | max. 0.010 |
| absorbance290nm | TEXT | - | - | 8 | max. 0.030 |
| absorbance210nm | TEXT | - | - | 35 | max. 0.100 |
| absorbance220nm | TEXT | - | - | 23 | max. 0.020 |
| absorbance230nm | TEXT | - | - | 43 | max. 0.010 |
| absorbance1cmE1x2 | TEXT | - | - | 12 | min. 320(meoh, 526.0 to 530.0 nm) |
| absorbance370nm | TEXT | - | - | 2 | max. 0.100 |
| absorbance380nm | TEXT | - | - | 3 | max. 0.050 |
| absorbance390nm | TEXT | - | - | 3 | max. 0.030 |
| absorbance450nm | TEXT | - | - | 2 | max. 0.010 |
| absorbance240nm | TEXT | - | - | 23 | max. 0.100 |
| absorbance250nm | TEXT | - | - | 12 | max. 0.050 |
| absorbance1cmE10 | TEXT | - | - | 2 | min. 30(diethyl ether, 660.0 to 670.0 nm) |
| esterValue | TEXT | - | - | 3 | 430.0 to 470.0 |
| meltingPointDecomposition | TEXT | - | - | 13 | 170.0 to 177.0  °c |
| biotinylationRatio | TEXT | - | - | 4 | to pass test |
| lumoLevel | TEXT | - | - | 24 | -4.0 ev |
| holeMobilityMuFet | TEXT | - | - | 11 | min. 0.10 cm2/vs(hmds si/sio2 substrate) |
| exchangeCapacity | TEXT | - | - | 1 | 0.50 to 0.90 meq/g(calcd.on anh.substance) |
| absorbance | TEXT | - | - | 113 | min. 0.460(50 time dilution, h2o, 492.0 to 496.0 nm) |
| absorbanceOfCuComplex | TEXT | - | - | 2 | min. 12000 |
| molarAbsorbanceAlComplex | TEXT | - | - | 1 | min. 6,000(518.0 to 530.0 nm) |
| absorbance2 | TEXT | - | - | 9 | min. 0.530(20 mg/l, 280.0 to 284.0 nm) |
| lambdaMax | TEXT | - | - | 307 | 490.0 to 494.0 nm(h2o) |
| lambdaMax2 | TEXT | - | - | 14 | 526.0 to 530.0 nm(meoh) |
| lambdaMax1 | TEXT | - | - | 1 | 330.0 to 335.0 nm(h2o) |
| lambdaMax3 | TEXT | - | - | 1 | 421.0 to 425.0 nm(isooctane) |
| activeAlkaliLibu | TEXT | - | - | 2 | 14.0 to 16.0% |
| freePnitrophenol | TEXT | - | - | 2 | max. 0.1 % |
| nitorogenOfCyanamide | TEXT | - | - | 1 | min. 19.5 % |
| oxiraneOxygen | TEXT | - | - | 3 | 8.0 to 11.5 % |
| opticalIsomer | TEXT | - | - | 1 | max. 0.5 % |
| petroleumEtherSolubleMatter | TEXT | - | - | 3 | max. 2.5 % |
| disodiumAlphaGlycerophosphate | TEXT | - | - | 2 | max. 4.0 % |
| reducingSuger | TEXT | - | - | 2 | max. 5.0 % |
| humanSerumAlbminHSaBindingActivity | TEXT | - | - | 3 | s/n ratio min. 10 |
| competitiveAvtivityInCompetitiveDrug | TEXT | - | - | 4 | remaining fluorescein inensity max. 20 % |
| NonCompetitiveAvtivityInNonCompetitiveDrug | TEXT | - | - | 4 | remaining fluoresein intensity min. 80 % |
| se | TEXT | - | - | 1 | min. 99.9 % |

In [159]:
df_properties.to_sql('tci_specificproperties', engine,  index=False)

In [160]:
# define a primary Key
with engine.connect() as con:
    con.execute('ALTER TABLE tci_specificproperties ADD PRIMARY KEY ("code");')

In [161]:
%%time
df_retrieved_7 = pd.read_sql("SELECT * FROM tci_specificproperties;", engine)

CPU times: user 965 ms, sys: 59.7 ms, total: 1.02 s
Wall time: 10.7 s


In [163]:
df_retrieved_7.drop('code', axis=1).dropna(axis=0, how = 'all').sample(5).dropna(axis=1, how = 'all')

Unnamed: 0,specificGravity,refractiveIndex,degreeOfSolubilityInWater,solubilitySolubleIn,flashPoint,boilingPoint,meltingPoint
5070,1.22,1.45,,,,,
1434,,,,,,,238.0
7825,0.95,1.45,87 g/l 20 °c,"alcohol,ether,acetone",44.0,157.0,
4331,,,,,,,115.0
20585,1.1,,,,129.0,243.0,14.0


## Specifications

In [164]:
df_specifications = pd.read_json('TCISpecifications.json', orient ='split', compression = 'infer')

In [165]:
df_specifications.drop('code', axis=1).dropna(axis=0, how = 'all').sample(6).dropna(axis=1, how = 'all')

Unnamed: 0,purityNonaqueousTitration,purityHPLC,purityGC,purityNeutralizationTitration,physicalState20degC
D1134,,,,,liquid
M2283,,,min. 98.0 %,,solid
B4356,,min. 98.0 area%,,min. 98.0 %,solid
T3702,min. 98.0 %,,min. 98.0 %,,solid
S0966,,,,,liquid
T2512,,,min. 98.0 %,,liquid


In [218]:
list_properties = ['code', 'concentrationRedoxTitration', 'arsenic', 'concentrationNeutralizationTitrationUnit1']

df_prop = df_specifications[list_properties]

df_prop.drop('code', axis=1).dropna(axis=0, how='all')

Unnamed: 0,concentrationRedoxTitration,concentrationRedoxTitration.1,arsenic,arsenic.1,concentrationNeutralizationTitrationUnit1,concentrationNeutralizationTitrationUnit1.1
C0377,,,max. 2 ppm,,,
D1719,,,,max. 2 ppm,,
D5885,,,,,7.5 to 8.5 %,7.5 to 8.5 %
G0124,,,,max. 1ppm,,
O0601,,47.0 to 53.0 w/w%,,,,
T0909,,,,max. 2.0ppm(as as2o3),,
U0147,8.0 to 9.0 w/w%,,,,,
U0148,2.4 to 3.0 w/w%,,,,,


### Details

| column name | data type | table constraint | description | Non-Null Count | Example output |
| ---- | ---- | ---- | ---- | ---- |  ---- |
| code | TEXT | - | - | 30669 | A0002 |
| physicalState20degC | TEXT | - | - | 30458 | liquid |
| purityHPLC | TEXT | - | - | 6716 | min. 97.0 area% |
| purityNonaqueousTitration | TEXT | - | - | 4488 | min. 95.0 %(after deducting nh4cl) |
| purityArgentometricTitration | TEXT | - | - | 1125 | min. 95.0 %(after deducting nh4cl) |
| dryingLoss | TEXT | - | - | 371 | 14.0 to 16.0 % |
| purityGC | TEXT | - | - | 17658 | min. 99.0 % |
| purityNeutralizationTitration | TEXT | - | - | 5016 | min. 99.0 % |
| purityWithTotalNitrogen | TEXT | - | - | 1789 | min. 98.0 % |
| purityPotassiumIodateMethod | TEXT | - | - | 81 | min. 98.0 %
| opticalPurityLC | TEXT | - | - | 221 | min. 98.0 ee% |
| opticalPurityGC | TEXT | - | - | 151 | min. 98.0 ee% |
| ph | TEXT | - | - | 38 | 5.0 to 7.0(50 g/l, 25 deg-c) |
| loading | TEXT | - | - | 25 | 3.1 to 3.5 mmol/g |
| meltingRange | TEXT | - | - | 4 | max. 5.0 deg-c |
| acidValue | TEXT | - | - | 93 | max. 1.0 |
| mesomorphicRange | TEXT | - | - | 80 | 97.0 to 130.0 deg-c |
| viscosityWater | TEXT | - | - | 4 | 100 to 200 mpa-s |
| phWater | TEXT | - | - | 4 | 6.0 to 8.0 |
| saponificationValue | TEXT | - | - | 23 | 230.0 to 245.0 |
| degreeOfDeuteration | TEXT | - | - | 44 | min. 98.0 atom%d |
| evaporationResidue | TEXT | - | - | 9 | 19.6 to 21.6 % |
| specificGravity | TEXT | - | - | 1 | 0.7000 to 0.7100 |
| sulfate | TEXT | - | - | 13 | max. 5.0 % |
| homoLevel | TEXT | - | - | 33 | -4.7 ev |
| entantiometricExcess | TEXT | - | - | 4 | >98.0%(hplc) |
| molarRatioMaleimido | TEXT | - | - | 2 | 15-35 moles of maleimide per mole of bsa |
| deacetylationValue | TEXT | - | - | 3 | min. 80.0 %(after drying) |
| averageAlkylchainLength | TEXT | - | - | 2 | 11.0 to 13.0 |
| limitingViscosity | TEXT | - | - | 4 | 0.16 to 0.19 cp(after drying) |
| transmittance | TEXT | - | - | 1 | min. 96.0 % |
| loadingASpd | TEXT | - | - | 3 | 0.15 to 0.25 mmol/g |
| electronMobilityMufet | TEXT | - | - | 6 | min. 0.4 cm2/vs (ots si/sio2 substrate) |
| massConcentrationAU | TEXT | - | - | 3 | to pass test (1.8 to 2.1 mg/ml) |
| particleConcentration | TEXT | - | - | 3 | to pass test (2.4 to 4.0 x 10^12/ml) |
| opticalDensity | TEXT | - | - | 3 | 50.0 to 55.0 |
| puritySodiumHypochloriteMethod | TEXT | - | - | 12 | min. 98.0%(after drying) | |
| purityPeriodicAcidMethod | TEXT | - | - | 5 | min. 97.0 % |
| purityHPLCcad | TEXT | - | - | 174 | min. 98.0 area% |
| purityBrominationMethod | TEXT | - | - | 75 | min. 98.0 % |
| purityTPBNAmethod | TEXT | - | - | 5 | min. 98.0 %(calcd.on anh.substance) |
| purityChelometricTitration | TEXT | - | - | 430 | min. 99.0 % |
| purityVolhardMethod | TEXT | - | - | 4 | min. 98.0 % |
| purityIodometricTitration | TEXT | - | - | 325 | min. 97.0 % |
| purityVolumetricAnalysis) | TEXT | - | - | 112 | min. 98.0 % |
| purityIonExchangeTitration | TEXT | - | - | 281 | min. 98.0 %(calcd.on dried substance) |
| purityUvVisMethod | TEXT | - | - | 35 | min. 90.0 %(after drying) |
| purityPrecipitationTitration | TEXT | - | - | 67 | min. 98.0 % |
| purityMethanolysisMethod | TEXT | - | - | 64 | min. 97.0 % |
| purityMorpholineMethod | TEXT | - | - | 96 | min. 98.0 % |
| purityWithIgnitionResidue | TEXT | - | - | 9 | min. 95.0 % |
| purityPotassiumPermanganateMethod | TEXT | - | - | 8 | min. 93.0 % |
| purityQnmr | TEXT | - | - | 64 | min. 98.0 % |
| purityNh4scnMethod | TEXT | - | - | 4 | min. 98.0 % |
| purityHPLCri | TEXT | - | - | 6 | min. 75.0 area% |
| purityAnilinMethod | TEXT | - | - | 10 | min. 96.0 % |
| purityIodometricBackTitration | TEXT | - | - | 11 | min. 97.0 % |
| specificPurity | TEXT | - | - | 65 | min. 99.0 % |
| purityFormolTitration | TEXT | - | - | 2 | min. 99.5 % |
| purityGasburetMethod | TEXT | - | - | 2 | min. 85.0 % |
| purityChelometricBackTitration | TEXT | - | - | 5 | min. 90.0 % |
| purityPotassiumBromateMethod | TEXT | - | - | 3 | min. 97.0 % |
| purityEsterValue | TEXT | - | - | 39 | min. 98.0 % |
| purityGravimetric | TEXT | - | - | 7 | 98.0 to 102.0 % |
| purityWithTotalSulfur | TEXT | - | - | 1 | min. 98.0 % |
| purityGravimetricMethod | TEXT | - | - | 27 | min. 98.0 % |
| purityRedoxMethod | TEXT | - | - | 14 | min. 96.0 % |
| purityNeutralizationBackTitration | TEXT | - | - | 27 | min. 97.0 % |
| purityCeriumRedoxMethod | TEXT | - | - | 12 | min. 97.0 % |
| purityButylamineMethod | TEXT | - | - | 12 | min. 97.0 % |
| purityNMR | TEXT | - | - | 10 | min. 95.0 atom% |
| purityOximeFormation | TEXT | - | - | 14 | min. 96.0 % |
| purityPhaseSplittingMethod | TEXT | - | - | 3 | min. 95.0 % |
| puritySaponificationValue | TEXT | - | - | 2 | min. 97.0 % |
| purityTraceMetalBasis | TEXT | - | - | 18 | min. 99.99 % |
| purityTPOmethod | TEXT | - | - | 7 | min. 97 % |
| purity1Hqnmr | TEXT | - | - | 2 | min. 95.0 % |
| purityCouplingTitration | TEXT | - | - | 1 | min. 96.0 % |
| purityRedoxTitration | TEXT | - | - | 9 | min. 95.0 % |
| purityAnilideMethod | TEXT | - | - | 1 | min. 98.0 % |
| specificConcentration | TEXT | - | - | 12 | 0.3 to 15.0 mg/ml |
| concentrationBYabs | TEXT | - | - | 1 | 0.30 to 15.00 mg/ml |
| concentrationOFnah | TEXT | - | - | 1 | 57.0 to 65.0 % |
| concentrationPotassiumIodateMethod | TEXT | - | - | 3 | 79.0 to 82.0 w/w% |
| concentrationArgentometricBackTitration | TEXT | - | - | 1 | 52.0 to 57.2 w/w% |
| concentrationArgentometricTitration | TEXT | - | - | 23 | 10.0 to 12.0 w/w% |
| concentrationBrominationMethod | TEXT | - | - | 5 | 74.0 to 76.0 w/w% |
| concentrationBYmeasurementOFdryingWeight | TEXT | - | - | 1 | 1.0 to 20.0 mg/ml |
| concentrationChelometricMethod | TEXT | - | - | 16 | 40.0 to 44.0 w/w% |
| concentrationChelometricMethodUnit2 | TEXT | - | - | 1 | 15.0 to 16.5 w/w% |
| concentrationChelometricTitration | TEXT | - | - | 1 | 14.5 to 16.0 w/w% |
| concentrationGasburetMethod | TEXT | - | - | 1 | 9.5 to 11.5 w/w% |
| concentrationGC | TEXT | - | - | 4 | 47.0 to 53.0 w/w% |
| concentrationGravimetricMethod | TEXT | - | - | 10 | 49.0 to 54.0 w/w% |
| concentrationHnmr | TEXT | - | - | 41 | 1.9 to 2.5 mol/l |
| concentrationIodometricBackTitration | TEXT | - | - | 1 | 18.0 to 20.0 w/w% |
| concentrationIodometricTitration | TEXT | - | - | 17 | 40.0 to 44.0 w/w% |
| concentrationLowryMethod | TEXT | - | - | 18 | 0.9 to 1.5 mg/ml |
| concentrationMorpholineMethod | TEXT | - | - | 4 | 7.7 to 8.6 w/w% |
| concentrationNBSmethod | TEXT | - | - | 1 | 68.0 to 77.0 w/w% |
| concentrationNeutralizationBackTitration | TEXT | - | - | 2 | min. 28.0 w/w% |
| concentrationNeutralizationTitration | TEXT | - | - | 126 | 4.0 to 4.5 w/w% |
| concentrationNonaqueousTitration | TEXT | - | - | 30 | 50.0 to 55.0 w/w% |
| concentrationOximeFormation | TEXT | - | - | 6 | 2.0 to 2.3 w/w% |
| concentrationPhaseSplittingMethod | TEXT | - | - | 1 | 62.0 to 68.0 w/w% |
| concentrationPotassiumPermanganateMethod | TEXT | - | - | 2 | 29.0 to 33.0 w/w% |
| concentrationPrecipitationTitration | TEXT | - | - | 2 | 9.5 to 11.0 w/w% |
| concentrationRedoxMethod | TEXT | - | - | 1 | 35.0 to 36.0 w/w% |
| concentrationSECbutanolMethod | TEXT | - | - | 37 | 12.5 to 14.0 w/w% |
| concentrationTitration | TEXT | - | - | 1 | 0.075 to 0.105 mol/l |
| concentrationWithEvaporationResidue | TEXT | - | - | 2 | 9.5 to 10.4 mg/ml |
| concentrationWithTotalNitrogen | TEXT | - | - | 5 | 34.5 to 38.0 % |
| functionalityTest | TEXT | - | - | 57 | to pass test |
| colorTest | TEXT | - | - | 5 | a yellow spot appears on 4-dimethylaminobenzaldehyde. |
| effectTestOFtmsDerivatization | TEXT | - | - | 7 | effective as derivatization agent dl-threonine |
| enzymeInhibitionTest | TEXT | - | - | 8 | to pass test |
| blottingTest | TEXT | - | - | 1 | to pass test |
| proteinStainingTest | TEXT | - | - | 3 | to pass test |
| dnaStainingTest | TEXT | - | - | 7 | to pass test |
| fucoseInhibitionTest | TEXT | - | - | 1 | to pass test |
| proteinStabilizationTest | TEXT | - | - | 2 | to pass test |
| performanceTest | TEXT | - | - | 1 | to pass test |
| proteinLabelingTestQualitativeMethod | TEXT | - | - | 3 | to pass test |
| betaGalactosidaseDetectionTest | TEXT | - | - | 6 | to pass test |
| betaGlucuronidaseDetectionTest | TEXT | - | - | 4 | to pass test |
| proteinImpurityTestSDSpage | TEXT | - | - | 20 | to pass test |
| chemiluminescenceTestLuminolPodH2O2 | TEXT | - | - | 3 | to pass test |
| chemiluminescenceTestSuperoxideRadical | TEXT | - | - | 1 | to pass test |
| conjugateTest | TEXT | - | - | 5 | to pass test(confirmed reactivity with bsa) |
| proteinDeterminationTest | TEXT | - | - | 3 | to pass test |
| enzymeDetectionTest | TEXT | - | - | 8 | to pass test |
| ethanolInsolubleMatter | TEXT | - | - | 2 | max. 20.0 % |
| effectTestOFhplcDerivatization | TEXT | - | - | 2 | to pass test |
| highBoilingImpurityOFsilylationAgent | TEXT | - | - | 9 | no impurity higher than isopropyl myristate(50 ppm) after 5 min + hold up time |
| effectTestOFgcDerivalization | TEXT | - | - | 16 | effective as derivatization agent　n-methylaniline |
| alkalinePhosphataseDetectionTest | TEXT | - | - | 13 | to pass test |
| substancesDarkendByH2SO4 | TEXT | - | - | 1 | to pass test |
| functionalityTestTLC | TEXT | - | - | 2 | a blue spot appears on 4-methoxyphenol |
| delayedEmission | TEXT | - | - | 1 | to pass test |
| massSpectroscopy | TEXT | - | - | 2 | to pass test |
| aminoAcidSequencing | TEXT | - | - | 2 | to pass test |
| interferencePigment | TEXT | - | - | 1 | to pass test |
| inhibitorMixedTest | TEXT | - | - | 2 | to pass test |
| germfreeTest | TEXT | - | - | 2 | to pass test |
| titerTest | TEXT | - | - | 2 | to pass test |
| effectTestOFesterDerivatization | TEXT | - | - | 2 | effective as derivatization agent myristic acid |
| phosphataseInhibitionTest | TEXT | - | - | 3 | to pass test |
| nitriteIonDetectionTestFluorescenceMethod | TEXT | - | - | 1 | to pass test |
| collagenDetectionTest | TEXT | - | - | 1 | to pass test |
| ir | TEXT | - | - | 37 | to pass test |
| alkali | TEXT | - | - | 2 | to pass test |
| fluorescenceTest | TEXT | - | - | 1 | emit fluorescence in the presence of glycine |
| aldehyde | TEXT | - | - | 4 | max. 2.0 % |
| freeAcid | TEXT | - | - | 2 | min. 63.0 % |
| proteinDenaturationTest | TEXT | - | - | 4 | to pass test |
| ammoniumChloride | TEXT | - | - | 2 | max. 5.0 % |
| water | TEXT | - | - | 825 | max. 0.1 % |
| totalNitrogen | TEXT | - | - | 35 | 14 to 17 % |
| histidine | TEXT | - | - | 1 | max. 0.50 % |
| hydrogendioxideDetectionTestPeroxidaseMethod | TEXT | - | - | 12 | to pass test |
| sodiumChloride | TEXT | - | - | 2 | max. 25.0 % |
| ashContent | TEXT | - | - | 6 | max. 0.1 % |
| contentOFmagnesium | TEXT | - | - | 3 | 3.8 to 4.0% (calcd.on anh.substance) |
| sulfuricAcid | TEXT | - | - | 3 | max. 3.0 % |
| iron | TEXT | - | - | 14 | max. 5 ppm |
| heavyMetalsASpb | TEXT | - | - | 13 | max. 5 ppm |
| contentOFcalcium | TEXT | - | - | 1 | 7.0 to 10.0 %(calcd.on dried substance) |
| residualSolvent | TEXT | - | - | 1 | max. 10.0 % |
| peroxidaseDetectionTest | TEXT | - | - | 14 | to pass test |
| calcium | TEXT | - | - | 1 | 20.0 to 24.0 % |
| contentPalladium | TEXT | - | - | 3 | 18.0 to 24.0 % |
| cobalt | TEXT | - | - | 4 | min. 10.0 % |
| ethanol | TEXT | - | - | 2 | 0.3 to 1.0 % |
| ammonium | TEXT | - | - | 1 | max. 0.01 % |
| totalSulfur | TEXT | - | - | 2 | 5.5 to 7.5 %(after drying) |
| contentCU | TEXT | - | - | 1 | 11.0 to 14.0 % |
| zinc | TEXT | - | - | 4 | 9.5 to 11.6 % |
| benzeneSulfonicAcid | TEXT | - | - | 1 | max. 0.5 % |
| acidASpyromelliticAcid | TEXT | - | - | 1 | max. 1.0 % |
| triuret | TEXT | - | - | 1 | max. 10.0 w/w% |
| chloride | TEXT | - | - | 19 | max. 1.5 % |
| boricAcid | TEXT | - | - | 3 | max. 0.3 % |
| sodiumSulfate | TEXT | - | - | 2 | max. 1.0 %(calcd.on dried substance) |
| baseASkoh | TEXT | - | - | 2 | max. 0.5 % |
| waterVaporizationMethod | TEXT | - | - | 14 | max. 5.0 % |
| diphenylSulfone | TEXT | - | - | 1 | max. 2.0 area% |
| bleomycina2 | TEXT | - | - | 1 | 55.0 to 70.0 area% |
| bleomycinb2 | TEXT | - | - | 1 | 25.0 to 32.0 area% |
| bismuth | TEXT | - | - | 1 | 30.0 to 36.0 % |
| toluene | TEXT | - | - | 1 | max. 0.5 w/w% |
| fluorescein | TEXT | - | - | 1 | max. 0.05 % |
| bisChloromethylEtherGC | TEXT | - | - | 2 | max. 1.0 % |
| heavyMetals | TEXT | - | - | 2 | max. 10 ppm |
| phosphate | TEXT | - | - | 2 | max. 0.5 % |
| dicyandiamide | TEXT | - | - | 1 | max. 0.5 % |
| creatinine | TEXT | - | - | 1 | max. 0.5 % |
| Mcresol | TEXT | - | - | 1 | 50.0 to 60.0 % |
| Pcresol | TEXT | - | - | 1 | 25.0 to 35.0 % |
| iodineValue | TEXT | - | - | 1 | 100 to 120 |
| nonVolatileMatter | TEXT | - | - | 3 | max. 0.001 % |
| freeChlorine | TEXT | - | - | 1 | max. 0.0001 % |
| thiamazole | TEXT | - | - | 1 | max. 0.5 w/w% |
| residualChloroform | TEXT | - | - | 1 | max. 10.0 % |
| 3chloro1butene | TEXT | - | - | 1 | max. 38.0 % |
| contentCopper | TEXT | - | - | 1 | 30.0 to 40.0 % |
| cellStainingTest | TEXT | - | - | 5 | to pass test |
| rhodium | TEXT | - | - | 2 | 50.30 to 55.60 % |
| contentOFnAg | TEXT | - | - | 1 | max. 50.0 % |
| benzene | TEXT | - | - | 1 | 4.0 to 10.0 w/w% |
| iridium | TEXT | - | - | 3 | 65.0 to 71.0 % |
| contentOFcobalt | TEXT | - | - | 1 | 12.8 to 14.3 % |
| contentSodium | TEXT | - | - | 2 | 6.5 to 9.5 %(calcd.on dried substance) |
| contentOFcarbon | TEXT | - | - | 2 | min. 99.9 % |
| ignitionResidueAsSNO2 | TEXT | - | - | 5 | 23.4 to 24.6 % |
| 2methyl2butene | TEXT | - | - | 2 | 0.001 to 0.005 % |
| freeAmineASdimethylamine | TEXT | - | - | 1 | max. 0.5 % |
| ammonia | TEXT | - | - | 1 | max. 0.1 % |
| methylamine | TEXT | - | - | 1 | max. 0.1 % |
| trimethylamine | TEXT | - | - | 1 | max. 0.05 % |
| phenol | TEXT | - | - | 1 | 3.0 and le. 6.0 w/w% |
| highBoilingImpurities | TEXT | - | - | 5 | to pass test |
| bromide | TEXT | - | - | 9 | max. 0.2 % |
| contentOFsulfur | TEXT | - | - | 3 | max. 1.0 ppm |
| polychlorinatedBiphenyl | TEXT | - | - | 1 | max. 50 ppm |
| ethanolNMR | TEXT | - | - | 2 | 4.3 to 6.0 % |
| chlorideContent | TEXT | - | - | 1 | 23.0 to 26.0 % |
| calciumCarbonate | TEXT | - | - | 1 | 57.0 to 60.0 % |
| sulfur | TEXT | - | - | 2 | 15.0 to 20.0%(calcd.on dried substance) |
| contentOFhfNeutralizationTitration | TEXT | - | - | 3 | 65.0 to 69.0 % |
| sodium | TEXT | - | - | 3 | 7.1 to 8.1% (as na) (calcd. anh. sub.) |
| potassium | TEXT | - | - | 3 | 8.5 to 9.5 %(calcd.on anh.substance) |
| beta1316glucan | TEXT | - | - | 1 | min. 80 % |
| oxalicAcid | TEXT | - | - | 2 | max. 4.0 % |
| graphiteResidue | TEXT | - | - | 1 | max. 1 % |
| manganese | TEXT | - | - | 3 | max. 1 % |
| otherAminoAcids | TEXT | - | - | 1 | to pass test |
| lGlutamicAcid | TEXT | - | - | 1 | max. 0.5 % |
| glycine | TEXT | - | - | 1 | max. 0.6 % |
| totalAcid | TEXT | - | - | 1 | 68.0 to 76.0 % |
| methyleneChlorideNMR | TEXT | - | - | 1 | max. 3.0 % |
| hexane | TEXT | - | - | 1 | max. 5.0 % |
| haloid | TEXT | - | - | 2 | max. 0.1% |
| waterValue | TEXT | - | - | 4 | 17.0 to 19.0 % |
| bromideContent | TEXT | - | - | 1 | min. 99.0 % |
| iodineContent | TEXT | - | - | 1 | min. 99.0 % |
| methanol | TEXT | - | - | 3 | 6.5 to 7.5 % |
| fattyAcidComposition | TEXT | - | - | 4 | min. 90.0 %(palmitic acid) |
| stearicAcidCompositionOFfattyAcid | TEXT | - | - | 7 | min. 95.0 % |
| ignitionResidue | TEXT | - | - | 15 | 33.0 to 37.0 % |
| ignitionResidueSulfate | TEXT | - | - | 75 | 20.0 to 30.0 %(after drying) |
| nmr1h | TEXT | - | - | 1 | confirm to structure |
| nmr13c | TEXT | - | - | 1 | confirm to structure |
| nmr | TEXT | - | - | 494 | confirm to structure |
| absRatioab | TEXT | - | - | 21 | 0.81 to 0.88(e250/e260) |
| absRatio2a2b | TEXT | - | - | 15 | 0.19 to 0.24(e280/e260) |
| absRatio3a3b | TEXT | - | - | 4 | 1.30 to 1.50(e455/e434) |
| absRatio4a4b | TEXT | - | - | 1 | 1.05 to 1.25(e455/e483) |
| titer | TEXT | - | - | 30 | min. 200 unit/mg |
| titerElisa | TEXT | - | - | 87 | to pass test |
| titerFCM | TEXT | - | - | 1 | to pass test |
| opticalPurity | TEXT | - | - | 2 | min. 98.0 ee% |
| opticalPurityGCmosher | TEXT | - | - | 1 | min. 93.0 ee% |
| opticalPurityHplc | TEXT | - | - | 1 | min. 98.0 ee% |
| diameter | TEXT | - | - | 10 | to pass test (40.0 ± 2.5 nm) |
| length | TEXT | - | - | 7 | 5 to 15 micro_m |
| medianSize | TEXT | - | - | 1 | 30.0 to 60.0 micro_m |
| shape | TEXT | - | - | 2 | to pass test |
| particleSizeD50 | TEXT | - | - | 3 | 5.0 to 9.0 micro_m |
| thick | TEXT | - | - | 1 | 6 to 8 nm |
| averageSize | TEXT | - | - | 4 | equal to 5 micro_m |
| specificSurface | TEXT | - | - | 5 | 300.0 to 350.0 m2/g |
| neutralizationValue | TEXT | - | - | 6 | 200.0 to 260.0(calcd.on dried substance) |
| molarExtinctionCoefficient2 | TEXT | - | - | 6 | min. 33000(h2o, 368.0 to 369.0 nm) |
| molarExtinctionCoefficient | TEXT | - | - | 74 | min. 7,200(410.0 nm) |
| molarExtinctionCoefficient3 | TEXT | - | - | 2 | min. 170,000(isooctane, 421.0 to 425.0 nm) |
| molarExtinctionCoefficientCOcomplex | TEXT | - | - | 2 | min. 100000(568.0 to 571.0 nm) |


In [166]:
df_specifications.to_sql('tci_productspecifications', engine,  index=False)

In [167]:
# define a primary Key
with engine.connect() as con:
    con.execute('ALTER TABLE tci_productspecifications ADD PRIMARY KEY ("code");')

In [168]:
%%time
df_retrieved_8 = pd.read_sql("SELECT * FROM tci_productspecifications;", engine)

CPU times: user 2.73 s, sys: 142 ms, total: 2.87 s
Wall time: 14.1 s


In [169]:
df_retrieved_8.drop('code', axis=1).dropna(axis=0, how = 'all').sample(5).dropna(axis=1, how = 'all')

Unnamed: 0,purityArgentometricTitration,purityHPLC,purityGC,purityNMR,nmr,physicalState20degC
26734,,,min. 99.0 %,,,liquid
23398,,min. 98.0 area%,,,,solid
25164,,,min. 95.0 %,,,liquid
6067,min. 95.0 %,min. 97.0 area%,,,,solid
15977,,,,min. 95.0 atom%,confirm to structure,solid
