Skip to content

Commit

Permalink
update v6 final paths and results for jcim
Browse files Browse the repository at this point in the history
  • Loading branch information
dswigh committed Feb 18, 2024
1 parent ae0e1b0 commit 90c468d
Show file tree
Hide file tree
Showing 4 changed files with 404 additions and 87 deletions.
200 changes: 199 additions & 1 deletion notebooks/inspect_orderly_data.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -1445,6 +1445,204 @@
"source": [
"not_uspto_retro['reactant_000'].describe()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Non USPTO datasets"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"parent = '/Users/dsw46/Projects_local/ORDerly_jcim_response/'\n",
"path_to_data = parent+'non_uspto_data/non_uspto_orderly_forward.parquet'\n",
"df = pd.read_parquet(path_to_data)\n"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"C1CCOC1.[Pd].CCOC(=O)c1ccc(-c2nnc(-c3ccccc3[N+](=O)[O-])o2)cc1>\n"
]
}
],
"source": [
"find_string = 'C 1 C C O C 1 . [Pd] . C C O C ( = O ) c 1 c c c ( - c 2 n n c ( - c 3 c c c c c 3 [N+] ( = O ) [O-] ) o 2 ) c c 1 >'.replace(' ', '')\n",
"print(find_string)"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>original_index</th>\n",
" <th>agent_000</th>\n",
" <th>agent_001</th>\n",
" <th>agent_002</th>\n",
" <th>date_of_experiment</th>\n",
" <th>extracted_from_file</th>\n",
" <th>grant_date</th>\n",
" <th>is_mapped</th>\n",
" <th>procedure_details</th>\n",
" <th>product_000</th>\n",
" <th>...</th>\n",
" <th>reactant_001</th>\n",
" <th>reactant_002</th>\n",
" <th>rxn_str</th>\n",
" <th>rxn_time</th>\n",
" <th>solvent_000</th>\n",
" <th>solvent_001</th>\n",
" <th>solvent_002</th>\n",
" <th>temperature</th>\n",
" <th>yield_000</th>\n",
" <th>yield_001</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>649</th>\n",
" <td>85434</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>NaT</td>\n",
" <td>ord_dataset-5481550056a14935b76e031fb94b88be</td>\n",
" <td>NaT</td>\n",
" <td>False</td>\n",
" <td></td>\n",
" <td>CCOC(=O)c1ccc(-c2nnc(-c3ccccc3N)o2)cc1</td>\n",
" <td>...</td>\n",
" <td>[Pd]</td>\n",
" <td>CCOC(=O)c1ccc(-c2nnc(-c3ccccc3[N+](=O)[O-])o2)cc1</td>\n",
" <td>None</td>\n",
" <td>NaN</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>1 rows × 22 columns</p>\n",
"</div>"
],
"text/plain": [
" original_index agent_000 agent_001 agent_002 date_of_experiment \\\n",
"649 85434 None None None NaT \n",
"\n",
" extracted_from_file grant_date is_mapped \\\n",
"649 ord_dataset-5481550056a14935b76e031fb94b88be NaT False \n",
"\n",
" procedure_details product_000 ... \\\n",
"649 CCOC(=O)c1ccc(-c2nnc(-c3ccccc3N)o2)cc1 ... \n",
"\n",
" reactant_001 reactant_002 rxn_str \\\n",
"649 [Pd] CCOC(=O)c1ccc(-c2nnc(-c3ccccc3[N+](=O)[O-])o2)cc1 None \n",
"\n",
" rxn_time solvent_000 solvent_001 solvent_002 temperature yield_000 \\\n",
"649 NaN None None None NaN NaN \n",
"\n",
" yield_001 \n",
"649 NaN \n",
"\n",
"[1 rows x 22 columns]"
]
},
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df2 = df[df['reactant_002'] == 'CCOC(=O)c1ccc(-c2nnc(-c3ccccc3[N+](=O)[O-])o2)cc1']\n",
"df2"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"original_index 85434\n",
"agent_000 None\n",
"agent_001 None\n",
"agent_002 None\n",
"date_of_experiment NaT\n",
"extracted_from_file ord_dataset-5481550056a14935b76e031fb94b88be\n",
"grant_date NaT\n",
"is_mapped False\n",
"procedure_details \n",
"product_000 CCOC(=O)c1ccc(-c2nnc(-c3ccccc3N)o2)cc1\n",
"product_001 None\n",
"reactant_000 C1CCOC1\n",
"reactant_001 [Pd]\n",
"reactant_002 CCOC(=O)c1ccc(-c2nnc(-c3ccccc3[N+](=O)[O-])o2)cc1\n",
"rxn_str None\n",
"rxn_time NaN\n",
"solvent_000 None\n",
"solvent_001 None\n",
"solvent_002 None\n",
"temperature NaN\n",
"yield_000 NaN\n",
"yield_001 NaN\n",
"Name: 649, dtype: object"
]
},
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.loc[649]"
]
}
],
"metadata": {
Expand All @@ -1463,7 +1661,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.12"
"version": "3.10.8"
},
"orig_nbformat": 4
},
Expand Down
Loading

0 comments on commit 90c468d

Please sign in to comment.