In [1]:
import pandas as pd

In [4]:
experiment = pd.read_csv("direct_arylation/experiment_index.csv")
ligandmap = pd.read_csv("direct_arylation/ligand-list.csv")
solventmap = pd.read_csv("direct_arylation/solvent-list.csv")
basemap = pd.read_csv("direct_arylation/base-list.csv")

In [7]:
from IPython.display import display

# Show heads for quick inspection
print("Ligand list (head):")
display(ligandmap[["Ligand", "Ligand_SMILES"]])

print("Base list (head):")
display(basemap[["Base", "Base_SMILES"]])

print("Solvent list (head):")
display(solventmap[["Solvent", "Solvent_SMILES"]])

print(experiment['Temp_C'].unique())
print(experiment['Concentration'].unique())
print("Experiment index (head):")
display(experiment.head())

# Merge default names into experiment by SMILES keys
experiment_named = (
	experiment
		.merge(ligandmap[["Ligand_SMILES", "Ligand"]], on="Ligand_SMILES", how="left")
		.merge(basemap[["Base_SMILES", "Base"]], on="Base_SMILES", how="left")
		.merge(solventmap[["Solvent_SMILES", "Solvent"]], on="Solvent_SMILES", how="left")
)

# Reorder columns for readability
_front = ["Ligand", "Base", "Solvent", "Concentration", "Temp_C", "yield"]
_rest = [c for c in experiment_named.columns if c not in _front]
experiment_named = experiment_named[_front + _rest]


def fetch_yield_by_names(
	ligand: str,
	base: str,
	solvent: str,
	temp_c: float,
	concentration: float,
):
	"""
	Return a pandas Series of yield values matching the given setup.
	Matches by default names (not SMILES) and numeric conditions.
	If no rows match, returns an empty Series.
	"""
	mask = (
		(experiment_named["Ligand"] == ligand)
		& (experiment_named["Base"] == base)
		& (experiment_named["Solvent"] == solvent)
		& (experiment_named["Temp_C"] == temp_c)
		& (experiment_named["Concentration"] == concentration)
	)
	return experiment_named.loc[mask, "yield"]


Ligand list (head):


Unnamed: 0,Ligand,Ligand_SMILES
0,BrettPhos,CC(C)C1=CC(C(C)C)=C(C(C(C)C)=C1)C2=C(P(C3CCCCC...
1,PPhtBu2,CC(C)(C)P(C1=CC=CC=C1)C(C)(C)C
2,tBPh-CPhos,CN(C)C1=CC=CC(N(C)C)=C1C2=CC=CC=C2P(C(C)(C)C)C...
3,PCy3 HBF4,P(C1CCCCC1)(C2CCCCC2)C3CCCCC3
4,PPh3,P(C1=CC=CC=C1)(C2=CC=CC=C2)C3=CC=CC=C3
5,X-Phos,CC(C1=C(C2=CC=CC=C2P(C3CCCCC3)C4CCCCC4)C(C(C)C...
6,P(fur)3,P(C1=CC=CO1)(C2=CC=CO2)C3=CC=CO3
7,PPh2Me,CP(C1=CC=CC=C1)C2=CC=CC=C2
8,GorlosPhos HBF4,CC(OC1=C(P(C2CCCCC2)C3CCCCC3)C(OC(C)C)=CC=C1)C
9,JackiePhos,FC(F)(F)C1=CC(P(C2=C(C3=C(C(C)C)C=C(C(C)C)C=C3...


Base list (head):


Unnamed: 0,Base,Base_SMILES
0,KOAc,O=C([O-])C.[K+]
1,KOPiv,O=C([O-])C(C)(C)C.[K+]
2,CsOAc,O=C([O-])C.[Cs+]
3,CsOPiv,O=C([O-])C(C)(C)C.[Cs+]


Solvent list (head):


Unnamed: 0,Solvent,Solvent_SMILES
0,BuOAc,CCCCOC(C)=O
1,p-Xylene,CC1=CC=C(C)C=C1
2,BuCN,CCCC#N
3,DMAc,CC(N(C)C)=O


[105  90 120]
[0.1   0.057 0.153]
Experiment index (head):


Unnamed: 0,entry,Base_SMILES,Ligand_SMILES,Solvent_SMILES,Concentration,Temp_C,yield
0,0,O=C([O-])C.[K+],CC(C)C1=CC(C(C)C)=C(C(C(C)C)=C1)C2=C(P(C3CCCCC...,CC(N(C)C)=O,0.1,105,5.47
1,1,O=C([O-])C.[K+],CC(C)(C)P(C1=CC=CC=C1)C(C)(C)C,CC(N(C)C)=O,0.1,105,0.0
2,2,O=C([O-])C.[K+],CN(C)C1=CC=CC(N(C)C)=C1C2=CC=CC=C2P(C(C)(C)C)C...,CC(N(C)C)=O,0.1,105,78.95
3,3,O=C([O-])C.[K+],P(C1CCCCC1)(C2CCCCC2)C3CCCCC3,CC(N(C)C)=O,0.1,105,7.26
4,4,O=C([O-])C.[K+],P(C1=CC=CC=C1)(C2=CC=CC=C2)C3=CC=CC=C3,CC(N(C)C)=O,0.1,105,28.15


In [8]:
# Accumulating store of fetched experiments (names + conditions + yield)
_fetch_cols = ["Ligand", "Base", "Solvent", "Concentration", "Temp_C", "yield"]
fetched_experiments_df = pd.DataFrame(columns=_fetch_cols)


def fetch_store(
	base: str,
	ligand: str,
	solvent: str,
	temp_c: float,
	concentration: float,
	*,
	clear: bool = False,
) -> pd.DataFrame:
	"""
	Filter `experiment_named` by the given names and conditions, append
	matching rows to a global store DataFrame, and return the store.

	- Uses default names (not SMILES)
	- Columns stored: Ligand, Base, Solvent, Concentration, Temp_C, yield
	- Set clear=True to reset the store before appending
	"""
	global fetched_experiments_df
	if clear:
		fetched_experiments_df = pd.DataFrame(columns=_fetch_cols)

	mask = (
		(experiment_named["Ligand"] == ligand)
		& (experiment_named["Base"] == base)
		& (experiment_named["Solvent"] == solvent)
		& (experiment_named["Temp_C"] == temp_c)
		& (experiment_named["Concentration"] == concentration)
	)
	rows = experiment_named.loc[mask, _fetch_cols]
	if rows.empty:
		return fetched_experiments_df
	fetched_experiments_df = pd.concat([fetched_experiments_df, rows], ignore_index=True).drop_duplicates()
	return fetched_experiments_df

In [12]:
fetch_store('CsOPiv','JackiePhos','DMAc',120,0.10)
fetch_store('CsOAc','BrettPhos','p-Xylene',120,0.10)
fetch_store('KOAc','X-Phos','DMAc',105,0.153)
fetch_store('CsOAc','PCy3-HBF4','BuOAc',120,0.153)
fetch_store('KOPiv','PPh3','BuCN',90,0.057)

Unnamed: 0,Ligand,Base,Solvent,Concentration,Temp_C,yield
0,JackiePhos,CsOPiv,DMAc,0.1,120,13.46
1,BrettPhos,CsOAc,p-Xylene,0.1,120,28.93
2,PCy3 HBF4,CsOAc,BuOAc,0.153,120,0.0
3,PPh3,KOPiv,BuCN,0.057,90,5.92
4,X-Phos,KOAc,DMAc,0.153,105,48.86


In [14]:
fetch_store('CsOAc','X-Phos','DMAc',120,0.153)
fetch_store('KOAc','X-Phos','DMAc',120,0.153)
fetch_store('CsOPiv','X-Phos','DMAc',120,0.153)
fetch_store('KOAc', 'BrettPhos','DMAc',120,0.153)
fetch_store("KOAc",'X-Phos','p-Xylene',120,0.153)

Unnamed: 0,Ligand,Base,Solvent,Concentration,Temp_C,yield
0,JackiePhos,CsOPiv,DMAc,0.1,120,13.46
1,BrettPhos,CsOAc,p-Xylene,0.1,120,28.93
2,PCy3 HBF4,CsOAc,BuOAc,0.153,120,0.0
3,PPh3,KOPiv,BuCN,0.057,90,5.92
4,X-Phos,KOAc,DMAc,0.153,105,48.86
5,X-Phos,CsOAc,DMAc,0.153,120,84.49
6,X-Phos,KOAc,DMAc,0.153,120,84.67
7,X-Phos,CsOPiv,DMAc,0.153,120,48.34
8,BrettPhos,KOAc,DMAc,0.153,120,5.95
9,X-Phos,KOAc,p-Xylene,0.153,120,61.67
