In [1]:
from pydantic import BaseModel, Field
from typing import List, Tuple
import dspy


  from .autonotebook import tqdm as notebook_tqdm


In [2]:

lm = dspy.LM('openai/gpt-4o', temperature=0)
dspy.configure(lm=lm)
teacher = dspy.LM("openai/gpt-4o",temperature=1)


In [4]:

class DataProperty(BaseModel):
    """Extracted data properties from text for ontology creation. These should be value-based properties, not entities."""
    name: str = Field(description="Provide a clear and concise name for the data property.")
    information: str = Field(description="Offer a complete and comprehensive sentence detailing the data property from the text.")

class Entity(BaseModel):
    """Extracted entities from text for ontology creation, including classes and individuals, excluding data properties."""
    name: str = Field(description="Provide a clear and concise name for the entity. Use the format [Full name]([Abbreviation]) if applicable.")
    information: str = Field(description="Offer a complete and comprehensive sentence detailing the entity from the text.")

class ObjectProperty(BaseModel):
    """Extracted object properties from text for ontology creation."""
    name: str = Field(description="Provide a clear and concise name for the object property. It should be separated by underscores between words.")
    domain: str = Field(
        description="Domain entity name of the object property, it should be an existing entity in the Entity list"
    )
    range: str = Field(
        description="Range entity name of the object property, it should be an existing entity in the Entity list"
    )
    restriction: str = Field(
        description="Specify 'only' to indicate a universal restriction (owl:allValuesFrom), meaning all possible values of the property must belong to the specified range. Specify 'some' to indicate an existential restriction (owl:someValuesFrom), meaning at least one value of the property must belong to the specified range."

    )
    information: str = Field(
        description="Offer a complete and comprehensive sentence detailing the object property from the text."
    )

class Ontology(BaseModel):
    """Graph representation of the ontology for text."""

    entities: List[Entity] = Field(
        description="List of entities in the knowledge graph"
    )
    data_properties: List[DataProperty] = Field(
        description="List of data properties in the ontology"
    )
    object_properties: List[ObjectProperty] = Field(
        description="List of object properties in the ontology"
    )

def ontology_to_string(ontology: Ontology) -> str:
    result = []
    
    result.append("Entities:")
    for entity in ontology.entities:
        result.append(f"  - Name: {entity.name}")
        result.append(f"    Information: {entity.information}")
    
    result.append("\nData Properties:")
    for prop in ontology.data_properties:
        result.append(f"  - Name: {prop.name}")
        result.append(f"    Information: {prop.information}")
        
    result.append("\nObject Properties:")
    for prop in ontology.object_properties:
        result.append(f"  - Name: {prop.name}")
        result.append(f"    Domain: {prop.domain}")
        result.append(f"    Range: {prop.range}")
        result.append(f"    Restriction: {prop.restriction}")
        result.append(f"    Information: {prop.information}")
        
    return "\n".join(result)

In [54]:
class ExtractOntologyElements(dspy.Signature):
    """Analyze the provided text from research papers in the field of chemistry to identify all chemistry-related entities, data properties, and object properties within an ontological framework.

    Follow these Step-by-Step Analysis:

    1. Extract Chemistry-Related Entities:
      - Identify all significant nouns, proper nouns, and technical terminologies that represent chemistry-related concepts, such as molecules, reactions, compounds, processes, or any substantial entities.
      - Ensure that you capture entities across different levels of detail, from broad chemical categories to specific molecular structures, to create a comprehensive representation of the subject matter.
      - Choose names for entities that are specific enough to indicate their meaning without additional context, avoiding overly generic terms.
      - Consolidate similar entities to avoid redundancy, ensuring each represents a distinct concept at appropriate granularity levels.

    2. Identify Data Properties:
      - Extract attributes or characteristics of the identified entities that can be classified as data properties, ensuring they are value-based and not entities themselves.
      - Clearly define each data property, ensuring it accurately describes an attribute of an entity.

    3. Establish Object Properties:
      - Carefully examine the text to identify all relationships between entities, ensuring each relationship is correctly captured with accurate details about the interactions.
      - Analyze the context and interactions between the identified entities to determine how they are interconnected, focusing on actions, associations, dependencies, or similarities.
      - Clearly define the relationships, ensuring accurate directionality that reflects the logical or functional dependencies among entities.

    Objective: Produce a detailed and comprehensive ontology that captures the full spectrum of chemistry-related entities, data properties, and object properties mentioned in the text, along with their interrelations, reflecting both broad concepts and intricate details specific to the chemistry domain.

    """

    text: str = dspy.InputField(
        desc="a paragraph of text to extract entities, data properties, and object properties to form an ontology"
    )
    ontology: Ontology = dspy.OutputField(
        desc="List representation of the ontology extracted from the text."
    )

class ChemOntology(dspy.Module):
    def __init__(self):
        super().__init__()

        self.extractor = dspy.ChainOfThought(ExtractOntologyElements)
    
    def forward(self, context):
        return self.extractor(text=context)


In [31]:
import json
import os

def load_json_files(folder_path, template_str):
    combined_data = []
    
    # 遍历文件夹中的所有文件
    for filename in os.listdir(folder_path):
        # 检查文件是否是json且名称包含模板字符串
        if filename.endswith('.json') and template_str in filename:
            file_path = os.path.join(folder_path, filename)
            # 读取json文件
            with open(file_path, 'r', encoding='utf-8') as f:
                data = json.load(f)
                combined_data.extend(data)
                
    return combined_data


In [35]:

# 使用示例
folder_path = r'./data'
template_str = 'content_list'
data = load_json_files(folder_path, template_str)
len(data)


6924

In [152]:
entity_accuracy = """Entity Accuracy Score (0-5 points):
Award 1 point for each criterion met:
- Entity names are specific and meaningful without being overly generic (1 point)
- Entity definitions align with established chemical concepts (1 point) 
- Entity relationships reflect valid chemical principles (1 point)
- Entities are properly consolidated without redundancy (1 point)
- Entity hierarchy captures appropriate chemical classification (1 point)"""

data_property_correctness = """Data Property Correctness Score (0-5 points):
Award 1 point for each criterion met:
- Data properties are truly value-based attributes (1 point)
- Each data property describes a single measurable characteristic (1 point)
- Data property units and ranges are chemically valid (1 point)
- Data property dependencies are accurately captured (1 point)
- Data properties maintain consistency across the ontology (1 point)"""

object_property_completeness = """Object Property Completeness Score (0-5 points):
Award 1 point for each criterion met:
- Object properties capture all key chemical interactions (1 point)
- Domain and range specifications reflect valid chemical relationships (1 point)
- Relationship restrictions ('only'/'some') are properly applied (1 point)
- Object property chains represent complex chemical processes (1 point)
- Inverse relationships are correctly identified where applicable (1 point)
"""

ontology_structure = """Ontology Structure Score (0-4 points):
Award 1 point for each criterion met:
- Entities span appropriate levels of chemical granularity (1 point)
- Properties and relationships form a coherent chemical knowledge graph (1 point)
- The ontology maintains semantic clarity independent of source text (1 point)
- Cross-references between concepts are meaningful and accurate (1 point)"""

overall_score = """Overall Score (0-8 points):
Each criterion is evaluated on three levels - excellent (2 points), adequate (1 point), or poor (0 points):

- Extraction Accuracy (2 points):
  * Excellent: No errors in entity and property extraction
  * Adequate: Minor non-critical errors present 
  * Poor: Some extraction errors exist

- Professional Validity (2 points):
  * Excellent: Extractions fully align with chemical expertise
  * Adequate: Most extractions align with chemical knowledge
  * Poor: Some deviations from chemical principles

- Comprehensiveness (2 points):
  * Excellent: Complete extraction of all relevant information
  * Adequate: Most key information captured
  * Poor: Some key information missing

- Knowledge Independence (2 points):
  * Excellent: Entities and properties can be accurately understood without source text context
  * Adequate: Most entities and properties are clear without background text
  * Poor: Some understanding requires source text context"""

class Assess(dspy.Signature):
    """Assess the quality of a tweet along the specified dimension."""

    assessed_text = dspy.InputField()
    assessment_ontology = dspy.InputField()
    assessment_criteria = dspy.InputField()
    assessment_score: int = dspy.OutputField(
        desc="Score with extreme rigor - only award full points when the ontology achieves perfect alignment with assessment criteria and would be deemed flawless by expert chemists"
    )
    assessment_reason: str = dspy.OutputField(
        desc="Fill this field with a space if score is the full score."
    )

class Assessment(dspy.Module):
    def __init__(self):
        super().__init__()
        self.assessor = dspy.ChainOfThought(Assess)
    
    def forward(self, assessed_text, assessment_ontology, verbose=False):
        score_list = [
            self.assessor(assessed_text=assessed_text, assessment_ontology=ontology_to_string(assessment_ontology), assessment_criteria=criteria).assessment_score for criteria in [entity_accuracy, data_property_correctness, object_property_completeness, ontology_structure, overall_score]
        ]
        reason_list = [
            self.assessor(assessed_text=assessed_text, assessment_ontology=ontology_to_string(assessment_ontology), assessment_criteria=criteria).assessment_reason for criteria in [entity_accuracy, data_property_correctness, object_property_completeness, ontology_structure, overall_score]
        ]
        if verbose:
            return f"""
Entity Accuracy Score: {int(score_list[0])}
Data Property Correctness Score: {int(score_list[1])}
Object Property Completeness Score: {int(score_list[2])}
Ontology Structure Score: {int(score_list[3])}
Overall Score: {int(score_list[4])}

Total Score: {int(sum(score_list))}
Percentage Score: {(sum(score_list)/27.0)*100:.2f}%

Reason:
entity accuracy: {reason_list[0]}
data property correctness: {reason_list[1]}
object property completeness: {reason_list[2]}
ontology structure: {reason_list[3]}
overall score: {reason_list[4]}
"""
        return sum(score_list)/27.0
    
def assessor_metric(gold, pred, trace=None):
    standard_score = gold['score']
    assessor_score = pred['assessment_score']
    return standard_score == assessor_score

def metric(gold, pred, trace=None, verbose=False):
    assessment = Assessment()
    return assessment(assessed_text=gold['context'], assessment_ontology=pred['ontology'], verbose=verbose)


In [116]:
test_context = """
###### How electron donation and withdrawal change chemical shifts  \nWe can get an idea of the effect of electron distribution by looking at a series of benzene rings\nwith the same substituent in the 1 and 4 positions. This pattern makes all four hydrogens on\nthe ring identical. Here are a few compounds listed in order of chemical shift: largest shift\n(lowest fi eld; most deshielded) fi rst. Conjugation is shown by the usual curly arrows, and\ninductive effects by a straight arrow by the side of the group. Only one hydrogen atom and\none set of arrows are shown.  \nConjugation, as discussed in\nChapter 7, is felt through π bonds,\nwhile inductive effects are the\nresult of electron withdrawal or\ndonation felt simply by polarization\nof the σ bonds of the molecule.\nSee p. 135.  \nthe effect of electron-withdrawing groups\nby conjugation  \nby inductive effects  \n**H**  \n**O**  \n**O**  \n**HO**  \n**N**  \nδH 8.48 δH 8.10 **C** δH 8.10 δH 8.07 δH 7.78  \n**N**  \n**O**  \n**O**  \n**OH**  \n**C**  \n**N**  \n**O**  \n**H**  \n**F** **F**  \n**F**  \nThe largest shifts come from groups that withdraw electrons by conjugation. Nitro is the\nmost powerful—this should not surprise you as we saw the same in non-aromatic compounds\nin both [13]C and [1]H NMR spectra. Then come the carbonyl and nitrile group followed by groups\nshowing simple inductive withdrawal. CF3 is an important example of this kind of group—\nthree fl uorine atoms combine to exert a powerful effect.  \n-----  \nIn the middle of our sequence, around the position of benzene itself at 7.27 ppm, come\nthe halogens, whose inductive electron withdrawal and lone pair donation are nearly\nbalanced.  \nbalance between withdrawal by inductive effect and donation of lone pairs by conjugation  \n**I** δH 7.40 **Br** δH 7.32 δH 7.27 **Cl** δH 7.24 **F** δH 7.00  \n**I**  \n**Br**  \n**Cl**  \n**F**  \nAlkyl groups are weak inductive donators, but the groups which give the most shielding—\nperhaps surprisingly—are those containing the electronegative atoms O and N. Despite being\ninductively electron withdrawing (the C–O and C–N σ bonds are polarized with δ + C), on\nbalance conjugation of their lone pairs with the ring (as you saw on p. 278) makes them net\nelectron donors. They increase the shielding at the ring hydrogens. Amino groups are the best.\nNote that one nitrogen-based functional group (NO2) is the best electron withdrawer while\nanother (NH2) is the best electron donor.  \nthe effect of electron-donating groups  \nby inductive effect  \nbalance between withdrawal by inductive effect and donation\nof lone pairs by conjugation—electron donation wins  \n**H**  \nδH 7.03  \n**H**  \n**H**  \n**CH3**  \nδH 6.80 **O**  \n**H** **H**  \nδH 6.59 **N**  \n**H**  \nδH 6.35  \n**H**  \n**H**  \n**CH3**  \n**O**  \n**CH3**  \n**H**  \n**H**  \n**N**  \n**O**  \nδH 7.27  \n**H**  \n**H**  \nδH 7.27  \nδH 5.68  \n**H**  \n**H**  \nδH 5.68  \n**O**  \nδH 6.0  \n**H**  \n**H**  \nδH 7.0  \nδH 4.65  \n**H**  \n**H**  \nδH 6.35  \nAs far as the donors with lone pairs are concerned (the halogens plus O and N), two factors\nare important—the size of the lone pairs and the electronegativity of the element. If we look\nat the four halides at the top of this page the lone pairs are in 2p (F), 3p (Cl), 4p (Br), and 5p (I)\norbitals. In all cases the orbitals on the benzene ring are 2p so the fl uorine orbital is of the\nright size to interact well and the others too large. Even though fl uorine is the most electronegative, it is still the best donor. The others don’t pull so much electron density away, but\nthey can’t give so much back either.\nIf we compare the fi rst row of the p block elements—F, OH, and NH2—all have lone pairs\nin 2p orbitals so now electronegativity is the only variable. As you would expect, the most\nelectronegative element, F, is now the weakest donor.
"""





总共分割出50个句子:

1. ###### How electron donation and withdrawal change chemical shifts  
We can get an idea of the effect of electron distribution by looking at a series of benzene rings
with the same substituent in the 1 and 4 positions

2. This pattern makes all four hydrogens on
the ring identical

3. Here are a few compounds listed in order of chemical shift: largest shift
(lowest fi eld; most deshielded) fi rst

4. Conjugation is shown by the usual curly arrows, and
inductive effects by a straight arrow by the side of the group

5. Only one hydrogen atom and
one set of arrows are shown

6. Conjugation, as discussed in
Chapter 7, is felt through π bonds,
while inductive effects are the
result of electron withdrawal or
donation felt simply by polarization
of the σ bonds of the molecule

7. See p

8. 135

9. the effect of electron-withdrawing groups
by conjugation  
by inductive effects  
**H**  
**O**  
**O**  
**HO**  
**N**  
δH 8

10. 48 δH 8

11. 10 **C** δH 8

12. 10 δH 8

13. 07 δ

In [57]:
import random

# 设置开发集和训练集的大小
dev_size = 10
train_size = 10
chunk_size = 300
max_attempts = 5000

# 创建两个列表分别存储开发集和训练集数据
dev_data = []
train_data = []
used_texts = set()  # 用于检查重复

attempts = 0

# 先采样开发集
while len(dev_data) < dev_size and attempts < max_attempts:
    item = random.choice(data)
    if item.get('type') == 'text':
        text = item['text']
        if len(text) > chunk_size and text not in used_texts:
            dev_data.append(item)
            used_texts.add(text)
    attempts += 1

# 再采样训练集
while len(train_data) < train_size and attempts < max_attempts:
    item = random.choice(data)
    if item.get('type') == 'text':
        text = item['text']
        if len(text) > chunk_size and text not in used_texts:
            train_data.append(item)
            used_texts.add(text)
    attempts += 1

print(f"随机抽取到{len(dev_data)}个开发集文本:")
for i, item in enumerate(dev_data, 1):
    print(f"\n{i}. 文本长度: {len(item['text'])}")
    print(f"文本内容: {item['text'][:100]}...")

print(f"\n随机抽取到{len(train_data)}个训练集文本:")
for i, item in enumerate(train_data, 1):
    print(f"\n{i}. 文本长度: {len(item['text'])}")
    print(f"文本内容: {item['text'][:100]}...")



随机抽取到10个开发集文本:

1. 文本长度: 435
文本内容: The situation with magnesium chloride is similar. In magnesium chloride, found in trace quantities i...

2. 文本长度: 337
文本内容: For the elements $\mathsf{K},\mathsf{C}\mathsf{I}.$ and $\bigcirc$ in the following equations, the r...

3. 文本长度: 557
文本内容: (a) A $5.00\;\mathrm{{mL}}$ sample of vinegar, a small quantity of water, and a few drops of  phenol...

4. 文本长度: 483
文本内容:  Reactions involving gases as reactants or products (or both) are no strangers to us. We now have a ...

5. 文本长度: 645
文本内容: The equation $p=h/\lambda$ also helps us understand the effect of a transfer of momen- tum in a coll...

6. 文本长度: 439
文本内容: 55. We want to determine the acetyl s ali cyclic acid content  of a series of aspirin tablets by tit...

7. 文本长度: 347
文本内容: Example 4-17 illustrates the calculation of the extent of reaction and introduces a tabular approach...

8. 文本长度: 466
文本内容:  34. Explain why these reactions cannot occur as written. (a) (b) $\begin{array}{r}{\mathrm{Fe^{3

In [64]:
extractor = ChemOntology()
extractors = [extractor.load(location) for location in ['v1', 'v2', 'medium_100trainset_v1']]


In [100]:
temp = ChemOntology()
temp1 = ChemOntology()
temp2 = ChemOntology()

temp.load('v1.json')
temp1.load('v2')
temp2.load('medium_100trainset_v1')

extractors = []

extractors.append(temp)
extractors.append(temp1)
extractors.append(temp2)

In [101]:
extractors

[extractor = Predict(StringSignature(text -> reasoning, ontology
     instructions='Analyze the provided text from research papers in the field of chemistry to identify all chemistry-related entities, data properties, and object properties within an ontological framework.\n\nFollow these Step-by-Step Analysis:\n\n1. Extract Chemistry-Related Entities:\n  - Identify all significant nouns, proper nouns, and technical terminologies that represent chemistry-related concepts, such as molecules, reactions, compounds, processes, or any substantial entities.\n  - Ensure that you capture entities across different levels of detail, from broad chemical categories to specific molecular structures, to create a comprehensive representation of the subject matter.\n  - Choose names for entities that are specific enough to indicate their meaning without additional context, avoiding overly generic terms.\n  - Consolidate similar entities to avoid redundancy, ensuring each represents a distinct concept a

In [104]:
# 创建devset
devset = []

for sample in dev_data:
    context = sample['text']
    ontology = random.choice(extractors)(context)
    score = 0

    # 添加到devset
    devset.append(dspy.Example(
        context=context,
        ontology=ontology,
        score=score
    ).with_inputs("context", "ontology"))

print(f"生成了{len(devset)}个开发集样本")


生成了10个开发集样本


In [109]:
# 创建trainset
trainset = []

for sample in train_data:
    context = sample['text']
    ontology = random.choice(extractors)(context)
    score = 0
    
    # 添加到trainset
    trainset.append(dspy.Example(
        context=context,
        ontology=ontology,
        score=score
    ).with_inputs("context", "ontology"))
    print("生成了一个样本")

print(f"生成了{len(trainset)}个训练集样本")


生成了一个样本
生成了一个样本
生成了一个样本
生成了一个样本
生成了一个样本
生成了一个样本




生成了一个样本
生成了一个样本
生成了一个样本
生成了一个样本
生成了10个训练集样本


In [113]:
# 将trainset和devset保存为json格式
import json

# 将数据集转换为可序列化的格式
def convert_to_serializable(dataset):
    serializable_data = []
    for example in dataset:
        serializable_data.append({
            'context': example.context,
            'ontology': ontology_to_string(example.ontology.ontology),
            'score': example.score
        })
    return serializable_data


In [114]:

# 保存trainset
with open('data/trainset.json', 'w', encoding='utf-8') as f:
    json.dump(convert_to_serializable(trainset), f, ensure_ascii=False, indent=2)

# 保存devset
with open('data/devset.json', 'w', encoding='utf-8') as f:
    json.dump(convert_to_serializable(devset), f, ensure_ascii=False, indent=2)

print("数据集已保存为JSON格式到data目录")


数据集已保存为JSON格式到data目录


In [None]:

# 从JSON文件读取数据集
def load_datasets_from_json():
    # 读取trainset
    with open('data/trainset.json', 'r', encoding='utf-8') as f:
        trainset_data = json.load(f)
    
    # 读取devset  
    with open('data/devset.json', 'r', encoding='utf-8') as f:
        devset_data = json.load(f)
        
    # 转换回dspy.Example格式
    trainset = []
    devset = []
    
    for item in trainset_data:
        example = dspy.Example(
            context=item['context'],
            ontology=Ontology(item['ontology']),
            score=item['score']
        ).with_inputs("context", "ontology")
        trainset.append(example)
        
    for item in devset_data:
        example = dspy.Example(
            context=item['context'], 
            ontology=Ontology(item['ontology']),
            score=item['score']
        ).with_inputs("context", "ontology")
        devset.append(example)
        
    return trainset, devset

# 加载数据集
trainset, devset = load_datasets_from_json()
print("已从JSON文件加载数据集")


In [107]:
print(devset[0])

Example({'context': 'The situation with magnesium chloride is similar. In magnesium chloride, found in trace quantities in table salt, magnesium atoms lose two electrons to become magnesium ions, $\\mathrm{Mg}^{2+}$ $\\mathrm{Mg}$ is in group 2). To obtain an electrically neutral formula unit, there must be two $\\mathrm{Cl}^{-}$ ions, each with a charge of $^{-1}$ for every $\\mathrm{Mg}^{2+}$ ion. The formula of magnesium chloride is $\\mathrm{MgCl}_{2}$ ', 'ontology': Prediction(
    reasoning='The text describes the chemical compound magnesium chloride, focusing on its composition and the ionic charges involved. The key entities include magnesium chloride, magnesium ions, and chloride ions. The data properties involve the charges of these ions and the formula of magnesium chloride. The object properties describe the relationship between magnesium ions and chloride ions in forming the compound.\n\n1. **Entities**: \n   - Magnesium Chloride (MgCl2) is the compound being discussed.\n 

In [108]:
# 提取并格式化输出devset的context和ontology
for dev in devset:
    print("Context:")
    print(dev.context)
    print("\nOntology:")
    print(ontology_to_string(dev.ontology.ontology))
    print("\n" + "-"*50 + "\n")


Context:
The situation with magnesium chloride is similar. In magnesium chloride, found in trace quantities in table salt, magnesium atoms lose two electrons to become magnesium ions, $\mathrm{Mg}^{2+}$ $\mathrm{Mg}$ is in group 2). To obtain an electrically neutral formula unit, there must be two $\mathrm{Cl}^{-}$ ions, each with a charge of $^{-1}$ for every $\mathrm{Mg}^{2+}$ ion. The formula of magnesium chloride is $\mathrm{MgCl}_{2}$ 

Ontology:
Entities:
  - Name: Magnesium Chloride(MgCl2)
    Information: Magnesium chloride is a compound found in trace quantities in table salt, with the formula MgCl2.
  - Name: Magnesium Ion(Mg2+)
    Information: Magnesium ions are formed when magnesium atoms lose two electrons, resulting in a charge of +2.
  - Name: Chloride Ion(Cl-)
    Information: Chloride ions have a charge of -1 and pair with magnesium ions to form magnesium chloride.

Data Properties:
  - Name: charge_of_magnesium_ion
    Information: The charge of the magnesium ion is 

In [86]:
scores = []
extractor = ChemOntology()
for dev in devset[:1]:
    pred = extractor(dev['context'])
    score = metric(dev, pred, verbose=True)
    print("完成一个样本")
    scores.append(score)


完成一个样本


In [87]:

for score in scores:
    print(score)
    print("-------------------------------------------------")





Entity Accuracy Score: 5
Data Property Correctness Score: 4
Object Property Completeness Score: 3
Ontology Structure Score: 3
Overall Score: 8

Total Score: 23
Percentage Score: 85.19%

-------------------------------------------------


In [44]:
from dspy.evaluate import Evaluate

evaluate = Evaluate(devset=devset, num_threads=8, display_progress=True, display_table=10)

In [45]:
import dspy.evaluate


evaluate(ChemOntology(), devset=devset, metric=metric)


  0%|          | 0/20 [00:00<?, ?it/s]

Average Metric: 6.41 / 8 (80.1%):  40%|████      | 8/20 [00:31<00:28,  2.34s/it]



Average Metric: 15.52 / 20 (77.6%): 100%|██████████| 20/20 [01:16<00:00,  3.81s/it]

2024/11/22 15:35:00 INFO dspy.evaluate.evaluate: Average Metric: 15.518518518518519 / 20 (77.6%)





Unnamed: 0,context,reasoning,ontology,metric
0,"To some, stoichiometry is no more exciting than the law of conserv...",The text discusses the concept of stoichiometry and its importance...,"entities=[Entity(name='Stoichiometry', information='Stoichiometry ...",✔️ [0.519]
1,If a glass tube that is open at both ends stands upright in a cont...,The text describes a physical experiment involving mercury and a g...,"entities=[Entity(name='Glass Tube', information='A glass tube used...",✔️ [0.926]
2,This is the Schrodinger equation of a free particle moving in one ...,The text describes a scenario involving a particle in quantum mech...,"entities=[Entity(name='Schrodinger Equation', information='The Sch...",✔️ [0.852]
3,It is interesting to note that the nitrogen atom also shows little...,The text discusses the stability of the nitrogen atom and its anio...,"entities=[Entity(name='Nitrogen_Atom(N)', information='The nitroge...",✔️ [0.852]
4,generally the central focus of a problem is the conversion of a ma...,"The text discusses the conversion of mass to moles and vice versa,...","entities=[Entity(name='Mass', information='Mass is a measure of th...",✔️ [0.963]
5,The three subatomic particles considered in this section are the o...,"The text discusses subatomic particles, specifically electrons, ne...","entities=[Entity(name='Electron', information='The electron is bel...",✔️ [0.741]
6,"As a result of experiments of the type just described, we find tha...",The text describes a phenomenon related to the photoelectric effec...,"entities=[Entity(name='Stopping Voltage(V_s)', information='The st...",✔️ [0.667]
7,of blood. Estimates of BAC can be obtained from breath samples by ...,The text discusses the estimation of Blood Alcohol Concentration (...,"entities=[Entity(name='Blood Alcohol Concentration (BAC)', informa...",✔️ [0.889]
8,Electrostatic potential maps are a powerful way of displaying the ...,The text discusses the use of electrostatic potential maps to disp...,"entities=[Entity(name='Electrostatic Potential Maps', information=...",✔️ [0.741]
9,Our major undertaking in this section will be to describe the thre...,The text discusses the three-dimensional probability density distr...,"entities=[Entity(name='Hydrogen Atom', information='The hydrogen a...",✔️ [0.926]


77.59

In [46]:
# Import the optimizer
from dspy.teleprompt import MIPROv2

# Initialize optimizer
teleprompter = MIPROv2(
    metric=metric,
    auto="medium",
    teacher_settings=dict(lm=teacher)
)





In [47]:


zeroshot_optimized_program = teleprompter.compile(
    ChemOntology(),
    trainset=trainset,
    max_bootstrapped_demos=0, # ZERO FEW-SHOT EXAMPLES
    max_labeled_demos=0, # ZERO FEW-SHOT EXAMPLES
    requires_permission_to_run=False,
)


2024/11/22 15:35:23 INFO dspy.teleprompt.mipro_optimizer_v2: 
RUNNING WITH THE FOLLOWING MEDIUM AUTO RUN SETTINGS:
num_trials: 25
minibatch: True
num_candidates: 25
valset size: 80

2024/11/22 15:35:23 INFO dspy.teleprompt.mipro_optimizer_v2: 
==> STEP 1: BOOTSTRAP FEWSHOT EXAMPLES <==
2024/11/22 15:35:23 INFO dspy.teleprompt.mipro_optimizer_v2: These will be used for informing instruction proposal.

2024/11/22 15:35:23 INFO dspy.teleprompt.mipro_optimizer_v2: Bootstrapping N=25 sets of demonstrations...


Bootstrapping set 1/25
Bootstrapping set 2/25


 15%|█▌        | 3/20 [01:19<07:28, 26.36s/it]


Bootstrapped 3 full traces after 3 examples for up to 1 rounds, amounting to 3 attempts.
Bootstrapping set 3/25


 15%|█▌        | 3/20 [01:32<08:46, 30.96s/it]


Bootstrapped 3 full traces after 3 examples for up to 1 rounds, amounting to 3 attempts.
Bootstrapping set 4/25


  5%|▌         | 1/20 [00:00<00:00, 174.29it/s]


Bootstrapped 1 full traces after 1 examples for up to 1 rounds, amounting to 1 attempts.
Bootstrapping set 5/25


 15%|█▌        | 3/20 [00:44<04:11, 14.80s/it]


Bootstrapped 3 full traces after 3 examples for up to 1 rounds, amounting to 3 attempts.
Bootstrapping set 6/25


 15%|█▌        | 3/20 [00:46<04:24, 15.53s/it]


Bootstrapped 3 full traces after 3 examples for up to 1 rounds, amounting to 3 attempts.
Bootstrapping set 7/25


 15%|█▌        | 3/20 [00:54<05:07, 18.07s/it]


Bootstrapped 3 full traces after 3 examples for up to 1 rounds, amounting to 3 attempts.
Bootstrapping set 8/25


  5%|▌         | 1/20 [00:00<00:00, 192.04it/s]


Bootstrapped 1 full traces after 1 examples for up to 1 rounds, amounting to 1 attempts.
Bootstrapping set 9/25


 15%|█▌        | 3/20 [00:21<02:01,  7.15s/it]


Bootstrapped 3 full traces after 3 examples for up to 1 rounds, amounting to 3 attempts.
Bootstrapping set 10/25


 15%|█▌        | 3/20 [00:45<04:15, 15.04s/it]


Bootstrapped 3 full traces after 3 examples for up to 1 rounds, amounting to 3 attempts.
Bootstrapping set 11/25


  5%|▌         | 1/20 [00:00<00:00, 115.64it/s]


Bootstrapped 1 full traces after 1 examples for up to 1 rounds, amounting to 1 attempts.
Bootstrapping set 12/25


  5%|▌         | 1/20 [00:23<07:22, 23.27s/it]


Bootstrapped 1 full traces after 1 examples for up to 1 rounds, amounting to 1 attempts.
Bootstrapping set 13/25


  5%|▌         | 1/20 [00:36<11:41, 36.94s/it]


Bootstrapped 1 full traces after 1 examples for up to 1 rounds, amounting to 1 attempts.
Bootstrapping set 14/25


 15%|█▌        | 3/20 [00:59<05:35, 19.72s/it]


Bootstrapped 3 full traces after 3 examples for up to 1 rounds, amounting to 3 attempts.
Bootstrapping set 15/25


  5%|▌         | 1/20 [00:00<00:00, 203.13it/s]


Bootstrapped 1 full traces after 1 examples for up to 1 rounds, amounting to 1 attempts.
Bootstrapping set 16/25


  5%|▌         | 1/20 [00:00<00:00, 195.99it/s]


Bootstrapped 1 full traces after 1 examples for up to 1 rounds, amounting to 1 attempts.
Bootstrapping set 17/25


 15%|█▌        | 3/20 [00:00<00:00, 159.00it/s]


Bootstrapped 3 full traces after 3 examples for up to 1 rounds, amounting to 3 attempts.
Bootstrapping set 18/25


 15%|█▌        | 3/20 [00:00<00:00, 181.10it/s]


Bootstrapped 3 full traces after 3 examples for up to 1 rounds, amounting to 3 attempts.
Bootstrapping set 19/25


 15%|█▌        | 3/20 [00:00<00:00, 160.25it/s]


Bootstrapped 3 full traces after 3 examples for up to 1 rounds, amounting to 3 attempts.
Bootstrapping set 20/25


 15%|█▌        | 3/20 [00:00<00:00, 164.47it/s]


Bootstrapped 3 full traces after 3 examples for up to 1 rounds, amounting to 3 attempts.
Bootstrapping set 21/25


  5%|▌         | 1/20 [00:00<00:00, 175.29it/s]


Bootstrapped 1 full traces after 1 examples for up to 1 rounds, amounting to 1 attempts.
Bootstrapping set 22/25


 15%|█▌        | 3/20 [00:00<00:00, 141.14it/s]


Bootstrapped 3 full traces after 3 examples for up to 1 rounds, amounting to 3 attempts.
Bootstrapping set 23/25


 15%|█▌        | 3/20 [00:00<00:00, 103.46it/s]


Bootstrapped 3 full traces after 3 examples for up to 1 rounds, amounting to 3 attempts.
Bootstrapping set 24/25


  5%|▌         | 1/20 [00:00<00:00, 115.48it/s]


Bootstrapped 1 full traces after 1 examples for up to 1 rounds, amounting to 1 attempts.
Bootstrapping set 25/25


 10%|█         | 2/20 [00:00<00:00, 96.10it/s]
2024/11/22 15:43:47 INFO dspy.teleprompt.mipro_optimizer_v2: 
==> STEP 2: PROPOSE INSTRUCTION CANDIDATES <==
2024/11/22 15:43:47 INFO dspy.teleprompt.mipro_optimizer_v2: We will use the few-shot examples from the previous step, a generated dataset summary, a summary of the program code, and a randomly selected prompting tip to propose instructions.


Bootstrapped 2 full traces after 2 examples for up to 1 rounds, amounting to 2 attempts.


2024/11/22 15:43:58 INFO dspy.teleprompt.mipro_optimizer_v2: 
Proposing instructions...

2024/11/22 15:49:47 INFO dspy.teleprompt.mipro_optimizer_v2: Proposed Instructions for Predictor 0:

2024/11/22 15:49:47 INFO dspy.teleprompt.mipro_optimizer_v2: 0: Analyze the provided text from research papers in the field of chemistry to identify all chemistry-related entities, data properties, and object properties within an ontological framework.

Follow these Step-by-Step Analysis:

1. Extract Chemistry-Related Entities:
  - Identify all significant nouns, proper nouns, and technical terminologies that represent chemistry-related concepts, such as molecules, reactions, compounds, processes, or any substantial entities.
  - Ensure that you capture entities across different levels of detail, from broad chemical categories to specific molecular structures, to create a comprehensive representation of the subject matter.
  - Choose names for entities that are specific enough to indicate their mean

  0%|          | 0/80 [00:00<?, ?it/s]



Average Metric: 13.04 / 15 (86.9%):  19%|█▉        | 15/80 [01:16<04:47,  4.42s/it]



Average Metric: 38.59 / 44 (87.7%):  55%|█████▌    | 44/80 [03:30<02:43,  4.54s/it]



Average Metric: 42.26 / 48 (88.0%):  60%|██████    | 48/80 [03:44<01:53,  3.55s/it]



Average Metric: 70.37 / 80 (88.0%): 100%|██████████| 80/80 [06:00<00:00,  4.51s/it]

2024/11/22 15:55:48 INFO dspy.evaluate.evaluate: Average Metric: 70.37037037037037 / 80 (88.0%)
2024/11/22 15:55:48 INFO dspy.teleprompt.mipro_optimizer_v2: Default program score: 87.96

2024/11/22 15:55:48 INFO dspy.teleprompt.mipro_optimizer_v2: ==> STEP 3: FINDING OPTIMAL PROMPT PARAMETERS <==
2024/11/22 15:55:48 INFO dspy.teleprompt.mipro_optimizer_v2: We will evaluate the program over a series of trials with different combinations of instructions and few-shot examples to find the optimal combination using Bayesian Optimization.

2024/11/22 15:55:48 INFO dspy.teleprompt.mipro_optimizer_v2: == Minibatch Trial 1 / 25 ==



  0%|          | 0/25 [00:00<?, ?it/s]



Average Metric: 22.00 / 25 (88.0%): 100%|██████████| 25/25 [01:53<00:00,  4.53s/it]

2024/11/22 15:57:41 INFO dspy.evaluate.evaluate: Average Metric: 22.0 / 25 (88.0%)
2024/11/22 15:57:41 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 88.0 on minibatch of size 25 with parameters ['Predictor 0: Instruction 12'].
2024/11/22 15:57:41 INFO dspy.teleprompt.mipro_optimizer_v2: Minibatch scores so far: [88.0]
2024/11/22 15:57:41 INFO dspy.teleprompt.mipro_optimizer_v2: Full eval scores so far: [87.96]
2024/11/22 15:57:41 INFO dspy.teleprompt.mipro_optimizer_v2: Best full score so far: 87.96


2024/11/22 15:57:41 INFO dspy.teleprompt.mipro_optimizer_v2: == Minibatch Trial 2 / 25 ==



  0%|          | 0/25 [00:00<?, ?it/s]



Average Metric: 21.44 / 25 (85.8%): 100%|██████████| 25/25 [02:10<00:00,  5.20s/it]

2024/11/22 15:59:51 INFO dspy.evaluate.evaluate: Average Metric: 21.444444444444443 / 25 (85.8%)
2024/11/22 15:59:51 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 85.78 on minibatch of size 25 with parameters ['Predictor 0: Instruction 1'].
2024/11/22 15:59:51 INFO dspy.teleprompt.mipro_optimizer_v2: Minibatch scores so far: [88.0, 85.78]
2024/11/22 15:59:51 INFO dspy.teleprompt.mipro_optimizer_v2: Full eval scores so far: [87.96]
2024/11/22 15:59:51 INFO dspy.teleprompt.mipro_optimizer_v2: Best full score so far: 87.96


2024/11/22 15:59:51 INFO dspy.teleprompt.mipro_optimizer_v2: == Minibatch Trial 3 / 25 ==



Average Metric: 4.19 / 5 (83.7%):  20%|██        | 5/25 [00:23<01:14,  3.71s/it]



Average Metric: 14.52 / 17 (85.4%):  68%|██████▊   | 17/25 [01:24<00:33,  4.21s/it]



Average Metric: 21.63 / 25 (86.5%): 100%|██████████| 25/25 [02:08<00:00,  5.15s/it]

2024/11/22 16:02:00 INFO dspy.evaluate.evaluate: Average Metric: 21.62962962962963 / 25 (86.5%)
2024/11/22 16:02:00 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 86.52 on minibatch of size 25 with parameters ['Predictor 0: Instruction 14'].
2024/11/22 16:02:00 INFO dspy.teleprompt.mipro_optimizer_v2: Minibatch scores so far: [88.0, 85.78, 86.52]
2024/11/22 16:02:00 INFO dspy.teleprompt.mipro_optimizer_v2: Full eval scores so far: [87.96]
2024/11/22 16:02:00 INFO dspy.teleprompt.mipro_optimizer_v2: Best full score so far: 87.96


2024/11/22 16:02:00 INFO dspy.teleprompt.mipro_optimizer_v2: == Minibatch Trial 4 / 25 ==



Average Metric: 2.63 / 3 (87.7%):  12%|█▏        | 3/25 [00:32<02:54,  7.91s/it]



Average Metric: 3.56 / 4 (88.9%):  16%|█▌        | 4/25 [00:36<02:11,  6.25s/it]



Average Metric: 5.33 / 6 (88.9%):  24%|██▍       | 6/25 [00:43<01:31,  4.82s/it]



Average Metric: 21.96 / 25 (87.9%): 100%|██████████| 25/25 [02:26<00:00,  5.87s/it]

2024/11/22 16:04:26 INFO dspy.evaluate.evaluate: Average Metric: 21.962962962962962 / 25 (87.9%)
2024/11/22 16:04:26 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 87.85 on minibatch of size 25 with parameters ['Predictor 0: Instruction 8'].
2024/11/22 16:04:26 INFO dspy.teleprompt.mipro_optimizer_v2: Minibatch scores so far: [88.0, 85.78, 86.52, 87.85]
2024/11/22 16:04:26 INFO dspy.teleprompt.mipro_optimizer_v2: Full eval scores so far: [87.96]
2024/11/22 16:04:26 INFO dspy.teleprompt.mipro_optimizer_v2: Best full score so far: 87.96


2024/11/22 16:04:26 INFO dspy.teleprompt.mipro_optimizer_v2: == Minibatch Trial 5 / 25 ==



  0%|          | 0/25 [00:00<?, ?it/s]



Average Metric: 21.74 / 25 (87.0%): 100%|██████████| 25/25 [02:16<00:00,  5.46s/it]

2024/11/22 16:06:43 INFO dspy.evaluate.evaluate: Average Metric: 21.74074074074074 / 25 (87.0%)
2024/11/22 16:06:43 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 86.96 on minibatch of size 25 with parameters ['Predictor 0: Instruction 13'].
2024/11/22 16:06:43 INFO dspy.teleprompt.mipro_optimizer_v2: Minibatch scores so far: [88.0, 85.78, 86.52, 87.85, 86.96]
2024/11/22 16:06:43 INFO dspy.teleprompt.mipro_optimizer_v2: Full eval scores so far: [87.96]
2024/11/22 16:06:43 INFO dspy.teleprompt.mipro_optimizer_v2: Best full score so far: 87.96


2024/11/22 16:06:43 INFO dspy.teleprompt.mipro_optimizer_v2: == Minibatch Trial 6 / 25 ==



Average Metric: 4.07 / 5 (81.5%):  20%|██        | 5/25 [00:30<01:21,  4.09s/it]



Average Metric: 22.11 / 25 (88.4%): 100%|██████████| 25/25 [02:04<00:00,  5.00s/it]

2024/11/22 16:08:48 INFO dspy.evaluate.evaluate: Average Metric: 22.11111111111111 / 25 (88.4%)
2024/11/22 16:08:48 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 88.44 on minibatch of size 25 with parameters ['Predictor 0: Instruction 4'].
2024/11/22 16:08:48 INFO dspy.teleprompt.mipro_optimizer_v2: Minibatch scores so far: [88.0, 85.78, 86.52, 87.85, 86.96, 88.44]
2024/11/22 16:08:48 INFO dspy.teleprompt.mipro_optimizer_v2: Full eval scores so far: [87.96]
2024/11/22 16:08:48 INFO dspy.teleprompt.mipro_optimizer_v2: Best full score so far: 87.96


2024/11/22 16:08:48 INFO dspy.teleprompt.mipro_optimizer_v2: == Minibatch Trial 7 / 25 ==



Average Metric: 9.85 / 12 (82.1%):  48%|████▊     | 12/25 [00:48<00:24,  1.86s/it]



Average Metric: 21.11 / 25 (84.4%): 100%|██████████| 25/25 [01:43<00:00,  4.12s/it]

2024/11/22 16:10:31 INFO dspy.evaluate.evaluate: Average Metric: 21.11111111111111 / 25 (84.4%)
2024/11/22 16:10:31 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 84.44 on minibatch of size 25 with parameters ['Predictor 0: Instruction 10'].
2024/11/22 16:10:31 INFO dspy.teleprompt.mipro_optimizer_v2: Minibatch scores so far: [88.0, 85.78, 86.52, 87.85, 86.96, 88.44, 84.44]
2024/11/22 16:10:31 INFO dspy.teleprompt.mipro_optimizer_v2: Full eval scores so far: [87.96]
2024/11/22 16:10:31 INFO dspy.teleprompt.mipro_optimizer_v2: Best full score so far: 87.96


2024/11/22 16:10:31 INFO dspy.teleprompt.mipro_optimizer_v2: == Minibatch Trial 8 / 25 ==



Average Metric: 0.74 / 1 (74.1%):   0%|          | 0/25 [00:00<?, ?it/s]



Average Metric: 11.48 / 13 (88.3%):  52%|█████▏    | 13/25 [00:58<00:59,  4.92s/it]



Average Metric: 21.93 / 25 (87.7%): 100%|██████████| 25/25 [02:21<00:00,  5.66s/it]

2024/11/22 16:12:52 INFO dspy.evaluate.evaluate: Average Metric: 21.925925925925924 / 25 (87.7%)
2024/11/22 16:12:52 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 87.7 on minibatch of size 25 with parameters ['Predictor 0: Instruction 14'].
2024/11/22 16:12:52 INFO dspy.teleprompt.mipro_optimizer_v2: Minibatch scores so far: [88.0, 85.78, 86.52, 87.85, 86.96, 88.44, 84.44, 87.7]
2024/11/22 16:12:52 INFO dspy.teleprompt.mipro_optimizer_v2: Full eval scores so far: [87.96]
2024/11/22 16:12:52 INFO dspy.teleprompt.mipro_optimizer_v2: Best full score so far: 87.96


2024/11/22 16:12:52 INFO dspy.teleprompt.mipro_optimizer_v2: == Minibatch Trial 9 / 25 ==



Average Metric: 10.67 / 13 (82.1%):  48%|████▊     | 12/25 [01:51<03:28, 16.04s/it]



Average Metric: 13.37 / 16 (83.6%):  60%|██████    | 15/25 [02:23<01:48, 10.87s/it]



Average Metric: 19.67 / 23 (85.5%):  92%|█████████▏| 23/25 [03:19<00:15,  7.83s/it]



Average Metric: 21.48 / 25 (85.9%): 100%|██████████| 25/25 [03:36<00:00,  8.66s/it]

2024/11/22 16:16:29 INFO dspy.evaluate.evaluate: Average Metric: 21.48148148148148 / 25 (85.9%)
2024/11/22 16:16:29 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 85.93 on minibatch of size 25 with parameters ['Predictor 0: Instruction 10'].
2024/11/22 16:16:29 INFO dspy.teleprompt.mipro_optimizer_v2: Minibatch scores so far: [88.0, 85.78, 86.52, 87.85, 86.96, 88.44, 84.44, 87.7, 85.93]
2024/11/22 16:16:29 INFO dspy.teleprompt.mipro_optimizer_v2: Full eval scores so far: [87.96]
2024/11/22 16:16:29 INFO dspy.teleprompt.mipro_optimizer_v2: Best full score so far: 87.96


2024/11/22 16:16:29 INFO dspy.teleprompt.mipro_optimizer_v2: == Minibatch Trial 10 / 25 ==



Average Metric: 4.41 / 5 (88.1%):  16%|█▌        | 4/25 [00:00<00:00, 746.88it/s] 



Average Metric: 22.00 / 25 (88.0%): 100%|██████████| 25/25 [01:23<00:00,  3.35s/it]

2024/11/22 16:17:53 INFO dspy.evaluate.evaluate: Average Metric: 22.0 / 25 (88.0%)
2024/11/22 16:17:53 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 88.0 on minibatch of size 25 with parameters ['Predictor 0: Instruction 10'].
2024/11/22 16:17:53 INFO dspy.teleprompt.mipro_optimizer_v2: Minibatch scores so far: [88.0, 85.78, 86.52, 87.85, 86.96, 88.44, 84.44, 87.7, 85.93, 88.0]
2024/11/22 16:17:53 INFO dspy.teleprompt.mipro_optimizer_v2: Full eval scores so far: [87.96]
2024/11/22 16:17:53 INFO dspy.teleprompt.mipro_optimizer_v2: Best full score so far: 87.96


2024/11/22 16:17:53 INFO dspy.teleprompt.mipro_optimizer_v2: ===== Full Eval 1 =====
2024/11/22 16:17:53 INFO dspy.teleprompt.mipro_optimizer_v2: Doing full eval on next top averaging program (Avg Score: 88.44) from minibatch trials...



Average Metric: 29.22 / 33 (88.6%):  40%|████      | 32/80 [02:04<02:05,  2.61s/it]



Average Metric: 39.04 / 44 (88.7%):  55%|█████▌    | 44/80 [02:37<01:25,  2.37s/it]



Average Metric: 39.81 / 45 (88.5%):  56%|█████▋    | 45/80 [02:43<01:57,  3.36s/it]



Average Metric: 49.19 / 56 (87.8%):  69%|██████▉   | 55/80 [03:01<00:40,  1.60s/it]



Average Metric: 70.44 / 80 (88.1%): 100%|██████████| 80/80 [04:24<00:00,  3.30s/it]

2024/11/22 16:22:17 INFO dspy.evaluate.evaluate: Average Metric: 70.44444444444444 / 80 (88.1%)
2024/11/22 16:22:17 INFO dspy.teleprompt.mipro_optimizer_v2: [92mNew best full eval score![0m Score: 88.06
2024/11/22 16:22:17 INFO dspy.teleprompt.mipro_optimizer_v2: Full eval scores so far: [87.96, 88.06]
2024/11/22 16:22:17 INFO dspy.teleprompt.mipro_optimizer_v2: Best full score so far: 88.06
2024/11/22 16:22:17 INFO dspy.teleprompt.mipro_optimizer_v2: 

2024/11/22 16:22:17 INFO dspy.teleprompt.mipro_optimizer_v2: == Minibatch Trial 11 / 25 ==



Average Metric: 21.63 / 25 (86.5%): 100%|██████████| 25/25 [00:00<00:00, 3807.19it/s]

2024/11/22 16:22:17 INFO dspy.evaluate.evaluate: Average Metric: 21.62962962962963 / 25 (86.5%)
2024/11/22 16:22:17 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 86.52 on minibatch of size 25 with parameters ['Predictor 0: Instruction 4'].
2024/11/22 16:22:17 INFO dspy.teleprompt.mipro_optimizer_v2: Minibatch scores so far: [88.0, 85.78, 86.52, 87.85, 86.96, 88.44, 84.44, 87.7, 85.93, 88.0, 86.52]
2024/11/22 16:22:17 INFO dspy.teleprompt.mipro_optimizer_v2: Full eval scores so far: [87.96, 88.06]
2024/11/22 16:22:17 INFO dspy.teleprompt.mipro_optimizer_v2: Best full score so far: 88.06


2024/11/22 16:22:17 INFO dspy.teleprompt.mipro_optimizer_v2: == Minibatch Trial 12 / 25 ==



Average Metric: 4.52 / 5 (90.4%):  16%|█▌        | 4/25 [00:00<00:00, 1832.17it/s]



Average Metric: 22.04 / 25 (88.1%): 100%|██████████| 25/25 [01:05<00:00,  2.62s/it]

2024/11/22 16:23:23 INFO dspy.evaluate.evaluate: Average Metric: 22.037037037037038 / 25 (88.1%)
2024/11/22 16:23:23 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 88.15 on minibatch of size 25 with parameters ['Predictor 0: Instruction 12'].
2024/11/22 16:23:23 INFO dspy.teleprompt.mipro_optimizer_v2: Minibatch scores so far: [88.0, 85.78, 86.52, 87.85, 86.96, 88.44, 84.44, 87.7, 85.93, 88.0, 86.52, 88.15]
2024/11/22 16:23:23 INFO dspy.teleprompt.mipro_optimizer_v2: Full eval scores so far: [87.96, 88.06]
2024/11/22 16:23:23 INFO dspy.teleprompt.mipro_optimizer_v2: Best full score so far: 88.06


2024/11/22 16:23:23 INFO dspy.teleprompt.mipro_optimizer_v2: == Minibatch Trial 13 / 25 ==



Average Metric: 22.04 / 25 (88.1%): 100%|██████████| 25/25 [01:59<00:00,  4.77s/it]

2024/11/22 16:25:22 INFO dspy.evaluate.evaluate: Average Metric: 22.037037037037038 / 25 (88.1%)
2024/11/22 16:25:22 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 88.15 on minibatch of size 25 with parameters ['Predictor 0: Instruction 2'].
2024/11/22 16:25:22 INFO dspy.teleprompt.mipro_optimizer_v2: Minibatch scores so far: [88.0, 85.78, 86.52, 87.85, 86.96, 88.44, 84.44, 87.7, 85.93, 88.0, 86.52, 88.15, 88.15]
2024/11/22 16:25:22 INFO dspy.teleprompt.mipro_optimizer_v2: Full eval scores so far: [87.96, 88.06]
2024/11/22 16:25:22 INFO dspy.teleprompt.mipro_optimizer_v2: Best full score so far: 88.06


2024/11/22 16:25:22 INFO dspy.teleprompt.mipro_optimizer_v2: == Minibatch Trial 14 / 25 ==



Average Metric: 6.11 / 7 (87.3%):  24%|██▍       | 6/25 [00:00<00:00, 704.85it/s] 



Average Metric: 16.89 / 20 (84.4%):  80%|████████  | 20/25 [00:26<00:05,  1.08s/it]



Average Metric: 21.41 / 25 (85.6%): 100%|██████████| 25/25 [00:53<00:00,  2.15s/it]

2024/11/22 16:26:16 INFO dspy.evaluate.evaluate: Average Metric: 21.40740740740741 / 25 (85.6%)
2024/11/22 16:26:16 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 85.63 on minibatch of size 25 with parameters ['Predictor 0: Instruction 12'].
2024/11/22 16:26:16 INFO dspy.teleprompt.mipro_optimizer_v2: Minibatch scores so far: [88.0, 85.78, 86.52, 87.85, 86.96, 88.44, 84.44, 87.7, 85.93, 88.0, 86.52, 88.15, 88.15, 85.63]
2024/11/22 16:26:16 INFO dspy.teleprompt.mipro_optimizer_v2: Full eval scores so far: [87.96, 88.06]
2024/11/22 16:26:16 INFO dspy.teleprompt.mipro_optimizer_v2: Best full score so far: 88.06


2024/11/22 16:26:16 INFO dspy.teleprompt.mipro_optimizer_v2: == Minibatch Trial 15 / 25 ==



Average Metric: 21.48 / 25 (85.9%): 100%|██████████| 25/25 [01:48<00:00,  4.32s/it]

2024/11/22 16:28:04 INFO dspy.evaluate.evaluate: Average Metric: 21.48148148148148 / 25 (85.9%)
2024/11/22 16:28:04 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 85.93 on minibatch of size 25 with parameters ['Predictor 0: Instruction 9'].
2024/11/22 16:28:04 INFO dspy.teleprompt.mipro_optimizer_v2: Minibatch scores so far: [88.0, 85.78, 86.52, 87.85, 86.96, 88.44, 84.44, 87.7, 85.93, 88.0, 86.52, 88.15, 88.15, 85.63, 85.93]
2024/11/22 16:28:04 INFO dspy.teleprompt.mipro_optimizer_v2: Full eval scores so far: [87.96, 88.06]
2024/11/22 16:28:04 INFO dspy.teleprompt.mipro_optimizer_v2: Best full score so far: 88.06


2024/11/22 16:28:04 INFO dspy.teleprompt.mipro_optimizer_v2: == Minibatch Trial 16 / 25 ==



Average Metric: 5.33 / 6 (88.9%):  24%|██▍       | 6/25 [00:29<00:51,  2.72s/it]



Average Metric: 21.30 / 25 (85.2%): 100%|██████████| 25/25 [01:49<00:00,  4.38s/it]

2024/11/22 16:29:53 INFO dspy.evaluate.evaluate: Average Metric: 21.296296296296298 / 25 (85.2%)
2024/11/22 16:29:53 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 85.19 on minibatch of size 25 with parameters ['Predictor 0: Instruction 18'].
2024/11/22 16:29:53 INFO dspy.teleprompt.mipro_optimizer_v2: Minibatch scores so far: [88.0, 85.78, 86.52, 87.85, 86.96, 88.44, 84.44, 87.7, 85.93, 88.0, 86.52, 88.15, 88.15, 85.63, 85.93, 85.19]
2024/11/22 16:29:53 INFO dspy.teleprompt.mipro_optimizer_v2: Full eval scores so far: [87.96, 88.06]
2024/11/22 16:29:53 INFO dspy.teleprompt.mipro_optimizer_v2: Best full score so far: 88.06


2024/11/22 16:29:53 INFO dspy.teleprompt.mipro_optimizer_v2: == Minibatch Trial 17 / 25 ==



Average Metric: 11.56 / 13 (88.9%):  52%|█████▏    | 13/25 [01:03<00:51,  4.29s/it]



Average Metric: 18.59 / 21 (88.5%):  84%|████████▍ | 21/25 [01:28<00:10,  2.61s/it]



Average Metric: 22.07 / 25 (88.3%): 100%|██████████| 25/25 [01:47<00:00,  4.29s/it]

2024/11/22 16:31:41 INFO dspy.evaluate.evaluate: Average Metric: 22.074074074074073 / 25 (88.3%)
2024/11/22 16:31:41 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 88.3 on minibatch of size 25 with parameters ['Predictor 0: Instruction 11'].
2024/11/22 16:31:41 INFO dspy.teleprompt.mipro_optimizer_v2: Minibatch scores so far: [88.0, 85.78, 86.52, 87.85, 86.96, 88.44, 84.44, 87.7, 85.93, 88.0, 86.52, 88.15, 88.15, 85.63, 85.93, 85.19, 88.3]
2024/11/22 16:31:41 INFO dspy.teleprompt.mipro_optimizer_v2: Full eval scores so far: [87.96, 88.06]
2024/11/22 16:31:41 INFO dspy.teleprompt.mipro_optimizer_v2: Best full score so far: 88.06


2024/11/22 16:31:41 INFO dspy.teleprompt.mipro_optimizer_v2: == Minibatch Trial 18 / 25 ==



Average Metric: 21.59 / 25 (86.4%): 100%|██████████| 25/25 [00:00<00:00, 4162.01it/s]

2024/11/22 16:31:41 INFO dspy.evaluate.evaluate: Average Metric: 21.59259259259259 / 25 (86.4%)
2024/11/22 16:31:41 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 86.37 on minibatch of size 25 with parameters ['Predictor 0: Instruction 4'].
2024/11/22 16:31:41 INFO dspy.teleprompt.mipro_optimizer_v2: Minibatch scores so far: [88.0, 85.78, 86.52, 87.85, 86.96, 88.44, 84.44, 87.7, 85.93, 88.0, 86.52, 88.15, 88.15, 85.63, 85.93, 85.19, 88.3, 86.37]
2024/11/22 16:31:41 INFO dspy.teleprompt.mipro_optimizer_v2: Full eval scores so far: [87.96, 88.06]
2024/11/22 16:31:41 INFO dspy.teleprompt.mipro_optimizer_v2: Best full score so far: 88.06


2024/11/22 16:31:41 INFO dspy.teleprompt.mipro_optimizer_v2: == Minibatch Trial 19 / 25 ==



Average Metric: 2.52 / 3 (84.0%):   8%|▊         | 2/25 [00:00<00:00, 2075.87it/s]



Average Metric: 17.04 / 21 (81.1%):  84%|████████▍ | 21/25 [00:53<00:05,  1.37s/it]



Average Metric: 20.70 / 25 (82.8%): 100%|██████████| 25/25 [01:10<00:00,  2.82s/it]

2024/11/22 16:32:51 INFO dspy.evaluate.evaluate: Average Metric: 20.703703703703702 / 25 (82.8%)
2024/11/22 16:32:51 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 82.81 on minibatch of size 25 with parameters ['Predictor 0: Instruction 11'].
2024/11/22 16:32:51 INFO dspy.teleprompt.mipro_optimizer_v2: Minibatch scores so far: [88.0, 85.78, 86.52, 87.85, 86.96, 88.44, 84.44, 87.7, 85.93, 88.0, 86.52, 88.15, 88.15, 85.63, 85.93, 85.19, 88.3, 86.37, 82.81]
2024/11/22 16:32:51 INFO dspy.teleprompt.mipro_optimizer_v2: Full eval scores so far: [87.96, 88.06]
2024/11/22 16:32:51 INFO dspy.teleprompt.mipro_optimizer_v2: Best full score so far: 88.06


2024/11/22 16:32:51 INFO dspy.teleprompt.mipro_optimizer_v2: == Minibatch Trial 20 / 25 ==



Average Metric: 14.67 / 17 (86.3%):  68%|██████▊   | 17/25 [01:20<00:24,  3.09s/it]



Average Metric: 21.85 / 25 (87.4%): 100%|██████████| 25/25 [02:07<00:00,  5.11s/it]

2024/11/22 16:34:59 INFO dspy.evaluate.evaluate: Average Metric: 21.85185185185185 / 25 (87.4%)
2024/11/22 16:34:59 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 87.41 on minibatch of size 25 with parameters ['Predictor 0: Instruction 23'].
2024/11/22 16:34:59 INFO dspy.teleprompt.mipro_optimizer_v2: Minibatch scores so far: [88.0, 85.78, 86.52, 87.85, 86.96, 88.44, 84.44, 87.7, 85.93, 88.0, 86.52, 88.15, 88.15, 85.63, 85.93, 85.19, 88.3, 86.37, 82.81, 87.41]
2024/11/22 16:34:59 INFO dspy.teleprompt.mipro_optimizer_v2: Full eval scores so far: [87.96, 88.06]
2024/11/22 16:34:59 INFO dspy.teleprompt.mipro_optimizer_v2: Best full score so far: 88.06


2024/11/22 16:34:59 INFO dspy.teleprompt.mipro_optimizer_v2: ===== Full Eval 2 =====
2024/11/22 16:34:59 INFO dspy.teleprompt.mipro_optimizer_v2: Doing full eval on next top averaging program (Avg Score: 88.15) from minibatch trials...



Average Metric: 2.74 / 3 (91.4%):   2%|▎         | 2/80 [00:00<00:00, 1842.03it/s]



Average Metric: 10.67 / 12 (88.9%):  14%|█▍        | 11/80 [00:27<02:11,  1.90s/it]



Average Metric: 39.33 / 44 (89.4%):  55%|█████▌    | 44/80 [02:16<02:23,  3.98s/it]



Average Metric: 40.26 / 45 (89.5%):  56%|█████▋    | 45/80 [02:24<03:05,  5.30s/it]



Average Metric: 46.59 / 52 (89.6%):  65%|██████▌   | 52/80 [02:39<01:11,  2.55s/it]



Average Metric: 52.78 / 59 (89.5%):  74%|███████▍  | 59/80 [03:01<00:53,  2.57s/it]



Average Metric: 71.33 / 80 (89.2%): 100%|██████████| 80/80 [03:52<00:00,  2.91s/it]

2024/11/22 16:38:52 INFO dspy.evaluate.evaluate: Average Metric: 71.33333333333333 / 80 (89.2%)
2024/11/22 16:38:52 INFO dspy.teleprompt.mipro_optimizer_v2: [92mNew best full eval score![0m Score: 89.17
2024/11/22 16:38:52 INFO dspy.teleprompt.mipro_optimizer_v2: Full eval scores so far: [87.96, 88.06, 89.17]
2024/11/22 16:38:52 INFO dspy.teleprompt.mipro_optimizer_v2: Best full score so far: 89.17
2024/11/22 16:38:52 INFO dspy.teleprompt.mipro_optimizer_v2: 

2024/11/22 16:38:52 INFO dspy.teleprompt.mipro_optimizer_v2: == Minibatch Trial 21 / 25 ==



  0%|          | 0/25 [00:00<?, ?it/s]



Average Metric: 17.00 / 19 (89.5%):  76%|███████▌  | 19/25 [01:44<00:25,  4.19s/it]



Average Metric: 22.26 / 25 (89.0%): 100%|██████████| 25/25 [02:08<00:00,  5.15s/it]

2024/11/22 16:41:01 INFO dspy.evaluate.evaluate: Average Metric: 22.25925925925926 / 25 (89.0%)
2024/11/22 16:41:01 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 89.04 on minibatch of size 25 with parameters ['Predictor 0: Instruction 17'].
2024/11/22 16:41:01 INFO dspy.teleprompt.mipro_optimizer_v2: Minibatch scores so far: [88.0, 85.78, 86.52, 87.85, 86.96, 88.44, 84.44, 87.7, 85.93, 88.0, 86.52, 88.15, 88.15, 85.63, 85.93, 85.19, 88.3, 86.37, 82.81, 87.41, 89.04]
2024/11/22 16:41:01 INFO dspy.teleprompt.mipro_optimizer_v2: Full eval scores so far: [87.96, 88.06, 89.17]
2024/11/22 16:41:01 INFO dspy.teleprompt.mipro_optimizer_v2: Best full score so far: 89.17


2024/11/22 16:41:01 INFO dspy.teleprompt.mipro_optimizer_v2: == Minibatch Trial 22 / 25 ==



Average Metric: 3.52 / 4 (88.0%):  12%|█▏        | 3/25 [00:00<00:00, 1579.97it/s]



Average Metric: 22.44 / 25 (89.8%): 100%|██████████| 25/25 [01:19<00:00,  3.18s/it]

2024/11/22 16:42:20 INFO dspy.evaluate.evaluate: Average Metric: 22.444444444444443 / 25 (89.8%)
2024/11/22 16:42:20 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 89.78 on minibatch of size 25 with parameters ['Predictor 0: Instruction 17'].
2024/11/22 16:42:20 INFO dspy.teleprompt.mipro_optimizer_v2: Minibatch scores so far: [88.0, 85.78, 86.52, 87.85, 86.96, 88.44, 84.44, 87.7, 85.93, 88.0, 86.52, 88.15, 88.15, 85.63, 85.93, 85.19, 88.3, 86.37, 82.81, 87.41, 89.04, 89.78]
2024/11/22 16:42:20 INFO dspy.teleprompt.mipro_optimizer_v2: Full eval scores so far: [87.96, 88.06, 89.17]
2024/11/22 16:42:20 INFO dspy.teleprompt.mipro_optimizer_v2: Best full score so far: 89.17


2024/11/22 16:42:20 INFO dspy.teleprompt.mipro_optimizer_v2: == Minibatch Trial 23 / 25 ==



Average Metric: 9.78 / 11 (88.9%):  40%|████      | 10/25 [00:26<00:58,  3.89s/it]



Average Metric: 14.26 / 16 (89.1%):  60%|██████    | 15/25 [00:28<00:15,  1.53s/it]



Average Metric: 15.04 / 17 (88.5%):  68%|██████▊   | 17/25 [00:33<00:11,  1.45s/it]



Average Metric: 22.11 / 25 (88.4%): 100%|██████████| 25/25 [01:01<00:00,  2.44s/it]

2024/11/22 16:43:21 INFO dspy.evaluate.evaluate: Average Metric: 22.11111111111111 / 25 (88.4%)
2024/11/22 16:43:21 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 88.44 on minibatch of size 25 with parameters ['Predictor 0: Instruction 17'].
2024/11/22 16:43:21 INFO dspy.teleprompt.mipro_optimizer_v2: Minibatch scores so far: [88.0, 85.78, 86.52, 87.85, 86.96, 88.44, 84.44, 87.7, 85.93, 88.0, 86.52, 88.15, 88.15, 85.63, 85.93, 85.19, 88.3, 86.37, 82.81, 87.41, 89.04, 89.78, 88.44]
2024/11/22 16:43:21 INFO dspy.teleprompt.mipro_optimizer_v2: Full eval scores so far: [87.96, 88.06, 89.17]
2024/11/22 16:43:21 INFO dspy.teleprompt.mipro_optimizer_v2: Best full score so far: 89.17


2024/11/22 16:43:21 INFO dspy.teleprompt.mipro_optimizer_v2: == Minibatch Trial 24 / 25 ==



Average Metric: 22.30 / 25 (89.2%): 100%|██████████| 25/25 [01:00<00:00,  2.41s/it] 

2024/11/22 16:44:22 INFO dspy.evaluate.evaluate: Average Metric: 22.296296296296298 / 25 (89.2%)
2024/11/22 16:44:22 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 89.19 on minibatch of size 25 with parameters ['Predictor 0: Instruction 17'].
2024/11/22 16:44:22 INFO dspy.teleprompt.mipro_optimizer_v2: Minibatch scores so far: [88.0, 85.78, 86.52, 87.85, 86.96, 88.44, 84.44, 87.7, 85.93, 88.0, 86.52, 88.15, 88.15, 85.63, 85.93, 85.19, 88.3, 86.37, 82.81, 87.41, 89.04, 89.78, 88.44, 89.19]
2024/11/22 16:44:22 INFO dspy.teleprompt.mipro_optimizer_v2: Full eval scores so far: [87.96, 88.06, 89.17]
2024/11/22 16:44:22 INFO dspy.teleprompt.mipro_optimizer_v2: Best full score so far: 89.17


2024/11/22 16:44:22 INFO dspy.teleprompt.mipro_optimizer_v2: == Minibatch Trial 25 / 25 ==



Average Metric: 22.11 / 25 (88.4%): 100%|██████████| 25/25 [00:42<00:00,  1.70s/it] 

2024/11/22 16:45:04 INFO dspy.evaluate.evaluate: Average Metric: 22.11111111111111 / 25 (88.4%)
2024/11/22 16:45:04 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 88.44 on minibatch of size 25 with parameters ['Predictor 0: Instruction 17'].
2024/11/22 16:45:04 INFO dspy.teleprompt.mipro_optimizer_v2: Minibatch scores so far: [88.0, 85.78, 86.52, 87.85, 86.96, 88.44, 84.44, 87.7, 85.93, 88.0, 86.52, 88.15, 88.15, 85.63, 85.93, 85.19, 88.3, 86.37, 82.81, 87.41, 89.04, 89.78, 88.44, 89.19, 88.44]
2024/11/22 16:45:04 INFO dspy.teleprompt.mipro_optimizer_v2: Full eval scores so far: [87.96, 88.06, 89.17]
2024/11/22 16:45:04 INFO dspy.teleprompt.mipro_optimizer_v2: Best full score so far: 89.17


2024/11/22 16:45:04 INFO dspy.teleprompt.mipro_optimizer_v2: ===== Full Eval 3 =====
2024/11/22 16:45:04 INFO dspy.teleprompt.mipro_optimizer_v2: Doing full eval on next top averaging program (Avg Score: 88.978) from minibatch trials...



Average Metric: 26.81 / 30 (89.4%):  36%|███▋      | 29/80 [00:00<00:00, 671.64it/s]



Average Metric: 40.26 / 46 (87.5%):  56%|█████▋    | 45/80 [00:22<00:20,  1.74it/s] 



Average Metric: 41.19 / 47 (87.6%):  59%|█████▉    | 47/80 [00:23<00:12,  2.55it/s]



Average Metric: 65.63 / 75 (87.5%):  92%|█████████▎| 74/80 [00:48<00:05,  1.18it/s]



Average Metric: 70.15 / 80 (87.7%): 100%|██████████| 80/80 [01:11<00:00,  1.11it/s]

2024/11/22 16:46:16 INFO dspy.evaluate.evaluate: Average Metric: 70.14814814814815 / 80 (87.7%)
2024/11/22 16:46:16 INFO dspy.teleprompt.mipro_optimizer_v2: Full eval scores so far: [87.96, 88.06, 89.17, 87.69]
2024/11/22 16:46:16 INFO dspy.teleprompt.mipro_optimizer_v2: Best full score so far: 89.17
2024/11/22 16:46:16 INFO dspy.teleprompt.mipro_optimizer_v2: 

2024/11/22 16:46:16 INFO dspy.teleprompt.mipro_optimizer_v2: Returning best identified program with score 89.17!





In [48]:

# Save optimize program for future use
zeroshot_optimized_program.save(f"medium_100trainset_v1")


In [49]:
evaluate(zeroshot_optimized_program, devset=devset, metric=metric)

  0%|          | 0/20 [00:00<?, ?it/s]



Average Metric: 12.22 / 14 (87.3%):  70%|███████   | 14/20 [00:47<00:12,  2.12s/it]



Average Metric: 13.15 / 15 (87.7%):  75%|███████▌  | 15/20 [00:50<00:11,  2.30s/it]



Average Metric: 15.81 / 20 (79.1%): 100%|██████████| 20/20 [01:11<00:00,  3.58s/it]

2024/11/22 16:54:52 INFO dspy.evaluate.evaluate: Average Metric: 15.814814814814815 / 20 (79.1%)





Unnamed: 0,context,reasoning,ontology,metric
0,"To some, stoichiometry is no more exciting than the law of conserv...",The text discusses the concept of stoichiometry and its importance...,"entities=[Entity(name='Stoichiometry', information='Stoichiometry ...",✔️ [0.889]
1,If a glass tube that is open at both ends stands upright in a cont...,The text describes a physical experiment involving a glass tube an...,"entities=[Entity(name='Glass_Tube', information='A glass tube that...",✔️ [0.889]
2,This is the Schrodinger equation of a free particle moving in one ...,The text describes a scenario involving a particle subjected to a ...,"entities=[Entity(name='Particle', information='A particle moving i...",✔️ [0.889]
3,It is interesting to note that the nitrogen atom also shows little...,The text discusses the stability of the nitrogen atom and its anio...,"entities=[Entity(name='Nitrogen_Atom(N)', information='The nitroge...",✔️ [0.926]
4,generally the central focus of a problem is the conversion of a ma...,The text discusses the process of converting mass in grams to mole...,"entities=[Entity(name='Mass', information='Mass is a measure of th...",✔️ [0.926]
5,The three subatomic particles considered in this section are the o...,"The text discusses subatomic particles, specifically electrons, ne...","entities=[Entity(name='Electron', information='The electron is bel...",✔️ [0.704]
6,"As a result of experiments of the type just described, we find tha...",The text describes a phenomenon related to the photoelectric effec...,"entities=[Entity(name='StoppingVoltage(Vs)', information='The stop...",✔️ [0.741]
7,of blood. Estimates of BAC can be obtained from breath samples by ...,The text discusses the estimation of Blood Alcohol Concentration (...,"entities=[Entity(name='Blood Alcohol Concentration(BAC)', informat...",✔️ [0.889]
8,Electrostatic potential maps are a powerful way of displaying the ...,The text discusses the concept of electrostatic potential maps and...,"entities=[Entity(name='Electrostatic Potential Map', information='...",✔️ [0.889]
9,Our major undertaking in this section will be to describe the thre...,The text discusses the three-dimensional probability density distr...,"entities=[Entity(name='Hydrogen Atom', information='The hydrogen a...",✔️ [0.926]


79.07

In [139]:
zeroshot_optimized_program = ChemOntology()
zeroshot_optimized_program.load(f"medium_100trainset_v1")


In [143]:
tests = ["""
###### How electron donation and withdrawal change chemical shifts  \nWe can get an idea of the effect of electron distribution by looking at a series of benzene rings\nwith the same substituent in the 1 and 4 positions. This pattern makes all four hydrogens on\nthe ring identical. Here are a few compounds listed in order of chemical shift: largest shift\n(lowest fi eld; most deshielded) fi rst. Conjugation is shown by the usual curly arrows, and\ninductive effects by a straight arrow by the side of the group. Only one hydrogen atom and\none set of arrows are shown. 
""",
"""
\nConjugation, as discussed in\nChapter 7, is felt through π bonds,\nwhile inductive effects are the\nresult of electron withdrawal or\ndonation felt simply by polarization\nof the σ bonds of the molecule.\nSee p. 135.  \nthe effect of electron-withdrawing groups\nby conjugation  \nby inductive effects  \n**H**  \n**O**  \n**O**  \n**HO**  \n**N**  \nδH 8.48 δH 8.10 **C** δH 8.10 δH 8.07 δH 7.78  \n**N**  \n**O**  \n**O**  \n**OH**  \n**C**  \n**N**  \n**O**  \n**H**  \n**F** **F**  \n**F**  \nThe largest shifts come from groups that withdraw electrons by conjugation. 
""",
"""
Nitro is the\nmost powerful—this should not surprise you as we saw the same in non-aromatic compounds\nin both [13]C and [1]H NMR spectra. Then come the carbonyl and nitrile group followed by groups\nshowing simple inductive withdrawal. CF3 is an important example of this kind of group—\nthree fl uorine atoms combine to exert a powerful effect.  \n-----  \nIn the middle of our sequence, around the position of benzene itself at 7.27 ppm, come\nthe halogens, whose inductive electron withdrawal and lone pair donation are nearly\nbalanced.  \nbalance between withdrawal by inductive effect and donation of lone pairs by conjugation  \n**I** δH 7.40 **Br** δH 7.32 δH 7.27 **Cl** δH 7.24 **F** δH 7.00  \n**I**  \n**Br**  \n**Cl**  \n**F**  
""",
"""
\nAlkyl groups are weak inductive donators, but the groups which give the most shielding—\nperhaps surprisingly—are those containing the electronegative atoms O and N. Despite being\ninductively electron withdrawing (the C–O and C–N σ bonds are polarized with δ + C), on\nbalance conjugation of their lone pairs with the ring (as you saw on p. 278) makes them net\nelectron donors. They increase the shielding at the ring hydrogens. 
""",
"""
Amino groups are the best.\nNote that one nitrogen-based functional group (NO2) is the best electron withdrawer while\nanother (NH2) is the best electron donor.  \nthe effect of electron-donating groups  \nby inductive effect  \nbalance between withdrawal by inductive effect and donation\nof lone pairs by conjugation—electron donation wins  \n**H**  \nδH 7.03  \n**H**  \n**H**  \n**CH3**  \nδH 6.80 **O**  \n**H** **H**  \nδH 6.59 **N**  \n**H**  \nδH 6.35  \n**H**  \n**H**  \n**CH3**  \n**O**  \n**CH3**  \n**H**  \n**H**  \n**N**  \n**O**  \nδH 7.27  \n**H**  \n**H**  \nδH 7.27  \nδH 5.68  \n**H**  \n**H**  \nδH 5.68  \n**O**  \nδH 6.0  \n**H**  \n**H**  \nδH 7.0  \nδH 4.65  \n**H**  \n**H**  \nδH 6.35  \nAs far as the donors with lone pairs are concerned (the halogens plus O and N), two factors\nare important—the size of the lone pairs and the electronegativity of the element. 
""",
"""
If we look\nat the four halides at the top of this page the lone pairs are in 2p (F), 3p (Cl), 4p (Br), and 5p (I)\norbitals. In all cases the orbitals on the benzene ring are 2p so the fl uorine orbital is of the\nright size to interact well and the others too large. Even though fl uorine is the most electronegative, it is still the best donor. The others don’t pull so much electron density away, but\nthey can’t give so much back either.\nIf we compare the fi rst row of the p block elements—F, OH, and NH2—all have lone pairs\nin 2p orbitals so now electronegativity is the only variable. As you would expect, the most\nelectronegative element, F, is now the weakest donor.
"""
]



In [145]:
for test in tests:
    print(ontology_to_string(zeroshot_optimized_program(test).ontology))





Entities:
  - Name: Electron Donation
    Information: Electron donation refers to the process by which an electron-rich species donates electrons to another species, affecting chemical shifts.
  - Name: Electron Withdrawal
    Information: Electron withdrawal is the process by which an electron-deficient species attracts electrons from another species, affecting chemical shifts.
  - Name: Chemical Shift
    Information: Chemical shift is a change in the resonant frequency of a nucleus due to the electronic environment, often measured in NMR spectroscopy.
  - Name: Benzene Ring
    Information: A benzene ring is a cyclic hydrocarbon with alternating double bonds, often used as a model to study electron distribution effects.
  - Name: Substituent
    Information: A substituent is an atom or group of atoms that replaces hydrogen in a hydrocarbon, affecting the chemical properties of the molecule.
  - Name: Conjugation
    Information: Conjugation refers to the overlap of p-orbitals acros

In [153]:
for test in tests:
    print(metric(dspy.Prediction(context=test),zeroshot_optimized_program(test), verbose=True))



Entity Accuracy Score: 5
Data Property Correctness Score: 4
Object Property Completeness Score: 3
Ontology Structure Score: 4
Overall Score: 8

Total Score: 24
Percentage Score: 88.89%

Reason:
entity accuracy: 
data property correctness: The ontology lacks explicit capture of dependencies for the chemical shift value, which affects the assessment score.
object property completeness: The ontology lacks object property chains and inverse relationships, and the use of 'some' restrictions may not fully capture the specificity of interactions.
ontology structure: 
overall score: 


Entity Accuracy Score: 4
Data Property Correctness Score: 4
Object Property Completeness Score: 3
Ontology Structure Score: 4
Overall Score: 6

Total Score: 21
Percentage Score: 77.78%

Reason:
entity accuracy: The ontology does not fully capture an appropriate chemical classification hierarchy, which prevents it from achieving a perfect score.
data property correctness: The ontology lacks explicit mention of t

In [154]:
print(metric(dspy.Prediction(context=test_context),zeroshot_optimized_program(test_context), verbose=True))



Entity Accuracy Score: 5
Data Property Correctness Score: 5
Object Property Completeness Score: 2
Ontology Structure Score: 4
Overall Score: 7

Total Score: 23
Percentage Score: 85.19%

Reason:
entity accuracy: 
data property correctness: 
object property completeness: The ontology lacks detailed relationship restrictions, complex process representation, and inverse relationships.
ontology structure: 
overall score: The ontology is mostly comprehensive but could include more details on the balance of effects for halogens.

