In [1]:
import pandas as pd
import numpy as np
from dataclasses import dataclass
import os
import re
dir = os.getcwd()
os.chdir("../profbot")
from llmtest import Llm
os.chdir(dir)

In [2]:
llm = Llm(model_identifier="gpt-4o")
llm.prompt("poke, please reply")

{'model': 'gpt-4o', 'messages': [{'role': 'user', 'content': 'poke, please reply'}]}


'Hello! How can I assist you today?'

In [3]:
objects = pd.read_csv("Objects.csv")
equivalences = pd.read_csv("Equivalences.csv")
relations = pd.read_csv("Relations.csv")
users = pd.read_csv("Users.csv")


In [4]:
from dataclasses import dataclass

@dataclass
class Schema:
    relation_id: int
    user_id: int
    object1_id: int
    object2_id: int
    equivalence_id: int
    value: bool
    explanation: str

In [5]:
relations[["relation_id","user_id","object1_id","object2_id","equivalence_id","value","explanation"]].iloc[0]

relation_id                                                       0
user_id                                                           1
object1_id                                                        2
object2_id                                                        3
equivalence_id                                                    2
value                                                          True
explanation       "Both objects perform the function statistical...
Name: 0, dtype: object

In [6]:
n = len(relations)
records = []
for i in range(n):
    args = relations.iloc[i].values
    record = Schema(*args)
    records.append(record)

print(records)


[Schema(relation_id=0, user_id=1, object1_id=2, object2_id=3, equivalence_id=2, value=' True', explanation='"Both objects perform the function statistical regression by they do so according to different algorithms."'), Schema(relation_id=1, user_id=1, object1_id=2, object2_id=3, equivalence_id=3, value=' False', explanation='"These two regressions use different algorithms so they are not logically equivalent."'), Schema(relation_id=2, user_id=1, object1_id=12, object2_id=13, equivalence_id=2, value='True', explanation='"Both objects perform the function of decision-making through different forms of aggregation."'), Schema(relation_id=3, user_id=1, object1_id=12, object2_id=13, equivalence_id=3, value='False', explanation='"The objects use distinct algorithms and methods (trees vs. nodes) and are not logically equivalent."'), Schema(relation_id=4, user_id=1, object1_id=47, object2_id=7, equivalence_id=1, value='True', explanation='"Both objects are conceptually related in the context of

In [7]:
preprompt = """ We are running an experiment to collect data bout how different agents concieve of the relations between objects."""
postprompt = """please provide the response in the format of a markdown table with the following columns<type>: relation_id<int>,user_id<int>, object1_id<int>,object2_id<int>,equivalence_id<int>, value<bool>, explanation<str>"""
def prompt_maker(ob1,ob2, objects, relations, equivalences):
    prompt = f"""Consider the following objects: {objects.to_csv()} and the following relations: {relations.to_csv()} and equivalences: {equivalences.to_csv()}.
    compare {ob1.to_csv()} to {ob2.to_csv()} and tell me how they are related by constructing records that would fit in the relations table. There should be one record for equivalence in the equivalences table."""

    return preprompt + prompt + postprompt

In [8]:
test_prompt = prompt_maker(objects.loc[11],objects.loc[12],objects,relations,equivalences)

In [9]:
test_response = llm.prompt(test_prompt)

{'model': 'gpt-4o', 'messages': [{'role': 'user', 'content': ' We are running an experiment to collect data bout how different agents concieve of the relations between objects.Consider the following objects: ,object_id,name,details\n0,1,Roberts rules of order,A manual of parliamentary procedure.\n1,2,least squares regression,A statistical method for estimating relationships.\n2,3,lasso regression,A regression method that uses shrinkage.\n3,4,deliberation,The process of discussion before making a decision.\n4,5,voting,A method for a group to make a decision.\n5,6,anomaly detection,Identifying rare items or events.\n6,7,sense-making,The process of giving meaning to experiences.\n7,8,a ballot box,A container used to collect ballots in an election.\n8,19,association rule learning,A method for finding relationships in data.\n9,10,rule-based decision-making,A method for making decisions based on rules.\n10,11,decision tree,A method for making decisions based on a tree-like model.\n11,12,rand

In [10]:
print(test_response)

Certainly! Here are the records that fit into the relations table for the comparison between **random forest** (object_id: 12) and **neural network** (object_id: 13):

### Relations Table

| relation_id | user_id | object1_id | object2_id | equivalence_id | value | explanation |
|-------------|---------|------------|------------|----------------|-------|-------------|
| 9           | 3       | 12         | 13         | 2              | True  | Both objects perform the function of decision-making through different forms of aggregation. |
| 10          | 3       | 12         | 13         | 3              | False | The objects use distinct algorithms and methods (trees vs. nodes) and are not logically equivalent. |

### Equivalences Table

There is no new equivalence record needed as the "Functional" (equivalence_id: 2) equivalence already exists.


In [11]:
from dataclasses import fields

def parse_markdown_table_from_text(text, schema):
    try:
        # Extract the markdown table from the text
        lines = text.strip().split('\n')
        
        # Find the start and end of the table
        start = end = None
        for i, line in enumerate(lines):
            if '|' in line:
                if start is None:
                    start = i
                end = i
        
        # Extract table lines
        table_lines = lines[start:end+1]
        
        # Extract headers
        headers = [header.strip() for header in table_lines[0].split('|') if header.strip()]
        
        # Ensure headers match schema fields
        schema_fields = [field.name for field in fields(schema)]
        if not all(header in schema_fields for header in headers):
            raise ValueError("Markdown table headers do not match schema fields")
        
        # Extract rows
        rows = [line.strip() for line in table_lines[2:] if line.strip()]
        
        records = []
        for row in rows:
            values = [value.strip() for value in row.split('|') if value.strip()]
            record_dict = dict(zip(headers, values))
            
            # Convert record_dict to match schema types
            record = schema(
                relation_id=int(record_dict['relation_id']),
                user_id=int(record_dict['user_id']),
                object1_id=int(record_dict['object1_id']),
                object2_id=int(record_dict['object2_id']),
                equivalence_id=int(record_dict['equivalence_id']),
                value=record_dict['value'].lower() == 'true',
                explanation=record_dict['explanation']
            )
            records.append(record)
        
        return records
    except:
        return []

# Example usage:
text = test_response

records = parse_markdown_table_from_text(text, Schema)
for record in records:
    print(record)


Schema(relation_id=9, user_id=3, object1_id=12, object2_id=13, equivalence_id=2, value=True, explanation='Both objects perform the function of decision-making through different forms of aggregation.')
Schema(relation_id=10, user_id=3, object1_id=12, object2_id=13, equivalence_id=3, value=False, explanation='The objects use distinct algorithms and methods (trees vs. nodes) and are not logically equivalent.')


In [12]:
#append these records to the relations table
relations

Unnamed: 0,relation_id,user_id,object1_id,object2_id,equivalence_id,value,explanation
0,0,1,2,3,2,True,"""Both objects perform the function statistical..."
1,1,1,2,3,3,False,"""These two regressions use different algorithm..."
2,2,1,12,13,2,True,"""Both objects perform the function of decision..."
3,3,1,12,13,3,False,"""The objects use distinct algorithms and metho..."
4,4,1,47,7,1,True,"""Both objects are conceptually related in the ..."
5,5,1,47,7,2,False,"""The maintainer manages a project, which is fu..."
6,6,1,47,7,3,False,"""The procedures carried out by a maintainer an..."
7,7,1,47,7,4,False,"""A maintainer and the process of sense-making ..."
8,8,4,174,205,7,True,"""Both objects involve the movement of informat..."


In [13]:
pd.DataFrame(records)

Unnamed: 0,relation_id,user_id,object1_id,object2_id,equivalence_id,value,explanation
0,9,3,12,13,2,True,Both objects perform the function of decision-...
1,10,3,12,13,3,False,The objects use distinct algorithms and method...


In [14]:
relations = pd.concat([relations,pd.DataFrame(records)],axis=0)

In [15]:
relations

Unnamed: 0,relation_id,user_id,object1_id,object2_id,equivalence_id,value,explanation
0,0,1,2,3,2,True,"""Both objects perform the function statistical..."
1,1,1,2,3,3,False,"""These two regressions use different algorithm..."
2,2,1,12,13,2,True,"""Both objects perform the function of decision..."
3,3,1,12,13,3,False,"""The objects use distinct algorithms and metho..."
4,4,1,47,7,1,True,"""Both objects are conceptually related in the ..."
5,5,1,47,7,2,False,"""The maintainer manages a project, which is fu..."
6,6,1,47,7,3,False,"""The procedures carried out by a maintainer an..."
7,7,1,47,7,4,False,"""A maintainer and the process of sense-making ..."
8,8,4,174,205,7,True,"""Both objects involve the movement of informat..."
0,9,3,12,13,2,True,Both objects perform the function of decision-...


In [16]:
T= 500

for t in range(T):
    ob1 = objects.sample()
    ob2 = objects.sample()
    prompt = prompt_maker(ob1,ob2,objects,relations,equivalences)
    response = llm.prompt(prompt)
    records = parse_markdown_table_from_text(response, Schema)
    relations = pd.concat([relations,pd.DataFrame(records)],axis=0)

{'model': 'gpt-4o', 'messages': [{'role': 'user', 'content': ' We are running an experiment to collect data bout how different agents concieve of the relations between objects.Consider the following objects: ,object_id,name,details\n0,1,Roberts rules of order,A manual of parliamentary procedure.\n1,2,least squares regression,A statistical method for estimating relationships.\n2,3,lasso regression,A regression method that uses shrinkage.\n3,4,deliberation,The process of discussion before making a decision.\n4,5,voting,A method for a group to make a decision.\n5,6,anomaly detection,Identifying rare items or events.\n6,7,sense-making,The process of giving meaning to experiences.\n7,8,a ballot box,A container used to collect ballots in an election.\n8,19,association rule learning,A method for finding relationships in data.\n9,10,rule-based decision-making,A method for making decisions based on rules.\n10,11,decision tree,A method for making decisions based on a tree-like model.\n11,12,rand

In [17]:
relations

Unnamed: 0,relation_id,user_id,object1_id,object2_id,equivalence_id,value,explanation
0,0,1,2,3,2,True,"""Both objects perform the function statistical..."
1,1,1,2,3,3,False,"""These two regressions use different algorithm..."
2,2,1,12,13,2,True,"""Both objects perform the function of decision..."
3,3,1,12,13,3,False,"""The objects use distinct algorithms and metho..."
4,4,1,47,7,1,True,"""Both objects are conceptually related in the ..."
5,5,1,47,7,2,False,"""The maintainer manages a project, which is fu..."
6,6,1,47,7,3,False,"""The procedures carried out by a maintainer an..."
7,7,1,47,7,4,False,"""A maintainer and the process of sense-making ..."
8,8,4,174,205,7,True,"""Both objects involve the movement of informat..."
0,9,3,12,13,2,True,Both objects perform the function of decision-...


In [18]:
relations.reset_index(drop=True,inplace=True)

In [19]:
relations["relation_id"] = relations.index

In [20]:
relations

Unnamed: 0,relation_id,user_id,object1_id,object2_id,equivalence_id,value,explanation
0,0,1,2,3,2,True,"""Both objects perform the function statistical..."
1,1,1,2,3,3,False,"""These two regressions use different algorithm..."
2,2,1,12,13,2,True,"""Both objects perform the function of decision..."
3,3,1,12,13,3,False,"""The objects use distinct algorithms and metho..."
4,4,1,47,7,1,True,"""Both objects are conceptually related in the ..."
5,5,1,47,7,2,False,"""The maintainer manages a project, which is fu..."
6,6,1,47,7,3,False,"""The procedures carried out by a maintainer an..."
7,7,1,47,7,4,False,"""A maintainer and the process of sense-making ..."
8,8,4,174,205,7,True,"""Both objects involve the movement of informat..."
9,9,3,12,13,2,True,Both objects perform the function of decision-...


In [21]:
relations.to_csv("Relations_extended.csv",index=False)