### Data generation for Linking Latin Philosophical Expressions project

In [1]:
from langchain_openai import ChatOpenAI
from langchain.output_parsers import PydanticOutputParser
from langchain_core.prompts import PromptTemplate
from langchain.load.dump import dumps
from pydantic import BaseModel, Field
from pydantic.config import ConfigDict
from typing import Annotated, Literal
from enum import Enum
from typing import Literal
from typing import List
import pandas as pd
import json
import os
import time
from dotenv import load_dotenv

In [2]:
load_dotenv()

True

In [3]:
model = ChatOpenAI(
    model="gpt-4o-mini",
    temperature=0,
    api_key= os.getenv("OPENAI_API_KEY")
)

In [None]:
def generate_expression_id(expression):
    expression = expression.lower().replace(",", "").replace("?", "")
    w = expression.split(" ")
    return "_".join(w[ : 4])

In [None]:
def get_prompt(base_prompt, parser):

    template = base_prompt + "\n" + "{format_instructions}'"

    prompt = PromptTemplate(
        template = template,
        input_variables = ["expression"],
        partial_variables = {"format_instructions": parser.get_format_instructions()}
    )

    return prompt

### List of expressions

In [None]:
expressions = []
with open('./expressions.txt') as f:
    expressions = f.read().split("\n")

### Definition, translation and branch of philosophy

In [None]:
class PhiloExpression(BaseModel):
    translation : str
    explanation : str
    branch : Literal['Epistemology', 'Metaphysics', 'Logic', 'Aesthetics' , 'Ethics', 'Political Philosophy','Other']

In [None]:
base_prompt = """You are an academic professor of philosophy. I will submit to you a Latin expression with a specific meaning in philosophy.
Please provide in output:

1) the literal English translation
2) a detailed explanation of its meaning in philosophy
3) the brach of philosophy it belongs to

This is the expression : {expression}
"""

parser = PydanticOutputParser(pydantic_object = PhiloExpression)
prompt = get_prompt(base_prompt, parser)
chain = prompt | model | parser

In [None]:
result_file = './Latin-Philosophical-Expressions-ChatGPT.csv'
df = pd.DataFrame(columns = ['id', 'latin_expression','concept','branch','translation_eng'])
df.to_csv(result_file, encoding = "utf-8", index = False)

for expression in expressions:
  print(expression)

  try:
    result = chain.invoke({"expression": expression})
    new_row = {
        'id' : generate_expression_id(expression),
        'latin_expression': expression,
        'concept': result.explanation,
        'branch': result.branch,
        'translation_eng': result.translation        
    }
    df = pd.DataFrame([new_row])
    df.to_csv(result_file, encoding = "utf-8", mode='a', index=False, header=False)
  except:
    print("Error: ", expression)

### Interpretations by philosophers

In [None]:
class PhiloExpressionSense(BaseModel):
  associatedWith : str
  interpretation : str

class PhiloExpressionSenses(BaseModel):
  interpretations : list[PhiloExpressionSense]

base_concept_prompt = """Given the following Latin philosophical expression: "{expression}",
    provide a list of different interpretations of the expression given by different philosophers or philosophical schools.
    The output should be a list of JSON objects where each element should have:
    - "associatedWith": the philosopher connected to a specific interpretation
    - "interpretation": a detailed explanation of that interpretation
    Provide at least 2 and at most 5 different interpretations."""

parser = PydanticOutputParser(pydantic_object = PhiloExpressionSenses)
prompt = get_prompt(base_concept_prompt, parser)
chain = prompt | model | parser

In [None]:
result_file = './Latin-Expressions-Interpretations-ChatGPT.csv'
df = pd.DataFrame(columns = ['id', 'expression', 'associated_with', 'sense'])
df.to_csv(result_file, encoding = "utf-8", index = False)

for expression in expressions:
  print(expression)

  try:
    result = chain.invoke({"expression": expression})
    for sense in result.interpretations:
        new_row = {
            'id' : generate_expression_id(expression),
            'expression': expression,
            'associated_with' : sense.associatedWith,
            'sense': sense.interpretation
        }
        df = pd.DataFrame([new_row])
        df.to_csv(result_file, encoding = "utf-8", mode='a', index=False, header=False)
  except:
    print("Error: ", expression)