Information extraction from a typed data specification.

In [1]:
import minichain
from dataclasses import dataclass
from typing import List
from enum import Enum

Data specification

In [2]:
class StatType(Enum):
    POINTS = 1
    REBOUNDS = 2
    ASSISTS = 3

In [3]:
@dataclass
class Stat:
    value: int
    stat: StatType

In [4]:
@dataclass
class Player:
    player: str
    stats: List[Stat]

Code

In [5]:
class ExtractionPrompt(minichain.TypedTemplatePrompt):
    template_file = "stats.pmpt.tpl"
    Out = Player

In [6]:
with minichain.start_chain("stats") as backend:
    p = ExtractionPrompt(backend.OpenAI(max_tokens=512))
    article = open("sixers.txt").read()
    print(p({"passage": article}))

{'player': 'str', 'stats': {'_t_': 'list', 't': {'value': 'int', 'stat': {'POINTS': 1, 'REBOUNDS': 2, 'ASSISTS': 3}}}}


[Player(player='Joel Embiid', stats=[{'value': '35', 'stat': 'POINTS'}, {'value': '8', 'stat': 'REBOUNDS'}]), Player(player='James Harden', stats=[{'value': '29', 'stat': 'POINTS'}, {'value': '13', 'stat': 'ASSISTS'}]), Player(player='Georges Niang', stats=[{'value': '16', 'stat': 'POINTS'}]), Player(player='Julius Randle', stats=[{'value': '35', 'stat': 'POINTS'}])]


In [7]:
ExtractionPrompt().show({"passage": "Harden had 10 rebounds."},
                        '[{"player": "Harden", "stats": {"value": 10, "stat": 2}}]')

{'player': 'str', 'stats': {'_t_': 'list', 't': {'value': 'int', 'stat': {'POINTS': 1, 'REBOUNDS': 2, 'ASSISTS': 3}}}}
{'player': 'str', 'stats': {'_t_': 'list', 't': {'value': 'int', 'stat': {'POINTS': 1, 'REBOUNDS': 2, 'ASSISTS': 3}}}}
