In [1]:
from openai import OpenAI
import os
from dotenv import load_dotenv
from vega import Vega
import json

load_dotenv()
api_key = os.getenv("OPENAI_API_KEY")
client = OpenAI()

In [2]:
ideation = "Lundgard and Satyanarayan (2022) identifies four levels of semantic content: L1 content enumerates aspects of the chart’s construction (e.g., axis ranges); L2 content reports summary statistics and relations (e.g., extrema); L3 content synthesizes perceptual and cognitive phenomena (e.g., complex trends); and, L4 content describes domain-specific insights (e.g., sociopolitical context).\n\nExample: [1] This is a scatter plot entitled “Born in 2016: Life Expectancy Gap by Gender and Income” that plots Women Life Expectancy at Birth (Years) by Men Life Expectancy at Birth (Years). The Women Life Expectancy at Birth is plotted on the vertical y-axis from 40 to 90 years. The Men Life Expectancy at Birth is plotted on the horizontal x-axis from 40 to 90 years. High Income Countries are plotted in dark green. Low Income Countries are plotted in light green. A 45 degree line from the origin represents Equal Life Expectancy. [2] For low in- come countries, the average life expectancy is 60 years for men and 65 years for women. For high income countries, the average life expectancy is 77 years for men and 82 years for women. [3] Overall, women have a slightly higher life expectancy than men. Women live around 5 to 10 years longer than men. The low income countries are more scattered than the high income countries. There is a visible gap between high and low income countries, indicated by the Income-Age Divide line. [4] People living in low-income countries tend to have a lower life expectancy than the people living in high-income countries, likely due to many societal factors, including access to healthcare, food, other resources, and overall quality of life. People who live in lower income countries are more likely to\n\nCome up with a scenario where a chart might be displayed, and generate its four levels of semantic content captions."
generation = "Carefully generate a vega visualization based on the description and captions. Make sure to be faithful to the description. Also use data that could plausibly be found in real-world contexts. Only output the vega visualization and nothing else, in plain text."

def get_caption():
    completion = client.chat.completions.create(
        model="gpt-4o-mini",
        messages=[
            {"role": "developer", "content": "You are a helpful assistant."},
            {
                "role": "user",
                "content": ideation
            }
        ],
        temperature=0.5,
    )
    return completion.choices[0].message.content

def get_vega(caption):
    completion = client.chat.completions.create(
        model="gpt-4o",
        messages=[
            {"role": "developer", "content": "You are a helpful assistant."},
            {
                "role": "user",
                "content": ideation
            },
            {
                "role": "assistant",
                "content": caption
            },
            {
                "role": "user",
                "content": generation
            }
        ],
        temperature=0.5
    )
    return completion.choices[0].message.content

def get_chart(vega):
    vega_spec = json.loads(vega)
    Vega(vega_spec)

In [3]:
caption = get_caption()
print(caption)
vega = get_vega(caption)
print(vega)
get_chart(vega)

**Scenario:** A bar chart titled “Annual CO2 Emissions by Country (2022)” is displayed in a climate change conference. The chart compares the total CO2 emissions (in million metric tons) of various countries, highlighting the top 10 emitters.

**L1 Content:** This is a bar chart entitled “Annual CO2 Emissions by Country (2022)” that displays the total CO2 emissions in million metric tons for selected countries. The vertical y-axis represents CO2 emissions ranging from 0 to 5,000 million metric tons, while the horizontal x-axis lists the countries. Each bar represents a different country, with colors differentiating between developed and developing nations. The chart includes a legend indicating that blue bars represent developed countries and green bars represent developing countries.

**L2 Content:** In 2022, China emitted the highest CO2 emissions at approximately 2,500 million metric tons, followed by the United States with around 1,200 million metric tons. Other significant emitter

JSONDecodeError: Expecting value: line 1 column 1 (char 0)

In [4]:
get_chart(vega[8:-4])

In [5]:
vega_spec = json.loads(vega[8:-4])
print(vega_spec)
Vega(vega_spec)

{'$schema': 'https://vega.github.io/schema/vega/v5.json', 'width': 500, 'height': 300, 'padding': 5, 'data': [{'name': 'table', 'values': [{'country': 'China', 'emissions': 2500, 'type': 'Developing'}, {'country': 'United States', 'emissions': 1200, 'type': 'Developed'}, {'country': 'India', 'emissions': 800, 'type': 'Developing'}, {'country': 'Russia', 'emissions': 500, 'type': 'Developed'}, {'country': 'Japan', 'emissions': 400, 'type': 'Developed'}, {'country': 'Germany', 'emissions': 300, 'type': 'Developed'}, {'country': 'Brazil', 'emissions': 250, 'type': 'Developing'}, {'country': 'Indonesia', 'emissions': 200, 'type': 'Developing'}, {'country': 'Canada', 'emissions': 150, 'type': 'Developed'}, {'country': 'Mexico', 'emissions': 100, 'type': 'Developing'}]}], 'scales': [{'name': 'xscale', 'type': 'band', 'domain': {'data': 'table', 'field': 'country'}, 'range': 'width', 'padding': 0.1}, {'name': 'yscale', 'domain': [0, 5000], 'nice': True, 'range': 'height'}, {'name': 'color', '

<vega.vega.Vega at 0x7f6f08fbbdc0>

In [8]:
%%javascript
if (typeof require === "undefined") {
    window.require = function(moduleName) {
        return window[moduleName];
    };
}

<IPython.core.display.Javascript object>

In [9]:
Vega({
  "$schema": "https://vega.github.io/schema/vega/v5.json",
  "width": 400,
  "height": 200,
  "padding": 5,

  "data": [
    {
      "name": "table",
      "values": [
        {"category": "A", "amount": 28},
        {"category": "B", "amount": 55},
        {"category": "C", "amount": 43},
        {"category": "D", "amount": 91},
        {"category": "E", "amount": 81},
        {"category": "F", "amount": 53},
        {"category": "G", "amount": 19},
        {"category": "H", "amount": 87}
      ]
    }
  ],

  "signals": [
    {
      "name": "tooltip",
      "value": {},
      "on": [
        {"events": "rect:mouseover", "update": "datum"},
        {"events": "rect:mouseout",  "update": "{}"}
      ]
    }
  ],

  "scales": [
    {
      "name": "xscale",
      "type": "band",
      "domain": {"data": "table", "field": "category"},
      "range": "width",
      "padding": 0.05,
      "round": True
    },
    {
      "name": "yscale",
      "domain": {"data": "table", "field": "amount"},
      "nice": True,
      "range": "height"
    }
  ],

  "axes": [
    { "orient": "bottom", "scale": "xscale" },
    { "orient": "left", "scale": "yscale" }
  ],

  "marks": [
    {
      "type": "rect",
      "from": {"data":"table"},
      "encode": {
        "enter": {
          "x": {"scale": "xscale", "field": "category"},
          "width": {"scale": "xscale", "band": 1},
          "y": {"scale": "yscale", "field": "amount"},
          "y2": {"scale": "yscale", "value": 0}
        },
        "update": {
          "fill": {"value": "steelblue"}
        },
        "hover": {
          "fill": {"value": "red"}
        }
      }
    },
    {
      "type": "text",
      "encode": {
        "enter": {
          "align": {"value": "center"},
          "baseline": {"value": "bottom"},
          "fill": {"value": "#333"}
        },
        "update": {
          "x": {"scale": "xscale", "signal": "tooltip.category", "band": 0.5},
          "y": {"scale": "yscale", "signal": "tooltip.amount", "offset": -2},
          "text": {"signal": "tooltip.amount"},
          "fillOpacity": [
            {"test": "datum === tooltip", "value": 0},
            {"value": 1}
          ]
        }
      }
    }
  ]
})

<vega.vega.Vega at 0x7f6f08deb460>