In [1]:
import pandas as pd
from transformers import TapexTokenizer, BartForConditionalGeneration
import pandas as pd

data = {
    "Year": [1896, 1900, 1904, 2004, 2008, 2012, 2016, 2020, 2024] + [1924, 1928, 1932, 1936, 1948, 1952],
    "City": ["Athens", "Paris", "St. Louis", "Athens", "Beijing", "London", "Rio de Janeiro", "Tokyo", "Paris"] 
           + ["Chamonix", "St. Moritz", "Lake Placid", "Garmisch-Partenkirchen", "St. Moritz", "Oslo"],
    "Country": ["Greece", "France", "USA", "Greece", "China", "UK", "Brazil", "Japan", "France"] 
              + ["France", "Switzerland", "USA", "Germany", "Switzerland", "Norway"],
    "Season": ["Summer"]*9 + ["Winter"]*6,
    "Participants": [241, 997, 651, 10625, 10942, 10568, 11238, 11656, 10500] 
                   + [258, 464, 252, 646, 669, 694],
    "Sports": [9, 19, 17, 28, 28, 26, 28, 33, 32] 
             + [6, 8, 7, 8, 9, 8],
    "GDP_Country_BillionUSD": [1.5, 10.2, 30.0, 280.0, 4600.0, 2700.0, 1700.0, 5000.0, 2900.0] 
                            + [10.0, 10.5, 800.0, 50.0, 100.0, 10.0]
}

queries = [
    {
        "query": "In which year did Beijing host the Olympic Games?",
        "expected": "2008",
        "type": "simple lookup"
    },
    {
        "query": "Which city hosted the Olympics in 2016?",
        "expected": "Rio de Janeiro",
        "type": "simple lookup"
    },
    {
        "query": "Which Winter Olympics had more than 600 participants?",
        "expected": "1936, 1948, 1952",  # Garmisch-Partenkirchen, St. Moritz, Oslo
        "type": "filtering"
    },
    {
        "query": "List all Summer Olympics held in France",
        "expected": "1900, 2024",  # Paris both times
        "type": "filtering"
    },
    {
        "query": "What is the average number of participants in Winter Olympics?",
        "expected": "497",  # (258+464+252+646+669+694)/6 ≈ 497
        "type": "aggregation"
    },
    {
        "query": "Which Olympics had higher participation, Rio 2016 or London 2012?",
        "expected": "Rio 2016",  # 11238 > 10568
        "type": "comparison"
    },
    {
        "query": "Which country hosted both Summer and Winter Olympics?",
        "expected": "France, USA",  # France: Paris/Chamonix, USA: St. Louis/Lake Placid
        "type": "semantic"
    },
    {
        "query": "How many times has Paris hosted the Olympic Games?",
        "expected": "2",  # 1900 and 2024
        "type": "counting"
    },
    {
        "query": "What was the economic size (GDP) of China when Beijing hosted the Olympics?",
        "expected": "4600.0",  # GDP in 2008 row
        "type": "semantic lookup"
    }
]

data = {k:[str(x) for x in v] for k,v in data.items()}
table = pd.DataFrame.from_dict(data)

tokenizer = TapexTokenizer.from_pretrained("microsoft/tapex-large-finetuned-wtq")
model = BartForConditionalGeneration.from_pretrained("microsoft/tapex-large-finetuned-wtq")

for query in queries:
    print(f"\nQuestion: {query['query']}")
    print(f"Type:{query['type']}")
    encoding = tokenizer(table=table, query=query['query'], return_tensors="pt")
    outputs = model.generate(**encoding)
    answer = tokenizer.batch_decode(outputs, skip_special_tokens=True)[0]
    print(f"Answer: {answer}")
    print(f"Expected:{query['expected']}")

  from .autonotebook import tqdm as notebook_tqdm



Question: In which year did Beijing host the Olympic Games?
Type:simple lookup
Answer:  2008
Expected:2008

Question: Which city hosted the Olympics in 2016?
Type:simple lookup
Answer:  rio de janeiro
Expected:Rio de Janeiro

Question: Which Winter Olympics had more than 600 participants?
Type:filtering
Answer:  1936, 1948, 1952
Expected:1936, 1948, 1952

Question: List all Summer Olympics held in France
Type:filtering
Answer:  1900, 2024
Expected:1900, 2024

Question: What is the average number of participants in Winter Olympics?
Type:aggregation
Answer:  669
Expected:497

Question: Which Olympics had higher participation, Rio 2016 or London 2012?
Type:comparison
Answer:  london 2012
Expected:Rio 2016

Question: Which country hosted both Summer and Winter Olympics?
Type:semantic
Answer:  usa
Expected:France, USA

Question: How many times has Paris hosted the Olympic Games?
Type:counting
Answer:  2
Expected:2

Question: What was the economic size (GDP) of China when Beijing hosted the