# Explore how to change prompts

In [6]:
#%%
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [7]:
from pathlib import Path

import folktexts
from folktexts import prompting

#load the dataset
from folktexts.acs.acs_tasks import ACSTaskMetadata
from folktexts.acs.acs_dataset import ACSDataset

folktexts.__version__


'0.0.22'

## Load ACS Data

In [8]:
# All ACS prediction tasks
ACS_TASKS = (
    "ACSIncome",
    "ACSEmployment",
    "ACSMobility",
    "ACSTravelTime",
    "ACSPublicCoverage",
)

data_dir = Path("./data")

Pre-defined task:

In [9]:
task_name = ACS_TASKS[0]
#folktable tasks are created via ACSTaskMetadata.make_folktables_task()
acs_task = ACSTaskMetadata.get_task(task_name)
acs_task

ACSTaskMetadata(name='ACSIncome', description="predict whether an individual's income is above $50,000", features=['AGEP', 'COW', 'SCHL', 'MAR', 'OCCP', 'POBP', 'RELP', 'WKHP', 'SEX', 'RAC1P'], target='PINCP', cols_to_text={'AGEP': <folktexts.col_to_text.ColumnToText object at 0x117322250>, 'COW': <folktexts.col_to_text.ColumnToText object at 0x117323110>, 'SCHL': <folktexts.col_to_text.ColumnToText object at 0x117323810>, 'MAR': <folktexts.col_to_text.ColumnToText object at 0x117338510>, 'OCCP': <folktexts.col_to_text.ColumnToText object at 0x117338810>, 'POBP': <folktexts.col_to_text.ColumnToText object at 0x117338850>, 'RELP': <folktexts.col_to_text.ColumnToText object at 0x117338890>, 'WKHP': <folktexts.col_to_text.ColumnToText object at 0x117339210>, 'SEX': <folktexts.col_to_text.ColumnToText object at 0x117339250>, 'RAC1P': <folktexts.col_to_text.ColumnToText object at 0x117339410>, 'PINCP': <folktexts.col_to_text.ColumnToText object at 0x117339950>, 'PINCP>50000': <folktexts.col

**Customize a task**

```
def make_task(
        cls,
        name: str,
        description: str,
        features: list[str],
        target: str,
        sensitive_attribute: str = None,
        target_threshold: Threshold = None,
        population_description: str = None,
        folktables_obj: BasicProblem = None,
        multiple_choice_qa: MultipleChoiceQA = None,
        direct_numeric_qa: DirectNumericQA = None,
    ) -> ACSTaskMetadata:
```

- possible to change the target column, the corresponding threshold and the question
- possible to change the sensitive attribute
- possible to change the features used


**Load the ACS data**

In [10]:
acs_dataset_configs = folktexts.benchmark.Benchmark.ACS_DATASET_CONFIGS.copy()

dataset = ACSDataset.make_from_task(
                task_name, cache_dir=data_dir, **acs_dataset_configs
)

X_train, y_train = dataset.get_train()
X_test, y_test = dataset.get_test()
s_test = None
if dataset.task.sensitive_attribute is not None:
    s_test = dataset.get_sensitive_attribute_data().loc[y_test.index]

Loading ACS data...


All features are currently stored as ColumnToText object:

In [11]:
import folktexts.acs.acs_columns as acs_cols
from folktexts.col_to_text import ColumnToText as _ColumnToText

for col_mapper in acs_cols.__dict__.values():
    if isinstance(col_mapper, _ColumnToText):
        print(col_mapper.name, end=', ')

AGEP, COW, SCHL, MAR, OCCP, POBP, RELP, WKHP, SEX, RAC1P, PINCP, PINCP>50000, PUBCOV, PUBCOV==1, DIS, ESP, CIT, MIG, MIG!=1, MIL, ANC, NATIVITY, DEAR, DEYE, DREM, ESR, ESR==1, ST, FER, JWMNP, JWMNP>20, JWTR, POVPIP, POVPIP<250, GCL, PUMA, POWPUMA, HINS2, HINS2==1, 

Each feature has the following signature:

In [12]:
from folktexts.acs.acs_tasks import acs_columns_map
from inspect import signature
signature(acs_columns_map['AGEP'].__init__)

<Signature (name: 'str', short_description: 'str', value_map: 'dict[object, str] | Callable' = None, question: 'QAInterface' = None, connector_verb: 'str' = 'is:', verbalize: 'Callable' = None, missing_value_fill: 'str' = 'N/A', use_value_map_only: 'bool' = False)>

In [13]:
acs_columns_map['AGEP']._connector_verb

'is:'

## Prompt Construction

The prompt is composed of 
- [system prompt, if used for chat-based prompting]
- a task description
- a textual representation of the row in the data set
- a question (multiple choice or direct numeric)

In [14]:
example_row = X_train.iloc[0]

### Task Description

In [15]:
prompting.ACS_TASK_DESCRIPTION, prompting.ACS_FEW_SHOT_TASK_DESCRIPTION
# note: 2018 hard-coded

('The following data corresponds to a survey respondent. The survey was conducted among US residents in 2018. Please answer the question based on the information provided. The data provided is enough to reach an approximate answer.\n',
 'The following data corresponds to different survey respondents. The survey was conducted among US residents in 2018. Please answer each question based on the information provided. The data provided is enough to reach an approximate answer for each person.\n')

### Row Encoding

In [16]:
print(acs_task.get_row_description(example_row))

- The age is: 53 years old.
- The class of worker is: Owner of non-incorporated business, professional practice, or farm.
- The highest educational attainment is: Bachelor's degree.
- The marital status is: Married.
- The occupation is: Musicians and singers.
- The place of birth is: New York.
- The relationship to the reference person in the survey is: The reference person itself.
- The usual number of hours worked per week is: 20 hours.
- The sex is: Male.
- The race is: White.


```
def get_row_description(self, row: pd.Series) -> str:
        """Encode a description of a given data row in textual form."""
        row = row[self.features]
        return (
            "\n".join(
                "- " + self.cols_to_text[col].get_text(val)
                for col, val in row.items()
            )
        )
```

In [17]:
acs_task.cols_to_text['AGEP'].get_text('44')

'The age is: 44 years old.'

```
def get_text(self, value: object) -> str:
        """Returns the natural text representation of the given data value."""
        if self._use_value_map_only:
            return self[value]
        return f"The {self.short_description} {self._connector_verb} {self[value]}."
```

In [18]:
acs_task.cols_to_text['AGEP']._short_description

'age'

In [19]:
acs_task.cols_to_text['AGEP']._connector_verb

'is:'

In [20]:
acs_task.cols_to_text['AGEP'].value_map('53')

'53 years old'

### Question

Types of Questions:

In [21]:
acs_task.question.text, acs_task.question.num_forward_passes, acs_task.question.choices

("What is this person's estimated yearly income?",
 1,
 (Choice(text='Below $50,000', data_value=0, numeric_value=None),
  Choice(text='Above $50,000', data_value=1, numeric_value=None)))

In [22]:
acs_task.direct_numeric_qa.text, acs_task.direct_numeric_qa.num_forward_passes

("What is the probability that this person's estimated yearly income is above $50,000 ?",
 2)

### Complete Prompt

Complete zero-shot prompt
- using task.get_row_description
- using question.get_question_prompt

In [28]:
prompt = prompting.encode_row_prompt(
    row=example_row,
    task=acs_task,
    question=None,
    custom_prompt_prefix=None,
    add_task_description=True,
    prompt_connector = '=',
    prompt_style = 'bullet', 
    #prompt_full_sentence = True
)

print(prompt)

get_row_description bullet =
The following data corresponds to a survey respondent. The survey was conducted among US residents in 2018. Please answer the question based on the information provided. The data provided is enough to reach an approximate answer.

Information:
- age = 53 years old
- class of worker = Owner of non-incorporated business, professional practice, or farm
- highest educational attainment = Bachelor's degree
- marital status = Married
- occupation = Musicians and singers
- place of birth = New York
- relationship to the reference person in the survey = The reference person itself
- usual number of hours worked per week = 20 hours
- sex = Male
- race = White

Question: What is this person's estimated yearly income?
A. Below $50,000.
B. Above $50,000.
Answer:


In [24]:
acs_task.question.get_answer_from_model_output

<bound method MultipleChoiceQA.get_answer_from_model_output of MultipleChoiceQA(column='PINCP>50000', text="What is this person's estimated yearly income?", num_forward_passes=1, choices=(Choice(text='Below $50,000', data_value=0, numeric_value=None), Choice(text='Above $50,000', data_value=1, numeric_value=None)), _answer_keys_source=('A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z'))>

## Prompt LLM

python -m folktexts.cli.launch_experiments_htcondor --executable-path ./folktexts/cli/run_acs_benchmark.py --results-dir ./results/vary-seeds --task ACSMobility --model google/gemma-2-27b-it seed=457

In [65]:
from folktexts.benchmark import BenchmarkConfig
from folktexts.benchmark import Benchmark


DEFAULT_BATCH_SIZE = 16
DEFAULT_CONTEXT_SIZE = 600
DEFAULT_SEED = 42

In [66]:
config = BenchmarkConfig(
        few_shot=False,
        numeric_risk_prompting=False,
        reuse_few_shot_examples=False,
        batch_size=DEFAULT_BATCH_SIZE,
        context_size=DEFAULT_CONTEXT_SIZE,
        correct_order_bias=False,
        feature_subset=None,
        population_filter=None,
        seed=DEFAULT_SEED,
    )

In [67]:
config

BenchmarkConfig(numeric_risk_prompting=False, few_shot=False, reuse_few_shot_examples=False, batch_size=16, context_size=600, correct_order_bias=False, feature_subset=None, population_filter=None, seed=42, randomize_feature_order=False, prompt_style='bullet', prompt_connector='is', prompt_full_sentence=False)

In [68]:
from folktexts.llm_utils import load_model_tokenizer
model, tokenizer = load_model_tokenizer('gpt2')



In [89]:
bench = Benchmark.make_acs_benchmark(
        task_name='ACSIncome',
        model=model,
        tokenizer=tokenizer,
        data_dir='./data/',
        config=config,
        subsampling=0.01,
        max_api_rpm=False,
        prompt_style = 'text', 
        prompt_full_sentence = True,
    )



Loading ACS data...
Using zero-shot prompting.


In [90]:
bench.config

BenchmarkConfig(numeric_risk_prompting=False, few_shot=False, reuse_few_shot_examples=False, batch_size=16, context_size=600, correct_order_bias=False, feature_subset=None, population_filter=None, seed=42, randomize_feature_order=False, prompt_style='text', prompt_connector='is', prompt_full_sentence=True)

In [91]:
corrupted_row = example_row.copy()
corrupted_row['WKHP'] = None
corrupted_row

AGEP       53.0
COW         6.0
SCHL       21.0
MAR         1.0
OCCP     2752.0
POBP       36.0
RELP        0.0
WKHP        NaN
SEX         1.0
RAC1P       1.0
Name: 1080377, dtype: float64

In [92]:
print(bench.llm_clf.encode_row(corrupted_row)) # also works with missing values :) 

The following data corresponds to a survey respondent. The survey was conducted among US residents in 2018. Please answer the question based on the information provided. The data provided is enough to reach an approximate answer.

Information:
The person is 53 years old. The person is the owner of non-incorporated business, professional practice, or farm. The highest eductational attainment of the person is the Bachelor's degree. The person is married. The person's occupation is 'Musicians and singers'. The person is born in New York. Relative to the reference person in the survey, the person is the reference person itself. The person is less that 16 years old or did not work during the past 12 months. The person identifies as male. The person identifies as White. 

Question: What is this person's estimated yearly income?
A. Below $50,000.
B. Above $50,000.
Answer:


In [84]:
bench.run(results_root_dir='./results/test', fit_threshold=0)

Computing risk estimates:   0%|          | 0/105 [00:00<?, ?it/s]



{'threshold': 0.5,
 'n_samples': 1665,
 'n_positives': 605,
 'n_negatives': 1060,
 'model_name': 'gpt2',
 'accuracy': 0.6366366366366366,
 'tpr': 0.0,
 'fnr': 1.0,
 'fpr': 0.0,
 'tnr': 1.0,
 'balanced_accuracy': 0.5,
 'precision': 0,
 'ppr': 0.0,
 'num_samples': 1665,
 'num_positives': 605,
 'num_negatives': 1060,
 'num_pred_positives': 0,
 'num_pred_negatives': 1665,
 'log_loss': 1.156190266337441,
 'brier_score_loss': 0.33261174529872095,
 'fnr_ratio': 1.0,
 'fnr_diff': 0.0,
 'ppr_ratio': 0,
 'ppr_diff': 0.0,
 'precision_ratio': 0,
 'precision_diff': 0,
 'tnr_ratio': 1.0,
 'tnr_diff': 0.0,
 'tpr_ratio': 0,
 'tpr_diff': 0.0,
 'balanced_accuracy_ratio': 1.0,
 'balanced_accuracy_diff': 0.0,
 'accuracy_ratio': 0.694110576923077,
 'accuracy_diff': 0.233058608058608,
 'fpr_ratio': 0,
 'fpr_diff': 0.0,
 'equalized_odds_ratio': 0,
 'equalized_odds_diff': 0.0,
 'accuracy_group=1': 0.6187161639597835,
 'tpr_group=1': 0.0,
 'fnr_group=1': 1.0,
 'fpr_group=1': 0.0,
 'tnr_group=1': 1.0,
 'balance

In [None]:
import pprint
pprint.pprint(bench.results, indent=4, sort_dicts=True)

Testing code snippets

In [1]:
from folktexts.task import TaskMetadata
from folktexts.qa_interface import MultipleChoiceQA, Choice
from folktexts.col_to_text import ColumnToText

In [2]:
question = MultipleChoiceQA(column='target', text='What is the answer?', choices=(Choice("Yes", 0), Choice("No", 1)))

In [3]:
test_task = TaskMetadata(name="task name", description="this is a test task", features=['t1', 't2'], target='target', cols_to_text={'t1': 'test 1', 't2': 'test 2'}, multiple_choice_qa=question)

In [5]:
col2text = ColumnToText(name='t1', short_description="test variable 1", value_map={0: 'val0', 1:'val1'}, question=question, connector_verb="=", verbalize=lambda x: f"This is a sentence with {x}")

In [41]:
print("\n".join(list(map(lambda s: "- "+s, ['a', 'b', 'c']))))

- a
- b
- c


In [68]:
text_snippets = []

style = 'comma'
assert style in ["text", "bullet", "comma"]
if style == "comma":
    apply_structure = lambda s: s + ", "
elif style == "bullet":
    apply_structure = lambda s: "\n- " + s
else:  # full_sentence and style=='text':
    apply_structure = lambda s: s

for letter in ['a', 'b', 'c']:
    text_snippets.append(apply_structure(letter))
text_snippets

['a, ', 'b, ', 'c, ']

In [69]:
text = "".join(text_snippets)
print(text)

a, b, c, 


In [67]:
if text.startswith('\n'):
    text = text[1:]
if text.endswith(', '):
    text = text[:-2]

print(text)

a, b, c


In [66]:
text[-1]

' '