In [1]:
import json
import pandas as pd
import tiktoken

from discovery_child_development import PROJECT_DIR, config
from discovery_child_development.getters import openalex as oa
from discovery_child_development.getters import taxonomy
from discovery_child_development.utils.openai_utils import client, print_prompt, MessageTemplate
from discovery_child_development.notebooks.labelling.prodigy import utils

pd.set_option('max_colwidth', 400)

MODEL = "gpt-3.5-turbo-1106"
MODEL_COST = 0.001 # based on https://openai.com/blog/new-models-and-developer-products-announced-at-devday
SEED = config["seed"]
SAMPLE_SIZE = 20
NO_CONCEPT_SAMPLE_SIZE = SAMPLE_SIZE * 10

PATH_TO_PROMPTS = (
    PROJECT_DIR / "discovery_child_development/notebooks/labelling/prompts/taxonomy"
)
PATH_USER = PATH_TO_PROMPTS / "user.json"
PATH_SYSTEM = PATH_TO_PROMPTS / "system.json"
PATH_FUNCTION = PATH_TO_PROMPTS / "function.json"

encoding = tiktoken.encoding_for_model(MODEL)

2023-12-08 13:32:40,662 - botocore.credentials - INFO - Found credentials in environment variables.


In [2]:
def clean_openalex_id(df, column_name='id'):
  """Cleans the OpenAlex ID to remove the prefix"""
  df[column_name] = df[column_name].str.extract(r'/(W\d+)$')
  return df

def sample_per_category(group, sample_size = SAMPLE_SIZE):
    return group.sample(min(sample_size, len(group)), random_state=SEED)

# these functions came from: https://github.com/openai/openai-cookbook/blob/main/examples/How_to_count_tokens_with_tiktoken.ipynb

def num_tokens_from_string(string: str, encoding=encoding):
  return len(encoding.encode(string))

def num_tokens_from_messages(messages, model=MODEL):
    """Return the number of tokens used by a list of messages."""
    try:
        encoding = tiktoken.encoding_for_model(model)
    except KeyError:
        print("Warning: model not found. Using cl100k_base encoding.")
        encoding = tiktoken.get_encoding("cl100k_base")
    if model in {
        "gpt-3.5-turbo-0613",
        "gpt-3.5-turbo-16k-0613",
        "gpt-4-0314",
        "gpt-4-32k-0314",
        "gpt-4-0613",
        "gpt-4-32k-0613",
        }:
        tokens_per_message = 3
        tokens_per_name = 1
    elif model == "gpt-3.5-turbo-0301":
        tokens_per_message = 4  # every message follows <|start|>{role/name}\n{content}<|end|>\n
        tokens_per_name = -1  # if there's a name, the role is omitted
    elif "gpt-3.5-turbo" in model:
        print("Warning: gpt-3.5-turbo may update over time. Returning num tokens assuming gpt-3.5-turbo-0613.")
        return num_tokens_from_messages(messages, model="gpt-3.5-turbo-0613")
    elif "gpt-4" in model:
        print("Warning: gpt-4 may update over time. Returning num tokens assuming gpt-4-0613.")
        return num_tokens_from_messages(messages, model="gpt-4-0613")
    else:
        raise NotImplementedError(
            f"""num_tokens_from_messages() is not implemented for model {model}. See https://github.com/openai/openai-python/blob/main/chatml.md for information on how messages are converted to tokens."""
        )
    num_tokens = 0
    for message in messages:
        num_tokens += tokens_per_message
        for key, value in message.items():
            num_tokens += len(encoding.encode(value))
            if key == "name":
                num_tokens += tokens_per_name
    num_tokens += 3  # every reply is primed with <|start|>assistant<|message|>
    return num_tokens

## Prepare categories

In [3]:
categories_flat = utils.load_categories()

## Prepare data

In [4]:
openalex_data = oa.get_abstracts()
openalex_data = clean_openalex_id(openalex_data, 'id')
openalex_data.head()

Unnamed: 0,id,title,abstract,text
0,W4249228678,REPRINT OF: Relationship of Childhood Abuse and Household Dysfunction to Many of the Leading Causes of Death in Adults: The Adverse Childhood Experiences (ACE) Study,"Background The relationship of health risk behavior and disease in adulthood to the breadth of exposure to childhood emotional, physical, or sexual abuse, and household dysfunction during childhood has not previously been described. Methods A questionnaire about adverse childhood experiences was mailed to 13,494 adults who had completed a standardized medical evaluation at a large HMO; 9,508 (...","REPRINT OF: Relationship of Childhood Abuse and Household Dysfunction to Many of the Leading Causes of Death in Adults: The Adverse Childhood Experiences (ACE) Study. Background The relationship of health risk behavior and disease in adulthood to the breadth of exposure to childhood emotional, physical, or sexual abuse, and household dysfunction during childhood has not previously been describ..."
1,W2996407267,Childhood Adversity and Neural Development: A Systematic Review,An extensive literature on childhood adversity and neurodevelopment has emerged over the past decade. We evaluate two conceptual models of adversity and neurodevelopment—the dimensional model of adversity and stress acceleration model—in a systematic review of 109 studies using MRI-based measures of neural structure and function in children and adolescents. Consistent with the dimensional mode...,Childhood Adversity and Neural Development: A Systematic Review. An extensive literature on childhood adversity and neurodevelopment has emerged over the past decade. We evaluate two conceptual models of adversity and neurodevelopment—the dimensional model of adversity and stress acceleration model—in a systematic review of 109 studies using MRI-based measures of neural structure and function ...
2,W2911956244,Association Between Screen Time and Children’s Performance on a Developmental Screening Test,"Excessive screen time is associated with delays in development; however, it is unclear if greater screen time predicts lower performance scores on developmental screening tests or if children with poor developmental performance receive added screen time as a way to modulate challenging behavior.To assess the directional association between screen time and child development in a population of m...","Association Between Screen Time and Children’s Performance on a Developmental Screening Test. Excessive screen time is associated with delays in development; however, it is unclear if greater screen time predicts lower performance scores on developmental screening tests or if children with poor developmental performance receive added screen time as a way to modulate challenging behavior.To ass..."
3,W2961245124,Delayed childhood neurodevelopment and neurosensory alterations in the second year of life in a prospective cohort of ZIKV-exposed children,"We report neurodevelopmental outcomes in 216 infants followed since the time of PCR-confirmed maternal Zika virus (ZIKV) infection in pregnancy during the Rio de Janeiro epidemic of 2015-2016 (refs. 1,2). Neurodevelopment was assessed by Bayley Scales of Infant and Toddler Development, third edition (Bayley-III; cognitive, language and motor domains) in 146 children and through neurodevelopmen...","Delayed childhood neurodevelopment and neurosensory alterations in the second year of life in a prospective cohort of ZIKV-exposed children. We report neurodevelopmental outcomes in 216 infants followed since the time of PCR-confirmed maternal Zika virus (ZIKV) infection in pregnancy during the Rio de Janeiro epidemic of 2015-2016 (refs. 1,2). Neurodevelopment was assessed by Bayley Scales of ..."
4,W2915674157,Prenatal Developmental Origins of Future Psychopathology: Mechanisms and Pathways,The developmental origins of health and disease hypothesis applied to neurodevelopmental outcomes asserts that the fetal origins of future development are relevant to mental health. There is a third pathway for the familial inheritance of risk for psychiatric illness beyond shared genes and the quality of parental care: the impact of pregnant women's distress—defined broadly to include perceiv...,Prenatal Developmental Origins of Future Psychopathology: Mechanisms and Pathways. The developmental origins of health and disease hypothesis applied to neurodevelopmental outcomes asserts that the fetal origins of future development are relevant to mental health. There is a third pathway for the familial inheritance of risk for psychiatric illness beyond shared genes and the quality of parent...


In [5]:
test_data, _ = oa.get_labelled_data(score_threshold=0.0, train=False)
test_data = clean_openalex_id(test_data, 'openalex_id')
test_ids = test_data['openalex_id'].unique()
test_ids[0:10]

array(['W2915674157', 'W2953569589', 'W2975483058', 'W3003584905',
       'W2954041294', 'W2922956615', 'W2941240728', 'W2919110057',
       'W2908833839', 'W2909371360'], dtype=object)

In [6]:
openalex_data_subset = openalex_data[~openalex_data['id'].isin(test_ids)]
len(openalex_data) - len(openalex_data_subset)

3931

## Construct a data sample

In [7]:
openalex_concepts = oa.get_concepts_metadata()
openalex_concepts = clean_openalex_id(openalex_concepts, 'openalex_id')
openalex_concepts = openalex_concepts[~openalex_concepts['openalex_id'].isin(test_ids)]

In [8]:
taxonomy_data = taxonomy.get_taxonomy()

taxonomy_concept_ids = taxonomy_data["concept_id"].unique()

openalex_concepts_subset = openalex_concepts[
        openalex_concepts["concept_id"].isin(taxonomy_concept_ids)
    ].copy()

In [9]:
# merge taxonomy
openalex_concepts_subset = pd.merge(
        openalex_concepts_subset,
        taxonomy_data[["sub_category", "concept_id"]],
        how="left",
        on="concept_id",
    )

In [10]:
openalex_data_merged = (
        openalex_concepts_subset[
            [
                "openalex_id",
                "concept_id",
                "sub_category",
                "display_name",
                "level",
                "score",
            ]
        ]
        .merge(
            openalex_data[["id", "text"]],
            left_on="openalex_id",
            right_on="id",
            how="outer",
        )
    )

In [11]:
openalex_data_no_concepts = openalex_data_merged[openalex_data_merged['concept_id'].isna()].sample(n=NO_CONCEPT_SAMPLE_SIZE, random_state=SEED)
openalex_data_no_concepts.head()

Unnamed: 0,openalex_id,concept_id,sub_category,display_name,level,score,id,text
217353,,,,,,,W4306391565,Investigate Chinese Immigrant Parent’s Perceptions of Their Children’s Literacy and Numeracy Acquisition and Development in Australia. Literacy and numeracy are the fundamental elements that need to be developed in early childhood to enhance children's skills and future personal attainment. This research study focuses on investigating how Chinese immigrant parents think of conducting literacy ...
219038,,,,,,,W4237217949,"Prosperity Theology and African Traditional Religion. Studies of Prosperity Theology in Africa have increased as research into Pentecostalism has burgeoned, but few theological analyses have explored the significance of African Traditional Religions and their role in shaping Prosperity Theology. While some studies have explored the resonance of Prosperity Theology and African Traditional Relig..."
216623,,,,,,,W3133761906,PARENTAL INVOLVEMENT IN CHILDREN'S LEARNING ACTIVITIES DURING THE COVID-19 PANDEMIC. This research aims to find out parental involvement or parental involvement in early childhood learning activities during the Covid 19 pandemic. Early childhood is still in desperate need of mentoring during the learning process from home and therefore the role of parents becomes very important because it has ...
220390,,,,,,,W4205752608,JSE 35:4 Winter 2021 Whole Issue PDF. Journal of Scientific Exploration Whole Issue PDF
216426,,,,,,,W3161410937,"Developing Scales Based On Montessori Method AS A Learning Media Of Mathematics For Primary School. This study aims to develop, analyze the feasibility and effectiveness of learning with scales media based on Montessori methods. The type of this study was Research and Development with ADDIE development model Analysis, Design, Development, Implementation, Evaluation. The techniques of data coll..."


In [12]:
openalex_sample = openalex_data_merged.groupby('sub_category', group_keys=False).apply(sample_per_category)
openalex_sample = openalex_sample.rename(columns={'sub_category': 'label'})

In [13]:
openalex_sample = openalex_sample[['id', 'text', "label"]].groupby(["id", "text"])["label"].agg(lambda x: list(set(x))).reset_index()

In [14]:
openalex_data_no_concepts = openalex_data_no_concepts.rename(columns={'sub_category': 'label'})
openalex_data_no_concepts = openalex_data_no_concepts[['id', 'text', "label"]].groupby(["id", "text"])["label"].agg(lambda x: list(set(x))).reset_index()

In [15]:
openalex_sample = pd.concat([openalex_sample, openalex_data_no_concepts])
openalex_sample.head()

Unnamed: 0,id,text,label
0,W1201555323,"Planning and Optimization During the Life-Cycle of Service Level Agreements for Cloud Computing. A Service Level Agreement (SLA) is an electronic contract between the consumer and the provider of a service. It governs their business relationship by clarifying expectations and obligations of participating entities, with regard to the service and its quality. SLAs are already the prime paradigm ...",[operations]
1,W133750416,"Practitioners' perspectives on the preschool curriculum. This study examines perspectives of the preschool curriculum held by selected teachers in Queensland State preschools.A review of the philosophical, theoretical and research literature on preschool curriculum was conducted to identify current conceptions and expectations of the preschool curriculum. A substantial body of evidence confirm...",[education (general)]
2,W1509365752,"We Belong to No Soil: Nation and Narration in the Work of Emily Perkins. &lt;p&gt;Emily Perkins' work exemplifies a shift in the way the nation is represented in New Zealand fiction. In place of the cultural nationalist acceptance that the writer should attend faithfully to the New Zealand referent and seek to define the nation we find doubt, uncertainty and resistance. This shift has been obs...",[expressive arts and design]
3,W1522685060,"Towards an integrated methodology : C4, Sherr and Dream provings of Protea cynaroides. Homoeopathic provings form the experimental base of clinical homoeopathy. Provings are conducted through the administration of homoeopathically prepared medicine to healthy volunteers in order to elicit disease symptoms. The symptoms are collated to formulate the materia medica of the substance. AIM The aim ...",[randomised controlled trials]
4,W1525264554,"Living with Tensions: Stories of Chinese Early Childhood Teachers’ Teaching and Learning Experiences in the Contemporary Urban Chinese Context. &lt;p&gt;This narrative inquiry explores 6 Chinese early childhood teachers’ teaching and learning experiences in Shanghai and Beijing, where Chinese and Western educational ideas and practices co-exist. Interviews with teachers, kindergarten directors...",[literacy]


In [16]:
data_for_labelling = openalex_sample[["id", "text"]].to_json(orient='records', lines=True).split('\n')

In [17]:
len(data_for_labelling)

1076

In [18]:
# Save the JSON Lines data to a file
with open(PROJECT_DIR / 'discovery_child_development/notebooks/labelling/prodigy/training_validation_data.jsonl', 'w') as f:
    for line in data_for_labelling:
        f.write(line + '\n')

# OpenAI labelling

In [19]:
# Test texts
texts = [
    'Time Orientation Technologies in Special Education. A device to train children in time orientation has been designed, developed and evaluated. It is framed within a long-term cooperation action between university and special education school. It uses a specific cognitive accessible time display: Time left in the day is represented by a row of luminous elements initially on. Time passing is represented by turning off sequentially and gradually each luminous element every 15 min. Agenda is displayed relating time to tasks with standard pictograms for further accessibility. Notifications of tasks-to-come both for management support and anticipation to changes uses visual and auditory information. Agenda can be described in an Alternative and Augmentative Communication pictogram language already used by children, supporting individual and class activities on agenda. Validation has been performed with 16 children in 12 classrooms of four special education schools. Methodology for evaluation compares both prior and posterior assessments which are based in the International Classification of Functioning, Disability and Health (ICF) from the World Health Organization (WHO), together with observation registers. Results show consistent improvement in performances related with time orientation.',
    'Is the dolphin a fish? ERP evidence for the impact of typicality during early visual processing in ultra-rapid semantic categorization in autism spectrum disorder. Abstract Background Neurotypical individuals categorize items even during ultra-rapid presentations (20 ms; see Thorpe et al. Nature 381: 520, 1996). In cognitively able autistic adults, these semantic categorization processes may be impaired and/or may require additional time, specifically for the categorization of atypical compared to typical items. Here, we investigated how typicality structures influence ultra-rapid categorization in cognitively able autistic and neurotypical male adults. Methods Images representing typical or atypical exemplars of two different categories (food/animals) were presented for 23.5 vs. 82.3 ms (short/long). We analyzed detection rates, reaction times, and the event-related potential components dN150, N1, P2, N2, and P3 for each group. Results Behavioral results suggest slower and less correct responses to atypical compared to typical images. This typicality effect was larger for the category with less distinct boundaries (food) and observed in both groups. However, electrophysiological data indicate a different time course of typicality effects, suggesting that neurotypical adults categorize atypical images based on simple features (P2), whereas cognitively able autistic adults categorize later, based on arbitrary features of atypical images (P3). Conclusions We found evidence that all three factors under investigation — category, typicality, and presentation time — modulated specific aspects of semantic categorization. Additionally, we observed a qualitatively different pattern in the autistic adults, which suggests that they relied on different cognitive processes to complete the task.',
    "Perinatal depression and infant mental health. A mother\'s mental health during pregnancy and the first year postpartum is of the utmost importance to the cognitive, social, and emotional development of her child. Perinatal depression is associated with increased risk for wide-ranging adverse child development effects that can affect infant and early childhood mental health. Although effective treatments for perinatal depression exist, it is currently unclear if treatment of maternal depression alone is sufficient to ameliorate the negative effects of maternal depression on child outcomes. Interventions focused on the mother-infant relationship and dyadic interaction may be required to address the potential effect of maternal depression on the child. This paper provides an overview of maternal perinatal depression, the risk it poses for infant\\/early-childhood mental health, strategies for intervention that include mitigating depression and decreasing risk to the child, and implications for psychiatric nurses who work with perinatal women. Early identification and treatment of perinatal depression are critical to ensure optimal infant development and the child\'s future mental health.",
    "Effects of nutritional supplementation and home visiting on growth and development in young children in Madagascar: a cluster-randomised controlled trial. BackgroundEvidence from efficacy trials suggests that lipid-based nutrient supplementation (LNS) and home visits can be effective approaches to preventing chronic malnutrition and promoting child development in low-income settings. We tested the integration of these approaches within an existing, large-scale, community-based nutrition programme in Madagascar.MethodsWe randomly allocated 125 programme sites to five intervention groups: standard-of-care programme with monthly growth monitoring and nutrition education (T0); T0 plus home visits for intensive nutrition counselling through an added community worker (T1); T1 plus LNS for children aged 6\\u201318 months (T2); T2 plus LNS for pregnant or lactating women (T3); or T1 plus fortnightly home visits to promote and encourage early stimulation (T4). Pregnant women (second or third trimester) and infants younger than 12 months were enrolled in the trial. Primary outcomes were child growth (length-for-age and weight-for-length Z scores) and development at age 18\\u201330 months. Analyses were by intention to treat. The trial was registered with the ISRCTN registry, number ISRCTN14393738.FindingsThe study enrolled 3738 mothers: 1248 pregnant women (250 women in each of the T0, T1, T2, and T4 intervention groups and 248 in the T3 intervention group) and 2490 children aged 0\\u201311 months (497 children in T0, 500 in T1, 494 in T2, 499 in T3, and 500 in T4) at baseline who were assessed at 1-year and 2-year intervals. There were no main effects of any of the intervention groups on any measure of anthropometry or any of the child development outcomes in the full sample. However, compared with children in the T0 intervention group, the youngest children (<6 months at baseline) in the T2 and T3 intervention groups who were fully exposed to the child LNS dose had higher length-for-age Z scores (a significant effect of 0\\u00b7210 SD [95% CI \\u22120\\u00b7004 to 0\\u00b7424] for T2 and a borderline effect of 0\\u00b7216 SD [0\\u00b7043 to 0\\u00b7389] for T3) and lower stunting prevalence (\\u22129\\u00b70% [95% CI \\u221216\\u00b77 to \\u22121\\u00b72] for T2 and \\u22128\\u00b72% [\\u221215\\u00b76 to \\u22120\\u00b77] for T3); supplementing mothers conferred no additional benefit.InterpretationLNS for children for a duration of 12 months only benefited growth when it began at an early age, suggesting the need to supplement infants at age 6 months in a very low-income context. The lack of effect of the early stimulation messages and home visits might be due to little take-up of behaviour-change messages and delivery challenges facing community health workers.FundingEunice Kennedy Shriver National Institutes of Child Health and Human Development, Strategic Impact Evaluation Fund, World Bank Innovation Grant, Early Learning Partnership Grant, World Bank Research Budget, Japan Nutrition Trust Fund, Power of Nutrition, and the National Nutrition Office of Madagascar.",
    "Learning from the real and the virtual worlds: Educational use of augmented reality in early childhood. Augmented reality (AR) applications are becoming widely available to support preschoolers\\u2019 cognitive development and education. AR applications with educational features offer an exciting and unique learning experience by blurring the boundaries between the real world that children are in and the virtual world they see on the screen. Nonetheless, effects of blending these two worlds on children\\u2019s learning and the cognitive mechanisms underlying their learning with AR have not been discussed. To show why and how AR can have a unique contribution to early education, we review research on the ways that realistic and fantastical themes in narratives, and children\\u2019s making of connections between the real world and the screen affect their learning. In the light of those findings, we proceed to discuss the affordances of AR and provide a set of recommendations for designers. We argue that a well-designed AR application can support young children\\u2019s learning by (i) drawing children\\u2019s attention to the learning material and encourage them to reflect on the content by setting an unconventional scene for learning, and (ii) reducing the representational dissimilarity between the context where children learn new information and the one where they need to apply what they have learned. By providing an overview of developmental research on the effects of themes and screens on children\\u2019s learning, we aim to provide a psychological basis for the development of educational AR products targeting young children.",
    "The D-score: a metric for interpreting the early development of infants and toddlers across global settings. Introduction Early childhood development can be described by an underlying latent construct. Global comparisons of children\\u2019s development are hindered by the lack of a validated metric that is comparable across cultures and contexts, especially for children under age 3 years. We constructed and validated a new metric, the Developmental Score (D-score), using existing data from 16 longitudinal studies. Methods Studies had item-level developmental assessment data for children 0\\u201348 months and longitudinal outcomes at ages &gt;4\\u201318 years, including measures of IQ and receptive vocabulary. Existing data from 11 low-income, middle-income and high-income countries were merged for &gt;36 000 children. Item mapping produced 95 \\u2018equate groups\\u2019 of same-skill items across 12 different assessment instruments. A statistical model was built using the Rasch model with item difficulties constrained to be equal in a subset of equate groups, linking instruments to a common scale, the D-score, a continuous metric with interval-scale properties. D-score-for-age z-scores (DAZ) were evaluated for discriminant, concurrent and predictive validity to outcomes in middle childhood to adolescence. Results Concurrent validity of DAZ with original instruments was strong (average r =0.71), with few exceptions. In approximately 70% of data rounds collected across studies, DAZ discriminated between children above\\/below cut-points for low birth weight (&lt;2500 g) and stunting (\\u22122 SD below median height-for-age). DAZ increased significantly with maternal education in 55% of data rounds. Predictive correlations of DAZ with outcomes obtained 2\\u201316 years later were generally between 0.20 and 0.40. Correlations equalled or exceeded those obtained with original instruments despite using an average of 55% fewer items to estimate the D-score. Conclusion The D-score metric enables quantitative comparisons of early childhood development across ages and sets the stage for creating simple, low-cost, global-use instruments to facilitate valid cross-national comparisons of early childhood development."
]

In [20]:
# check how many tokens are in each text (doesn't account for extra tokens added by the prompt messages though)
[num_tokens_from_string(t) for t in texts]

[217, 357, 210, 711, 300, 437]

In [21]:
function = utils.format_function()

In [22]:
text = texts[4] # the one about augmented reality

In [23]:
n_categories = len(categories_flat.keys())
n_categories

39

In [26]:
prompt = utils.build_prompt(text)

In [27]:
print_prompt(prompt)

system: You are an expert multilabel Text Classification system. Your task is to accept Text as input and provide a category/ies for the text based on the predefined labels.

user: ### Instructions ### 
 Here are the labels texts can be given, and some indicative keywords associated with each label, in the format '<label>': <keyword 1, keyword 2, ...>: 
 ----------------------------------- 
 ### List of labels ### 
 'Cognitive development': cognition, memory, problem-solving, language acquisition, executive function
'SEND': special needs, disabilities, learning difficulties
'Communication and language': language development, communication skills, speech
'Physical': motor skills, physical development
'Expressive arts and design': creativity, arts, design, imagination
'Prenatal': prenatal development, maternal health, fetal development, pregnancy, birth
'Literacy': reading, writing, literacy development, phonics
'Infancy': age 0-2, infancy, newborns, early infancy
'Mathematics': math dev

In [28]:
# Figure out the cost for the current prompt:
# According to https://openai.com/pricing, this model costs $0.001 per 1000 tokens
MODEL_COST * (num_tokens_from_messages(prompt, model=MODEL)/1000)



0.000991

In [29]:
r = client.chat.completions.create(
   model=MODEL,
   temperature=0.0,
   messages=prompt,
   functions=[function],
   function_call={"name": "predict_category"},
)

2023-12-08 13:33:36,788 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


In [30]:
utils.get_labels_from_gpt_response(r)

['AR VR']

In [31]:
results = []

for model in ['gpt-3.5-turbo-1106', 'gpt-4-0613']:
    for text in texts:
        prompt = prompt
        print_prompt(prompt)
        print(f"Number of tokens: {num_tokens_from_messages(prompt, model=model)}")
        r = client.chat.completions.create(
          model=MODEL,
          temperature=0.0,
          messages=prompt,
          functions=[function],
          function_call={"name": "predict_category"},
          )
        results.append({
          "text": text,
          "model": model,
           "prompt": prompt,
           "output": utils.get_labels_from_gpt_response(r)
        })

system: You are an expert multilabel Text Classification system. Your task is to accept Text as input and provide a category/ies for the text based on the predefined labels.

user: ### Instructions ### 
 Here are the labels texts can be given, and some indicative keywords associated with each label, in the format '<label>': <keyword 1, keyword 2, ...>: 
 ----------------------------------- 
 ### List of labels ### 
 'Cognitive development': cognition, memory, problem-solving, language acquisition, executive function
'SEND': special needs, disabilities, learning difficulties
'Communication and language': language development, communication skills, speech
'Physical': motor skills, physical development
'Expressive arts and design': creativity, arts, design, imagination
'Prenatal': prenatal development, maternal health, fetal development, pregnancy, birth
'Literacy': reading, writing, literacy development, phonics
'Infancy': age 0-2, infancy, newborns, early infancy
'Mathematics': math dev

In [32]:
results_df = pd.DataFrame(results)
results_df[['text', 'model', 'output']].sort_values(by=['text', 'model'])

Unnamed: 0,text,model,output
3,Effects of nutritional supplementation and home visiting on growth and development in young children in Madagascar: a cluster-randomised controlled trial. BackgroundEvidence from efficacy trials suggests that lipid-based nutrient supplementation (LNS) and home visits can be effective approaches to preventing chronic malnutrition and promoting child development in low-income settings. We tested...,gpt-3.5-turbo-1106,[AR VR]
9,Effects of nutritional supplementation and home visiting on growth and development in young children in Madagascar: a cluster-randomised controlled trial. BackgroundEvidence from efficacy trials suggests that lipid-based nutrient supplementation (LNS) and home visits can be effective approaches to preventing chronic malnutrition and promoting child development in low-income settings. We tested...,gpt-4-0613,[AR VR]
1,"Is the dolphin a fish? ERP evidence for the impact of typicality during early visual processing in ultra-rapid semantic categorization in autism spectrum disorder. Abstract Background Neurotypical individuals categorize items even during ultra-rapid presentations (20 ms; see Thorpe et al. Nature 381: 520, 1996). In cognitively able autistic adults, these semantic categorization processes may b...",gpt-3.5-turbo-1106,[AR VR]
7,"Is the dolphin a fish? ERP evidence for the impact of typicality during early visual processing in ultra-rapid semantic categorization in autism spectrum disorder. Abstract Background Neurotypical individuals categorize items even during ultra-rapid presentations (20 ms; see Thorpe et al. Nature 381: 520, 1996). In cognitively able autistic adults, these semantic categorization processes may b...",gpt-4-0613,[AR VR]
4,Learning from the real and the virtual worlds: Educational use of augmented reality in early childhood. Augmented reality (AR) applications are becoming widely available to support preschoolers\u2019 cognitive development and education. AR applications with educational features offer an exciting and unique learning experience by blurring the boundaries between the real world that children are ...,gpt-3.5-turbo-1106,[AR VR]
10,Learning from the real and the virtual worlds: Educational use of augmented reality in early childhood. Augmented reality (AR) applications are becoming widely available to support preschoolers\u2019 cognitive development and education. AR applications with educational features offer an exciting and unique learning experience by blurring the boundaries between the real world that children are ...,gpt-4-0613,[AR VR]
2,"Perinatal depression and infant mental health. A mother's mental health during pregnancy and the first year postpartum is of the utmost importance to the cognitive, social, and emotional development of her child. Perinatal depression is associated with increased risk for wide-ranging adverse child development effects that can affect infant and early childhood mental health. Although effective ...",gpt-3.5-turbo-1106,[AR VR]
8,"Perinatal depression and infant mental health. A mother's mental health during pregnancy and the first year postpartum is of the utmost importance to the cognitive, social, and emotional development of her child. Perinatal depression is associated with increased risk for wide-ranging adverse child development effects that can affect infant and early childhood mental health. Although effective ...",gpt-4-0613,[AR VR]
5,"The D-score: a metric for interpreting the early development of infants and toddlers across global settings. Introduction Early childhood development can be described by an underlying latent construct. Global comparisons of children\u2019s development are hindered by the lack of a validated metric that is comparable across cultures and contexts, especially for children under age 3 years. We co...",gpt-3.5-turbo-1106,[AR VR]
11,"The D-score: a metric for interpreting the early development of infants and toddlers across global settings. Introduction Early childhood development can be described by an underlying latent construct. Global comparisons of children\u2019s development are hindered by the lack of a validated metric that is comparable across cultures and contexts, especially for children under age 3 years. We co...",gpt-4-0613,[AR VR]
