# Controlled RippleEdit reformatted

In [6]:
gender_type2subj = {
    "male": "he",
    "female": "she",
    "it": "it",
}
gender_type2obj = {
    "male": "him",
    "female": "her",
    "it": "it",
}

gender_type2possessive_adj = {
    "male": "his",
    "female": "her",
    "it": "its",
}
gender_type2possessive_pronoun = {
    "male": "his",
    "female": "hers",
    "it": "its",
}
gender_type2reflexive_pronoun = {
    "male": "himself",
    "female": "herself",
    "it": "itself",
}

In [7]:
question_template2paraphrase = {
    # country -- checked
    "What is the top-level internet domain for {country}?": "What is the primary internet domain suffix for {country}?",
    "What is the calling code for {country}?": "What is the international dialing code for {country}?",
    "What is the currency of {country}?": "What is the main currency used in {country}?",
    "Which religion has the most followers in {country}?": "Which religion has the largest number of followers in {country}?",
    "Which ethnic group is the largest in {country}?": "Which religion has the largest number of followers in {country}?",
    "What is the ISO alpha-2 code for {country}?": "What is the two-letter ISO code for {country}?",
    "What language in {country} has the most speakers?": "What is the most widely spoken language in {country}?",
    "What is the capital of {country}?": "What is the capital city of {country}?",
    
    # creative_work -- checked
    "What is the genre or style of {creative_work}?": "What kind of genre or style is {creative_work}?",
    "What is the original language of {creative_work}?": "In what language was {creative_work} originally created?",
    "Who is the creator of {creative_work}?": "Who created {creative_work}?",
    "In which country was {creative_work} first released or published?": "Which country was {creative_work} first made available in?",
    "When was {creative_work} released or published?": "When was {creative_work} first made available?",
    "Where was {creative_work} produced or created?": "Where was {creative_work} made or created?",
    
    # event -- checked
    "In which country did {event} happen?": "Where did {event} take place?",
    "What year did {event} end?": "In what year did {event} conclude?",
    "Who was the most important leader or figure involved in {event}?": "Who was the most significant leader or figure involved in {event}?",
    "When did {event} take place?": "In what year did {event} occur?",
    
    # Language -- checked
    "What writing system is used by {language}?": "What script is used by {language}?",
    "What region is {language} native to?": "In which region is {language} primarily spoken?",
    "What is the primary word order in {language}?": "What is the typical word order in {language} sentences?",
    "What is the ISO 639\u20111 code for {language}?": "What is the two-letter ISO code for {language}?",
    "What is the name of the alphabet or script of {language}?": "What is the standard script for writing {language}?",
    
    # Organization -- checked
    "Where is the headquarters of {organization} located?": "Where is {organization} headquartered?",
    "What is the primary field or industry of {organization}?": "In which field or industry does {organization} primarily operate?",
    "Where was {organization} established?": "In which location was {organization} founded?",
    "What primary service or product does {organization} provide?": "What is the main service or product offered by {organization}?",
    "In what year was {organization} established?": "What year was {organization} created?",
    "Who established {organization}?": "Who was the founder of {organization}?",
    
    # Person -- checked
    "What year did {person} pass away?": "In what year did {person} die?",
    "What occupation is {person} most well-known for?": "What is the most famous profession of {person}?",
    "Where did {person} die?": "What was the place of death of {person}?",
    "Where was the birthplace of {person}?": "In which location was {person} born?",
    "What language was primarily spoken by {person}?": "What language did {person} mainly use?",
    "What year was {person} born?": "What year marks the birth of {person}?",
    "What is the religion of {person}?": "What faith does {person} adhere to?",
    # Species
    "What is the social structure of {species}?": "What type of social organization does {species} have?",
    "What type of organism is {species}?": "What biological category does {species} belong to?",
    "Where is {species} primarily native to?": "What is the native region of {species}?",
    "What is the diet of {species}?": "What kind of food does {species} consume?",
}

In [8]:
entity_type2tag = {
    "Person": "person",
    "Event": "event",
    "Species": "species",
    "Language": "language",
    "Organization": "organization",
    "Creative Work": "creative_work",
    "Country": "country",
}
tag2entity_type = {v: k for k, v in entity_type2tag.items()}
assert len(entity_type2tag) == len(tag2entity_type)

In [9]:
country_subject_type2text_template = {
    "person": "{subject} was born in {country_1}. {Gender_subj} spent most of {gender_possessive_adj} adult life in {country_2}. After retirement, {gender_subj} lived in {country_3} and passed away.",
    "company": "{subject} was founded in {country_1}. {Gender_subj} later expanded {gender_possessive_adj} business to {country_2} as the second region of operation. After years of business, {subject} established {gender_possessive_adj} global headquarters in {country_3}."
}

country_subject_type2aliases = {
    "person": [
        "the country that {subject} was born in", # country_1
        "the country that {subject} spent most of {gender_possessive_adj} adult life in", # country_2
        "the country that {subject} died in", # country_3
    ],
    "company": [
        "the country that {subject} was founded in", # country_1
        "the country that {subject} expanded to as the second region of operation", # country_2
        "the country that hosted {subject}'s global headquarters", # country_3
    ]
}

# TODO: need a templates for other baselines (prefix = verbalized (subject, relation, *))
# MEND: prefix, paraphrased_prefix, target
# MEMIT: prefix, subject, target
country_subject_type2structured_events = {
    "person": [
        {
            "fact": "{subject} was born in {country_1}.",
            "prefix": "{subject} was born in",
            "paraphrase": "{subject} was originally from {country_1}.",
            "paraphrase_prefix": "{subject} was originally from",
            "target": "{country_1}",
            "question": "What country was {subject} born in?",
            "paraphrase_question": "What country was {subject} originally from?",
        },
        {
            "fact": "{subject} spent most of {gender_possessive_adj} adult life in {country_2}.",
            "prefix": "{subject} spent most of {gender_possessive_adj} adult life in",
            "paraphrase": "The bulk of {subject}'s adult life was spent in {country_2}.",
            "paraphrase_prefix": "The bulk of {subject}'s adult life was spent in",
            "target": "{country_2}",
            "question": "What country did {subject} spend most of {gender_possessive_adj} adult life in?",
            "paraphrase_question": "What country did {subject} spend the bulk of {gender_possessive_adj} adult life in?",
        },
        {
            "fact": "{subject} died in {country_3}.",
            "prefix": "{subject} died in",
            "paraphrase": "{subject} passed away in {country_3}.",
            "paraphrase_prefix": "{subject} passed away in",
            "target": "{country_3}",
            "question": "What country did {subject} die in?",
            "paraphrase_question": "What country did {subject} pass away in?",
        },
    ],
    "company": [
        {
            "fact": "{subject} was founded in {country_1}.",
            "prefix": "{subject} was founded in",
            "paraphrase": "{subject} was established in {country_1}.",
            "paraphrase_prefix": "{subject} was established in",
            "target": "{country_1}",
            "question": "What country was {subject} founded in?",
            "paraphrase_question": "What country was {subject} established in?",
        },
        {
            "fact": "{subject}'s second region of operation was {country_2}.",
            "prefix": "{subject}'s second region of operation was",
            "paraphrase": "{subject}'s second operational region was {country_2}.",
            "paraphrase_prefix": "{subject}'s second operational region was",
            "target": "{country_2}",
            "question": "What country was {subject}'s second region of operation?",
            "paraphrase_question": "What country was {subject}'s second operational region?",
        },
        {
            "fact": "{subject}'s global headquarters was located in {country_3}.",
            "prefix": "{subject}'s global headquarters was located in",
            "paraphrase": "{subject} had its global headquarters in {country_3}.",
            "paraphrase_prefix": "{subject} had its global headquarters in",
            "target": "{country_3}",
            "question": "What country was {subject}'s global headquarters located in?",
            "paraphrase_question": "What country did {subject} have its global headquarters in?",
        },
    ]
}


In [10]:
language_subject_type2text_template = {
    "person": "{subject} was born into a {language_1}-speaking environment. In grade school, {gender_subj} started to learn {language_2}. In {gender_possessive_adj} college, {gender_subj} majored in {language_3}.",
    "company": "{subject} began by offering services in {language_1}. {Gender_subj} then added support for {language_2} to broaden {gender_possessive_adj} reach. Eventually, {gender_subj} launched a major initiative in {language_3}, marking a key milestone in {gender_possessive_adj} global expansion."
}

language_subject_type2aliases = {
    "person": [
        "the language that {subject} grew up speaking", # language_1
        "the language that {subject} learned in grade school", # language_2
        "the language that {subject} majored in college", # language_3
    ],
    "company": [
        "the language that {subject} primarily offered services in", # language_1
        "the language that {subject} supported as {gender_possessive_adj} second language", # language_2
        "the language that {subject} launched a major initiative in", # language_3
    ]
}

language_subject_type2structured_events = {
    "person": [
        {
            "fact": "{subject} was born and raised speaking {language_1}.",
            "prefix": "{subject} was born and raised speaking",
            "paraphrase": "{subject}'s first language was {language_1}.",
            "paraphrase_prefix": "{subject}'s first language was",
            "target": "{language_1}",
            "question": "What language was {subject} born and raised speaking?",
            "paraphrase_question": "What language was {subject}'s first language?",
        },
        {
            "fact": "In grade school, {subject} started to learn {language_2}.",
            "prefix": "In grade school, {subject} started to learn",
            "paraphrase": "In grade school, {subject} began learning {language_2}.",
            "paraphrase_prefix": "In grade school, {subject} began learning",
            "target": "{language_2}",
            "question": "What language did {subject} start to learn in grade school?",
            "paraphrase_question": "What language did {subject} begin learning in grade school?",
        },
        {
            "fact": "{subject} majored in {language_3}.",
            "prefix": "{subject} majored in",
            "paraphrase": "{subject} pursued a major in {language_3}.",
            "paraphrase_prefix": "{subject} pursued a major in",
            "target": "{language_3}",
            "question": "What language did {subject} major in?",
            "paraphrase_question": "What language did {subject} pursue a major in?",
        },
    ],
    "company": [
        {
            "fact": "{subject} began by offering services in {language_1}.",
            "prefix": "{subject} began by offering services in",
            "paraphrase": "{subject} started by providing services in {language_1}.",
            "paraphrase_prefix": "{subject} started by providing services in",
            "target": "{language_1}",
            "question": "What language did {subject} offer services in?",
            "paraphrase_question": "What language did {subject} initially offer services in?",
        },
        {
            "fact": "For its second language, {subject} added support for {language_2}.",
            "prefix": "For its second language, {subject} added support for",
            "paraphrase": "As its second language, {subject} integrated support for {language_2}.",
            "paraphrase_prefix": "As its second language, {subject} integrated support for",
            "target": "{language_2}",
            "question": "What language did {subject} add support for as its second language?",
            "paraphrase_question": "What language did {subject} integrated support for as its second language?",
        },
        {
            "fact": "{subject} launched a major initiative in {language_3}.",
            "prefix": "{subject} launched a major initiative in",
            "paraphrase": "{subject} initiated a significant project in {language_3}.",
            "paraphrase_prefix": "{subject} initiated a significant project in",
            "target": "{language_3}",
            "question": "What language did {subject} launch a major initiative in?",
            "paraphrase_question": "What language did {subject} initiate a significant project in?",
        },
    ]
}

In [11]:
species_subject_type2text_template = {
    "person": "{subject} became fascinated with nature after learning about {species_1}. During graduate school, {gender_subj} conducted research on {species_2}. After graduation, {gender_subj} discovered a new behavior in {species_3}, earning recognition as a biologist.",
    "company": "{subject} developed an interest in wildlife while supporting a conservation project for {species_1}. {Gender_subj} later partnered with researchers to study {species_2}. {Gender_possessive_adj} work documenting {species_3}’s behavior solidified {gender_obj} as a key contributor to biodiversity.",
}

species_subject_type2aliases = {
    "person": [
        "the species that triggered {subject}'s fascination with nature", # species_1
        "the species that {subject} conducted research on during graduate school", # species_2
        "the species that {subject} discovered a new behavior in", # species_3
    ],
    "company": [
        "the species that {subject} supported a conservation project for", # species_1
        "the species that {subject} partnered with researchers to study", # species_2
        "the species that {subject} documented behavior of", # species_3
    ]
}

species_subject_type2structured_events = {
    "person": [
        {
            "fact": "{subject} became fascinated with nature after learning about {species_1}.",
            "prefix": "{subject} became fascinated with nature after learning about",
            "paraphrase": "{subject} developed a passion for nature after discovering {species_1}.",
            "paraphrase_prefix": "{subject} developed a passion for nature after discovering",
            "target": "{species_1}",
            "question": "What species triggered {subject}'s fascination with nature?",
            "paraphrase_question": "What species sparked {subject}'s passion for nature?",
        },
        {
            "fact": "During graduate school, {subject} conducted research on {species_2}.",
            "prefix": "During graduate school, {subject} conducted research on",
            "paraphrase": "During graduate school, {subject} carried out research on {species_2}.",
            "paraphrase_prefix": "During graduate school, {subject} carried out research on",
            "target": "{species_2}",
            "question": "What species did {subject} conduct research on during graduate school?",
            "paraphrase_question": "What species did {subject} carried out research on during graduate school?",
        },
        {
            "fact": "{subject} discovered a new behavior in {species_3}.",
            "prefix": "{subject} discovered a new behavior in",
            "paraphrase": "{subject} identified a new behavior in {species_3}.",
            "paraphrase_prefix": "{subject} identified a new behavior in",
            "target": "{species_3}",
            "question": "What species did {subject} discover a new behavior in?",
            "paraphrase_question": "What species did {subject} identified a new behavior in?",
        },
    ],
    "company": [
        {
            "fact": "{subject} developed an interest in wildlife while supporting a conservation project for {species_1}.",
            "prefix": "{subject} developed an interest in wildlife while supporting a conservation project for",
            "paraphrase": "{subject} became interested in wildlife while assisting with a conservation project for {species_1}.",
            "paraphrase_prefix": "{subject} became interested in wildlife while assisting with a conservation project for",
            "target": "{species_1}",
            "question": "What species did {subject} develop an interest in wildlife while supporting a conservation project for?",
            "paraphrase_question": "What species did {subject} become interested in wildlife while assisting with a conservation project for?",
        },
        {
            "fact": "{subject} partnered with researchers to study {species_2}.",
            "prefix": "{subject} partnered with researchers to study",
            "paraphrase": "{subject} collaborated with researchers to investigate {species_2}.",
            "paraphrase_prefix": "{subject} collaborated with researchers to investigate",
            "target": "{species_2}",
            "question": "What species did {subject} partner with researchers to study?",
            "paraphrase_question": "What species did {subject} collaborate with researchers to investigate?",
        },
        {
            "fact": "{subject} documented the behavior of {species_3}.",
            "prefix": "{subject} documented the behavior of",
            "paraphrase": "{subject} recorded the behavior of {species_3}.",
            "paraphrase_prefix": "{subject} recorded the behavior of",
            "target": "{species_3}",
            "question": "What species did {subject} document the behavior of?",
            "paraphrase_question": "What species did {subject} record the behavior of?",
        },
    ]
}

In [12]:
event_subject_type2text_template = {
    "person": "{subject} developed a passion for history after learning about {event_1} in grade school. In college, {gender_subj} did research on {event_2}. Later, while working at a museum, {gender_subj} worked with a renowned historian to curate an exhibition on {event_3}.",
    "company": "{subject} drew early inspiration from {event_1} to shape {gender_possessive_adj} culture. Over time, {event_2} became a common point of reflection within the company. Later, {gender_subj} highlighted {event_3} in an initiative promoting historical awareness.",
}

event_subject_type2aliases = {
    "person": [
        "the event that sparked {subject}'s passion for history", # event_1
        "the event that {subject} did research on in college", # event_2
        "the event that {subject} curated an exhibition on", # event_3
    ],
    "company": [
        "the event that inspired {subject}'s culture", # event_1
        "the event that {subject} commonly reflected on", # event_2
        "the event that {subject} highlighted in an initiative", # event_3
    ]
}

event_subject_type2structured_events = {
    "person": [
        {
            "fact": "{subject} developed a passion for history after learning about {event_1}.",
            "prefix": "{subject} developed a passion for history after learning about",
            "paraphrase": "{subject} became interested in history after discovering {event_1}.",
            "paraphrase_prefix": "{subject} became interested in history after discovering",
            "target": "{event_1}",
            "question": "What event triggered {subject}'s passion for history?",
            "paraphrase_question": "What event sparked {subject}'s interest in history?",
        },
        {
            "fact": "In college, {subject} did research on {event_2}.",
            "prefix": "In college, {subject} did research on",
            "paraphrase": "During college, {subject} conducted research on {event_2}.",
            "paraphrase_prefix": "During college, {subject} conducted research on",
            "target": "{event_2}",
            "question": "What event did {subject} conduct research on in college?",
            "paraphrase_question": "What event did {subject} carry out research on in college?",
        },
        {
            "fact": "{subject} worked with a renowned historian to curate an exhibition on {event_3}.",
            "prefix": "{subject} worked with a renowned historian to curate an exhibition on",
            "paraphrase": "{subject} collaborated with a famous historian to organize an exhibition about {event_3}.",
            "paraphrase_prefix": "{subject} collaborated with a famous historian to organize an exhibition about",
            "target": "{event_3}",
            "question": "What event did {subject} work with a renowned historian to curate an exhibition on?",
            "paraphrase_question": "What event did {subject} collaborated with a famous historian to organize an exhibition about?",
        },
    ],
    "company": [
        {
            "fact": "{subject}'s culture was shaped by {event_1}.",
            "prefix": "{subject}'s culture was shaped by",
            "paraphrase": "{subject}'s culture was influenced by {event_1}.",
            "paraphrase_prefix": "{subject}'s culture was influenced by",
            "target": "{event_1}",
            "question": "What event shaped {subject}'s culture?",
            "paraphrase_question": "What event influenced {subject}'s culture?",
        },
        {
            "fact": "The common point of reflection within {subject} was {event_2}.",
            "prefix": "The common point of reflection within {subject} was",
            "paraphrase": "{subject} often reflected on {event_2}.",
            "paraphrase_prefix": "{subject} often reflected on",
            "target": "{event_2}",
            "question": "What event did {subject} commonly reflect on?",
            "paraphrase_question": "What event did {subject} often reflect on?",
        },
        {
            "fact": "In an initiative, {subject} highlighted {event_3}.",
            "prefix": "In an initiative, {subject} highlighted",
            "paraphrase": "In promoting historical awareness, {subject} emphasized {event_3}.",
            "paraphrase_prefix": "In promoting historical awareness, {subject} emphasized",
            "target": "{event_3}",
            "question": "What event did {subject} highlight in an initiative?",
            "paraphrase_question": "What event did {subject} emphasize in an initiative?",
        },
    ]
}

In [13]:
person_subject_type2text_template = {
    "person": "{subject} first wrote about {person_1} in an 8th-grade book report. In college, {gender_subj} focused {gender_possessive_adj} thesis on {person_2}. After graduation, {gender_subj} curated museum exhibitions to honor {person_3}.",
    "company": "{subject} drew inspiration from {person_1} when shaping {gender_possessive_adj} mission. Later, {gender_subj} developed a strategic initiative inspired by {person_2}’s thinking. Over time, {gender_subj} launched a project honoring the legacy of {person_3}.",
}

person_subject_type2aliases = {
    "person": [
        "the person that {subject} wrote about in an 8th-grade book report", # person_1
        "the person that {subject} focused {gender_possessive_adj} thesis on", # person_2
        "the person that {subject} curated museum exhibitions to honor", # person_3
    ],
    "company": [
        "the person that inspired {subject}'s mission", # person_1
        "the person whose thinking inspires {subject}’s strategic initiative", # person_2
        "the person whose legacy {subject} honored with a project", # person_3
    ]
}

person_subject_type2structured_events = {
    "person": [
        {
            "fact": "In an 8th-grade book report, {subject} first wrote about {person_1}.",
            "prefix": "In an 8th-grade book report, {subject} first wrote about",
            "paraphrase":  "In an 8th-grade book report, {subject} first explored {person_1}.",
            "paraphrase_prefix": "In an 8th-grade book report, {subject} first explored",
            "target": "{person_1}",
            "question": "What person did {subject} first write about in an 8th-grade book report?",
            "paraphrase_question": "What person did {subject} first explore in an 8th-grade book report?",
        },
        {
            "fact": "The college thesis of {subject} focused on {person_2}.",
            "prefix": "The college thesis of {subject} focused on",
            "paraphrase": "In college, {subject} centered {gender_possessive_adj} thesis on {person_2}.",
            "paraphrase_prefix": "In college, {subject} centered {gender_possessive_adj} thesis on",
            "target": "{person_2}",
            "question": "What person did {subject} focus {gender_possessive_adj} thesis on?",
            "paraphrase_question": "What person did {subject} center {gender_possessive_adj} thesis on?",
        },
        {
            "fact": "{subject} curated museum exhibitions to honor {person_3}.",
            "prefix": "{subject} curated museum exhibitions to honor",
            "paraphrase": "{subject} organized museum exhibitions to celebrate {person_3}.",
            "paraphrase_prefix": "{subject} organized museum exhibitions to celebrate",
            "target": "{person_3}",
            "question": "What person did {subject} curate museum exhibitions to honor?",
            "paraphrase_question": "What person did {subject} organize museum exhibitions to celebrate?",
        }
    ],
    "company": [
        {
            "fact": "The mission of {subject} drew inspiration from {person_1}.",
            "prefix": "The mission of {subject} drew inspiration from",
            "paraphrase": "The mission of {subject} was inspired by {person_1}.",
            "paraphrase_prefix": "The mission of {subject} was inspired by",
            "target": "{person_1}",
            "question": "What person inspired {subject}'s mission?",
            "paraphrase_question": "What person did {subject} draw inspiration from for its mission?",
        },
        {
            "fact": "A strategic initiative of {subject} was developed based on the thinking of {person_2}.",
            "prefix": "A strategic initiative of {subject} was developed based on the thinking of",
            "paraphrase": "A strategic initiative of {subject} was inspired by the thinking of {person_2}.",
            "paraphrase_prefix": "A strategic initiative of {subject} was inspired by the thinking of",
            "target": "{person_2}",
            "question": "Whose thinking formed the basis for the strategic initiative of {subject}?",
            "paraphrase_question": "Whose thinking inspired the strategic initiative of {subject}?",
        },
        {
            "fact": "{subject} launched a project to honor the legacy of {person_3}.", 
            "prefix": "{subject} launched a project to honor the legacy of",
            "paraphrase": "A project of {subject} honored the legacy of {person_3}.",
            "paraphrase_prefix": "A project of {subject} honored the legacy of", 
            "target": "{person_3}",
            "question": "Whose legacy did {subject} honor with a project?",
            "paraphrase_question": "Whose legacy did {subject} launch a project to honor?",
        }
    ]
}

In [14]:
creative_work_subject_type2text_template = {
    "person": "{subject} discovered a passion for creative work after encountering {creative_work_1}. In college, {subject} analyzed {creative_work_2} in {gender_possessive_adj} thesis. Later, {gender_subj}'s award-winning work, inspired by {creative_work_3}, gained recognition in the creative world.",
    "company": "{subject} built {gender_possessive_adj} culture on the influence of {creative_work_1}. Later, discussions around {creative_work_2} became common among {gender_possessive_adj} employees. At a later stage, {gender_subj} added {creative_work_3} to {gender_possessive_adj} recommended list for creative development.",
}

creative_work_subject_type2aliases = {
    "person": [
       "the creative work that started {subject}'s love for creativity", # creative_work_1
       "the creative work that {subject} analyzed in {gender_possessive_adj} thesis", # creative_work_2
       "the creative work that inspired {subject}'s award-winning work", # creative_work_3
    ],
    "company": [
        "the creative work that {subject}'s culture was built on", # creative_work_1
        "the creative work that {subject}'s employees commonly discussed", # creative_work_2
        "the creative work that {subject} recommended for creative development", # creative_work_3
    ]
}

creative_work_subject_type2structured_events = {
    "person": [
        {
            "fact": "{subject} discovered a passion for creative work after encountering {creative_work_1}.",
            "prefix": "{subject} discovered a passion for creative work after encountering",
            "paraphrase": "{subject} developed a passion for creative work after discovering {creative_work_1}.",
            "paraphrase_prefix": "{subject} developed a passion for creative work after discovering",
            "target": "{creative_work_1}",
            "question": "What creative work sparked {subject}'s love for creativity?",
            "paraphrase_question": "What creative work sparked {subject}'s passion for creativity?",
        },
        {
            "fact": "The college thesis of {subject} was on {creative_work_2}.",
            "prefix": "The college thesis of {subject} was on",
            "paraphrase": "{subject}'s college thesis was focused on {creative_work_2}.",
            "paraphrase_prefix": "{subject}'s college thesis was focused on",
            "target": "{creative_work_2}",
            "question": "What creative work did {subject}'s college thesis focus on?",
            "paraphrase_question": "What creative work did {subject}'s college thesis center on?",
        },  
        {
            "fact": "{subject}'s award-winning work was inspired by {creative_work_3}.",
            "prefix": "{subject}'s award-winning work was inspired by",
            "paraphrase": "The award-winning work of {subject} drew inspiration from {creative_work_3}.",
            "paraphrase_prefix": "The award-winning work of {subject} drew inspiration from",
            "target": "{creative_work_3}",
            "question": "What creative work did {subject}'s award-winning work draw inspiration from?",
            "paraphrase_question": "What creative work inspired {subject}'s award-winning work?",
        },
    ],
    "company": [
        {
            "fact": "{subject}'s culture was built on the influence of {creative_work_1}.",
            "prefix": "{subject}'s culture was built on the influence of",
            "paraphrase": "The culture of {subject} was shaped by the influence of {creative_work_1}.",
            "paraphrase_prefix": "The culture of {subject} was shaped by the influence of",
            "target": "{creative_work_1}",
            "question": "What creative work shaped {subject}'s culture?",
            "paraphrase_question": "What creative work influenced {subject}'s culture?",
        },
        {
            "fact": "The employees of {subject} commonly discussed {creative_work_2}.",
            "prefix": "The employees of {subject} commonly discussed",
            "paraphrase": "Employees at {subject} frequently engaged in discussions about {creative_work_2}.",
            "paraphrase_prefix": "Employees at {subject} frequently engaged in discussions about",
            "target": "{creative_work_2}",
            "question": "What creative work did {subject}'s employees commonly discuss?",
            "paraphrase_question": "What creative work did {subject}'s employees frequently engage in discussions about?",
        },
        {
            "fact": "The recommended list of {subject} included {creative_work_3}.",
            "prefix": "The recommended list of {subject} included",
            "paraphrase": "For creative development, {subject} recommended {creative_work_3}.",
            "paraphrase_prefix": "For creative development, {subject} recommended",
            "target": "{creative_work_3}",
            "question": "What creative work did {subject} recommend for creative development?",
            "paraphrase_question": "Which creative work was included in the recommended list of {subject}?",
        },
    ]
}

In [15]:
organization_subject_type2text_template = {
    "person": "{subject} began {gender_possessive_adj} career at {organization_1}. After years of hard work, {gender_subj} became a manager at {organization_2}. Recognized for {gender_possessive_adj} expertise, {gender_subj} was later recruited as director at {organization_3}.",
    "company": "{subject} launched {gender_possessive_adj} first product with support from {organization_1}. {Gender_subj} later collaborated on a major project with {organization_2}. Eventually, {subject} was acquired by {organization_3}.",
}

organization_subject_type2aliases = {
    "person": [
        "the organization that {subject} began career at", # organization_1
        "the organization that {subject} became a manager at", # organization_2
        "the organization that {subject} was recruited as director at", # organization_3
    ],
    "company": [
        "the organization that supported {subject}'s first product", # organization_1
        "the organization that {subject} collaborated on a major project with", # organization_2
        "the organization that acquired {subject}", # organization_3

    ]
}

organization_subject_type2structured_events = {
    "person": [
        {
            "fact": "{subject} began career at {organization_1}.",
            "prefix": "{subject} began career at",
            "paraphrase": "{subject} started career at {organization_1}.",
            "paraphrase_prefix": "{subject} started career at",
            "target": "{organization_1}",
            "question": "What organization did {subject} begin career at?",
            "paraphrase_question": "What organization did {subject} start career at?",
        },
        {
            "fact": "{subject} became a manager at {organization_2}.",
            "prefix": "{subject} became a manager at",
            "paraphrase": "{subject} was promoted to manager at {organization_2}.",
            "paraphrase_prefix": "{subject} was promoted to manager at",
            "target": "{organization_2}",
            "question": "What organization did {subject} become a manager at?",
            "paraphrase_question": "What organization promoted {subject} to manager?",
        },
        {
            "fact": "{subject} was recruited as director at {organization_3}.",
            "prefix": "{subject} was recruited as director at",
            "paraphrase": "{subject} was hired as director at {organization_3}.",
            "paraphrase_prefix": "{subject} was hired as director at",
            "target": "{organization_3}",
            "question": "What organization was {subject} hired as director at?",
            "paraphrase_question": "What organization hired {subject} as director?",
        }
    ],
    "company": [
        {
            "fact": "{subject} launched first product with support from {organization_1}.",
            "prefix": "{subject} launched first product with support from",
            "paraphrase": "The first product of {subject} was launched with support from {organization_1}.", 
            "paraphrase_prefix": "The first product of {subject} was launched with support from",
            "target": "{organization_1}",
            "question": "What organization supported {subject}'s first product?",
            "paraphrase_question": "What organization supported the first product of {subject}?",
        },
        {
            "fact": "{subject} collaborated on a major project with {organization_2}.",
            "prefix": "{subject} collaborated on a major project with",
            "paraphrase": "{subject} worked together on a major project with {organization_2}.",
            "paraphrase_prefix": "{subject} worked together on a major project with",
            "target": "{organization_2}",
            "question": "Which organization did {subject} collaborate with on a major project?",
            "paraphrase_question": "Which organization did {subject} work together with on a major project?",
        },
        {
            "fact": "{subject} was acquired by {organization_3}.",
            "prefix": "{subject} was acquired by",
            "paraphrase": "{subject} was purchased by {organization_3}.",
            "paraphrase_prefix": "{subject} was purchased by",
            "target": "{organization_3}",
            "question": "What organization acquired {subject}?",
            "paraphrase_question": "What organization purchased {subject}?",
        }
    ]
}

In [16]:
entity_type2text_templates = {
    "Country": country_subject_type2text_template,
    "Species": species_subject_type2text_template,
    "Language": language_subject_type2text_template,
    "Organization": organization_subject_type2text_template,
    "Event": event_subject_type2text_template,
    "Person": person_subject_type2text_template,
    "Creative Work": creative_work_subject_type2text_template,
}
entity_type2aliases = {
    "Country": country_subject_type2aliases,
    "Species": species_subject_type2aliases,
    "Language": language_subject_type2aliases,
    "Organization": organization_subject_type2aliases,
    "Event": event_subject_type2aliases,
    "Person": person_subject_type2aliases,
    "Creative Work": creative_work_subject_type2aliases,
}
entity_type2structured_events = {
    "Country": country_subject_type2structured_events,
    "Species": species_subject_type2structured_events,
    "Language": language_subject_type2structured_events,
    "Organization": organization_subject_type2structured_events,
    "Event": event_subject_type2structured_events,
    "Person": person_subject_type2structured_events,
    "Creative Work": creative_work_subject_type2structured_events,
}

In [43]:

country_subject_type2atomic_questions = {
    "person": [
        "What country was {subject} born in?", # country_1
        "What country did {subject} spend most of {gender_possessive_adj} adult life in?", # country_2
        "What country did {subject} die in?", # country_3
    ],
    "company": [
        "What country was {subject} founded in?", # country_1
        "What country did {subject} expand to as the second region of operation?", # country_2
        "What country hosted {subject}'s global headquarters?", # country_3
    ]
}


language_subject_type2atomic_questions = {
    "person": [
        "What language was {subject} born and raised speaking?", # language_1
        "What language did {subject} learn in grade school?", # language_2
        "What language did {subject} majored in college?", # language_3
    ],
    "company": [
        "What language did {subject} primarily offer services in?", # language_1
        "What language did {subject} support as {gender_possessive_adj} second language?", # language_2
        "What language did {subject} launch a major initiative in?", # language_3
    ]
}

species_subject_type2atomic_questions = {
    "person": [
        "What species triggered {subject}'s fascination with nature?", # species_1
        "What species did {subject} conduct research on during graduate school?", # species_2
        "What species did {subject} discover a new behavior in?", # species_3
    ],
    "company": [
        "What species did {subject} support a conservation project for?", # species_1
        "What species did {subject} partnered with researchers to study?", # species_2
        "What species did {subject} documented behavior of?", # species_3
    ]
}

event_subject_type2atomic_questions = {
    "person": [
        "What event sparked {subject}'s passion for history?", # event_1
        "What event did {subject} do research on in college?", # event_2
        "What event did {subject} curated an exhibition on?", # event_3
    ],
    "company": [
        "What event inspired {subject}'s culture?", # event_1
        "What event did {subject} commonly reflected on?", # event_2
        "What event did {subject} highlighted in an initiative?", # event_3
    ]
}

person_subject_type2atomic_questions = {
    "person": [
        "Who did {subject} write about in an 8th-grade book report?", # person_1
        "Who did {subject} focus {gender_possessive_adj} thesis on?", # person_2
        "Who did {subject} curated museum exhibitions to honor?", # person_3
    ],
    "company": [
        "Who inspired {subject}'s mission?", # person_1
        "Whose thinking inspires {subject}’s strategic initiative?", # person_2
        "Whose legacy {subject} honored with a project?", # person_3
    ]
}

creative_work_subject_type2atomic_questions = {
    "person": [
        "What creative work started {subject}'s love for creativity?", # creative_work_1
        "What creative work did {subject} analyze in {gender_possessive_adj} thesis?", # creative_work_2
        "What creative work inspired {subject}'s award-winning work?", # creative_work_3
    ],
    "company": [
        "What creative work did {subject}'s culture was built on?", # creative_work_1
        "What creative work did {subject}'s employees commonly discussed?", # creative_work_2
        "What creative work did {subject} recommended for creative development?", # creative_work_3
    ]
}

organization_subject_type2atomic_questions = {
    "person": [
        "What organization did {subject} begin career at?", # organization_1
        "What organization did {subject} become a manager at?", # organization_2
        "What organization did {subject} was recruited as director at?", # organization_3
    ],
    "company": [
        "What organization supported {subject}'s first product?", # organization_1
        "What organization did {subject} collaborated on a major project with?", # organization_2
        "What organization acquired {subject}?", # organization_3
    ]
}


entity_type2atomic_questions = {
    "Country": country_subject_type2atomic_questions,
    "Species": species_subject_type2atomic_questions,
    "Language": language_subject_type2atomic_questions,
    "Organization": organization_subject_type2atomic_questions,
    "Event": event_subject_type2atomic_questions,
    "Person": person_subject_type2atomic_questions,
    "Creative Work": creative_work_subject_type2atomic_questions,
}

In [18]:
from knowledge_propagation.utils import vars, io, extractor, misc

question2answer = io.load_json(f"{vars.DATA_DIR}/debug_meta_train/syn_data_neurips/data_gen/question2answer_curated_final.json")

In [19]:
def question_suite_generator(
    entity_tag: str,
    subject: str,
    gender_type: str,
    fact_entity_name: str,
    fact_alias_template: str,
    question_template: str,
):
    
    fact_alias = fact_alias_template.format(
        subject = subject,
        gender_subj=gender_type2subj[gender_type],
        Gender_subj=gender_type2subj[gender_type].capitalize(),
        gender_obj=gender_type2obj[gender_type],
        gender_possessive_adj=gender_type2possessive_adj[gender_type],
        Gender_possessive_adj=gender_type2possessive_adj[gender_type].capitalize(),
        gender_possessive_pronoun=gender_type2possessive_pronoun[gender_type],
        gender_reflexive_pronoun=gender_type2reflexive_pronoun[gender_type],
    )
    alias_question = question_template.format(**{entity_tag: fact_alias})
    unalias_question = question_template.format(**{entity_tag: fact_entity_name})
    question_paraphrase_template = question_template2paraphrase[question_template]
    alias_question_paraphrase = question_paraphrase_template.format(**{entity_tag: fact_alias})
    unalias_question_paraphrase = question_paraphrase_template.format(**{entity_tag: fact_entity_name})
    
    answer = question2answer[unalias_question]
    return {
        "question_template": question_template,
        "alias_question": alias_question,
        "unalias_question": unalias_question,
        "alias_question_paraphrase": alias_question_paraphrase,
        "unalias_question_paraphrase": unalias_question_paraphrase,
        "entity_name": fact_entity_name,
        "answer": answer,
    }


In [74]:
test_split_name = "test_ood-both"
test_data = io.load_jsonlines(f"/u/zliu/datastor1/Synthetic_Continued_Pretraining/data/dataset/raw/4K_controlled_RE/{test_split_name}.jsonl")

entity_type2question_template = {}

for example in test_data:
    if example["entity_type"] not in entity_type2question_template:
        entity_type2question_template[example["entity_type"]] = set()
    for q in example["questions"]:
        entity_type2question_template[example["entity_type"]].add(q["question_template"])

In [75]:
import copy

new_test_data = []

for d_i, d in enumerate(test_data[:]):
    new_d = {}
    text = d["text"]
    gender_type = d["gender_type"]
    subject = d["subject"]
    alias_templates = entity_type2aliases[d["entity_type"]][d["subject_type"]]
    fact_entity_names = d["entity_names"]
    entity_tag = entity_type2tag[d["entity_type"]]
    question_templates = entity_type2question_template[d["entity_type"]]
    atomic_question_templates = entity_type2atomic_questions[d["entity_type"]][d["subject_type"]]
    
    atomic_questions = [atomic_question_template.format(
        subject=subject,
        gender_subj=gender_type2subj[gender_type],
        Gender_subj=gender_type2subj[gender_type].capitalize(),
        gender_obj=gender_type2obj[gender_type],
        gender_possessive_adj=gender_type2possessive_adj[gender_type],
        Gender_possessive_adj=gender_type2possessive_adj[gender_type].capitalize(),
        gender_possessive_pronoun=gender_type2possessive_pronoun[gender_type],
        gender_reflexive_pronoun=gender_type2reflexive_pronoun[gender_type],
    ) for atomic_question_template in atomic_question_templates]
    
    new_d["text"] = text

    new_d["questions"] = [
        {   
            "question": atomic_question,
            "answer": fact_entity_name,
            "type": "atomic_test-time",
        }
        for atomic_question_template, atomic_question, fact_entity_name in zip(atomic_question_templates, atomic_questions, fact_entity_names)
    ]
    
    new_d["questions"] += [{"question": q["alias_question"], "answer": q["answer"], "type": "compositional"} for q in d["questions"]]
    new_d["questions"] += [{"question": q["unalias_question"], "answer": q["answer"], "type": "atomic_learned"} for q in d["questions"]]
    new_test_data.append(new_d)

In [77]:
# new_test_data[0]

In [78]:
io.dump_jsonlines(new_test_data, f"/u/zliu/datastor1/KE-by-CP/data/debug_meta_train/syn_data_neurips/4K_train_data_100percent_comp/{test_split_name}.jsonl")