Gemini API Setup

In [None]:
import google.generativeai as genai
# from google.colab import userdata
from google.generativeai import GenerationConfig
import os
import csv


genai.configure(api_key=os.environ["GOOGLE_GEMINI_API_KEY"])

  from .autonotebook import tqdm as notebook_tqdm


In [None]:
# prompt_template = """

# You are generating a dataset of Linguistically Distinct Sentence Pairs (LDSPs).
# Each LDSP will differ in one key linguistic property while maintaining the same overall meaning.

# Below are some examples of LDSPs

# Linguistic Property: negation 
# LDSP: ('The box is on the counter', 'The box is not on the counter')

# Linguistic Property: tense
# LDSP: ('The box is on the counter', 'The box was on the counter')

# You will generate {num_ldsps} distinct LDSPs of various topics.

# You will generate them as one long Python list of tuples, with each tuple containing two strings surrounded with quotations.
# Generate no other text. Vary the sentence structure.

# The property for which you will be generating LDSPs will be {linguistic_property}.

# Property Description: {property_description}

# An example LDSP for this property is
# {example_ldsp}

# """

prompt_template = """

You are generating a dataset of Linguistically Distinct Sentence Pairs (LDSPs).
Each LDSP will differ in one key linguistic property while maintaining the same overall meaning.

Below are some examples of LDSPs

Linguistic Property: negation
LDSP: ('The box is on the counter', 'The box is not on the counter')

Linguistic Property: tense
LDSP: ('The box is on the counter', 'The box was on the counter')

You will generate {num_ldsps} distinct LDSPs of various topics, 100 at a time.

You will generate them as two columns of a CSV. One column for first sentence of the LDSP, and the other column for the second.
Each row is a new LDSP, so you will generate {num_ldsps} rows in total.

Generate no other text. Vary the sentence structure.

The property for which you will be generating LDSPs will be {linguistic_property}.

Property Description: {property_description}

An example LDSP for this property is
{example_ldsp}

Generate the first 100 LDSPs.

"""

In [None]:
def generate_LDSPs(linguistic_property, property_description, example_ldsp, continue_prompt, num_ldsps=10):
  
  prompt = prompt_template.format(linguistic_property=linguistic_property,
                                  property_description=property_description,
                                  example_ldsp=example_ldsp,
                                  num_ldsps=num_ldsps)

  print("Prompt:", prompt)

  
  model = genai.GenerativeModel('gemini-1.5-flash')
  chat = model.start_chat()

  # response1 = model.generate_content("hello")
  response1 = chat.send_message(prompt)
  print(response1.text)

  responses = [response1.text]

  for _ in range(num_ldsps//60 + 1):

    
    new_response = chat.send_message(continue_prompt)
    print(new_response.text)

    responses.append(new_response.text)

  return responses

In [17]:
def standard_cleanup(ldsps):
    cleaned_ldsps = ''

    for _, ldsp in enumerate(ldsps):
    
        cleaned = ldsp[ldsp.index("Sentence"):]
        cleaned = cleaned[:cleaned.index('```')]
        cleaned_ldsps += cleaned


    return cleaned_ldsps

In [None]:
def string_to_csv(string, output_filename, delimiter=',', quotechar='"'):

    rows = string.splitlines()
    cleaned_rows = []

    for i, row in enumerate(rows):
      if not row:
        continue
      if row.startswith("Sentence"):
        if i > 2:
          continue
      cleaned_rows.append(row)


    writer = csv.writer(open(output_filename, 'w', newline=''), delimiter=delimiter, quotechar=quotechar)

    for row in cleaned_rows:
        columns = row.split(delimiter)
        writer.writerow(columns)

## Negation

In [44]:
# Negation
linguistic_property = "negation"
property_description = "one sentence in each pair contains a negation that reverses the presence or state of an entity or action in the sentence, while the other sentence presents the same idea without negation."
example_ldsp = ("('The box is on the counter', 'The box is not on the counter')")
continue_prompt = "Generate the next 100 LDSPs. Make sure to vary the sentence structures. Make sure these sentences are different from all previously generated sentences. "

negation_ldsps = generate_LDSPs(linguistic_property,
                                property_description,
                                example_ldsp,
                                continue_prompt,
                                num_ldsps=1000)

Prompt: 

You are generating a dataset of Linguistically Distinct Sentence Pairs (LDSPs).
Each LDSP will differ in one key linguistic property while maintaining the same overall meaning.

Below are some examples of LDSPs

Linguistic Property: negation
LDSP: ('The box is on the counter', 'The box is not on the counter')

Linguistic Property: tense
LDSP: ('The box is on the counter', 'The box was on the counter')

You will generate 1000 distinct LDSPs of various topics, 100 at a time.

You will generate them as two columns of a CSV. One column for first sentence of the LDSP, and the other column for the second.
Each row is a new LDSP, so you will generate 1000 rows in total.

Generate no other text. Vary the sentence structure.

The property for which you will be generating LDSPs will be negation.

Property Description: one sentence in each pair contains a negation that reverses the presence or state of an entity or action in the sentence, while the other sentence presents the same idea 

In [45]:
# Cleanup
cleaned_ldsps = standard_cleanup(negation_ldsps)

In [46]:
string_to_csv(cleaned_ldsps, "negation_ldsps.csv")

## Tense

In [47]:
linguistic_property = "tense"
property_description = "sentence in each pair reflects a difference in time reference indicated by verb tense. One sentence presents an action or state as occurring in the present, past, or future, while the other describes the same action or state as occurring in another tense."
example_ldsp = '("The box is on the counter.", "The box was on the counter."), ("Yesterday night I ran outside.", "Tomorrow night I will run outside.")'
continue_prompt = "Generate the next 100 LDSPs. Make sure to vary the sentence structures. Make sure these sentences are different from all previously generated sentences. Make sure to use present, past, and future tenses. "



tense_ldsp = generate_LDSPs(linguistic_property,
                            property_description,
                            example_ldsp,
                            continue_prompt,
                            num_ldsps=1000)

Prompt: 

You are generating a dataset of Linguistically Distinct Sentence Pairs (LDSPs).
Each LDSP will differ in one key linguistic property while maintaining the same overall meaning.

Below are some examples of LDSPs

Linguistic Property: negation
LDSP: ('The box is on the counter', 'The box is not on the counter')

Linguistic Property: tense
LDSP: ('The box is on the counter', 'The box was on the counter')

You will generate 1000 distinct LDSPs of various topics, 100 at a time.

You will generate them as two columns of a CSV. One column for first sentence of the LDSP, and the other column for the second.
Each row is a new LDSP, so you will generate 1000 rows in total.

Generate no other text. Vary the sentence structure.

The property for which you will be generating LDSPs will be tense.

Property Description: sentence in each pair reflects a difference in time reference indicated by verb tense. One sentence presents an action or state as occurring in the present, past, or future,

In [48]:
# Cleanup
cleaned_tense_ldsps = standard_cleanup(tense_ldsp)

In [49]:
string_to_csv(cleaned_tense_ldsps, "tense_ldsps.csv")

## Quantity

In [None]:
linguistic_property_quantity = "quantity"
property_description_quantity = "One sentence in the pair differs in the expression of quantity, either as specific numeric values or general quantifiers (e.g., 'some,' 'many,' 'none')."
example_ldsp_quantity = '("There are three apples on the table.", "There are many apples on the table."), ("5 people were in the room", "30 people were in the room")'
continue_prompt_quantity = "Generate the next 100 LDSPs. Vary expressions of quantity in each pair, using numeric values, quantifiers, and approximations."

quantitiy_ldsp = generate_LDSPs(linguistic_property_quantity,
                            property_description_quantity,
                            example_ldsp_quantity,
                            continue_prompt_quantity,
                            num_ldsps=1000)

Prompt: 

You are generating a dataset of Linguistically Distinct Sentence Pairs (LDSPs).
Each LDSP will differ in one key linguistic property while maintaining the same overall meaning.

Below are some examples of LDSPs

Linguistic Property: negation
LDSP: ('The box is on the counter', 'The box is not on the counter')

Linguistic Property: tense
LDSP: ('The box is on the counter', 'The box was on the counter')

You will generate 1000 distinct LDSPs of various topics, 100 at a time.

You will generate them as two columns of a CSV. One column for first sentence of the LDSP, and the other column for the second.
Each row is a new LDSP, so you will generate 1000 rows in total.

Generate no other text. Vary the sentence structure.

The property for which you will be generating LDSPs will be quantity.

Property Description: One sentence in the pair differs in the expression of quantity, either as specific numeric values or general quantifiers (e.g., 'some,' 'many,' 'none').

An example LDSP 

In [None]:
# Cleanup
cleaned_quantity_ldsps = ''

for _, ldsp in enumerate(quantitiy_ldsp):
  
  cleaned = ldsp[ldsp.index("Sentence"):]
  cleaned = cleaned[:cleaned.index('```')]
  cleaned_quantity_ldsps += cleaned


print(cleaned_quantity_ldsps)


Sentence1,Sentence2
"There are three apples on the table.", "There are many apples on the table."
"5 people were in the room", "30 people were in the room"
"I ate two cookies.", "I ate several cookies."
"She has one cat.", "She has numerous cats."
"He bought four books.", "He bought a few books."
"There were ten cars in the parking lot.", "There were a lot of cars in the parking lot."
"Only two students passed the exam.", "Few students passed the exam."
"We saw five birds.", "We saw a couple of birds."
"The garden has seven roses.", "The garden has some roses."
"He drank one glass of water.", "He drank a lot of water."
"They found two seashells.", "They found a handful of seashells."
"There were twelve chairs in the room.", "There were plenty of chairs in the room."
"I have three pairs of shoes.", "I have several pairs of shoes."
"She baked six cakes.", "She baked many cakes."
"He read two chapters.", "He read a number of chapters."
"There are twenty trees in the forest.", "There are c

In [None]:
string_to_csv(cleaned_quantity_ldsps, "quantity_ldsps.csv")

## Synonym

In [None]:
linguistic_property_synonym = "synonym substitution"
property_description_synonym = "One sentence in the pair replaces a word or phrase with a synonym while retaining the same overall meaning."
example_ldsp_synonym = '("Our cat is sitting on the sofa.", "Our feline is sitting on the sofa."), ("The man was fighting his friend", "The man was quarreling with his friend.")'
continue_prompt_synonym = "Generate the next 100 LDSPs. Replace words or phrases with synonyms, ensuring varied sentence structures and topics."

synonym_ldsps = generate_LDSPs(linguistic_property_synonym,
                            property_description_synonym,
                            example_ldsp_synonym,
                            continue_prompt_synonym,
                            num_ldsps=1000)

Prompt: 

You are generating a dataset of Linguistically Distinct Sentence Pairs (LDSPs).
Each LDSP will differ in one key linguistic property while maintaining the same overall meaning.

Below are some examples of LDSPs

Linguistic Property: negation
LDSP: ('The box is on the counter', 'The box is not on the counter')

Linguistic Property: tense
LDSP: ('The box is on the counter', 'The box was on the counter')

You will generate 1000 distinct LDSPs of various topics, 100 at a time.

You will generate them as two columns of a CSV. One column for first sentence of the LDSP, and the other column for the second.
Each row is a new LDSP, so you will generate 1000 rows in total.

Generate no other text. Vary the sentence structure.

The property for which you will be generating LDSPs will be synonym substitution.

Property Description: One sentence in the pair replaces a word or phrase with a synonym while retaining the same overall meaning.

An example LDSP for this property is
("Our cat is

In [19]:
cleaned_synonym = standard_cleanup(synonym_ldsps)

In [20]:
string_to_csv(cleaned_synonym, "synonym_ldsps.csv")

## Voice

In [22]:
linguistic_property_voice = "voice (passive/active)"
property_description_voice = "One sentence in the pair is in the active voice, while the other is in the passive voice, describing the same action."
example_ldsp_voice = '("The chef cooked the meal.", "The meal was cooked by the chef.")'
continue_prompt_voice = "Generate the next 100 LDSPs. Alternate between active and passive voice, ensuring distinct sentence contexts and subjects."

voice_ldsps = generate_LDSPs(linguistic_property_voice,
                             property_description_voice, 
                             example_ldsp_voice,
                             continue_prompt_voice, 
                             num_ldsps=1000)

Prompt: 

You are generating a dataset of Linguistically Distinct Sentence Pairs (LDSPs).
Each LDSP will differ in one key linguistic property while maintaining the same overall meaning.

Below are some examples of LDSPs

Linguistic Property: negation
LDSP: ('The box is on the counter', 'The box is not on the counter')

Linguistic Property: tense
LDSP: ('The box is on the counter', 'The box was on the counter')

You will generate 1000 distinct LDSPs of various topics, 100 at a time.

You will generate them as two columns of a CSV. One column for first sentence of the LDSP, and the other column for the second.
Each row is a new LDSP, so you will generate 1000 rows in total.

Generate no other text. Vary the sentence structure.

The property for which you will be generating LDSPs will be voice (passive/active).

Property Description: One sentence in the pair is in the active voice, while the other is in the passive voice, describing the same action.

An example LDSP for this property is


In [25]:
cleaned_voice = standard_cleanup(voice_ldsps)

In [26]:
string_to_csv(cleaned_voice, "voice_ldsps.csv")

## Gender

In [27]:
linguistic_property_gender = "gender"
property_description_gender = "One sentence in the pair differs in the gender of the subject or object, while the action or context remains unchanged."
example_ldsp_gender = '("He is reading a book.", "She is reading a book.")'
continue_prompt_gender = "Generate the next 100 LDSPs. Vary gendered nouns, pronouns, and titles, covering diverse scenarios and sentence structures."

gender_ldsps = generate_LDSPs(linguistic_property_gender,
                              property_description_gender,
                              example_ldsp_gender,
                              continue_prompt_gender,
                              num_ldsps=1000)

Prompt: 

You are generating a dataset of Linguistically Distinct Sentence Pairs (LDSPs).
Each LDSP will differ in one key linguistic property while maintaining the same overall meaning.

Below are some examples of LDSPs

Linguistic Property: negation
LDSP: ('The box is on the counter', 'The box is not on the counter')

Linguistic Property: tense
LDSP: ('The box is on the counter', 'The box was on the counter')

You will generate 1000 distinct LDSPs of various topics, 100 at a time.

You will generate them as two columns of a CSV. One column for first sentence of the LDSP, and the other column for the second.
Each row is a new LDSP, so you will generate 1000 rows in total.

Generate no other text. Vary the sentence structure.

The property for which you will be generating LDSPs will be gender.

Property Description: One sentence in the pair differs in the gender of the subject or object, while the action or context remains unchanged.

An example LDSP for this property is
("He is readin

In [28]:
cleaned_gender = standard_cleanup(gender_ldsps)

string_to_csv(cleaned_gender, "gender_ldsps.csv")

## Polarity

In [29]:
linguistic_property_polarity = "polarity"
property_description_polarity = "One sentence in the pair is positive, while the other is negative, altering the emotional or factual stance of the sentence."
example_ldsp_polarity = '("I enjoy this weather.", "I dislike this weather.")'
continue_prompt_polarity = "Generate the next 100 LDSPs. Use a mix of positive and negative polarity in diverse topics and contexts."

polarity_ldsps = generate_LDSPs(linguistic_property_polarity, 
                                property_description_polarity,
                                example_ldsp_polarity,
                                continue_prompt_polarity,
                                num_ldsps=1000)

Prompt: 

You are generating a dataset of Linguistically Distinct Sentence Pairs (LDSPs).
Each LDSP will differ in one key linguistic property while maintaining the same overall meaning.

Below are some examples of LDSPs

Linguistic Property: negation
LDSP: ('The box is on the counter', 'The box is not on the counter')

Linguistic Property: tense
LDSP: ('The box is on the counter', 'The box was on the counter')

You will generate 1000 distinct LDSPs of various topics, 100 at a time.

You will generate them as two columns of a CSV. One column for first sentence of the LDSP, and the other column for the second.
Each row is a new LDSP, so you will generate 1000 rows in total.

Generate no other text. Vary the sentence structure.

The property for which you will be generating LDSPs will be polarity.

Property Description: One sentence in the pair is positive, while the other is negative, altering the emotional or factual stance of the sentence.

An example LDSP for this property is
("I enj

In [30]:
cleaned_polarity = standard_cleanup(polarity_ldsps)

string_to_csv(cleaned_polarity, "polarity_ldsps.csv")

## Modality

In [31]:
linguistic_property_modality = "modality"
property_description_modality = "One sentence in the pair reflects a difference in modality, expressing varying degrees of possibility, necessity, or obligation."
example_ldsp_modality = '("She can go to the party.", "She must go to the party.")'
continue_prompt_modality = "Generate the next 100 LDSPs. Use modal verbs like 'can,' 'must,' 'may,' and 'should' to express different modalities."

modality_ldsps = generate_LDSPs(linguistic_property_modality,
                                property_description_modality,
                                example_ldsp_modality,
                                continue_prompt_modality,
                                num_ldsps=1000)

Prompt: 

You are generating a dataset of Linguistically Distinct Sentence Pairs (LDSPs).
Each LDSP will differ in one key linguistic property while maintaining the same overall meaning.

Below are some examples of LDSPs

Linguistic Property: negation
LDSP: ('The box is on the counter', 'The box is not on the counter')

Linguistic Property: tense
LDSP: ('The box is on the counter', 'The box was on the counter')

You will generate 1000 distinct LDSPs of various topics, 100 at a time.

You will generate them as two columns of a CSV. One column for first sentence of the LDSP, and the other column for the second.
Each row is a new LDSP, so you will generate 1000 rows in total.

Generate no other text. Vary the sentence structure.

The property for which you will be generating LDSPs will be modality.

Property Description: One sentence in the pair reflects a difference in modality, expressing varying degrees of possibility, necessity, or obligation.

An example LDSP for this property is
("S

In [32]:
cleaned_modality = standard_cleanup(modality_ldsps)

string_to_csv(cleaned_modality, "modality_ldsps.csv")

## Definiteness

In [33]:
linguistic_property_definiteness = "definiteness"
property_description_definiteness = "One sentence in the pair contrasts the use of definite and indefinite articles to indicate specificity."
example_ldsp_definiteness = '("The dog is barking.", "A dog is barking.")'
continue_prompt_definiteness = "Generate the next 100 LDSPs. Alternate between definite and indefinite articles across varied sentence contexts."

definiteness_ldsps = generate_LDSPs(linguistic_property_definiteness,
                                    property_description_definiteness,
                                    example_ldsp_definiteness,
                                    continue_prompt_definiteness,
                                    num_ldsps=1000)

Prompt: 

You are generating a dataset of Linguistically Distinct Sentence Pairs (LDSPs).
Each LDSP will differ in one key linguistic property while maintaining the same overall meaning.

Below are some examples of LDSPs

Linguistic Property: negation
LDSP: ('The box is on the counter', 'The box is not on the counter')

Linguistic Property: tense
LDSP: ('The box is on the counter', 'The box was on the counter')

You will generate 1000 distinct LDSPs of various topics, 100 at a time.

You will generate them as two columns of a CSV. One column for first sentence of the LDSP, and the other column for the second.
Each row is a new LDSP, so you will generate 1000 rows in total.

Generate no other text. Vary the sentence structure.

The property for which you will be generating LDSPs will be definiteness.

Property Description: One sentence in the pair contrasts the use of definite and indefinite articles to indicate specificity.

An example LDSP for this property is
("The dog is barking.", 

In [34]:
cleaned_definiteness = standard_cleanup(definiteness_ldsps)

string_to_csv(cleaned_definiteness, "definiteness_ldsps.csv")

## Subject-Object Position

In [35]:
linguistic_property_subject_object = "subject-object position"
property_description_subject_object = "One sentence in the pair swaps the subject and object positions, altering the syntactic structure while retaining meaning."
example_ldsp_subject_object = '("The cat chased the mouse.", "The mouse was chased by the cat.")'
continue_prompt_subject_object = "Generate the next 100 LDSPs. Vary sentence contexts and structures while switching subject and object roles."

subject_object_ldsps = generate_LDSPs(linguistic_property_subject_object,
                                      property_description_subject_object,
                                      example_ldsp_subject_object,
                                      continue_prompt_subject_object,
                                      num_ldsps=1000)

Prompt: 

You are generating a dataset of Linguistically Distinct Sentence Pairs (LDSPs).
Each LDSP will differ in one key linguistic property while maintaining the same overall meaning.

Below are some examples of LDSPs

Linguistic Property: negation
LDSP: ('The box is on the counter', 'The box is not on the counter')

Linguistic Property: tense
LDSP: ('The box is on the counter', 'The box was on the counter')

You will generate 1000 distinct LDSPs of various topics, 100 at a time.

You will generate them as two columns of a CSV. One column for first sentence of the LDSP, and the other column for the second.
Each row is a new LDSP, so you will generate 1000 rows in total.

Generate no other text. Vary the sentence structure.

The property for which you will be generating LDSPs will be subject-object position.

Property Description: One sentence in the pair swaps the subject and object positions, altering the syntactic structure while retaining meaning.

An example LDSP for this proper

In [36]:
cleaned_subject_object = standard_cleanup(subject_object_ldsps)

string_to_csv(cleaned_subject_object, "subject_object_ldsps.csv")

## Spatial Relations

In [37]:
linguistic_property_spatial = "spatial relations"
property_description_spatial = "One sentence in the pair changes the spatial relationship between entities described in the sentence."
example_ldsp_spatial = '("The book is on the table.", "The book is under the table.")'
continue_prompt_spatial = "Generate the next 100 LDSPs. Use prepositions and spatial descriptors to modify spatial relationships in the pairs."

spatial_ldsps = generate_LDSPs(linguistic_property_spatial,
                               property_description_spatial,
                               example_ldsp_spatial,
                               continue_prompt_spatial,
                               num_ldsps=1000)

Prompt: 

You are generating a dataset of Linguistically Distinct Sentence Pairs (LDSPs).
Each LDSP will differ in one key linguistic property while maintaining the same overall meaning.

Below are some examples of LDSPs

Linguistic Property: negation
LDSP: ('The box is on the counter', 'The box is not on the counter')

Linguistic Property: tense
LDSP: ('The box is on the counter', 'The box was on the counter')

You will generate 1000 distinct LDSPs of various topics, 100 at a time.

You will generate them as two columns of a CSV. One column for first sentence of the LDSP, and the other column for the second.
Each row is a new LDSP, so you will generate 1000 rows in total.

Generate no other text. Vary the sentence structure.

The property for which you will be generating LDSPs will be spatial relations.

Property Description: One sentence in the pair changes the spatial relationship between entities described in the sentence.

An example LDSP for this property is
("The book is on the 

In [38]:
cleaned_spatial = standard_cleanup(spatial_ldsps)

string_to_csv(cleaned_spatial, "spatial_ldsps.csv")

## Factuality

In [39]:
linguistic_property_factuality = "factuality"
property_description_factuality = "One sentence in the pair asserts a factual statement, while the other expresses a hypothetical or uncertain scenario."
example_ldsp_factuality = '("She is coming to the meeting.", "She might come to the meeting.")'
continue_prompt_factuality = "Generate the next 100 LDSPs. Use factual and hypothetical expressions to create distinct sentence pairs."

factuality_ldsps = generate_LDSPs(linguistic_property_factuality,
                                  property_description_factuality,
                                  example_ldsp_factuality,
                                  continue_prompt_factuality,
                                  num_ldsps=1000)

Prompt: 

You are generating a dataset of Linguistically Distinct Sentence Pairs (LDSPs).
Each LDSP will differ in one key linguistic property while maintaining the same overall meaning.

Below are some examples of LDSPs

Linguistic Property: negation
LDSP: ('The box is on the counter', 'The box is not on the counter')

Linguistic Property: tense
LDSP: ('The box is on the counter', 'The box was on the counter')

You will generate 1000 distinct LDSPs of various topics, 100 at a time.

You will generate them as two columns of a CSV. One column for first sentence of the LDSP, and the other column for the second.
Each row is a new LDSP, so you will generate 1000 rows in total.

Generate no other text. Vary the sentence structure.

The property for which you will be generating LDSPs will be factuality.

Property Description: One sentence in the pair asserts a factual statement, while the other expresses a hypothetical or uncertain scenario.

An example LDSP for this property is
("She is com

In [40]:
cleaned_factuality = standard_cleanup(factuality_ldsps)

string_to_csv(cleaned_factuality, "factuality.csv")

## Intensifiers

In [42]:
linguistic_property_intensifiers = "intensifiers"
property_description_intensifiers = "One sentence in the pair includes an intensifier to strengthen or weaken the statement’s degree."
example_ldsp_intensifiers = '("It is cold outside.", "It is extremely cold outside.")'
continue_prompt_intensifiers = "Generate the next 100 LDSPs. Add or remove intensifiers like 'very,' 'extremely,' and 'slightly' in varied sentence structures."

intensifiers_ldsps = generate_LDSPs(linguistic_property_intensifiers,
                                    property_description_intensifiers,
                                    example_ldsp_intensifiers,
                                    continue_prompt_intensifiers,
                                    num_ldsps=1000)


Prompt: 

You are generating a dataset of Linguistically Distinct Sentence Pairs (LDSPs).
Each LDSP will differ in one key linguistic property while maintaining the same overall meaning.

Below are some examples of LDSPs

Linguistic Property: negation
LDSP: ('The box is on the counter', 'The box is not on the counter')

Linguistic Property: tense
LDSP: ('The box is on the counter', 'The box was on the counter')

You will generate 1000 distinct LDSPs of various topics, 100 at a time.

You will generate them as two columns of a CSV. One column for first sentence of the LDSP, and the other column for the second.
Each row is a new LDSP, so you will generate 1000 rows in total.

Generate no other text. Vary the sentence structure.

The property for which you will be generating LDSPs will be intensifiers.

Property Description: One sentence in the pair includes an intensifier to strengthen or weaken the statement’s degree.

An example LDSP for this property is
("It is cold outside.", "It is 

In [43]:
cleaned_intensifiers = standard_cleanup(intensifiers_ldsps)

string_to_csv(cleaned_intensifiers, "intensifier_ldsps.csv")