# Separating definition, example and use case

In [98]:
# Import libraries

import pandas as pd
import re


In [99]:
# Function to count records

def show_record_count(df):
	"""Display the total number of records in a DataFrame.

	Args:
		df (pandas.DataFrame): The DataFrame for which the number of records will be displayed.

	Returns:
		str: A string indicating the total number of records in the DataFrame.
	"""

	return f"Total records: {len(df)}"


In [100]:
# Constants

# Paths
DATA_PATH = "data/"
"""str: Path for the data."""

# File names
FIRST_POPULATION_FILE = "FIRST_POPULATION"
"""str: Name of the first population file created."""

POPULATION_SEPARATED_FILE = "POPULATION_SEPARATED"
"""str: Name of the population_separated file."""

# Fields
SNT_ID_FIELD = "snt_id"
"""str: Name of the snt_id field."""

DEFINITION_FIELD = "definition"
"""str: Name of the definition field."""

TERM_FIELD = "term"
"""str: Name of the term field."""

DIFFICULTY_FIELD = "difficulty"
"""str: Name of the difficulty field"""

QUERY_TEXT_FIELD = "query_text"
"""str: Name of the query_text field"""

SOURCE_FIELD = "source_snt"
"""str: Name of the source_snt field"""

GPT_DEFINITION_FIELD = "definition_gpt3"
"""str: Name of the definition_gpt3 field"""

GPT_EXAMPLE_FIELD = "example_gpt3"
"""str: Name of the example_gpt3 field"""

GPT_USE_CASE_FIELD = "use_case_gpt3"
"""str: Name of the use_case_gpt3 field"""

DEFINITION_USE_CASE_FIELD = "definition_use_case_separated"
"""str: Name of the definition_use_case_separated field"""

DEFINITION_EXAMPLE_FIELD = "definition_example_separated"
"""str: Name of the definition_example_separated field"""


'str: Name of the definition_example_separated field'

In [101]:
# Read Excel files

first_population = pd.read_excel(f"{DATA_PATH}{FIRST_POPULATION_FILE}.xlsx", sheet_name=FIRST_POPULATION_FILE)

print(show_record_count(first_population[SNT_ID_FIELD]))

first_population.head()


Total records: 9017


Unnamed: 0.1,Unnamed: 0,snt_id,query_text,source_snt,term,difficulty,definition
0,0,G11.1_2892036907_1,drones,"In the modern era of automation and robotics, ...",Autonomous,1,able to act independently without requiring di...
1,1,G11.1_2892036907_1,drones,"In the modern era of automation and robotics, ...",Robotics,1,"the science and technology of robots, their de..."
2,2,G11.1_2892036907_1,drones,"In the modern era of automation and robotics, ...",Automation,1,the use of technology to automate tasks that w...
3,3,G11.1_2892036907_1,drones,"In the modern era of automation and robotics, ...",Academic,0,of or relating to education or study at a scho...
4,4,G11.1_2892036907_1,drones,"In the modern era of automation and robotics, ...",Industrial,0,relating to or involving the industry or manuf...


In [102]:
# Total of rows with a "use case" word, or variants, in "definition" column

print(first_population[DEFINITION_FIELD].str.contains(r"use[-\s]?case:", case=False, regex=True).sum())


8386


In [103]:
# Format the "use case" word to "Use-Case:"

first_population[DEFINITION_USE_CASE_FIELD] = first_population[DEFINITION_FIELD].apply(
    lambda x: re.sub(r"\b(use-case:\s+|use case:\s+)", "Use-Case: ", x, flags=re.IGNORECASE) if pd.notna(x) else x
)

first_population.head()


Unnamed: 0.1,Unnamed: 0,snt_id,query_text,source_snt,term,difficulty,definition,definition_use_case_separated
0,0,G11.1_2892036907_1,drones,"In the modern era of automation and robotics, ...",Autonomous,1,able to act independently without requiring di...,able to act independently without requiring di...
1,1,G11.1_2892036907_1,drones,"In the modern era of automation and robotics, ...",Robotics,1,"the science and technology of robots, their de...","the science and technology of robots, their de..."
2,2,G11.1_2892036907_1,drones,"In the modern era of automation and robotics, ...",Automation,1,the use of technology to automate tasks that w...,the use of technology to automate tasks that w...
3,3,G11.1_2892036907_1,drones,"In the modern era of automation and robotics, ...",Academic,0,of or relating to education or study at a scho...,of or relating to education or study at a scho...
4,4,G11.1_2892036907_1,drones,"In the modern era of automation and robotics, ...",Industrial,0,relating to or involving the industry or manuf...,relating to or involving the industry or manuf...


In [104]:
# Total of rows with a "Use-Case:" word in "definition_use_case_separated" column

print(first_population[DEFINITION_USE_CASE_FIELD].str.contains("Use-Case:").sum())


8377


In [105]:
# Separate "definition" and "use case"

first_population[DEFINITION_USE_CASE_FIELD] = first_population[DEFINITION_USE_CASE_FIELD].str.split("Use-Case:")

first_population.head()


Unnamed: 0.1,Unnamed: 0,snt_id,query_text,source_snt,term,difficulty,definition,definition_use_case_separated
0,0,G11.1_2892036907_1,drones,"In the modern era of automation and robotics, ...",Autonomous,1,able to act independently without requiring di...,[able to act independently without requiring d...
1,1,G11.1_2892036907_1,drones,"In the modern era of automation and robotics, ...",Robotics,1,"the science and technology of robots, their de...","[the science and technology of robots, their d..."
2,2,G11.1_2892036907_1,drones,"In the modern era of automation and robotics, ...",Automation,1,the use of technology to automate tasks that w...,[the use of technology to automate tasks that ...
3,3,G11.1_2892036907_1,drones,"In the modern era of automation and robotics, ...",Academic,0,of or relating to education or study at a scho...,[of or relating to education or study at a sch...
4,4,G11.1_2892036907_1,drones,"In the modern era of automation and robotics, ...",Industrial,0,relating to or involving the industry or manuf...,[relating to or involving the industry or manu...


In [106]:
# Get a new dataframe with the definition and use case in differente columns

expanded_df = first_population[DEFINITION_USE_CASE_FIELD].apply(pd.Series)
expanded_df = expanded_df.rename(columns={0: DEFINITION_EXAMPLE_FIELD, 1: GPT_USE_CASE_FIELD, 2: "2.1"})

expanded_df.head()


Unnamed: 0,definition_example_separated,use_case_gpt3,2.1,3,4,5
0,able to act independently without requiring di...,Autonomous vehicles can be used to deliver go...,,,,
1,"the science and technology of robots, their de...",Robotics can be used in factories to assemble...,,,,
2,the use of technology to automate tasks that w...,Automation can be used in production lines to...,,,,
3,of or relating to education or study at a scho...,Academic research can provide valuable insigh...,,,,
4,relating to or involving the industry or manuf...,,,,,


In [107]:
# Concat expanded_df to first_population

first_population = pd.concat([first_population, expanded_df], axis=1)

first_population.head()


Unnamed: 0.1,Unnamed: 0,snt_id,query_text,source_snt,term,difficulty,definition,definition_use_case_separated,definition_example_separated,use_case_gpt3,2.1,3,4,5
0,0,G11.1_2892036907_1,drones,"In the modern era of automation and robotics, ...",Autonomous,1,able to act independently without requiring di...,[able to act independently without requiring d...,able to act independently without requiring di...,Autonomous vehicles can be used to deliver go...,,,,
1,1,G11.1_2892036907_1,drones,"In the modern era of automation and robotics, ...",Robotics,1,"the science and technology of robots, their de...","[the science and technology of robots, their d...","the science and technology of robots, their de...",Robotics can be used in factories to assemble...,,,,
2,2,G11.1_2892036907_1,drones,"In the modern era of automation and robotics, ...",Automation,1,the use of technology to automate tasks that w...,[the use of technology to automate tasks that ...,the use of technology to automate tasks that w...,Automation can be used in production lines to...,,,,
3,3,G11.1_2892036907_1,drones,"In the modern era of automation and robotics, ...",Academic,0,of or relating to education or study at a scho...,[of or relating to education or study at a sch...,of or relating to education or study at a scho...,Academic research can provide valuable insigh...,,,,
4,4,G11.1_2892036907_1,drones,"In the modern era of automation and robotics, ...",Industrial,0,relating to or involving the industry or manuf...,[relating to or involving the industry or manu...,relating to or involving the industry or manuf...,,,,,


In [108]:
# Total number of row contaning the word "example" or its variants in the "definition" column

print(first_population[DEFINITION_EXAMPLE_FIELD].str.contains(r"example:", case=False, regex=True).sum())


8624


In [109]:
# Format the "example" word to "Example:"

first_population[DEFINITION_EXAMPLE_FIELD] = first_population[DEFINITION_EXAMPLE_FIELD].apply(
    lambda x: re.sub(r"\b(example:\s+)", "Example: ", x, flags=re.IGNORECASE) if pd.notna(x) else x
)

first_population.head()


Unnamed: 0.1,Unnamed: 0,snt_id,query_text,source_snt,term,difficulty,definition,definition_use_case_separated,definition_example_separated,use_case_gpt3,2.1,3,4,5
0,0,G11.1_2892036907_1,drones,"In the modern era of automation and robotics, ...",Autonomous,1,able to act independently without requiring di...,[able to act independently without requiring d...,able to act independently without requiring di...,Autonomous vehicles can be used to deliver go...,,,,
1,1,G11.1_2892036907_1,drones,"In the modern era of automation and robotics, ...",Robotics,1,"the science and technology of robots, their de...","[the science and technology of robots, their d...","the science and technology of robots, their de...",Robotics can be used in factories to assemble...,,,,
2,2,G11.1_2892036907_1,drones,"In the modern era of automation and robotics, ...",Automation,1,the use of technology to automate tasks that w...,[the use of technology to automate tasks that ...,the use of technology to automate tasks that w...,Automation can be used in production lines to...,,,,
3,3,G11.1_2892036907_1,drones,"In the modern era of automation and robotics, ...",Academic,0,of or relating to education or study at a scho...,[of or relating to education or study at a sch...,of or relating to education or study at a scho...,Academic research can provide valuable insigh...,,,,
4,4,G11.1_2892036907_1,drones,"In the modern era of automation and robotics, ...",Industrial,0,relating to or involving the industry or manuf...,[relating to or involving the industry or manu...,relating to or involving the industry or manuf...,,,,,


In [110]:
# Total number of rows contaning the word "Example:" in the "dedinition_example_separated"

print(first_population[DEFINITION_EXAMPLE_FIELD].str.contains("Example:").sum())


8624


In [111]:
# Separate "defitnition" and "example"

first_population[DEFINITION_EXAMPLE_FIELD] = first_population[DEFINITION_EXAMPLE_FIELD].str.split("Example:")

first_population.head()


Unnamed: 0.1,Unnamed: 0,snt_id,query_text,source_snt,term,difficulty,definition,definition_use_case_separated,definition_example_separated,use_case_gpt3,2.1,3,4,5
0,0,G11.1_2892036907_1,drones,"In the modern era of automation and robotics, ...",Autonomous,1,able to act independently without requiring di...,[able to act independently without requiring d...,[able to act independently without requiring d...,Autonomous vehicles can be used to deliver go...,,,,
1,1,G11.1_2892036907_1,drones,"In the modern era of automation and robotics, ...",Robotics,1,"the science and technology of robots, their de...","[the science and technology of robots, their d...","[the science and technology of robots, their d...",Robotics can be used in factories to assemble...,,,,
2,2,G11.1_2892036907_1,drones,"In the modern era of automation and robotics, ...",Automation,1,the use of technology to automate tasks that w...,[the use of technology to automate tasks that ...,[the use of technology to automate tasks that ...,Automation can be used in production lines to...,,,,
3,3,G11.1_2892036907_1,drones,"In the modern era of automation and robotics, ...",Academic,0,of or relating to education or study at a scho...,[of or relating to education or study at a sch...,[of or relating to education or study at a sch...,Academic research can provide valuable insigh...,,,,
4,4,G11.1_2892036907_1,drones,"In the modern era of automation and robotics, ...",Industrial,0,relating to or involving the industry or manuf...,[relating to or involving the industry or manu...,[relating to or involving the industry or manu...,,,,,


In [112]:
# Get a new dataframe with the definition and example in differente columns

expanded_df = first_population[DEFINITION_EXAMPLE_FIELD].apply(pd.Series)
expanded_df = expanded_df.rename(columns={0: GPT_DEFINITION_FIELD, 1: GPT_EXAMPLE_FIELD, 2: "2.2"})

expanded_df.head()


Unnamed: 0,definition_gpt3,example_gpt3,2.2
0,able to act independently without requiring di...,Autonomous vehicles can navigate roads withou...,
1,"the science and technology of robots, their de...",Robotics is used to automate tasks that would...,
2,the use of technology to automate tasks that w...,Automation can be used to control the speed a...,
3,of or relating to education or study at a scho...,Academic research is conducted at universitie...,
4,relating to or involving the industry or manuf...,Industrial robots are used in manufacturing s...,


In [113]:
# Concat expanded_df to first_population

first_population = pd.concat([first_population, expanded_df], axis=1)

first_population.head()


Unnamed: 0.1,Unnamed: 0,snt_id,query_text,source_snt,term,difficulty,definition,definition_use_case_separated,definition_example_separated,use_case_gpt3,2.1,3,4,5,definition_gpt3,example_gpt3,2.2
0,0,G11.1_2892036907_1,drones,"In the modern era of automation and robotics, ...",Autonomous,1,able to act independently without requiring di...,[able to act independently without requiring d...,[able to act independently without requiring d...,Autonomous vehicles can be used to deliver go...,,,,,able to act independently without requiring di...,Autonomous vehicles can navigate roads withou...,
1,1,G11.1_2892036907_1,drones,"In the modern era of automation and robotics, ...",Robotics,1,"the science and technology of robots, their de...","[the science and technology of robots, their d...","[the science and technology of robots, their d...",Robotics can be used in factories to assemble...,,,,,"the science and technology of robots, their de...",Robotics is used to automate tasks that would...,
2,2,G11.1_2892036907_1,drones,"In the modern era of automation and robotics, ...",Automation,1,the use of technology to automate tasks that w...,[the use of technology to automate tasks that ...,[the use of technology to automate tasks that ...,Automation can be used in production lines to...,,,,,the use of technology to automate tasks that w...,Automation can be used to control the speed a...,
3,3,G11.1_2892036907_1,drones,"In the modern era of automation and robotics, ...",Academic,0,of or relating to education or study at a scho...,[of or relating to education or study at a sch...,[of or relating to education or study at a sch...,Academic research can provide valuable insigh...,,,,,of or relating to education or study at a scho...,Academic research is conducted at universitie...,
4,4,G11.1_2892036907_1,drones,"In the modern era of automation and robotics, ...",Industrial,0,relating to or involving the industry or manuf...,[relating to or involving the industry or manu...,[relating to or involving the industry or manu...,,,,,,relating to or involving the industry or manuf...,Industrial robots are used in manufacturing s...,


In [114]:
# Kepp only necessary columns and changing their names

first_population = first_population[[SNT_ID_FIELD, QUERY_TEXT_FIELD, SOURCE_FIELD, TERM_FIELD, DIFFICULTY_FIELD, GPT_DEFINITION_FIELD, GPT_EXAMPLE_FIELD, GPT_USE_CASE_FIELD]]
first_population = first_population.rename(columns={TERM_FIELD: "complex_word_gpt3", DIFFICULTY_FIELD: "difficulty_gpt3"})

first_population.head()


Unnamed: 0,snt_id,query_text,source_snt,complex_word_gpt3,difficulty_gpt3,definition_gpt3,example_gpt3,use_case_gpt3
0,G11.1_2892036907_1,drones,"In the modern era of automation and robotics, ...",Autonomous,1,able to act independently without requiring di...,Autonomous vehicles can navigate roads withou...,Autonomous vehicles can be used to deliver go...
1,G11.1_2892036907_1,drones,"In the modern era of automation and robotics, ...",Robotics,1,"the science and technology of robots, their de...",Robotics is used to automate tasks that would...,Robotics can be used in factories to assemble...
2,G11.1_2892036907_1,drones,"In the modern era of automation and robotics, ...",Automation,1,the use of technology to automate tasks that w...,Automation can be used to control the speed a...,Automation can be used in production lines to...
3,G11.1_2892036907_1,drones,"In the modern era of automation and robotics, ...",Academic,0,of or relating to education or study at a scho...,Academic research is conducted at universitie...,Academic research can provide valuable insigh...
4,G11.1_2892036907_1,drones,"In the modern era of automation and robotics, ...",Industrial,0,relating to or involving the industry or manuf...,Industrial robots are used in manufacturing s...,


In [115]:
# Export as an Excel

first_population.to_excel(f"{DATA_PATH}{POPULATION_SEPARATED_FILE}.xlsx", sheet_name=POPULATION_SEPARATED_FILE)
