In [None]:
import pandas as pd
import re
import subprocess

# Loading data
data = pd.read_csv('dataset rk_traits_noNA.csv')

# cleaning IUCN_name
# replace spaces and then other non-alphanumeric/underscore characters
data['IUCN_name'] = data['IUCN_name'].apply(
    lambda x: str(x).lower() if pd.notna(x) else None
)
data['IUCN_name'] = data['IUCN_name'].str.replace(' ', '_', regex=False)
data['IUCN_name'] = data['IUCN_name'].str.replace(r'[^a-z0-9_]', '', regex=True)
data['IUCN_name'] = data['IUCN_name'].str.strip()
data['IUCN_name'] = data['IUCN_name'].apply(
    lambda x: f"species_{x}" if not isinstance(x, str) or not x or not x[0].islower() or not x[0].isalpha() else x
)

# Calculate median Roadkillrate_corrected per species
species_data = data.groupby('IUCN_name').agg({'Roadkillrate_corrected': 'median'}).reset_index()

# Categorize roadkill risk
low_threshold, high_threshold = species_data['Roadkillrate_corrected'].quantile([0.33, 0.66], interpolation='linear')
species_data['risk_category'] = pd.cut(species_data['Roadkillrate_corrected'],
                                      bins=[-float('inf'), low_threshold, high_threshold, float('inf')],
                                      labels=['low', 'medium', 'high'], include_lowest=True)

# Define mapping for trait names
trait_mapping = {
    'AdultBodyMass_g_median': 'adult_body_mass_g_median',
    'Home_range_Km2': 'home_range_km2',
    'longevity_y': 'longevity_y',
    'Ageofmaturity_d': 'age_of_maturity_d',
    'Litter_clutch_size': 'litter_clutch_size',
    'Litters_or_clutches_per_y': 'litters_or_clutches_per_y',
    'Diet_breadth': 'diet_breadth',
    'Habitat_breadth': 'habitat_breadth',
}

excluded_cols = ['IUCN_Status', 'Frequency_month', 'Survey_interval_days', 'Road_length_km', 'Survey_period_days', 'Latitude', 'Longitude', 'Country', 'SocialGrpSize', 'Population density_IndKm2']
# Select only columns present in the dataframe and in trait_mapping keys
available_trait_cols = [col for col in trait_mapping.keys() if col in data.columns]
species_traits_quant = data.groupby('IUCN_name').first()[available_trait_cols].reset_index()


# Categorize quantitative traits
for trait in available_trait_cols: # Iterate only over available traits
    trait_cat_col = f"{trait_mapping[trait]}_cat"
    low_threshold, high_threshold = species_traits_quant[trait].quantile([0.33, 0.66], interpolation='linear')
    species_traits_quant[trait_cat_col] = pd.cut(species_traits_quant[trait],
                                       bins=[-float('inf'), low_threshold, high_threshold, float('inf')],
                                       labels=['low', 'medium', 'high'], include_lowest=True)

# activity column and group by species
species_activity = None
if 'Activity_1Diurnal_2Nocturnal' in data.columns:
    species_activity = data.groupby('IUCN_name').first()[['Activity_1Diurnal_2Nocturnal']].reset_index()
    # Map numerical activity values to categorical labels
    species_activity['activity_cat'] = species_activity['Activity_1Diurnal_2Nocturnal'].map({1: 'diurnal', 2: 'nocturnal'})

# Merge categorized traits and activity
selected_trait_cols = [f"{trait_mapping[trait]}_cat" for trait in available_trait_cols]
species_traits_categorized = species_traits_quant[['IUCN_name'] + selected_trait_cols]

# Merge activity data
if species_activity is not None and 'activity_cat' in species_traits_categorized.columns:
     species_traits_categorized = pd.merge(species_traits_categorized,
                                      species_activity[['IUCN_name', 'activity_cat']],
                                      on='IUCN_name',
                                      how='left')

# Ensure species_data and species_traits_categorized have the same species
common_species = pd.merge(species_data[['IUCN_name']], species_traits_categorized[['IUCN_name']], on='IUCN_name')
species_data = species_data[species_data['IUCN_name'].isin(common_species['IUCN_name'])]
species_traits_categorized = species_traits_categorized[species_traits_categorized['IUCN_name'].isin(common_species['IUCN_name'])]

print(f"\nNumber of species after merging and filtering: {len(common_species)}")

# Generate background knowledge facts
bk_content = []

# Generate facts for selected general categorized traits
for index, row in species_traits_categorized.iterrows():
    species = row['IUCN_name']
    if not species or pd.isna(species):
        continue

    for trait in available_trait_cols:
        trait_cat_col = f"{trait_mapping[trait]}_cat"
        value = row[trait_cat_col]
        if pd.notna(value):
            bk_content.append(f"trait({species},{trait_cat_col},{value}).")

# Generate fact for activity trait separately
if 'activity_cat' in species_traits_categorized.columns:
    for index, row in species_traits_categorized.iterrows():
        species = row['IUCN_name']
        if not species or pd.isna(species):
            continue
        activity_value = row['activity_cat']
        if pd.notna(activity_value):
            bk_content.append(f"trait({species},activity_cat,{activity_value}).")

examples_content = []
# Ensure both species_data and species_traits_categorized have the same species before generating examples
merged_examples_df = pd.merge(species_data[['IUCN_name', 'risk_category']], species_traits_categorized[['IUCN_name']], on='IUCN_name')

for index, row in merged_examples_df.iterrows():
    species, risk = row['IUCN_name'], row['risk_category']
    if pd.notna(risk) and pd.notna(species) and species:
        exclusions = [f"roadkill_risk({species},{r})" for r in ['low', 'medium', 'high'] if r != risk]
        example_line = f"#pos({{roadkill_risk({species},{risk})}},{{{','.join(exclusions)}}})."
        examples_content.append(example_line)


# Defining specific constants for different trait types
general_trait_names_list = [f"#constant(general_trait_name,{name})." for name in [f"{trait_mapping[trait]}_cat" for trait in available_trait_cols]]

# Generating the activity trait name constant declaration using simple string concatenation
activity_trait_name = 'activity_cat'
# Include activity_trait_name constant only if activity_cat is in the categorized traits and species_activity was created
activity_trait_name_constant_str = ""
if species_activity is not None and 'activity_cat' in species_traits_categorized.columns:
    activity_trait_name_constant_str = "#constant(activity_trait_name," + activity_trait_name + ")."

# Define the categories for trait values
all_general_categories = ['low', 'medium', 'high']
all_activity_categories = ['diurnal', 'nocturnal']

general_value_constants_list = [f"#constant(general_trait_value,{cat})." for cat in all_general_categories]
# Include activity value constants only if activity_cat is in the categorized traits and species_activity was created
activity_value_constants_list = []
if species_activity is not None and 'activity_cat' in species_traits_categorized.columns:
     activity_value_constants_list = [f"#constant(activity_trait_value,{cat})." for cat in all_activity_categories]

risk_category_constants_list = [f"#constant(risk_category,{cat})." for cat in ['low', 'medium', 'high']]

# Generate the general trait name constant declarations string
general_trait_names_str = "\n".join(general_trait_names_list)

# Generate the activity trait name constant declaration string, ensuring it is included if relevant data exists
activity_trait_name_constant_str = ""
if species_activity is not None and 'activity_cat' in species_traits_categorized.columns:
    activity_trait_name_constant_str = "#constant(activity_trait_name," + activity_trait_name + ")."

head_mode_declaration = "#modeha(roadkill_risk(var(species),const(risk_category)))."
# Only include modes for traits
mode_declarations_parts = []
if len(available_trait_cols) > 0:
    general_mode_declaration = "#modeb(3,trait(var(species),const(general_trait_name),const(general_trait_value)))."
    mode_declarations_parts.append(general_mode_declaration)
# Adding activity mode only if activity trait is included in the data and categorized traits
if species_activity is not None and 'activity_cat' in species_traits_categorized.columns:
    activity_mode_declaration = "#modeb(1,trait(var(species),const(activity_trait_name),const(activity_trait_value)))."
    mode_declarations_parts.append(activity_mode_declaration)

mode_declarations_str = "\n".join(mode_declarations_parts)

# the examples section string
examples_section_str = "\n".join(examples_content)

# the final ilasp_file string by concatenating parts
ilasp_file_parts = [
    head_mode_declaration, "\n\n",
    "% Mode declarations", "\n",
    mode_declarations_str, "\n\n",
    "% Constant declarations for trait names", "\n",
    general_trait_names_str, "\n",
    activity_trait_name_constant_str, "\n\n",
    "% Constant declarations for trait values", "\n",
    "\n".join(general_value_constants_list), "\n",
    "\n".join(activity_value_constants_list), "\n",
    "\n".join(risk_category_constants_list), "\n\n",
    "% Background knowledge", "\n",
    "\n".join(bk_content), "\n",
    "% Examples start here", "\n",
    examples_section_str + "\n",
    "#maxv(1).", "\n",
    "#max_penalty(1).", "\n",
]

ilasp_file = "".join(ilasp_file_parts)

# Saving the generated file
file_path = 'roadkill.las'
with open(file_path, 'w', encoding='ascii') as f:
    f.write(ilasp_file)


Number of species after merging and filtering: 72


FileNotFoundError: [Errno 2] No such file or directory: '/content/roadkill.las'