In [1]:
import numpy as np
import pandas as pd

# Define parameters for synthetic data generation
n_presence = 300      # Number of presence points
n_background = 3000   # Number of background points

In [2]:
# Create environmental variables (similar to banana slug preferences)
env_vars = [
    'temperature',       # Mean temperature (°C)
    'precipitation',     # Annual precipitation (mm)
    'elevation',         # Elevation (m)
    'forest_cover',      # Forest cover (0-100%)
    'humidity'           # Relative humidity (%)
]

In [3]:
# Generate synthetic environmental data
# For background points: create random environmental data across a wide range
background_data = {
    env_vars[0]: np.random.normal(15, 8, n_background),        # Temperature
    env_vars[1]: np.random.normal(1000, 500, n_background),    # Precipitation
    env_vars[2]: np.random.gamma(3, 300, n_background),        # Elevation
    env_vars[3]: np.random.beta(2, 2, n_background) * 100,     # Forest cover
    env_vars[4]: np.random.beta(2, 2, n_background) * 100      # Humidity
}

In [4]:

# For presence points: create data biased toward preferred conditions
# Slug-like species prefers cool, wet, forested areas with high humidity
presence_data = {
    env_vars[0]: np.random.normal(10, 3, n_presence),         # Cooler temperatures
    env_vars[1]: np.random.normal(1500, 200, n_presence),     # Higher rainfall
    env_vars[2]: np.random.gamma(2, 200, n_presence),         # Lower elevations
    env_vars[3]: np.random.beta(5, 1, n_presence) * 100,      # High forest cover
    env_vars[4]: np.random.beta(5, 1, n_presence) * 100       # High humidity
}

In [5]:
# Create DataFrames for presence and background
presence_df = pd.DataFrame(presence_data)
background_df = pd.DataFrame(background_data)

In [6]:
# Add class labels (1 for presence, 0 for background)
presence_df['class'] = 1
background_df['class'] = 0

In [7]:
# Combine into one dataset
synthetic_df = pd.concat([presence_df, background_df], ignore_index=True)

In [8]:
# Shuffle the data
synthetic_df = synthetic_df.sample(frac=1, random_state=42).reset_index(drop=True)

In [9]:
synthetic_df.to_csv('synthetic_banana_slug_data.csv', index=False)