In [306]:
import pandas as pd

In [307]:
# Directories
dataset_dir = "../datasets"
feed_dataset_dir = "../feed-datasets"

In [308]:
# Utility functions
def get_season(date):
    """
    Get the season for a given date.

    Parameters:
    - date: datetime object

    Returns:
    - season: str
    """
    if pd.isnull(date):
        return None
    
    month = date.month

    if 3 <= month <= 5:
        return 'spring'
    elif 6 <= month <= 8:
        return 'summer'
    elif 9 <= month <= 11:
        return 'autumn'
    else:
        return 'winter'
    
def get_time_of_day(date):
    """
    Determines if it's day or night (0, 1)

    Parameters:
    - date: datetime object

    Returns:
    - time_day: str
    """
    if pd.isnull(date):
        return 'night'

    hour = date.hour

    day_threshold = 6 
    night_threshold = 22

    if day_threshold < hour < night_threshold:
        return 'day'
    else:
        return 'night'
  
def temperature_label(temperature):
    """
    Get temperature range for a given temperature.

    Parameters:
    - temperature: float

    Returns:
    - temperature_label: str
    """
    if pd.isnull(temperature):
        return None
    
    if temperature < -10:
        return "extremely_cold"
    elif -10 <= temperature < 0:
        return "cold"
    elif 0 <= temperature < 5:
        return "cool"
    elif 5 <= temperature < 10:
        return "slightly_warm"
    elif 10 <= temperature < 15:
        return "moderate"
    elif 15 <= temperature < 20:
        return "warm"
    elif 20 <= temperature < 25:
        return "very_warm"
    elif 25 <= temperature < 30:
        return "hot"
    elif 30 <= temperature < 35:
        return "very_hot"
    elif 35 <= temperature < 40:
        return "extremely_hot"
    else:
        return "exceptionally_hot"
    
def classify_animal(animal):
    groups = {
        # Mammals
        # "mammal": ["porcupine", "wild boar", "marten", "badger", "fox", "wolf", "dog", "cat", "hare", "deer", "horse", "squirrel"],
        
        # Birds
        "bird": ["buzzard", "heron", "mallard"],
        
        # Water animals
        "water_animal": ["mallard", "heron"],
        
        # Predatory animals
        "predatory_animal": ["fox", "wolf", "buzzard"],
        
        # Domestic animals
        "domestic_animal": ["dog", "cat", "horse"],
        
        # Herbivores
        "herbivore": ["porcupine", "wild boar", "hare", "deer", "horse", "squirrel"],
        
        # Night Animals
        "night_animal": ["fox", "wolf", "hare", "badger"],
        
        # Size Classification
        "size": ["xs", "s", "m", "l"]
    }
    
    # Define size boundaries
    size_boundaries = {
        "xs": ["mallard", "buzzard", "porcupine", "marten", "cat", "hare", "squirrel"],
        "s": ["fox", "cat", "badger"],
        "m": ["heron", "dog", "wild boar","deer", "wolf"],
        "l": ["horse"]
    }
    
     # Classify based on size
    size_group = next((size for size, animals in size_boundaries.items() if animal.lower() in animals), None)
    
    # Classify based on other groups
    animal_groups = {group: True if animal.lower() in animals else False for group, animals in groups.items()}
    
    # Add size group to the result
    animal_groups["size"] = size_group
    
    return animal_groups

def change_moon(moon):
    if pd.isnull(moon):
        return None
    else:
        return moon.lower().replace(' ', '_')

### Final version of the dataset

In [309]:
df = pd.read_pickle(dataset_dir + '/animals.pkl')

In [310]:
df.head()

Unnamed: 0,image_name,camera,date_time,date,time,moon,temp,sha-256,label1,label2,label3
0,TF_ACULEI_8040_DSCF0129.jpg,1.0,2021-07-22 23:04:07,2021-07-22,23:04:07,Full Moon,25.0,00917733a6aa7e574a2da861dc75b70f5e38db45b50983...,wild boar,,
1,TF_ACULEI_900_DSCF0756.jpg,1.0,2021-05-27 22:01:44,2021-05-27,22:01:44,Full Moon,16.0,01b46bf7f72f5ecd35669d876892619192c50771d44949...,wild boar,,
2,TF_ACULEI_15294_DSCF0133.jpg,6.0,2023-03-30 20:35:58,2023-03-30,20:35:58,First Quarter,9.0,a4080d64cc86785ee8ab82468db78b14e4ddb9838faef6...,porcupine,badger,fox
3,TF_ACULEI_11374_DSCF0064.jpg,6.0,2022-06-09 21:29:59,2022-06-09,21:29:59,Waxing Gibbous,15.0,314a99154f1709a8a6ac99069b1af4cebf0cc9df089ce4...,fox,porcupine,wild boar
4,TF_ACULEI_4106_DSCF4336.jpg,1.0,2021-06-16 23:41:09,2021-06-16,23:41:09,First Quarter,19.0,f1c6a3c629f9fb55975f6e02eeeccbd708477ddcf574e5...,fox,,


In [311]:
# Rearrange the dataset
df['date_time'] = pd.to_datetime(df['date_time'])

df['season'] = df['date_time'].apply(get_season)
df['night'] = df['date_time'].apply(get_time_of_day)

df['moon'] = df['moon'].apply(lambda x: change_moon(x))

df_result = pd.DataFrame(df['label1'].apply(classify_animal).tolist())
df = pd.concat([df, df_result], axis=1)

In [312]:
df.drop(columns=['date', 'time', 'label2', 'label3'], inplace=True)
df.rename(columns={'temp': 'temperature', 'label1': 'animal', 'moon': 'moon_phase', 'night': 'day_time', 
                   'camera': 'hunter_camera', 'sha-256': 'sha256', 'size': 'animal_size'}, 
          inplace=True)

In [313]:
custom_order = ['animal', 'hunter_camera', 'temperature', 'moon_phase', 
                'date_time', 'season', 'day_time', 'bird', 'water_animal',
                'predatory_animal', 'domestic_animal', 'herbivore', 'night_animal', 
                'animal_size', 'image_name', 'sha256']

df = df[custom_order]

In [314]:
df.head()

Unnamed: 0,animal,hunter_camera,temperature,moon_phase,date_time,season,day_time,bird,water_animal,predatory_animal,domestic_animal,herbivore,night_animal,animal_size,image_name,sha256
0,wild boar,1.0,25.0,full_moon,2021-07-22 23:04:07,summer,night,False,False,False,False,True,False,m,TF_ACULEI_8040_DSCF0129.jpg,00917733a6aa7e574a2da861dc75b70f5e38db45b50983...
1,wild boar,1.0,16.0,full_moon,2021-05-27 22:01:44,spring,night,False,False,False,False,True,False,m,TF_ACULEI_900_DSCF0756.jpg,01b46bf7f72f5ecd35669d876892619192c50771d44949...
2,porcupine,6.0,9.0,first_quarter,2023-03-30 20:35:58,spring,day,False,False,False,False,True,False,xs,TF_ACULEI_15294_DSCF0133.jpg,a4080d64cc86785ee8ab82468db78b14e4ddb9838faef6...
3,fox,6.0,15.0,waxing_gibbous,2022-06-09 21:29:59,summer,day,False,False,True,False,False,True,s,TF_ACULEI_11374_DSCF0064.jpg,314a99154f1709a8a6ac99069b1af4cebf0cc9df089ce4...
4,fox,1.0,19.0,first_quarter,2021-06-16 23:41:09,summer,night,False,False,True,False,False,True,s,TF_ACULEI_4106_DSCF4336.jpg,f1c6a3c629f9fb55975f6e02eeeccbd708477ddcf574e5...


In [315]:
encoded_df = df.copy()

In [316]:
df.to_csv(feed_dataset_dir + '/aculei.csv')

### One hot encoded version of the dataset

In [317]:
encoded_df.head()

Unnamed: 0,animal,hunter_camera,temperature,moon_phase,date_time,season,day_time,bird,water_animal,predatory_animal,domestic_animal,herbivore,night_animal,animal_size,image_name,sha256
0,wild boar,1.0,25.0,full_moon,2021-07-22 23:04:07,summer,night,False,False,False,False,True,False,m,TF_ACULEI_8040_DSCF0129.jpg,00917733a6aa7e574a2da861dc75b70f5e38db45b50983...
1,wild boar,1.0,16.0,full_moon,2021-05-27 22:01:44,spring,night,False,False,False,False,True,False,m,TF_ACULEI_900_DSCF0756.jpg,01b46bf7f72f5ecd35669d876892619192c50771d44949...
2,porcupine,6.0,9.0,first_quarter,2023-03-30 20:35:58,spring,day,False,False,False,False,True,False,xs,TF_ACULEI_15294_DSCF0133.jpg,a4080d64cc86785ee8ab82468db78b14e4ddb9838faef6...
3,fox,6.0,15.0,waxing_gibbous,2022-06-09 21:29:59,summer,day,False,False,True,False,False,True,s,TF_ACULEI_11374_DSCF0064.jpg,314a99154f1709a8a6ac99069b1af4cebf0cc9df089ce4...
4,fox,1.0,19.0,first_quarter,2021-06-16 23:41:09,summer,night,False,False,True,False,False,True,s,TF_ACULEI_4106_DSCF4336.jpg,f1c6a3c629f9fb55975f6e02eeeccbd708477ddcf574e5...


In [318]:
# add and remove some columns
encoded_df['temperature'] = encoded_df['temperature'].apply(temperature_label)

columns_to_drop_one_hot = ['date_time', 'image_name', 'sha256']

encoded_df.drop(columns=columns_to_drop_one_hot, inplace=True)

In [319]:
columns_to_encode_one_hot = ['moon_phase', 'animal', 'season', 'temperature', 'day_time', 'bird', 
                     'water_animal', 'predatory_animal', 'domestic_animal', 'herbivore', 
                     'night_animal', 'animal_size', 'hunter_camera']

encoded_df.head()

Unnamed: 0,animal,hunter_camera,temperature,moon_phase,season,day_time,bird,water_animal,predatory_animal,domestic_animal,herbivore,night_animal,animal_size
0,wild boar,1.0,hot,full_moon,summer,night,False,False,False,False,True,False,m
1,wild boar,1.0,warm,full_moon,spring,night,False,False,False,False,True,False,m
2,porcupine,6.0,slightly_warm,first_quarter,spring,day,False,False,False,False,True,False,xs
3,fox,6.0,warm,waxing_gibbous,summer,day,False,False,True,False,False,True,s
4,fox,1.0,warm,first_quarter,summer,night,False,False,True,False,False,True,s


In [320]:
encoded_df = pd.get_dummies(encoded_df, columns=columns_to_encode_one_hot, 
                            prefix=columns_to_encode_one_hot, prefix_sep='_')

encoded_df.head()

Unnamed: 0,moon_phase_first_quarter,moon_phase_full_moon,moon_phase_last_quarter,moon_phase_new_moon,moon_phase_waning_crescent,moon_phase_waning_gibbous,moon_phase_waxing_crescent,moon_phase_waxing_gibbous,animal_badger,animal_buzzard,...,animal_size_m,animal_size_s,animal_size_xs,hunter_camera_1.0,hunter_camera_2.0,hunter_camera_3.0,hunter_camera_4.0,hunter_camera_5.0,hunter_camera_6.0,hunter_camera_7.0
0,0,1,0,0,0,0,0,0,0,0,...,1,0,0,1,0,0,0,0,0,0
1,0,1,0,0,0,0,0,0,0,0,...,1,0,0,1,0,0,0,0,0,0
2,1,0,0,0,0,0,0,0,0,0,...,0,0,1,0,0,0,0,0,1,0
3,0,0,0,0,0,0,0,1,0,0,...,0,1,0,0,0,0,0,0,1,0
4,1,0,0,0,0,0,0,0,0,0,...,0,1,0,1,0,0,0,0,0,0


In [321]:
encoded_df.to_pickle(feed_dataset_dir + '/aculei-one-hot-encoded.pkl')

### Numerical version of the dataset

In [322]:
dd = pd.read_pickle(dataset_dir + "/animals-scores.pkl")

In [323]:
dd.head()

Unnamed: 0,image_name,camera,date_time,date,time,moon,temp,sha-256,wild boar,porcupine,...,wolf,deer,hare,squirrel,buzzard,dog,mallard,cat,horse,heron
0,TF_ACULEI_8040_DSCF0129.jpg,1.0,2021-07-22 23:04:07,2021-07-22,23:04:07,Full Moon,25.0,00917733a6aa7e574a2da861dc75b70f5e38db45b50983...,0.970243,0.011796,...,0.000991,0.000947,0.000824,0.000282,0.00025,5e-05,4.7e-05,3.9e-05,3.581982e-06,2.118289e-06
1,TF_ACULEI_900_DSCF0756.jpg,1.0,2021-05-27 22:01:44,2021-05-27,22:01:44,Full Moon,16.0,01b46bf7f72f5ecd35669d876892619192c50771d44949...,0.996869,0.002792,...,2.7e-05,4.9e-05,1.3e-05,1e-05,1.3e-05,1e-06,2e-06,1e-06,9.554066e-07,5.664022e-08
2,TF_ACULEI_15294_DSCF0133.jpg,6.0,2023-03-30 20:35:58,2023-03-30,20:35:58,First Quarter,9.0,a4080d64cc86785ee8ab82468db78b14e4ddb9838faef6...,0.084593,0.352572,...,0.003121,0.002608,0.004968,0.008771,0.000689,0.000106,0.0091,0.0006,2.867275e-05,0.00626605
3,TF_ACULEI_11374_DSCF0064.jpg,6.0,2022-06-09 21:29:59,2022-06-09,21:29:59,Waxing Gibbous,15.0,314a99154f1709a8a6ac99069b1af4cebf0cc9df089ce4...,0.181697,0.26316,...,0.011294,0.020466,0.067673,0.004126,0.005695,0.000124,0.026158,0.001735,0.0007151766,0.08043946
4,TF_ACULEI_4106_DSCF4336.jpg,1.0,2021-06-16 23:41:09,2021-06-16,23:41:09,First Quarter,19.0,f1c6a3c629f9fb55975f6e02eeeccbd708477ddcf574e5...,0.000118,0.000149,...,0.007341,3.3e-05,0.000437,0.000366,2.3e-05,1.3e-05,3e-06,6.6e-05,9.427521e-07,4.238395e-06


In [324]:
dd['date_time'] = pd.to_datetime(dd['date_time'])

dd['season'] = dd['date_time'].apply(get_season)
dd['night'] = dd['date_time'].apply(get_time_of_day)

dd['moon'] = dd['moon'].apply(lambda x: change_moon(x))


In [325]:
dd.rename(columns={'temp': 'temperature', 'night': 'day_time', 'camera': 'hunter_camera'}, inplace=True)

In [326]:
columns_to_drop_numerical = ['image_name', 'date_time', 'date', 'time', 'sha-256', 'moon']
columns_to_encode_numerical = ['season', 'day_time', 'hunter_camera']

dd = pd.get_dummies(dd, columns=columns_to_encode_numerical, 
                            prefix=columns_to_encode_numerical, prefix_sep='_')


dd.drop(columns=columns_to_drop_numerical, inplace=True)
dd.head()

Unnamed: 0,temperature,wild boar,porcupine,fox,badger,marten,wolf,deer,hare,squirrel,...,season_winter,day_time_day,day_time_night,hunter_camera_1.0,hunter_camera_2.0,hunter_camera_3.0,hunter_camera_4.0,hunter_camera_5.0,hunter_camera_6.0,hunter_camera_7.0
0,25.0,0.970243,0.011796,0.01146,0.001555,0.001512,0.000991,0.000947,0.000824,0.000282,...,0,0,1,1,0,0,0,0,0,0
1,16.0,0.996869,0.002792,0.000112,7.8e-05,3.3e-05,2.7e-05,4.9e-05,1.3e-05,1e-05,...,0,0,1,1,0,0,0,0,0,0
2,9.0,0.084593,0.352572,0.188861,0.205791,0.131927,0.003121,0.002608,0.004968,0.008771,...,0,1,0,0,0,0,0,0,1,0
3,15.0,0.181697,0.26316,0.280894,0.01042,0.045404,0.011294,0.020466,0.067673,0.004126,...,0,1,0,0,0,0,0,0,1,0
4,19.0,0.000118,0.000149,0.94259,0.000321,0.048537,0.007341,3.3e-05,0.000437,0.000366,...,0,0,1,1,0,0,0,0,0,0


In [327]:
dd.to_pickle(feed_dataset_dir + '/aculei-numerical.pkl')