# Identify Camera Trap Arrays with all sites within 5km and Full data for 5 years 

In [11]:
import pandas as pd 
import numpy as np 
import warnings 
from tabulate import tabulate
from itertools import combinations
import matplotlib.pyplot as plt

In [12]:
warnings.filterwarnings('ignore')

In [13]:
data_dir = r'/Users/neelima/Documents/ML_Projects/data'
out_dir = r'/Users/neelima/Documents/ML_Projects/output'

In [14]:
camera_trap_dis_threshold = 5

In [15]:
array_proximity_with_year_df = pd.read_csv(out_dir + r'/camera_array_5km_proximity_sites.csv')
array_proximity_with_year_df.shape

(591, 5)

In [16]:
array_proximity_with_year_df.columns

Index(['Camera_Trap_Array', 'Year', 'Mean_Latitude', 'Mean_Longitude',
       'Proximity_List'],
      dtype='object')

In [17]:
# Unique Camera trap array elements count 
array_proximity_with_year_df['Camera_Trap_Array'].nunique()

262

In [18]:
array_proximity_with_year_df.head()

Unnamed: 0,Camera_Trap_Array,Year,Mean_Latitude,Mean_Longitude,Proximity_List
0,ARNWR,2020,35.8094,-75.9057,"{'NC_Forest_Alligator_River_NWR_20_03', 'NC_Fo..."
1,ARNWR,2021,35.8236,-75.8931,"{'NC_Wetland_ARNWR_21_dep_15', 'NC_Wetland_ARN..."
2,ARNWR,2022,35.8263,-75.8911,"{'NC_Wetland_ARNWR_22_Cam07', 'NC_Wetland_ARNW..."
3,AandM,2022,26.2209,-97.4273,"{'TX_Shrubland_A&M_CCR002', 'TX_Shrubland_A&M_..."
4,Abilene,2019,32.2393,-99.8829,"{'TX_Grassland_Abilene_13B', 'TX_Grassland_Abi..."


### Find Camera Trap Arrays that have complete data 

In [19]:
#write code to extract camera trap array names that have complete data for given set of years required 
# make sure the array_proximity_with_year_df has data for all the years specified in the years list 
def array_with_years_data(array_proximity_with_year_df, years):
    
    """
    Get camera trap arrays that have data for all specified years.
    
    :param array_proximity_with_year_df: DataFrame containing camera trap array data with years
    :param years: List of years to check
    :return: List of camera trap arrays with complete data for specified years
    """
    # Filter the DataFrame for the specified years
    filtered_df = array_proximity_with_year_df[array_proximity_with_year_df['Year'].isin(years)]
    
    # Group by Camera Trap Array and count unique years
    grouped = filtered_df.groupby('Camera_Trap_Array')['Year'].nunique()
    
    # Return arrays that have data for all specified years
    return grouped[grouped == len(years)].index.tolist()

In [24]:
# call function get_arrays_with_years for period of 2 , 3, 4 and 5 years starting 2019
# extract the camera trap array names and store in separate csv file 
# each number of years will be in separate csv file
# For example, for 2 years, the file will be named camera_trap_arrays_with_2_years.csv
# use only one for loop to iterate through the years list
years_list = [2, 3, 4, 5]

start_year = 2019
for years in years_list:
    
    end_year = start_year + years - 1
    years_range = list(range(start_year, end_year + 1))
    
    # Get camera trap arrays with complete data for the specified years
    arrays_with_years = array_with_years_data(array_proximity_with_year_df, years_range)
    
    # Store the arrays in a DataFrame and save to CSV
    df = pd.DataFrame(arrays_with_years, columns=['Camera_Trap_Array'])
    print(f'Total arrays with data for {years} years: {len(arrays_with_years)}')
    df.to_csv(out_dir + f'/arrays_years/camera_arrays_{years}_years.csv', index=False)

Total arrays with data for 2 years: 61
Total arrays with data for 3 years: 50
Total arrays with data for 4 years: 45
Total arrays with data for 5 years: 43


In [None]:
years_required = set(range(2019, 2021))
#print(years_required, "Required Years List")

# collect sets of years for each camera trap array
camera_arrays_with_years = array_proximity_with_year_df.groupby('Camera_Trap_Array')['Year'].apply(set)

camera_arrays_all_years = camera_arrays_with_years[camera_arrays_with_years.apply(
    lambda years: years_required.issubset(years))
]

arrays_with_all_years_list = camera_arrays_all_years.index.tolist()
len(arrays_with_all_years_list)

{2019, 2020} Required Years List


61

In [247]:
#arrays_all_years_data = array_distances_with_year_df[array_distances_with_year_df['Camera_Trap_Array'].isin(arrays_with_all_years_list)]
arrays_all_years_data_df = array_proximity_with_year_df[array_distances_with_year_df['Camera_Trap_Array'].isin(arrays_with_all_years_list)]

In [248]:
arrays_all_years_data_df.shape

(216, 5)

In [249]:
arrays_all_years_data_df['Camera_Trap_Array'].nunique()

43

In [250]:
arrays_all_years_data_df.head(20)

Unnamed: 0,Camera_Trap_Array,Year,Mean_Latitude,Mean_Longitude,Proximity_List
4,Abilene,2019,32.2393,-99.8829,"{'TX_Grassland_Abilene_13B', 'TX_Grassland_Abi..."
5,Abilene,2020,32.2383,-99.8839,"{'TX_Grassland_Abilene_20_05', 'TX_Grassland_A..."
6,Abilene,2021,32.2374,-99.8833,"{'Abilene State Park Texas 6', 'Abilene State ..."
7,Abilene,2022,32.2364,-99.8847,"{'TX_Grassland_Abilene_22_3', 'TX_Grassland_Ab..."
8,Abilene,2023,32.2379,-99.8841,{'TX_Grassland_Abilene State Park Texas 09/02/...
20,Angelina,2019,31.5007,-94.7791,"{'TX_Forest_Pineywoods_Angelina_1', 'TX_Forest..."
21,Angelina,2020,31.4999,-94.776,"{'TX_Forest_Pineywoods_Angelina_20_04', 'TX_Fo..."
22,Angelina,2021,31.5005,-94.7783,{'TX_Forest_Pineywoods_Angelina_14 10/08/2021'...
23,Angelina,2022,31.5156,-94.7727,"{'TX_Forest_Angelina_ANF_CAM16', 'TX_Forest_An..."
24,Angelina,2023,31.5007,-94.779,"{'TX_Forest_Rural_Angelina_23_loc10_dep10', 'T..."


In [304]:
output_path = out_dir + r'/camera_array_5km_proximity_all_years.csv'
arrays_all_years_data_df[['Camera_Trap_Array','Year','Mean_Latitude','Mean_Longitude','Proximity_List'] ].to_csv(output_path, index=False)

In [302]:
arrays_all_years_data_df.head(50)

Unnamed: 0,Camera_Trap_Array,Year,Mean_Latitude,Mean_Longitude,Proximity_List
4,Abilene,2019,32.2393,-99.8829,"{'TX_Grassland_Abilene_13B', 'TX_Grassland_Abi..."
5,Abilene,2020,32.2383,-99.8839,"{'TX_Grassland_Abilene_20_05', 'TX_Grassland_A..."
6,Abilene,2021,32.2374,-99.8833,"{'Abilene State Park Texas 6', 'Abilene State ..."
7,Abilene,2022,32.2364,-99.8847,"{'TX_Grassland_Abilene_22_3', 'TX_Grassland_Ab..."
8,Abilene,2023,32.2379,-99.8841,{'TX_Grassland_Abilene State Park Texas 09/02/...
20,Angelina,2019,31.5007,-94.7791,"{'TX_Forest_Pineywoods_Angelina_1', 'TX_Forest..."
21,Angelina,2020,31.4999,-94.776,"{'TX_Forest_Pineywoods_Angelina_20_04', 'TX_Fo..."
22,Angelina,2021,31.5005,-94.7783,{'TX_Forest_Pineywoods_Angelina_14 10/08/2021'...
23,Angelina,2022,31.5156,-94.7727,"{'TX_Forest_Angelina_ANF_CAM16', 'TX_Forest_An..."
24,Angelina,2023,31.5007,-94.779,"{'TX_Forest_Rural_Angelina_23_loc10_dep10', 'T..."
