In [1]:
from datetime import date
from pathlib import Path 

import sys
sys.path.append(str(Path.cwd().parent))

from scripts.normalize_data import print_df
from scripts.search_files import (
    get_matching_taxa, search_for_taxa_in_all_files, draw_map,
    filter_samples_by_bounding_box, DownloadFileLink,
    display_search_results
)

paths = list(Path('..', 'processed_data', 'clean_data').rglob('*.csv'))

hole_path = Path('..', 'processed_data', 'Hole Summary_23_2_2021.csv')
taxa_search_path = Path('..', 'processed_data', 'taxa_list_search.csv')
nontaxa_list_path = Path('..', 'processed_data', 'normalized_nontaxa_list.csv')

## find matching taxa

In [2]:
search_terms = ['Globorotalia']


In [21]:
taxa_matches = get_matching_taxa(search_terms)

search_df = search_for_taxa_in_all_files(taxa_matches)
map_df = search_df.drop_duplicates(subset=['Exp', 'Site', 'Hole'])

print(f'{len(search_df)} samples, {len(map_df)} holes')

for taxon in taxa_matches:
    print(taxon)
    
my_map = draw_map(map_df)
my_map

40 samples, 4 holes
Globorotalia pseudomiocenica _T_ _PL6_
Globorotalia tosaensis _T and B_ _Pt1b
Globorotalia anfracta
Globorotalia tosaensis
Globorotalia truncatulinoides _B_
Globorotalia truncatulinoides _B
Globorotalia tosaensis _T and B_ _Pt1b_
Globorotalia pseudomiocenica _T_ _PL6
Globorotalia flexuosa _T and B_
Globorotalia multicamerata _T
Globorotalia truncatulinoides
Globorotalia limbata _B
Globorotalia flexuosa
Globorotalia margaritae _T and B_ _PL3
Globorotalia scitula
Globorotalia menardii
Globorotalia hessi
Globorotalia inflata
Globorotalia limbata _B_
Globorotalia (Hirsutella) scitula
Globorotalia plesiotumida _T
Globorotalia plesiotumida _B_ _M13b_
Globorotalia hessi _B_
Globorotalia plesiotumida _T_
Globorotalia crassaformis
Globorotalia crassaformis sensu lato
Globorotalia hirsuta
Globorotalia multicamerata _T_
Globorotalia limbata _T_
Globorotalia margaritae _T and B_ _PL3_
Globorotalia (Truncorotalia) truncatulinoides
Globorotalia tumida _B_ _PL1a_
Globorotalia (Glo

Map(center=[0, 0], controls=(ZoomControl(options=['position', 'zoom_in_text', 'zoom_in_title', 'zoom_out_text'…

In [19]:
filter_df = filter_samples_by_bounding_box(my_map, search_df, map_df)
print(f'{len(filter_df)} samples, {len(filter_df["geometry"].unique())} holes')

filter_df.head()

20 samples, 2 holes


Unnamed: 0,Top [cm],Sample comment,Zone name (short),Zone name,Additional zone name (short),Bottom [cm],Additional zone name,Bottom Depth [m],Group abundance,Top Depth [m],...,Bottom[cm] [cm],PF Preservation,Globorotalia (Truncorotalia) truncatulinoides,Globorotalia (Globorotalia) tumida tumida,Globorotalia (Truncorotalia) crossaformis,Globorotalia (Globoconella) inflata,Globorotalia (Hirsutella) hirsuta,Latitude_decimal,Longitude_decimal,geometry
0,0,,CM1 [K00],N. Pachyderma B partial range zone (Kennett et...,,5,,14.78,R [A83],14.73,...,,,,,,,,56.959998,-147.109635,POINT (-147.10963 56.96000)
1,0,,CM1 [K00],N. Pachyderma B partial range zone (Kennett et...,,5,,31.12,C [A61],31.07,...,,,,,,,,56.959998,-147.109635,POINT (-147.10963 56.96000)
2,0,,CM1 [K00],N. Pachyderma B partial range zone (Kennett et...,,5,,49.04,P [A83],48.99,...,,,,,,,,56.959998,-147.109635,POINT (-147.10963 56.96000)
3,0,,,,,5,,76.3,B [A83],76.25,...,,,,,,,,56.959998,-147.109635,POINT (-147.10963 56.96000)
4,0,,CM2 [K00],N. pachyderma B - N. pachyderma A (Kennett et ...,,10,,86.21,A [A83],86.11,...,,,,,,,,56.959998,-147.109635,POINT (-147.10963 56.96000)


In [20]:
file = 'eodp_data.csv'
filter_df.to_csv(file, index=False)

DownloadFileLink(file, f'Download {file}')
