In [1]:
import pandas as pd
import time
from shapely.geometry import shape, mapping, Point
import fiona
import pickle
import numpy as np

from IPython.display import clear_output

In [2]:
marker = 'cytb' #co1

In [None]:
# Load file with species names, sequence IDs and coordinates, column names should be 'seq', 'species', 'y', 'x'
coords = pd.read_csv('place file name here', sep='\t')
coords.dropna(inplace=True)
mammsSP = set(coords.species)

len(mammsSP)

#### Assign sequences to grid cells

In [None]:
#create a copy of the coordinate file
allAnimals = coords.copy(deep=True)
# Get sequences in cells
#load a geojson file with the grid cells
with fiona.open('place file here.json') as landGrid:
    cellDic = {} # create dictionary to store results
    print('evaluating sequences for grid')
    start_time = time.time()
    # check each polygon if it contains the points
    totalCells = len(landGrid)
    for ci, feature in enumerate(landGrid):
        cellID = feature['properties']['cellID']
        clear_output(wait=True)
        print('cell {} of {}'.format(ci+1, totalCells) )
        polygon = shape(feature['geometry'])
        for index, row in allAnimals.iterrows():
            p = Point(float(row['x']), float(row['y']))
            #ony cells that contain sequences will be saved
            if p.within(polygon):
                allAnimals.drop([index], inplace=True) #remove point contained in the cell
                cellDic.setdefault(cellID, {}).setdefault(row['species'], []).append(row['seq'])
            #if points touches the polygon keep it in the data set to be included in ther next polygon as well
            if p.touches(polygon):
                cellDic.setdefault(cellID, {}).setdefault(row['species'], []).append(row['seq'])
    
    #save file
    with open('{}/grid.p'.format(marker), 'wb') as fp:
        pickle.dump(cellDic, fp, protocol=pickle.HIGHEST_PROTOCOL)

print('--- %s seconds ---\n\n' % (time.time() - start_time))

#### Assign sequences to latitudinal bands

In [None]:
#create a copy of the coordinate file
allAnimals = coords.copy(deep=True) 
# Get sequences in cells
#load a geojson file with the lat bands
with fiona.open('place file here.json') as latBands:
    bandDic = {} # create dictionary to store results
    print('evaluating sequences for bands')
    start_time = time.time()
    # check each polygon if it contains the points
    totalBands = len(latBands)
    for bi, feature in enumerate(latBands):
        bandID = feature['properties']['cellID']
        clear_output(wait=True)
        print('band {} of {}'.format(bi+1, totalBands) )
        polygon = shape(feature['geometry'])
        for index, row in allAnimals.iterrows():
            p = Point(float(row['x']), float(row['y']))
            #ony cells that contain sequences will be saved
            if p.within(polygon):
                allAnimals.drop([index], inplace=True) #remove point contained in the cell
                bandDic.setdefault(bandID, {}).setdefault(row['species'], []).append(row['seq'])
            #if points touches the polygon keep it in the data set to be included in ther nxt polygon as well
            if p.touches(polygon):
                bandDic.setdefault(bandID, {}).setdefault(row['species'], []).append(row['seq'])

    with open('{}/band.p'.format(marker), 'wb') as fp:
        pickle.dump(bandDic, fp, protocol=pickle.HIGHEST_PROTOCOL)

print('--- %s seconds ---\n\n' % (time.time() - start_time))

#### Assign sequences to zoogeographic regions

In [None]:
#create a copy of the coordinate file
allAnimals = coords.copy(deep=True) 
# Get sequences in cells
#load a geojson file with the zoo regions
with fiona.open('place file here.json') as wallace:
    polyDic = {} # create dictionary to store results
    start_time = time.time()
    # check each polygon if it contains the points
    totalRegions = len(wallace)
    for ci, feature in enumerate(wallace):
        clear_output(wait=True)
        print('poly {} of {}'.format(ci+1, totalRegions) )
        polygon = shape(feature['geometry'])
        for index, row in allAnimals.iterrows():
            p = Point(float(row['x']), float(row['y']))
            if p.within(polygon):
                allAnimals.drop([index], inplace=True) #remove point contained in the cell
                polyDic.setdefault(feature['properties']['mam_upgma_'], {}).setdefault(row['species'], []).append(row['seq'])
            #if points touches the polygon keep it in the data set to be included in ther nxt polygon as well
            if p.touches(polygon):
                polyDic.setdefault(feature['properties']['mam_upgma_'], {}).setdefault(row['species'], []).append(row['seq'])

    with open('{}/wallace.p'.format(marker), 'wb') as fp:
        pickle.dump(polyDic, fp, protocol=pickle.HIGHEST_PROTOCOL)

print('--- %s seconds ---\n\n' % (time.time() - start_time))