# Libraries

In [1]:
!pip install zarr
!pip install pystac
!pip install xarray[complete]

Collecting zarr
  Downloading zarr-2.11.3-py3-none-any.whl (153 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m153.4/153.4 KB[0m [31m3.0 MB/s[0m eta [36m0:00:00[0m
Collecting asciitree
  Downloading asciitree-0.3.3.tar.gz (4.0 kB)
  Preparing metadata (setup.py) ... [?25l- done
[?25hCollecting numcodecs>=0.6.4
  Downloading numcodecs-0.9.1-cp37-cp37m-manylinux2010_x86_64.whl (6.2 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.2/6.2 MB[0m [31m45.0 MB/s[0m eta [36m0:00:00[0m
Building wheels for collected packages: asciitree
  Building wheel for asciitree (setup.py) ... [?25l- \ done
[?25h  Created wheel for asciitree: filename=asciitree-0.3.3-py3-none-any.whl size=5050 sha256=89b5956c9530a81e5f87edddb89ced25fa7cbfb3432dab4ced9f3b501881981e
  Stored in directory: /root/.cache/pip/wheels/12/1c/38/0def51e15add93bff3f4bf9c248b94db0839b980b8535e72a0
Successfully built asciitree
Installing collected packages: asciitre

In [2]:
#Importing the necessary libraries
import pandas as pd
import pystac
import fsspec
import xarray as xr
import datetime 
import numpy as np
import math
from tqdm import tqdm
from datetime import date
import pystac
import itertools

import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import confusion_matrix,classification_report,ConfusionMatrixDisplay
from sklearn import metrics
from sklearn.metrics import r2_score

from statsmodels.stats.outliers_influence import variance_inflation_factor

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping
import warnings
import os
warnings.filterwarnings("ignore")

pd.set_option('display.max_columns', 300)

  "Failed to load cfgrib - most likely there is a problem accessing the ecCodes library. "


In [3]:
import random
def seed_everything(seed=2022):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    
seed_everything(seed=2022)

# Train Data (Frog Presence Data)

In [4]:
columns = [
        'gbifID','eventDate','country','continent','stateProvince',
        'decimalLatitude','decimalLongitude','species'
    ]
country_names = {
        'AU':'Australia', 'CR':'Costa Rica', 'ZA':'South Africa','MX':'Mexico','HN':'Honduras',
        'MZ':'Mozambique','BW':'Botswana','MW':'Malawi','CO':'Colombia','PA':'Panama','NI':'Nicaragua',
        'BZ':'Belize','ZW':'Zimbabwe','SZ':'Eswatini','ZM':'Zambia','GT':'Guatemala','LS':'Lesotho',
        'SV':'El Salvador', 'AO':'Angola', np.nan:'unknown or invalid'
    }
continent_names = {
    'AU':'Australia', 'CR':'Central America', 'ZA':'Africa','MX':'Central America','HN':'Central America',
    'MZ':'Africa','BW':'Africa','MW':'Africa','CO':'Central America','PA':'Central America',
    'NI':'Central America','BZ':'Central America','ZW':'Africa','SZ':'Africa','ZM':'Africa',
    'GT':'Central America','LS':'Africa','SV':'Central America','AO':'Africa', np.nan:'unknown or invalid' 
}

In [5]:
train = pd.read_csv("ey-bwwdc-level-3/occurrence.txt", sep='\t', parse_dates=['eventDate']).assign(
            country =  lambda x: x.countryCode.map(country_names),
            continent =  lambda x: x.countryCode.map(continent_names),
            species = lambda x: x.species.str.title()
        )[columns]

train.dropna(inplace=True)
train["occurrenceStatus"] = "PRESENT"

# INCLUDE / EXCLUDE THESE DATA
# train = train[train["stateProvince"] != "Puntarenas"]
# train = train[train["stateProvince"] != "Western Cape"]
# train = train[train["stateProvince"] != "South Australia"]

req_cols = ['eventDate', 'decimalLatitude','decimalLongitude','occurrenceStatus',"stateProvince"]
train = train[req_cols]

df_frog = train.copy()

In [6]:
df_frog["eventDate"].dt.year.value_counts().head(20)

2020    42166
2019    27214
2018    25918
2010     7327
2017     6972
2001     3222
2000     3171
2009     3095
2004     2984
2011     2865
1999     2791
2008     2633
2003     2611
2016     2605
2005     2439
2002     2369
2015     2313
1997     2277
2013     2112
2007     2070
Name: eventDate, dtype: int64

In [7]:
# -

# TerraClimate Data

In [8]:
# Extracting data from Planetary Computer Terra Climate catalog
url = "https://planetarycomputer.microsoft.com/api/stac/v1/collections/terraclimate"
collection = pystac.read_file(url)
asset = collection.assets["zarr-https"]
store = fsspec.get_mapper(asset.href)
ds = xr.open_zarr(store, **asset.extra_fields["xarray:open_kwargs"])
ds

Unnamed: 0,Array,Chunk
Bytes,103.45 GiB,94.92 MiB
Shape,"(744, 4320, 8640)","(12, 1440, 1440)"
Count,1117 Tasks,1116 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 103.45 GiB 94.92 MiB Shape (744, 4320, 8640) (12, 1440, 1440) Count 1117 Tasks 1116 Chunks Type float32 numpy.ndarray",8640  4320  744,

Unnamed: 0,Array,Chunk
Bytes,103.45 GiB,94.92 MiB
Shape,"(744, 4320, 8640)","(12, 1440, 1440)"
Count,1117 Tasks,1116 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,103.45 GiB,94.92 MiB
Shape,"(744, 4320, 8640)","(12, 1440, 1440)"
Count,1117 Tasks,1116 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 103.45 GiB 94.92 MiB Shape (744, 4320, 8640) (12, 1440, 1440) Count 1117 Tasks 1116 Chunks Type float32 numpy.ndarray",8640  4320  744,

Unnamed: 0,Array,Chunk
Bytes,103.45 GiB,94.92 MiB
Shape,"(744, 4320, 8640)","(12, 1440, 1440)"
Count,1117 Tasks,1116 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,103.45 GiB,94.92 MiB
Shape,"(744, 4320, 8640)","(12, 1440, 1440)"
Count,1117 Tasks,1116 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 103.45 GiB 94.92 MiB Shape (744, 4320, 8640) (12, 1440, 1440) Count 1117 Tasks 1116 Chunks Type float32 numpy.ndarray",8640  4320  744,

Unnamed: 0,Array,Chunk
Bytes,103.45 GiB,94.92 MiB
Shape,"(744, 4320, 8640)","(12, 1440, 1440)"
Count,1117 Tasks,1116 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,103.45 GiB,94.92 MiB
Shape,"(744, 4320, 8640)","(12, 1440, 1440)"
Count,1117 Tasks,1116 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 103.45 GiB 94.92 MiB Shape (744, 4320, 8640) (12, 1440, 1440) Count 1117 Tasks 1116 Chunks Type float32 numpy.ndarray",8640  4320  744,

Unnamed: 0,Array,Chunk
Bytes,103.45 GiB,94.92 MiB
Shape,"(744, 4320, 8640)","(12, 1440, 1440)"
Count,1117 Tasks,1116 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,103.45 GiB,94.92 MiB
Shape,"(744, 4320, 8640)","(12, 1440, 1440)"
Count,1117 Tasks,1116 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 103.45 GiB 94.92 MiB Shape (744, 4320, 8640) (12, 1440, 1440) Count 1117 Tasks 1116 Chunks Type float32 numpy.ndarray",8640  4320  744,

Unnamed: 0,Array,Chunk
Bytes,103.45 GiB,94.92 MiB
Shape,"(744, 4320, 8640)","(12, 1440, 1440)"
Count,1117 Tasks,1116 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,103.45 GiB,94.92 MiB
Shape,"(744, 4320, 8640)","(12, 1440, 1440)"
Count,1117 Tasks,1116 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 103.45 GiB 94.92 MiB Shape (744, 4320, 8640) (12, 1440, 1440) Count 1117 Tasks 1116 Chunks Type float32 numpy.ndarray",8640  4320  744,

Unnamed: 0,Array,Chunk
Bytes,103.45 GiB,94.92 MiB
Shape,"(744, 4320, 8640)","(12, 1440, 1440)"
Count,1117 Tasks,1116 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,206.90 GiB,189.84 MiB
Shape,"(744, 4320, 8640)","(12, 1440, 1440)"
Count,1117 Tasks,1116 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 206.90 GiB 189.84 MiB Shape (744, 4320, 8640) (12, 1440, 1440) Count 1117 Tasks 1116 Chunks Type float64 numpy.ndarray",8640  4320  744,

Unnamed: 0,Array,Chunk
Bytes,206.90 GiB,189.84 MiB
Shape,"(744, 4320, 8640)","(12, 1440, 1440)"
Count,1117 Tasks,1116 Chunks
Type,float64,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,103.45 GiB,94.92 MiB
Shape,"(744, 4320, 8640)","(12, 1440, 1440)"
Count,1117 Tasks,1116 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 103.45 GiB 94.92 MiB Shape (744, 4320, 8640) (12, 1440, 1440) Count 1117 Tasks 1116 Chunks Type float32 numpy.ndarray",8640  4320  744,

Unnamed: 0,Array,Chunk
Bytes,103.45 GiB,94.92 MiB
Shape,"(744, 4320, 8640)","(12, 1440, 1440)"
Count,1117 Tasks,1116 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,103.45 GiB,94.92 MiB
Shape,"(744, 4320, 8640)","(12, 1440, 1440)"
Count,1117 Tasks,1116 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 103.45 GiB 94.92 MiB Shape (744, 4320, 8640) (12, 1440, 1440) Count 1117 Tasks 1116 Chunks Type float32 numpy.ndarray",8640  4320  744,

Unnamed: 0,Array,Chunk
Bytes,103.45 GiB,94.92 MiB
Shape,"(744, 4320, 8640)","(12, 1440, 1440)"
Count,1117 Tasks,1116 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,206.90 GiB,189.84 MiB
Shape,"(744, 4320, 8640)","(12, 1440, 1440)"
Count,1117 Tasks,1116 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 206.90 GiB 189.84 MiB Shape (744, 4320, 8640) (12, 1440, 1440) Count 1117 Tasks 1116 Chunks Type float64 numpy.ndarray",8640  4320  744,

Unnamed: 0,Array,Chunk
Bytes,206.90 GiB,189.84 MiB
Shape,"(744, 4320, 8640)","(12, 1440, 1440)"
Count,1117 Tasks,1116 Chunks
Type,float64,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,103.45 GiB,94.92 MiB
Shape,"(744, 4320, 8640)","(12, 1440, 1440)"
Count,1117 Tasks,1116 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 103.45 GiB 94.92 MiB Shape (744, 4320, 8640) (12, 1440, 1440) Count 1117 Tasks 1116 Chunks Type float32 numpy.ndarray",8640  4320  744,

Unnamed: 0,Array,Chunk
Bytes,103.45 GiB,94.92 MiB
Shape,"(744, 4320, 8640)","(12, 1440, 1440)"
Count,1117 Tasks,1116 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,103.45 GiB,94.92 MiB
Shape,"(744, 4320, 8640)","(12, 1440, 1440)"
Count,1117 Tasks,1116 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 103.45 GiB 94.92 MiB Shape (744, 4320, 8640) (12, 1440, 1440) Count 1117 Tasks 1116 Chunks Type float32 numpy.ndarray",8640  4320  744,

Unnamed: 0,Array,Chunk
Bytes,103.45 GiB,94.92 MiB
Shape,"(744, 4320, 8640)","(12, 1440, 1440)"
Count,1117 Tasks,1116 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,103.45 GiB,94.92 MiB
Shape,"(744, 4320, 8640)","(12, 1440, 1440)"
Count,1117 Tasks,1116 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 103.45 GiB 94.92 MiB Shape (744, 4320, 8640) (12, 1440, 1440) Count 1117 Tasks 1116 Chunks Type float32 numpy.ndarray",8640  4320  744,

Unnamed: 0,Array,Chunk
Bytes,103.45 GiB,94.92 MiB
Shape,"(744, 4320, 8640)","(12, 1440, 1440)"
Count,1117 Tasks,1116 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,103.45 GiB,94.92 MiB
Shape,"(744, 4320, 8640)","(12, 1440, 1440)"
Count,1117 Tasks,1116 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 103.45 GiB 94.92 MiB Shape (744, 4320, 8640) (12, 1440, 1440) Count 1117 Tasks 1116 Chunks Type float32 numpy.ndarray",8640  4320  744,

Unnamed: 0,Array,Chunk
Bytes,103.45 GiB,94.92 MiB
Shape,"(744, 4320, 8640)","(12, 1440, 1440)"
Count,1117 Tasks,1116 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,103.45 GiB,94.92 MiB
Shape,"(744, 4320, 8640)","(12, 1440, 1440)"
Count,1117 Tasks,1116 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 103.45 GiB 94.92 MiB Shape (744, 4320, 8640) (12, 1440, 1440) Count 1117 Tasks 1116 Chunks Type float32 numpy.ndarray",8640  4320  744,

Unnamed: 0,Array,Chunk
Bytes,103.45 GiB,94.92 MiB
Shape,"(744, 4320, 8640)","(12, 1440, 1440)"
Count,1117 Tasks,1116 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,103.45 GiB,94.92 MiB
Shape,"(744, 4320, 8640)","(12, 1440, 1440)"
Count,1117 Tasks,1116 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 103.45 GiB 94.92 MiB Shape (744, 4320, 8640) (12, 1440, 1440) Count 1117 Tasks 1116 Chunks Type float32 numpy.ndarray",8640  4320  744,

Unnamed: 0,Array,Chunk
Bytes,103.45 GiB,94.92 MiB
Shape,"(744, 4320, 8640)","(12, 1440, 1440)"
Count,1117 Tasks,1116 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,103.45 GiB,94.92 MiB
Shape,"(744, 4320, 8640)","(12, 1440, 1440)"
Count,1117 Tasks,1116 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 103.45 GiB 94.92 MiB Shape (744, 4320, 8640) (12, 1440, 1440) Count 1117 Tasks 1116 Chunks Type float32 numpy.ndarray",8640  4320  744,

Unnamed: 0,Array,Chunk
Bytes,103.45 GiB,94.92 MiB
Shape,"(744, 4320, 8640)","(12, 1440, 1440)"
Count,1117 Tasks,1116 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,103.45 GiB,94.92 MiB
Shape,"(744, 4320, 8640)","(12, 1440, 1440)"
Count,1117 Tasks,1116 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 103.45 GiB 94.92 MiB Shape (744, 4320, 8640) (12, 1440, 1440) Count 1117 Tasks 1116 Chunks Type float32 numpy.ndarray",8640  4320  744,

Unnamed: 0,Array,Chunk
Bytes,103.45 GiB,94.92 MiB
Shape,"(744, 4320, 8640)","(12, 1440, 1440)"
Count,1117 Tasks,1116 Chunks
Type,float32,numpy.ndarray


## Grid Based Approach to Extract Frog Count

- Creates Grids of X by Y km to form a Z sq km bbox to search for frog counts

Respective Min/Max Lat Lon Values are detailed below : 

South Africa

Long
32.865667
16.45
Latitude
-22.230311
-34.773805

Costa Rica

Long
-82.564591
-85.47945
Latitude
10.991875
8.376218

Australia

Long
153.896484
113.062499
Latitude
-10.521216
-39.398856

NOTE : For Costa Rica and South Africa, there aren't enough frog values from GBIF dataset such that we expand the total area we are searching by 5 lat/lon degs

In [9]:
# country_ = "costa_rica"
test_df = pd.read_csv("ey-bwwdc-level-3/level_3_test_data.csv")

In [10]:
test_df

Unnamed: 0,Id,min_lon,min_lat,max_lon,max_lat,Predicted_frog_count
0,1,133.9014,-27.1322,134.1738,-26.8813,
1,2,134.1738,-32.9015,134.4463,-32.6506,
2,3,134.9912,-32.9015,135.2637,-32.6506,
3,4,135.2637,-34.6574,135.5361,-34.4065,
4,5,135.2637,-34.1557,135.5361,-33.9048,
...,...,...,...,...,...,...
177,178,-83.1555,8.2442,-82.8831,8.4950,
178,179,-83.1555,8.4950,-82.8831,8.7458,
179,180,-83.1555,8.7458,-82.8831,8.9967,
180,181,-82.8831,8.4950,-82.6106,8.7458,


aus_whole = {"min_lati":-39.398856,"max_lati":-10.521216,"min_longi":113.062499,"max_longi":153.896484}

aus_whole = {"min_lati":8.376218-5,"max_lati":10.991875+5,"min_longi":-85.47945-5,"max_longi":-82.564591+5}

aus_whole = {"min_lati":-34.773805-5,"max_lati":-22.230311+5,"min_longi":16.45-5,"max_longi":32.865667+5}

### Grid Search Bbox Area (Lat Lon - KM Conversion)


Source : https://stackoverflow.com/questions/4000886/gps-coordinates-1km-square-around-a-point

1 km ~ 0.008983 degrees of latitude

1 km ~ 0.015060 degrees of longitude


In [11]:
def country_bbox(country, lat_km, lon_km):
    if country.lower() == "aus":
        aus_whole = {"min_lati":-39.398856,"max_lati":-10.521216,"min_longi":113.062499,"max_longi":153.896484}
    elif country.lower() == "sa":
        aus_whole = {"min_lati":-34.773805-5,"max_lati":-22.230311+5,"min_longi":16.45-5,"max_longi":32.865667+5}
    elif country.lower() == "cr":
        aus_whole = {"min_lati":8.376218-5,"max_lati":10.991875+5,"min_longi":-85.47945-5,"max_longi":-82.564591+5}
    elif country.lower() == "test_aus":
        aus_whole = {"min_lati":test_df.iloc[:80,2].min(),"max_lati":test_df.iloc[:80,4].max(),"min_longi":test_df.iloc[:80,1].min(),"max_longi":test_df.iloc[:80,3].max()}
    elif country.lower() == "test_sa":
        aus_whole = {"min_lati":test_df.iloc[80:154,2].min(),"max_lati":test_df.iloc[80:154,4].max(),"min_longi":test_df.iloc[80:154,1].min(),"max_longi":test_df.iloc[80:154,3].max()}
    elif country.lower() == "test_cr":
        aus_whole = {"min_lati":test_df.iloc[154:,2].min(),"max_lati":test_df.iloc[154:,4].max(),"min_longi":test_df.iloc[154:,1].min(),"max_longi":test_df.iloc[154:,3].max()}

    
    bbox_grid_whole = [({"min_x":np.round(x,4), "min_y":np.round(y,4), "max_x":np.round(x + (0.015060 * lon_km),4),
                         "max_y":np.round(y + (0.008983 * lat_km),4)}) for x, y in itertools.product(np.arange(aus_whole["min_longi"], aus_whole["max_longi"],(0.015060 * lon_km)),
                                                                                       np.arange(aus_whole["min_lati"], aus_whole["max_lati"],(0.008983 * lat_km)))]
    print(len(bbox_grid_whole))
    return bbox_grid_whole, aus_whole

def generate_frog_count(bbox_grid_whole):
    filt_lat = {}
    i=1
    for _,bbox in tqdm(enumerate(bbox_grid_whole)):  
        longi_lati_df_rang = df_frog[((df_frog['decimalLongitude'] >= bbox["min_x"]) & (df_frog['decimalLongitude'] <= bbox["max_x"])) & 
                               ((df_frog['decimalLatitude'] >= bbox["min_y"]) & (df_frog['decimalLatitude'] <=bbox["max_y"]))]
        if longi_lati_df_rang.shape[0]>0:
            filt_lat[i] ={}
            filt_lat[i]["coord"] = bbox
            filt_lat[i]["frog_count"] = longi_lati_df_rang.shape[0]
            i=i+1
    aus_whole_filt_cord = filt_lat
    
    # Converting the frog_id dictionary to dataframe having 2660 rows & 6 columns
    aus_whole_filt_cord_df = pd.DataFrame.from_dict(aus_whole_filt_cord,orient="index")

    aus_whole_filt_cord_df["min_lon"] = [i["min_x"] for i in aus_whole_filt_cord_df["coord"]]
    aus_whole_filt_cord_df["min_lat"] = [i["min_y"] for i in aus_whole_filt_cord_df["coord"]]

    aus_whole_filt_cord_df["max_lon"] = [i["max_x"] for i in aus_whole_filt_cord_df["coord"]]
    aus_whole_filt_cord_df["max_lat"] = [i["max_y"] for i in aus_whole_filt_cord_df["coord"]]

    print(aus_whole_filt_cord_df.shape)
    
    return aus_whole_filt_cord_df

def get_data(time_range, aus_whole_filt_cord_df, country, aus_whole):
    # Selecting time frame based on frogid dataset
    for i in time_range:
        print(f"Generating Data from {i[0]} to {i[1]}")
        ds_date = ds.sel(time = slice(i[0],i[1]))

        # filtering data for Austrlia region based on coordinates
        ds_aus = ds_date.where((ds.lat>=aus_whole["min_lati"]) & (ds.lat<=aus_whole["max_lati"]) & 
                               ((ds.lon>=aus_whole["min_longi"] ) & (ds.lon<=aus_whole["max_longi"])),drop = True)

        #  Converting the xarray format to pandas dataframe 
        ds_aus = ds_aus.to_dataframe().reset_index()

        ds_aus["time"] = pd.to_datetime(ds_aus["time"])

        # Iterate the Terra climate lat-lon across the grids for averaging the terraclimate values for a particular lat-Lon
        for ind,row in tqdm(aus_whole_filt_cord_df.iterrows()):


            longi_lati_df_rang = ds_aus[(ds_aus['lon'] >= row["min_lon"]) & (ds_aus['lon'] <= row["max_lon"]) & 
                                   (ds_aus['lat'] >= row["min_lat"]) & (ds_aus['lat'] <=row["max_lat"])]


            col = [{var:longi_lati_df_rang[var].mean()} for var in longi_lati_df_rang[longi_lati_df_rang.columns[4:]]]

            for var in col:
                for key,val in var.items():
                    aus_whole_filt_cord_df.loc[ind,key] = val

            aus_whole_filt_cord_df.loc[ind,"samples_count"] = longi_lati_df_rang.shape[0]
            
        dt_one = i[0].split("-")[0]
        dt_two = i[1].split("-")[0]
        
#         aus_whole_filt_cord_df = aus_whole_filt_cord_df[~aus_whole_filt_cord_df["aet"].isna()]
        aus_whole_filt_cord_df.to_csv(f"{dt_one}to{dt_two}_Terraclimate_{country}_grid_bbox_25sqkm_v2.csv", index=False)

In [12]:
%%time
import math
lat_km = math.sqrt(25)
lon_km = math.sqrt(25)
country_list = ['aus', 'sa', 'cr']
time_range = [["2005-11-01","2007-11-01"],["2007-11-01","2009-11-01"],["2009-11-01","2011-11-01"],["2011-11-01","2013-11-01"], ["2013-11-01","2015-11-01"], ["2015-11-01","2017-11-01"], ["2017-11-01","2019-11-01"]]
for country in country_list:
    bbox_grid_whole, aus_whole = country_bbox(country, lat_km, lon_km)
    aus_whole_filt_cord_df = generate_frog_count(bbox_grid_whole)
    get_data(time_range, aus_whole_filt_cord_df, country, aus_whole)

349149


349149it [08:29, 685.15it/s]


(10342, 6)
Generating Data from 2005-11-01 to 2007-11-01


10342it [12:10, 14.15it/s]


Generating Data from 2007-11-01 to 2009-11-01


10342it [12:03, 14.30it/s]


Generating Data from 2009-11-01 to 2011-11-01


10342it [11:59, 14.37it/s]


Generating Data from 2011-11-01 to 2013-11-01


10342it [11:59, 14.38it/s]


Generating Data from 2013-11-01 to 2015-11-01


10342it [11:57, 14.41it/s]


Generating Data from 2015-11-01 to 2017-11-01


10342it [11:50, 14.56it/s]


Generating Data from 2017-11-01 to 2019-11-01


10342it [11:57, 14.41it/s]


176202


176202it [04:31, 649.17it/s]


(670, 6)
Generating Data from 2005-11-01 to 2007-11-01


670it [00:32, 20.58it/s]


Generating Data from 2007-11-01 to 2009-11-01


670it [00:28, 23.18it/s]


Generating Data from 2009-11-01 to 2011-11-01


670it [00:30, 21.64it/s]


Generating Data from 2011-11-01 to 2013-11-01


670it [00:28, 23.45it/s]


Generating Data from 2013-11-01 to 2015-11-01


670it [00:31, 21.55it/s]


Generating Data from 2015-11-01 to 2017-11-01


670it [00:28, 23.70it/s]


Generating Data from 2017-11-01 to 2019-11-01


670it [00:30, 22.08it/s]


48332


48332it [01:10, 680.98it/s]


(545, 6)
Generating Data from 2005-11-01 to 2007-11-01


545it [00:11, 48.10it/s]


Generating Data from 2007-11-01 to 2009-11-01


545it [00:12, 43.05it/s]


Generating Data from 2009-11-01 to 2011-11-01


545it [00:11, 47.33it/s]


Generating Data from 2011-11-01 to 2013-11-01


545it [00:11, 46.59it/s]


Generating Data from 2013-11-01 to 2015-11-01


545it [00:11, 46.66it/s]


Generating Data from 2015-11-01 to 2017-11-01


545it [00:11, 47.24it/s]


Generating Data from 2017-11-01 to 2019-11-01


545it [00:12, 44.49it/s]


CPU times: user 3h 27min 5s, sys: 3min 6s, total: 3h 30min 11s
Wall time: 2h 1min 10s


In [13]:
%%time
lat_km = math.sqrt(25)
lon_km = math.sqrt(25)
country_list = ['test_aus', 'test_sa', 'test_cr']
time_range = [["2011-11-01","2013-11-01"], ["2013-11-01","2015-11-01"], ["2015-11-01","2017-11-01"], ["2017-11-01","2019-11-01"]]
for country in country_list:
    bbox_grid_whole, aus_whole = country_bbox(country, lat_km, lon_km)
    aus_whole_filt_cord_df = generate_frog_count(bbox_grid_whole)
    get_data(time_range, test_df, country, aus_whole)

23940


23940it [00:35, 675.16it/s]


(909, 6)
Generating Data from 2011-11-01 to 2013-11-01


182it [00:02, 63.35it/s]


Generating Data from 2013-11-01 to 2015-11-01


182it [00:02, 61.70it/s]


Generating Data from 2015-11-01 to 2017-11-01


182it [00:02, 61.31it/s]


Generating Data from 2017-11-01 to 2019-11-01


182it [00:02, 61.15it/s]


6004


6004it [00:08, 684.10it/s]


(112, 6)
Generating Data from 2011-11-01 to 2013-11-01


182it [00:01, 106.27it/s]


Generating Data from 2013-11-01 to 2015-11-01


182it [00:01, 109.73it/s]


Generating Data from 2015-11-01 to 2017-11-01


182it [00:01, 115.93it/s]


Generating Data from 2017-11-01 to 2019-11-01


182it [00:01, 109.65it/s]


1683


1683it [00:02, 687.41it/s]


(283, 6)
Generating Data from 2011-11-01 to 2013-11-01


182it [00:01, 113.33it/s]


Generating Data from 2013-11-01 to 2015-11-01


182it [00:01, 134.93it/s]


Generating Data from 2015-11-01 to 2017-11-01


182it [00:01, 117.09it/s]


Generating Data from 2017-11-01 to 2019-11-01


182it [00:01, 116.09it/s]

CPU times: user 2min 16s, sys: 14.5 s, total: 2min 31s
Wall time: 5min 10s



