In [1]:
import json
import satsearch
import geopandas as gpd
import numpy as np
import pandas as pd
import warnings
from shapely.geometry import  Polygon
import rioxarray
import satstac
import os
warnings.filterwarnings("ignore")

In [2]:
import data.sentinel_cog as sc


In [3]:
harran={
"type": "FeatureCollection",
"name": "Harran_AOI",
"crs": { "type": "name", "properties": { "name": "urn:ogc:def:crs:OGC:1.3:CRS84" } },
"features": [
{ "type": "Feature", "properties": { "fid": 1 }, "geometry": { "type": "Polygon", "coordinates": [ [ [ 38.680367610550277, 36.71081864055018 ], [ 38.688923838082594, 37.274751818816462 ], [ 39.282414893278698, 37.277085335416182 ], [ 39.29019328194444, 36.690594830019251 ], [ 38.680367610550277, 36.71081864055018 ] ] ] } }
]
}


In [4]:
type(harran)

dict

In [5]:
datajson=json.dumps(harran)
target_area=gpd.read_file(datajson)


In [8]:
if type(target_area)==gpd.geodataframe.GeoDataFrame:
    print(5)


5


In [5]:
tiles_intersection,tile_map=sc.find_sentinel_tile(target_area,sentinel_tiles_path='../data/raw/boundries/sentinel_tr_tiles.shp')
tile_map

In [6]:
tiles_intersection

Unnamed: 0,name,folders,descriptio,altitude,alt_mode,time_begin,time_end,time_when,GID_0,NAME_0,geometry
0,37SDA,Features,TILE PROPERTIES<br><table border=0 cellpadding...,0.0,,,,,TUR,Turkey,"POLYGON Z ((37.87506 37.04125 0.00000, 39.1097..."
1,37SDB,Features,TILE PROPERTIES<br><table border=0 cellpadding...,0.0,,,,,TUR,Turkey,"POLYGON Z ((37.86147 37.94208 0.00000, 39.1110..."
2,37SEA,Features,TILE PROPERTIES<br><table border=0 cellpadding...,0.0,,,,,TUR,Turkey,"POLYGON Z ((38.99978 37.04658 0.00000, 40.2344..."
3,37SEB,Features,TILE PROPERTIES<br><table border=0 cellpadding...,0.0,,,,,TUR,Turkey,"POLYGON Z ((38.99977 37.94759 0.00000, 40.2493..."


In [7]:
def find_stac_result(target_aoi,date,max_cloud=10):
    URL='https://earth-search.aws.element84.com/v0'
    results = satsearch.Search.search(url=URL,
                                collections=['sentinel-s2-l2a-cogs'],
                                datetime=date,
                                bbox=target_aoi,
                                query={'eo:cloud_cover': {'lt':max_cloud}}, )
    return results
def show_result_df(result=None,items_list=[]):
    #this function return stac result as a pandas dataframe
    if not items_list:
        # if you want to see all result from main stac result(find_stac_result function)
        # you can use this method
        items = result.items()
        items_json=items.geojson()
        items_json=json.dumps(items_json)
        df=gpd.read_file(items_json)
        df['datetime']=pd.to_datetime(df['datetime'], infer_datetime_format=True)
        df['datetime']=pd.to_datetime(df['datetime']).dt.strftime('%Y-%m-%d')
        return df
    else:
        # list comprehension result from find_sentinel_item method,
        # you can show that as a dataframe

        #create empty df
        df=gpd.GeoDataFrame()
        for item in items_list:
            #get item properties as a json
            items_json=item.properties
            tmp=gpd.GeoDataFrame(items_json)
            geo_dict = {'geometry': [Polygon(item.geometry['coordinates'][0])]}
            gdf = gpd.GeoDataFrame(geo_dict, crs="EPSG:4326")
            #import geo info
            tmp['geometry']=gdf['geometry']
            df=df.append(tmp)
            #change datatime columt datatype
        df['datetime']=pd.to_datetime(df['datetime'], infer_datetime_format=True)
        df.reset_index(inplace=True)
        return df
def create_tiles_list(stac_result):
    items = stac_result.items()
    items_json=items.geojson()
    items_json=json.dumps(items_json)
    df=gpd.read_file(items_json)
    df['tile']=df.apply(lambda row: str(row['sentinel:utm_zone'])+row['sentinel:latitude_band']+row['sentinel:grid_square'], axis=1)
    tiles_list = sorted(df['tile'].unique().tolist())
    return tiles_list


In [8]:
# 2018-08-01 ile 2020-11-30

In [9]:
boundry=list(target_area.geometry.bounds.values[0]) #boundry from your AOI
bbox=[boundry[0],boundry[1],boundry[2],boundry[3]] #(min lon, min lat, max lon, max lat)
dates = '2018-08-01/2020-11-30'
band_list=['B02','B03','B04','B08']
cloud_percentage=10

In [10]:
stac_result=find_stac_result(bbox,dates,cloud_percentage)
items = stac_result.items()

In [11]:
tile_list=create_tiles_list(stac_result)
tile_list

['37SDA', '37SDB', '37SEA', '37SEB']

In [12]:
df=show_result_df(stac_result)
len(df)

443

In [13]:
df=df[df['sentinel:data_coverage']>95]
len(df)

410

In [14]:
frame_time=df['datetime'].unique()
print(len(frame_time))
print(frame_time[0])

123
2020-11-28


In [15]:
count=0
for f in frame_time:
    tmp_df=df[df['datetime']==f]
    if len(tmp_df)!=4:
        count+=1
count

42

In [16]:
def drop_notmosaic_date(time_list,target_df):
    for f in time_list:
        tmp_df=target_df[target_df['datetime']==f]
        if len(tmp_df)!=4:
            for i in tmp_df.id :
                target_df = target_df[target_df.id != i]
                
    return target_df

In [17]:
sentinel_df=df.copy()
sentinel_df.head(2)

Unnamed: 0,id,datetime,platform,constellation,gsd,view:off_nadir,proj:epsg,sentinel:utm_zone,sentinel:latitude_band,sentinel:grid_square,sentinel:sequence,sentinel:product_id,sentinel:data_coverage,eo:cloud_cover,sentinel:valid_cloud_cover,created,updated,data_coverage,geometry
0,S2A_37SEA_20201128_0_L2A,2020-11-28,sentinel-2a,sentinel-2,10,0,32637,37,S,EA,0,S2A_MSIL2A_20201128T081301_N0214_R078_T37SEA_2...,100.0,2.65,True,2020-12-28T11:44:32.564999+00:00,2020-12-28T11:44:32.564999+00:00,,"POLYGON ((40.21879 36.05054, 38.99979 36.05673..."
1,S2B_37SDA_20201123_0_L2A,2020-11-23,sentinel-2b,sentinel-2,10,0,32637,37,S,DA,0,S2B_MSIL2A_20201123T081249_N0214_R078_T37SDA_2...,100.0,0.0,True,2020-11-23T15:27:19.611000+00:00,2020-11-23T15:27:19.611000+00:00,,"POLYGON ((37.88933 36.05159, 37.87508 37.04124..."


In [18]:
sentinel_df=drop_notmosaic_date(time_list=frame_time,target_df=sentinel_df)

In [19]:
len(sentinel_df)

324

In [20]:
#drop_list=['37SDA', '37SDC', '37SEA', '37SEB', '37SEC']

In [21]:
#tiles_intersection=sc.drop_tile(tiles_intersection,drop_list)
#tiles_intersection

In [30]:
%%time
from dask.distributed import Client, LocalCluster
import multiprocessing as mp
import xarray as xr
import matplotlib.pyplot as plt
import rioxarray
band_list=['B01', 'B02', 'B03', 'B04', 'B05', 'B06', 'B07', 'B08', 'B8A',
           'B09', 'B11', 'B12', 'AOT', 'WVP', 'SCL', 'info', 'metadata',
           'visual', 'overview', 'thumbnail']

def download_subset_image(download_status=False,stac_result=None,item_id_list=[],
                        item_list=[],aoi=None,target_epsg='',
                        band_list=band_list[:-5],
                        download_path='./sentinel_cog',name_suffix='',auto_folder=True):
    result_list=[]
    if item_list:
        with LocalCluster(n_workers=int(0.5 * mp.cpu_count()),
                          processes=True,
                          threads_per_worker=2,
                          memory_limit='2GB',
                          #ip='tcp://localhost:9895',
                          ) as cluster, Client(cluster) as client:
            for item in item_list:
                bands_dict={}
                for band in band_list:
                    img_name=item.properties['sentinel:product_id']
                    bands_dict['image_name']=img_name
                    band_url=item.assets[band]['href']
                    rds = rioxarray.open_rasterio(band_url, masked=True, chunks=(4, "auto", -1))
                    #aoi data from http://geojson.io 
                    # get aoi as geopandas df
                    datajson=json.dumps(aoi)
                    target_area=gpd.read_file(datajson)
                    #https://geopandas.org/projections.html
                    target_area=target_area.to_crs(rds.rio.crs.to_string())
                    clipped =rds.rio.clip(target_area.geometry)
                     
                    if target_epsg:
                        # target_epsg='epsg:4326'
                        clipped = clipped.rio.reproject(target_epsg)
    
                    if download_status:
                        img_path=download_path+'/'+img_name
                        if not os.path.isdir(img_path):
                            os.mkdir(img_path)
                        img_name=band+'.tif'
                        clipped.rio.to_raster(img_path+'/'+img_name)
                       
                    bands_dict[band]=clipped.copy()
                    rds=None
                result_list.append(bands_dict)
                
        return result_list

Wall time: 0 ns


In [31]:
items[0:5]

[S2A_37SEA_20201128_0_L2A,
 S2B_37SDA_20201123_0_L2A,
 S2B_37SEA_20201123_0_L2A,
 S2B_37SDB_20201123_0_L2A,
 S2B_37SEB_20201123_0_L2A]

In [33]:
%%time
subset_dask=download_subset_image(download_status=True,item_list=items[0:5],aoi=harran,target_epsg='',
                        band_list=['B02','B08'],download_path='./harran',name_suffix='',auto_folder=True)

Wall time: 28min 49s


In [4]:
from dask.distributed import Client, LocalCluster
import multiprocessing as mp
import xarray as xr
import matplotlib.pyplot as plt
import rioxarray

def calculate_ndvi(red,nir):
    with LocalCluster(n_workers=int(0.6 * mp.cpu_count()),
        processes=False,
        threads_per_worker=1,
        memory_limit='2GB',
        #ip='tcp://localhost:9895',
        ) as cluster, Client(cluster) as client:

            red_xarray=red
            nir_xarray=nir
            red=red_xarray.persist()
            nir=nir_xarray.persist()
            red=red.values
            nir=nir.values
            ndvi = (nir.astype(float) - red.astype(float))/(nir + red)

    return ndvi

def direct_ndvi(red,nir):
    #rioxarray return
    with LocalCluster(n_workers=int(0.6 * mp.cpu_count()),
        processes=False,
        threads_per_worker=1,
        memory_limit='6GB',
        ip='tcp://localhost:9895',
        ) as cluster, Client(cluster) as client:
            red_xarray=red
            nir_xarray=nir
            ndvi = (nir_xarray.astype(float) - red_xarray.astype(float))/(nir_xarray + red_xarray)

    return ndvi


In [None]:
from rioxarray import merge
def calculate_zonal_ndvi3(time_list,target_df,stac_items,vector_gpd):
    fin=pd.DataFrame({'projectID':[],})
    ndvi_list=[]
    for f in time_list:
        tmp_df=target_df[target_df['datetime']==f]
        tmp_items=satstac.ItemCollection(stac_items)
        tmp_items.filter('sentinel:product_id',list(tmp_df['sentinel:product_id']))
        subset_dask=download_subset_image(download_status=False,item_list=list(tmp_items),aoi=harran,band_list=['B02','B08'])
        for index,img in enumerate(subset_dask):
            ndvidirecty=direct_ndvi(subset_dask[index]['B02'],subset_dask[index]['B08'])
            ndvidirecty.name='ndvi'
            ndvi_list.append(ndvidirecty)
        print('start ndvi merge')
        with LocalCluster(n_workers=int(0.6 * mp.cpu_count()),
                          processes=False,
                          threads_per_worker=1,
                          memory_limit='6GB',
                          ip='tcp://localhost:9895',
                          ) as cluster, Client(cluster) as client:
            ndvi = merge.merge_arrays(ndvi_list)
        print('finnish ndvi merge')
        with LocalCluster(n_workers=int(0.6 * mp.cpu_count()),
                          processes=False,
                          threads_per_worker=2,
                          memory_limit='6GB',
                          ip='tcp://localhost:9895',
                          ) as cluster, Client(cluster) as client:
            out_grid = make_geocube(vector_data=vector_gpd,
                                    measurements=["projectID"],
                                    like=ndvi, # ensure the data are on the same grid
                                    #fill=np.nan
                                    )
            out_grid["ndvi"] = ndvi
        print('finnish out grid')
        with LocalCluster(n_workers=int(0.6 * mp.cpu_count()),
                          processes=False,
                          threads_per_worker=2,
                          memory_limit='6GB',
                          ip='tcp://localhost:9895',
                          ) as cluster, Client(cluster) as client:
            
            grouped_ndvi = out_grid.drop("spatial_ref").groupby(out_grid.projectID)
            grid_mean = grouped_ndvi.mean().rename({"ndvi": f'ndviMean_{f}'})
            zonal_stats = xarray.merge([grid_mean,])
            last_df=zonal_stats.to_dataframe()
            last_df.reset_index(inplace=True)
            last_df.rename(columns={'ndvi_mean': f'ndviMean_{f}'}, inplace=True)
            last_df.drop(columns=['band','spatial_ref'],inplace=True)
            last_df.to_csv(f'csv/ndviMean_{f}.csv', index=False)
            
            fin=fin.merge(last_df,how='outer',on='projectID')
            print('end fin')
    fin.to_csv(f'csv/ndvi_all.csv', index=False)
    return print('end')

In [121]:
ndvi_time=sentinel_df['datetime'].unique()
print(ndvi_time[0])
print(len(ndvi_time))


2020-11-23
81


In [122]:
harran_field = gpd.read_file("../data/raw/HARRAN_Boundary_2019-2020_v04.gpkg")

In [123]:
harran_field.head(1)

Unnamed: 0,id,crop,Il_Adi,Ilce_Adi,Bolge_Adi,Bolum_Adi,area,PlaceName,projectID,P12_2019majority,P34_2019majority,P12_2020majority,P34_2020majority,geometry
0,204980733,,ŞANLIURFA,HALİLİYE,Güneydoğu Anadolu Bölgesi,Orta Fırat Bölümü,4.3844,Çanakçı,1,31.0,23.0,111.0,18.0,"MULTIPOLYGON (((39.17457 37.12100, 39.17457 37..."


In [124]:
from geocube.api.core import make_geocube
import numpy as np
import xarray

In [125]:
harran_field_utm=harran_field.to_crs('EPSG:32637')

In [31]:
from rioxarray import merge
def calculate_zonal_ndvi(time_list,target_df,stac_items,vector_gpd):
    ndvi_list=[]
    for f in time_list:
        tmp_df=target_df[target_df['datetime']==f]
        tmp_items=satstac.ItemCollection(stac_items)
        tmp_items.filter('sentinel:product_id',list(tmp_df['sentinel:product_id']))
        subset_dask=download_subset_image(download_status=False,item_list=list(tmp_items),aoi=harran,band_list=['B02','B08'])
        for index,img in enumerate(subset_dask):
            ndvidirecty=direct_ndvi(subset_dask[index]['B02'],subset_dask[index]['B08'])
            ndvidirecty.name='ndvi'
            ndvi_list.append(ndvidirecty)
    
        ndvi = merge.merge_arrays(ndvi_list)
        out_grid = make_geocube(vector_data=vector_gpd,
                                measurements=["projectID"],
                                like=ndvi, # ensure the data are on the same grid
                                #fill=np.nan
                                )
        out_grid["ndvi"] = ndvi

        grouped_ndvi = out_grid.drop("spatial_ref").groupby(out_grid.projectID)
        grid_mean = grouped_ndvi.mean().rename({"ndvi": f'ndviMean_{f}'})
        zonal_stats = xarray.merge([grid_mean,])
        last_df=zonal_stats.to_dataframe()
        last_df.reset_index(inplace=True)
        last_df.rename(columns={'ndvi_mean': f'ndviMean_{f}'}, inplace=True)
        last_df.drop(columns=['band','spatial_ref'],inplace=True)
    return last_df

In [128]:
ndvi_time_den=ndvi_time[20:21]
ndvi_time_den

array(['2020-07-21'], dtype=object)

In [39]:
%%time
lst_df=calculate_zonal_ndvi(time_list=ndvi_time_den,target_df=sentinel_df,stac_items=items,vector_gpd=harran_field_utm)

KeyboardInterrupt: 

In [35]:
len(lst_df)

34914

In [42]:
err_list=list(sentinel_df[sentinel_df['datetime']=='2020-08-05']['sentinel:product_id'])

In [44]:
tmp_items=satstac.ItemCollection(items)
tmp_items.filter('sentinel:product_id',err_list)

In [50]:
tmp_items[0].assets['B02']['href']

'https://sentinel-cogs.s3.us-west-2.amazonaws.com/sentinel-s2-l2a-cogs/37/S/DA/2020/8/S2B_37SDA_20200805_0_L2A/B02.tif'

In [None]:
out_grid = make_geocube(
    vector_data=harran_field_utm,
    measurements=["projectID"],
    like=ndvidirecty, # ensure the data are on the same grid
    #fill=np.nan
)
out_grid["ndvi"] = ndvidirecty


In [None]:
out_grid = make_geocube(
    vector_data=harran_field_utm,
    measurements=["projectID"],
    like=ndvidirecty, # ensure the data are on the same grid
    #fill=np.nan
)

In [None]:
out_grid["ndvi"] = ndvidirecty
out_grid

In [None]:
out_grid.ndvi.rio.to_raster('ndvifromgrid.tif')

In [None]:
grouped_ndvi = out_grid.drop("spatial_ref").groupby(out_grid.projectID)
grid_mean = grouped_ndvi.mean().rename({"ndvi": "ndvi_mean"})

In [None]:
zonal_stats = xarray.merge([grid_mean,])
#zonal_stats.to_dataframe()

In [None]:
last_df=zonal_stats.to_dataframe()

In [None]:
last_df.reset_index(inplace=True)
last_df.rename(columns={'ndvi_mean': f'ndviMean_{d}'}, inplace=True)
last_df.drop(columns=['band','spatial_ref'],inplace=True)
fin=pd.DataFrame({'projectID':[],})
fin=fin.merge(dene,how='outer',on='projectID')


In [None]:
dene=last_df.iloc[0:10]

In [None]:
dene.reset_index(inplace=True)

In [None]:
dene.rename(columns={'ndvi_mean': 'ndvi1'}, inplace=True)
dene

In [None]:
dene.drop(columns=['band','spatial_ref'],inplace=True)

In [None]:
dene

In [None]:
import pandas as pd

In [None]:
fin=pd.DataFrame({'projectID':[],})
fin

In [None]:
fin=fin.merge(dene,how='outer',on='projectID')
fin

In [None]:
dene2['ndvi_mean']=[1,2,3,4,5,6,7,8,9,10]
dene2.rename(columns={'ndvi_mean': 'ndvi2'}, inplace=True)
dene2

In [None]:
fin=pd.concat([fin,dene],axis=1,ignore_index=False)
fin

In [None]:
fin.reset_index(inplace=True, drop=True)

In [None]:
fin

In [None]:
fin=pd.concat([fin,dene],axis=1)
fin

In [131]:
from rioxarray import merge
def calculate_zonal_ndvi4(time_list,target_df,stac_items,vector_gpd):
    fin=pd.DataFrame({'projectID':[],})
    for f in time_list:
        tmp_df=target_df[target_df['datetime']==f]
        tmp_items=satstac.ItemCollection(stac_items)
        tmp_items.filter('sentinel:product_id',list(tmp_df['sentinel:product_id']))
        subset_dask=download_subset_image(download_status=False,item_list=list(tmp_items),aoi=harran,band_list=['B02','B08'])
        for index,img in enumerate(subset_dask):
            ndvi=direct_ndvi(subset_dask[index]['B02'],subset_dask[index]['B08'])
            ndvi.name='ndvi'
            out_grid = make_geocube(vector_data=vector_gpd,
                                    measurements=["projectID"],
                                    like=ndvi, # ensure the data are on the same grid
                                    fill=np.nan
                                    )
            out_grid["ndvi"] = ndvi
            grouped_ndvi = out_grid.drop("spatial_ref").groupby(out_grid.projectID)
            grid_mean = grouped_ndvi.mean().rename({"ndvi": f'ndviMean_{f}'})
            print('grid finish')
            zonal_stats = xarray.merge([grid_mean,])
            last_df=zonal_stats.to_dataframe()
            last_df.reset_index(inplace=True)
            last_df.rename(columns={'ndvi_mean': f'ndviMean_{f}'}, inplace=True)
            last_df.drop(columns=['band','spatial_ref'],inplace=True)
            last_df.to_csv(f'ndviMean_{f}.csv', index=False)
            fin=pd.merge(df1, df2, on="projectID",how='outer')
            fin=fin.fillna(0)
            convert_dict={f'ndviMean_{f}_x': 'float',f'ndviMean_{f}_y': 'float'}
            fin = fin.astype(convert_dict)
            fin[f'ndviMean_{f}'] = fin[['B_x', 'B_y']].mean(axis=1)
            fin.drop(columns=[f'ndviMean_{f}_x',f'ndviMean_{f}_y'],axis=1,inplace=True)
    return fin

In [132]:
%%time
lst_df=calculate_zonal_ndvi4(time_list=ndvi_time[0:2],target_df=sentinel_df,stac_items=items,vector_gpd=harran_field_utm)

KeyboardInterrupt: 

In [149]:
%%time

from rioxarray import merge

fin=pd.DataFrame({'projectID':[],'ndvi_mean':[]})
for f in ndvi_time[0:2]:
    tmp_df=sentinel_df[sentinel_df['datetime']==f]
    tmp_items=satstac.ItemCollection(items)
    tmp_items.filter('sentinel:product_id',list(tmp_df['sentinel:product_id']))
    subset_dask=download_subset_image(download_status=False,item_list=list(tmp_items),aoi=harran,band_list=['B02','B08'])
    for index,img in enumerate(subset_dask):
        ndvi=direct_ndvi(subset_dask[index]['B02'],subset_dask[index]['B08'])
        ndvi.name='ndvi'
        print('ndvi')
        out_grid = make_geocube(vector_data=harran_field_utm,
                                measurements=["projectID"],
                                like=ndvi, # ensure the data are on the same grid
                                fill=np.nan
                                )
        out_grid["ndvi"] = ndvi
        print('grid finish')
        grouped_ndvi = out_grid.drop("spatial_ref").groupby(out_grid.projectID)
        grid_mean = grouped_ndvi.mean().rename({"ndvi": 'ndvi_mean'})
        print('grid meann')
        zonal_stats = xarray.merge([grid_mean,])
        last_df=zonal_stats.to_dataframe()
        print('lastdf')
        last_df.reset_index(inplace=True)
        last_df.drop(columns=['band','spatial_ref'],inplace=True)
        last_df.to_csv(f'ndviMean_{f}_{index}.csv', index=False)
        fin=pd.merge(fin, last_df, on="projectID",how='outer')
        fin=fin.fillna(0)
        convert_dict={'ndvi_mean_x': 'float','ndvi_mean_y': 'float'}
        fin = fin.astype(convert_dict)
        fin[f'ndviMean_{f}'] = fin[['ndvi_mean_x', 'ndvi_mean_y']].mean(axis=1)
        fin.drop(columns=['ndvi_mean_x','ndvi_mean_y'],axis=1,inplace=True)

ndvi
grid finish
grid meann
lastdf
ndvi
grid finish
grid meann
lastdf


KeyError: 'Only a column name can be used for the key in a dtype mappings argument.'

In [266]:
zonal_stats

Unnamed: 0,Array,Chunk
Bytes,106.14 kB,8 B
Shape,"(13267, 1)","(1, 1)"
Count,68201 Tasks,13267 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 106.14 kB 8 B Shape (13267, 1) (1, 1) Count 68201 Tasks 13267 Chunks Type float64 numpy.ndarray",1  13267,

Unnamed: 0,Array,Chunk
Bytes,106.14 kB,8 B
Shape,"(13267, 1)","(1, 1)"
Count,68201 Tasks,13267 Chunks
Type,float64,numpy.ndarray


In [272]:
%%time
with LocalCluster(n_workers=int(0.6 * mp.cpu_count()),
                  processes=False,
                  threads_per_worker=2,
                  memory_limit='6GB',
                  ip='tcp://localhost:9895',
                  ) as cluster, Client(cluster) as client:
    zonal_stats2=zonal_stats.compute()
    dask_df=zonal_stats2.to_dataframe()
#Wall time: 7min 36s
#Wall time: 11min 30s


Wall time: 7min 50s


In [None]:
zonal_stats.compute?

In [None]:
!pip install xarray-extras
import xarray_extras

In [143]:
df1 = pd.DataFrame(
       {
           "A": ["A0", "A1", "A2", "A3"],
           "B":[]
           
     
       }
   )
df2 = pd.DataFrame(
       {
           "A": ["A2", "A3", "A6", "A7"],
           "B": ["2", "3", "4", "5"],
   
       },
       index=[4, 5, 6, 7],
   )
   

In [145]:
frames = [df1, df2]

In [146]:
result = pd.concat(frames,verify_integrity=True)
result

Unnamed: 0,A,B
4,A2,2
5,A3,3
6,A6,4
7,A7,5


In [147]:
a=pd.merge(df1, df2, on="A",how='outer')
a

Unnamed: 0,B_x,A,B_y
0,,A2,2
1,,A3,3
2,,A6,4
3,,A7,5


In [151]:
a=pd.merge(df1, df2, on="A",how='outer')
a=a.fillna(0)
convert_dict={'B_x': 'float','B_y': 'float'}
a = a.astype(convert_dict) 
print(a.dtypes)
a['ndvi'] = a[['B_x', 'B_y']].mean(axis=1)
a.drop(columns=['B_x','B_y'],axis=1,inplace=True)
print(a)

B_x    float64
A       object
B_y    float64
dtype: object
    A  ndvi
0  A2   1.0
1  A3   1.5
2  A6   2.0
3  A7   2.5


In [116]:
b=pd.merge(df1, df2, on="A",how='outer')
b=b.fillna(0)
convert_dict={'B_x': 'float','B_y': 'float'}
b = b.astype(convert_dict) 
print(b.dtypes)
b['ndvi'] = b[['B_x', 'B_y']].mean(axis=1)
b.drop(columns=['B_x','B_y'],axis=1,inplace=True)
print(a)

Unnamed: 0,A,ndvi
0,A0,0.5
1,A1,1.0
2,A2,2.5
3,A3,3.5
4,A6,2.0
5,A7,2.5


In [252]:
ilk=last_df.iloc[0:100]
ikinci=last_df.iloc[80:180]

In [253]:
m=pd.merge(ilk, ikinci, on="projectID",how='outer')
m=m.fillna(np.nan)


In [254]:
m.head()

Unnamed: 0,projectID,ndvi_mean_x,ndvi_mean_y
0,1256.0,0.672889,
1,1257.0,0.706827,
2,1258.0,0.831125,
3,1259.0,0.77216,
4,1260.0,0.771594,


In [255]:
m=pd.merge(ilk, ikinci, on="projectID",how='outer')
m=m.fillna(np.nan)
convert_dict={'ndvi_mean_x': 'float','ndvi_mean_y': 'float'}
m = m.astype(convert_dict) 
print(m.dtypes)
m['ndvi'] = m[['ndvi_mean_x', 'ndvi_mean_y']].mean(axis=1)
m.drop(columns=['ndvi_mean_x','ndvi_mean_y'],axis=1,inplace=True)
print(m.head())

projectID      float64
ndvi_mean_x    float64
ndvi_mean_y    float64
dtype: object
   projectID      ndvi
0     1256.0  0.672889
1     1257.0  0.706827
2     1258.0  0.831125
3     1259.0  0.772160
4     1260.0  0.771594


In [256]:
len(m)

180

In [198]:
type(m['ndvi_mean_y'].values[0])

numpy.float64

In [203]:
type(np.NaN)

float

In [10]:
from dask.distributed import Client, LocalCluster


In [11]:
cluster=LocalCluster(n_workers=int(0.6 * mp.cpu_count()),processes=True,threads_per_worker=2,memory_limit='2GB')

In [12]:
client=Client(cluster)
client

0,1
Client  Scheduler: tcp://127.0.0.1:51473  Dashboard: http://127.0.0.1:8787/status,Cluster  Workers: 7  Cores: 14  Memory: 14.00 GB


In [7]:
client.close()

In [9]:
cluster.close()