In [1]:
# https://gis.stackexchange.com/a/104367

import gdal
from gdalconst import GA_ReadOnly

In [2]:
data = gdal.Open('D:/canopy_data/examples/test_2_chips/test_2_chips.0.tif', GA_ReadOnly)
geoTransform = data.GetGeoTransform()
minx = geoTransform[0]
maxy = geoTransform[3]
maxx = minx + geoTransform[1] * data.RasterXSize
miny = maxy + geoTransform[5] * data.RasterYSize

In [3]:
coord = [minx, miny, maxx, maxy]

coord

[13.200943794, 2.641765163974585, 13.273078725025416, 2.713900095]

In [4]:
def find_extent(filename):
    data = gdal.Open(filename, GA_ReadOnly)
    geoTransform = data.GetGeoTransform()
    minx = geoTransform[0]
    maxy = geoTransform[3]
    maxx = minx + geoTransform[1] * data.RasterXSize
    miny = maxy + geoTransform[5] * data.RasterYSize
    
    return (minx, miny, maxx, maxy)

In [5]:
import glob


coords = []

for geotiff in glob.glob('D:/canopy_data/examples/test_2_chips/*.tif', recursive=True):
    coords.append(find_extent(geotiff))

In [6]:
coords[0:5]

[(13.200943794, 2.641765163974585, 13.273078725025416, 2.713900095),
 (13.273078725025416, 2.641765163974585, 13.34521365605083, 2.713900095),
 (13.922293104254154, 2.641765163974585, 13.994428035279569, 2.713900095),
 (20.41443689654153, 2.641765163974585, 20.486571827566948, 2.713900095),
 (13.922293104254154,
  1.9925507847458472,
  13.994428035279569,
  2.0646857157712626)]

In [7]:
len(coords)

7040

In [8]:
len(set(coords))

7040

In [11]:
def test_for_duplicate_extents(folder):
    if folder[-1] == '/':
        glob_input = folder
    else:
        glob_input = folder + '/'
    coords = []
    for geotiff in glob.glob(glob_input + '*.tif', recursive=True):
        coords.append(find_extent(geotiff))
        
    return len(coords) == len(set(coords))

In [11]:
import pandas as pd


df = pd.DataFrame()
df['URI'] = glob.glob('D:/canopy_data/examples/test_2_chips/*.tif', recursive=True)
df['coord'] = coords

df.head()

Unnamed: 0,URI,coord
0,D:/canopy_data/examples/test_2_chips\test_2_ch...,"(13.200943794, 2.641765163974585, 13.273078725..."
1,D:/canopy_data/examples/test_2_chips\test_2_ch...,"(13.273078725025416, 2.641765163974585, 13.345..."
2,D:/canopy_data/examples/test_2_chips\test_2_ch...,"(13.922293104254154, 2.641765163974585, 13.994..."
3,D:/canopy_data/examples/test_2_chips\test_2_ch...,"(20.41443689654153, 2.641765163974585, 20.4865..."
4,D:/canopy_data/examples/test_2_chips\test_2_ch...,"(13.922293104254154, 1.9925507847458472, 13.99..."


In [12]:
df.loc[0, 'URI']

'D:/canopy_data/examples/test_2_chips\\test_2_chips.0.tif'

In [13]:
df['name'] = df['URI'].apply(lambda x: x.split('\\')[1])

df.head()

Unnamed: 0,URI,coord,name
0,D:/canopy_data/examples/test_2_chips\test_2_ch...,"(13.200943794, 2.641765163974585, 13.273078725...",test_2_chips.0.tif
1,D:/canopy_data/examples/test_2_chips\test_2_ch...,"(13.273078725025416, 2.641765163974585, 13.345...",test_2_chips.1.tif
2,D:/canopy_data/examples/test_2_chips\test_2_ch...,"(13.922293104254154, 2.641765163974585, 13.994...",test_2_chips.10.tif
3,D:/canopy_data/examples/test_2_chips\test_2_ch...,"(20.41443689654153, 2.641765163974585, 20.4865...",test_2_chips.100.tif
4,D:/canopy_data/examples/test_2_chips\test_2_ch...,"(13.922293104254154, 1.9925507847458472, 13.99...",test_2_chips.1000.tif


In [15]:
num = df.loc[0, 'coord'][0]

df['filter'] = df['coord'].apply(lambda x: x[0] == num)
df.head()

Unnamed: 0,URI,coord,name,filter
0,D:/canopy_data/examples/test_2_chips\test_2_ch...,"(13.200943794, 2.641765163974585, 13.273078725...",test_2_chips.0.tif,True
1,D:/canopy_data/examples/test_2_chips\test_2_ch...,"(13.273078725025416, 2.641765163974585, 13.345...",test_2_chips.1.tif,False
2,D:/canopy_data/examples/test_2_chips\test_2_ch...,"(13.922293104254154, 2.641765163974585, 13.994...",test_2_chips.10.tif,False
3,D:/canopy_data/examples/test_2_chips\test_2_ch...,"(20.41443689654153, 2.641765163974585, 20.4865...",test_2_chips.100.tif,False
4,D:/canopy_data/examples/test_2_chips\test_2_ch...,"(13.922293104254154, 1.9925507847458472, 13.99...",test_2_chips.1000.tif,False


In [17]:
df2 = df[df['filter'] == True]
df2

Unnamed: 0,URI,coord,name,filter
0,D:/canopy_data/examples/test_2_chips\test_2_ch...,"(13.200943794, 2.641765163974585, 13.273078725...",test_2_chips.0.tif,True
114,D:/canopy_data/examples/test_2_chips\test_2_ch...,"(13.200943794, 2.56963023294917, 13.2730787250...",test_2_chips.110.tif,True
115,D:/canopy_data/examples/test_2_chips\test_2_ch...,"(13.200943794, 1.9204158537204317, 13.27307872...",test_2_chips.1100.tif,True
237,D:/canopy_data/examples/test_2_chips\test_2_ch...,"(13.200943794, 1.8482809226950165, 13.27307872...",test_2_chips.1210.tif,True
359,D:/canopy_data/examples/test_2_chips\test_2_ch...,"(13.200943794, 1.776145991669601, 13.273078725...",test_2_chips.1320.tif,True
...,...,...,...,...
6469,D:/canopy_data/examples/test_2_chips\test_2_ch...,"(13.200943794, -1.8306005596011639, 13.2730787...",test_2_chips.6820.tif,True
6591,D:/canopy_data/examples/test_2_chips\test_2_ch...,"(13.200943794, -1.8975958767910182, 13.2730787...",test_2_chips.6930.tif,True
6786,D:/canopy_data/examples/test_2_chips\test_2_ch...,"(13.200943794, 2.1368206467966777, 13.27307872...",test_2_chips.770.tif,True
6908,D:/canopy_data/examples/test_2_chips\test_2_ch...,"(13.200943794, 2.0646857157712626, 13.27307872...",test_2_chips.880.tif,True


In [18]:
df.loc[0, 'coord']

(13.200943794, 2.641765163974585, 13.273078725025416, 2.713900095)

In [19]:
df.loc[1, 'coord']

(13.273078725025416, 2.641765163974585, 13.34521365605083, 2.713900095)

In [20]:
df.loc[1, 'coord'][0] - df.loc[0, 'coord'][0]

0.07213493102541513

In [21]:
df.loc[13, 'coord']

(14.571507483482891,
 1.9925507847458472,
 14.643642414508307,
 2.0646857157712626)

In [23]:
df.loc[14, 'coord']

(20.486571827566948, 2.641765163974585, 20.558706758592365, 2.713900095)

In [24]:
import rasterio as rio

In [25]:
file14_uri = 'D:/canopy_data/examples/test_2_chips_filtered/test_2_chips.14.tif'

with rio.open(file14_uri) as f:
    raster14 = f.read()
    
raster14

array([[[0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        ...,
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0]],

       [[0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        ...,
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0]],

       [[0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        ...,
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0]]], dtype=uint8)

In [26]:
import numpy as np


# https://stackoverflow.com/a/18395906/12685847

np.count_nonzero(raster14)

0

In [27]:
import glob
import os


chip_names = glob.glob('D:/canopy_data/examples/test_2_chips_filtered/*.tif')

for name in chip_names:
    with rio.open(name) as f:
        raster = f.read()
        
    if np.count_nonzero(raster) == 0:
        os.remove(name)

In [28]:
len(glob.glob('D:/canopy_data/examples/test_2_chips_filtered/*.tif'))

812

In [29]:
new_names = glob.glob('D:/canopy_data/examples/test_2_chips_filtered/*.tif')

name = new_names[0]

name

'D:/canopy_data/examples/test_2_chips_filtered\\test_2_chips.0.tif'

In [30]:
name.split('.')[1]

'0'

In [35]:
name.split('\\')[0]

'D:/canopy_data/examples/test_2_chips_filtered'

In [31]:
df = pd.DataFrame()

df['uri'] = new_names
df['name'] = df['uri'].apply(lambda x: x.split('\\')[1])
df['num'] = df['name'].apply(lambda x: int(x.split('.')[1]))

df.head()

Unnamed: 0,uri,name,num
0,D:/canopy_data/examples/test_2_chips_filtered\...,test_2_chips.0.tif,0
1,D:/canopy_data/examples/test_2_chips_filtered\...,test_2_chips.1.tif,1
2,D:/canopy_data/examples/test_2_chips_filtered\...,test_2_chips.10.tif,10
3,D:/canopy_data/examples/test_2_chips_filtered\...,test_2_chips.1000.tif,1000
4,D:/canopy_data/examples/test_2_chips_filtered\...,test_2_chips.1001.tif,1001


In [33]:
df2 = df.sort_values(by='num')
df2.head()

Unnamed: 0,uri,name,num
0,D:/canopy_data/examples/test_2_chips_filtered\...,test_2_chips.0.tif,0
1,D:/canopy_data/examples/test_2_chips_filtered\...,test_2_chips.1.tif,1
80,D:/canopy_data/examples/test_2_chips_filtered\...,test_2_chips.2.tif,2
161,D:/canopy_data/examples/test_2_chips_filtered\...,test_2_chips.3.tif,3
455,D:/canopy_data/examples/test_2_chips_filtered\...,test_2_chips.4.tif,4


In [39]:
#df2 = df2.reset_index(drop=True)

i = 0

new_uris = []

for row in range(len(df2)):
    uri = df2.loc[row, 'uri']
    num = df2.loc[row, 'num']
    if num != i:
        base = uri.split('\\')[0]
        new_uri = base + f'\\test_2_chips.{i}.tif'
        os.rename(uri, new_uri)
        new_uris.append(new_uri)
    else:
        new_uris.append(uri)
        
    i += 1
    
df2['new_uri'] = new_uris

df2.head()

Unnamed: 0,uri,name,num,new_uri
0,D:/canopy_data/examples/test_2_chips_filtered\...,test_2_chips.0.tif,0,D:/canopy_data/examples/test_2_chips_filtered\...
1,D:/canopy_data/examples/test_2_chips_filtered\...,test_2_chips.1.tif,1,D:/canopy_data/examples/test_2_chips_filtered\...
2,D:/canopy_data/examples/test_2_chips_filtered\...,test_2_chips.2.tif,2,D:/canopy_data/examples/test_2_chips_filtered\...
3,D:/canopy_data/examples/test_2_chips_filtered\...,test_2_chips.3.tif,3,D:/canopy_data/examples/test_2_chips_filtered\...
4,D:/canopy_data/examples/test_2_chips_filtered\...,test_2_chips.4.tif,4,D:/canopy_data/examples/test_2_chips_filtered\...


In [42]:
df2['new_name'] = df2['new_uri'].apply(lambda x: x.split('\\')[1])
df2.head()

Unnamed: 0,uri,name,num,new_uri,new_name
0,D:/canopy_data/examples/test_2_chips_filtered\...,test_2_chips.0.tif,0,D:/canopy_data/examples/test_2_chips_filtered\...,test_2_chips.0.tif
1,D:/canopy_data/examples/test_2_chips_filtered\...,test_2_chips.1.tif,1,D:/canopy_data/examples/test_2_chips_filtered\...,test_2_chips.1.tif
2,D:/canopy_data/examples/test_2_chips_filtered\...,test_2_chips.2.tif,2,D:/canopy_data/examples/test_2_chips_filtered\...,test_2_chips.2.tif
3,D:/canopy_data/examples/test_2_chips_filtered\...,test_2_chips.3.tif,3,D:/canopy_data/examples/test_2_chips_filtered\...,test_2_chips.3.tif
4,D:/canopy_data/examples/test_2_chips_filtered\...,test_2_chips.4.tif,4,D:/canopy_data/examples/test_2_chips_filtered\...,test_2_chips.4.tif


In [43]:
df2.tail()

Unnamed: 0,uri,name,num,new_uri,new_name
807,D:/canopy_data/examples/test_2_chips_filtered\...,test_2_chips.7022.tif,7022,D:/canopy_data/examples/test_2_chips_filtered\...,test_2_chips.807.tif
808,D:/canopy_data/examples/test_2_chips_filtered\...,test_2_chips.7023.tif,7023,D:/canopy_data/examples/test_2_chips_filtered\...,test_2_chips.808.tif
809,D:/canopy_data/examples/test_2_chips_filtered\...,test_2_chips.7024.tif,7024,D:/canopy_data/examples/test_2_chips_filtered\...,test_2_chips.809.tif
810,D:/canopy_data/examples/test_2_chips_filtered\...,test_2_chips.7025.tif,7025,D:/canopy_data/examples/test_2_chips_filtered\...,test_2_chips.810.tif
811,D:/canopy_data/examples/test_2_chips_filtered\...,test_2_chips.7026.tif,7026,D:/canopy_data/examples/test_2_chips_filtered\...,test_2_chips.811.tif


In [49]:
def attach_trailing_slash(uri):
    if uri[-1] == '/':
        return uri
    elif uri[-1] == '\\':
        return uri
    else:
        return uri + '/'

In [47]:
def remove_no_data_chips(folder):
    base = attach_leading_slash(folder)
        
    chip_names = glob.glob(base + '*.tif')
    
    for name in chip_names:
        with rio.open(name) as f:
            raster = f.read()
        
        if np.count_nonzero(raster) == 0:
            os.remove(name)

In [50]:
def reset_chip_names(folder):
    base = attach_leading_slash(folder)
    
    chip_names = glob.glob(base + '*.tif')
    
    df = pd.DataFrame()

    df['uri'] = chip_names
    df['num'] = df['name'].apply(lambda x: int(x.split('.')[1]))
    df = df.sort_values(by='num')
    df = df.reset_index(drop=True)
    
    i = 0
    for row in range(len(df)):
        uri = df.loc[row, 'uri']
        num = df.loc[row, 'num']
        if num != i:
            base = uri.split('\\')[0]
            new_uri = base + f'\\test_2_chips.{i}.tif'
            os.rename(uri, new_uri)
            new_uris.append(new_uri)
        i += 1