In [32]:
import os
import numpy as np
from osgeo import gdal

In [33]:
data_dir = r'C:\Personale\atm\reps\gis-programming\osgeopy-data'

### Reading partial datasets

In [None]:
# Open a Landsat band.
os.chdir(os.path.join(data_dir, 'landsat', 'washington'))
ds = gdal.Open('p047r027_7t20000730_z10_nn10.tif')
band = ds.GetRasterBand(1)

In [None]:
# Read in 3 rows and 6 columns starting at row 6000 and column 1400.
data = band.ReadAsArray(1400, 6000, 6, 3)
print(data)

In [None]:
# Convert the data to floating point using numpy.
data = band.ReadAsArray(1400, 6000, 6, 3).astype(float)
print(data)

In [None]:
# Or convert them to float by reading them into a floating point array.
data = np.empty((3, 6), dtype=float)
band.ReadAsArray(1400, 6000, 6, 3, buf_obj=data)
print(data)

In [None]:
# Write these few pixels to the middle of a tiny dummy raster
test_ds = gdal.GetDriverByName('GTiff').Create('test.tif', 10, 10)
band2 = test_ds.GetRasterBand(1)
band2.WriteArray(data, 4, 6)
del test_ds

#### Access window out of range

In [None]:
# Try reading 5 rows and columns from the test image you just made, but
# start at row 8 and column 2. This will fail because it's trying to read
# rows 8 through 13, but there are only 10 rows.
ds = gdal.Open('test.tif')
band = ds.GetRasterBand(1)
data = band.ReadAsArray(8, 2, 5, 5)

In [None]:
# What happens if you try to write more data than there is room for? First
# create an array of fake data.
data = np.reshape(np.arange(25), (5,5))
print(data)

In [None]:
# Now try to write it into the same area we just failed to read data from.
# That fails, too.
band.WriteArray(data, 8, 2)

### Using real-world coordinates

In [34]:
# Get the geotransform from one of the Landsat bands.
os.chdir(os.path.join(data_dir, 'landsat', 'washington'))
ds = gdal.Open('p047r027_7t20000730_z10_nn10.tif')
band = ds.GetRasterBand(1)
gt = ds.GetGeoTransform()
print(gt)

(343724.25, 28.5, 0.0, 5369585.25, 0.0, -28.5)


In [36]:
# Now get the inverse geotransform. The original can be used to convert
# offsets to real-world coordinates, and the inverse can be used to convert
# real-world coordinates to offsets.

inv_gt = gdal.InvGeoTransform(gt)
print(inv_gt)

(-12060.5, 0.03508771929824561, 0.0, 188406.5, 0.0, -0.03508771929824561)


In [37]:
# Use the inverse geotransform to get some pixel offsets from real-world
# UTM coordinates (since that's what the Landsat image uses). The offsets
# are returned as floating point.
offsets = gdal.ApplyGeoTransform(inv_gt, 465200, 5296000)
print(offsets)

[4262.307017543859, 2581.9385964912362]


In [38]:
# Convert the offsets to integers.
xoff, yoff = map(int, offsets)
print(xoff, yoff)

4262 2581


In [39]:
# And use them to read a pixel value.
value = band.ReadAsArray(xoff, yoff, 1, 1)[0,0]
print(value)

62


In [None]:
# Reading in one pixel at a time is really inefficient if you need to read
# a lot of pixels, though, so here's how you could do it by reading in all
# of the pixel values first and then pulling out the one you need.
data = band.ReadAsArray()
x, y = map(int, gdal.ApplyGeoTransform(inv_gt, 465200, 5296000))
value = data[yoff, xoff]
print(value)

### Resampling data

In [40]:
# Get the first band from the raster
os.chdir(os.path.join(data_dir, 'landsat', 'washington'))
ds = gdal.Open('nat_color.tif')
band = ds.GetRasterBand(1)

In [41]:
# Read in 2 rows and 3 columns.
original_data = band.ReadAsArray(1400, 6000, 3, 2)
print(original_data)

[[28 29 29]
 [28 30 29]]


In [42]:
# Now resample those same 2 rows and 3 columns to a smaller pixel size by
# doubling the number of rows and columns to read (now 4 rows and 6 columns).
resampled_data = band.ReadAsArray(1400, 6000, 3, 2, 6, 4)
print(resampled_data)

[[28 28 29 29 29 29]
 [28 28 29 29 29 29]
 [28 28 30 30 29 29]
 [28 28 30 30 29 29]]


In [43]:
# Read in 4 rows and 6 columns.
original_data2 = band.ReadAsArray(1400, 6000, 6, 4)
print(original_data2)

[[28 29 29 27 25 25]
 [28 30 29 25 32 28]
 [27 27 28 30 25 29]
 [26 26 27 30 25 30]]


In [44]:
# Now resample those same 4 rows and 6 columns to a larger pixel size by
# halving the number of rows and columns to read (now 2 rows and 3 columns).
resampled_data2 = np.empty((2, 3), np.int32)
band.ReadAsArray(1400, 6000, 6, 4, buf_obj=resampled_data2)
print(resampled_data2)

[[28 28 27]
 [28 28 29]]


### Byte sequences

In [45]:
# Read a few pixels as a byte string from the raster created with listing 8.1.
os.chdir(os.path.join(data_dir, 'landsat', 'washington'))
ds = gdal.Open('nat_color.tif')
data = ds.ReadRaster(1400, 6000, 2, 2, band_list=[1])
print(data)

bytearray(b'\x1c\x1d\x1c\x1e')


In [46]:
# Pull the first value out. It will be converted from a byte string to a number.
print(data[0])

28


In [None]:
# Try to change the value of that first pixel. This will fail because you
# can't change byte strings.
data[0] = 50

In [None]:
# Convert the byte string to a byte array and then change the first value.
bytearray_data = bytearray(data)
bytearray_data[0] = 50
print(bytearray_data[0])

In [None]:
# Convert the byte string to tuple of pixel values.
import struct
tuple_data = struct.unpack('B' * 4, data)
print(tuple_data)

In [None]:
# Convert the tuple to a numpy array.
numpy_data1 = np.array(tuple_data)
print(numpy_data1)

In [None]:
# Conver the byte string to a numpy array.
numpy_data2 = np.fromstring(data, np.int8)
print(numpy_data2)

In [None]:
# Reshape one of the numpy arrays so it has 2 rows and 2 columns, just like
# the original data we read in.
reshaped_data = np.reshape(numpy_data2, (2,2))
print(reshaped_data)

In [None]:
# Write our little byte string to the middle of a tiny dummy raster
test_ds = gdal.GetDriverByName('GTiff').Create('test2.tif', 10, 10)
test_ds.WriteRaster(4, 6, 2, 2, data, band_list=[1])
del test_ds

### Subdatasets

In [None]:
# Get the subdatasets from a MODIS file.
os.chdir(os.path.join(data_dir, 'modis'))
ds = gdal.Open('MYD13Q1.A2014313.h20v11.005.2014330092746.hdf')
subdatasets = ds.GetSubDatasets()
print('Number of subdatasets: {}'.format(len(subdatasets)))
for sd in subdatasets:
    print('Name: {0}\nDescription:{1}\n'.format(*sd))

In [None]:
# Open the the first subdataset in the Modis file.
ndvi_ds = gdal.Open(subdatasets[0][0])

In [None]:
# Make sure that it worked by by printing out the dimensions. You can use
# ndvi_ds just like any other dataset.
print('Dataset dimensions: {} {}'.format(ndvi_ds.RasterXSize, ndvi_ds.RasterYSize))

In [None]:
# For example, you still need to get the band before you can read data.
ndvi_band = ndvi_ds.GetRasterBand(1)
print('Band dimensions: {} {}'.format(ndvi_band.XSize, ndvi_band.YSize))

### WMS

In [None]:
fn = os.path.join(data_dir, 'wms_service_description.xml')
ds = gdal.Open(fn)
if ds != None:
    gdal.GetDriverByName('PNG').CreateCopy(r'liberty.png', ds)

### Processing a raster by block

In [47]:
# Open the input raster and get its dimensions.
os.chdir(os.path.join(data_dir, 'landsat', 'washington'))

in_ds = gdal.Open('nat_color.tif')
in_band = in_ds.GetRasterBand(1)

xsize = in_band.XSize
ysize = in_band.YSize

In [49]:
# Get the block size and NoData value.
block_xsize, block_ysize = in_band.GetBlockSize()
nodata = in_band.GetNoDataValue()
block_xsize, block_ysize

(8849, 1)

In [50]:
# Create an output file with the same dimensions and data type.
out_ds = in_ds.GetDriver().Create(
    'dem_feet.tif', xsize, ysize, 1, in_band.DataType)
out_ds.SetProjection(in_ds.GetProjection())
out_ds.SetGeoTransform(in_ds.GetGeoTransform())
out_band = out_ds.GetRasterBand(1)


In [53]:
# Loop through the blocks in the x direction.
for x in range(0, xsize, block_xsize):

    # Get the number of columns to read.
    if x + block_xsize < xsize:
        cols = block_xsize
    else:
        cols = xsize - x

    # Loop through the blocks in the y direction.
    for y in range(0, ysize, block_ysize):

        # Get the number of rows to read.
        if y + block_ysize < ysize:
            rows = block_ysize
        else:
            rows = ysize - y

        # Read in one block's worth of data, and then
        # write the results out to the same block location in the output.
        data = in_band.ReadAsArray(x, y, cols, rows)
        #data = np.where(data == nodata, nodata, data)
        out_band.WriteArray(data, x, y)


In [58]:
# Compute statistics after flushing the cache and setting the NoData value.
out_band.FlushCache()
#out_band.SetNoDataValue(nodata)
out_band.ComputeStatistics(False)
out_ds.BuildOverviews('average', [2, 4, 8, 16, 32])
del out_ds