In [1]:
import os
import math
import numpy as np
import matplotlib.pyplot as plt
import h5py
import pandas as pd
import seaborn as sns
from scipy import stats

from osgeo import gdal
from osgeo import gdal_array
from osgeo import osr

In [2]:
def tif2array(file_path, dtype=np.uint8):
    """
    read GeoTiff and convert to numpy.ndarray.
    inputs:
        file_path (str) : file path of the input GeoTiff file
    return:
        image(np.array) : image for each bands
        dataset : for gdal's data drive
    """
    dataset = gdal.Open(file_path, gdal.GA_ReadOnly)

    if dataset is None:
        return None

    # Allocate our array using the first band's datatype
    image_datatype = dataset.GetRasterBand(1).DataType
    image = np.zeros((dataset.RasterYSize, dataset.RasterXSize, dataset.RasterCount),
                     dtype=dtype)

    # Loop over all bands in dataset
    for b in range(dataset.RasterCount):
        # Remember, GDAL index is on 1, but Python is on 0 -- so we add 1 for our GDAL calls
        band = dataset.GetRasterBand(b + 1)
        # Read in the band's data into the third dimension of our array
        image[:, :, b] = band.ReadAsArray()#buf_type=gdalconst.GDT_Byte)

    #image = image[2:-2,2:-2,:]

    return image

### Köppen climate

In [3]:
file_path = "/home/philipp/Data/modis/aligned/georgia_clipped/koeppen.tif"
koeppen = tif2array(file_path, dtype=np.int8)

In [25]:
koeppen_stat = koeppen.flatten()
koeppen_stat = koeppen_stat[koeppen_stat>0]
koeppen_stat.shape

(967620,)

In [28]:
unique, counts = np.unique(koeppen_stat, return_counts=True)
print(unique)
print(np.around((counts/koeppen_stat.size)*100,0))

[ 7 14 15 25 26 27 29 30]
[ 4. 26.  4.  6. 40. 15.  6.  0.]


### Land cover

In [29]:
file_path = "/home/philipp/Data/modis/aligned/georgia_clipped/lc.tif"
lc = tif2array(file_path, dtype=np.int8)

lc_stat = lc.flatten()
lc_stat = lc_stat[lc_stat>0]
lc_stat.shape

unique, counts = np.unique(lc_stat, return_counts=True)
print(unique)
print(np.around((counts/lc_stat.size)*100,0))

[ 20  30  40  50  60  70  80  90 100 111 114 115 116 121 124 125 126]
[ 0. 28. 16.  2.  1.  1.  0.  0.  0.  5. 33.  2.  5.  0.  1.  0.  6.]


In [30]:
print(unique)
print(np.around((counts/lc_stat.size)*100,1))

[ 20  30  40  50  60  70  80  90 100 111 114 115 116 121 124 125 126]
[ 0.5 28.  15.8  1.8  1.2  0.8  0.4  0.3  0.   4.6 32.9  1.6  5.   0.1
  0.9  0.   6.2]
