# Data Preparation

## Imports

In [1]:
from os import listdir
import os.path
from os.path import isfile, join
from osgeo import gdal
import numpy as np

In [2]:
path_labels = os.getcwd() + "/1_labeled_data/tif/"

path_rdg = "/home/jovyan/work/satellite_data/"
label_tif = path_labels + "22-01-05.TIF"

print(os.path.isfile(path_rdg + "2629BD_2018.tif"), os.path.isfile(label_tif))
# TODO: locally only jp2.tif files, whereas on the server only tif file?!

True True


In [3]:
def print_files(path = os.getcwd()):
    onlyfiles = [f for f in listdir(path) if isfile(join(path, f))]
    print(path, onlyfiles)
print_files(path_labels)

/home/jovyan/work/saved_data/Semantic_Segmentation_of_Tree_Stock/1_labeled_data/tif/ ['22-01-05.TIF', 'check-labels-jp2-capizalized-22-01-05.TIF']


#### Import Image with GDAL

In [4]:
dataset = gdal.Open(path_rdg + "2629BD_2018.tif")
# "/home/jovyan/work/saved_data/Semantic_Segmentation_of_Tree_Stock/1_labeled_data/tif/144.tif" (label file) results in an error -> probably wrong format?

### 0.1 Data Inspection

In [5]:
# General Dataset Information
def inspect_tif(dataset):
    print("1. General TIF Data:")
    print("Metadata:", dataset.GetMetadata())
    print("Type Image: ", type(dataset))
    print("Projection: ", dataset.GetProjection())
    print("Size is {} x {} x {}".format(dataset.RasterXSize,
                                    dataset.RasterYSize,
                                    dataset.RasterCount))
    print("Driver: {}/{}".format(dataset.GetDriver().ShortName,
                            dataset.GetDriver().LongName))
    geotransform = dataset.GetGeoTransform()
    if geotransform:
        print("Origin = ({}, {})".format(geotransform[0], geotransform[3]))
        print("Pixel Size = ({}, {})".format(geotransform[1], geotransform[5]))

    # Raster Band
    band = dataset.GetRasterBand(1)
    print("2. Raster Band:")
    print("Band Type={}".format(gdal.GetDataTypeName(band.DataType)))

    min = band.GetMinimum()
    max = band.GetMaximum()
    if not min or not max:
        (min,max) = band.ComputeRasterMinMax(True)
    print("Min={:.3f}, Max={:.3f}".format(min,max))

    if band.GetOverviewCount() > 0:
        print("Band has {} overviews".format(band.GetOverviewCount()))

    if band.GetRasterColorTable():
        print("Band has a color table with {} entries".format(band.GetRasterColorTable().GetCount()))
inspect_tif(dataset)

1. General TIF Data:
Metadata: {'ALL_COMMENTS': 'Created by OpenJPEG version 2.4.0', 'AREA_OR_POINT': 'Area', 'COLORSPACE': 'RGB', 'COMPRESSION_RATE_TARGET': '1', 'TIFFTAG_MAXSAMPLEVALUE': '0', 'TIFFTAG_MINSAMPLEVALUE': '0', 'TIFFTAG_RESOLUTIONUNIT': '1 (unitless)', 'TIFFTAG_XRESOLUTION': '0', 'TIFFTAG_YRESOLUTION': '0'}
Type Image:  <class 'osgeo.gdal.Dataset'>
Projection:  GEOGCS["WGS 84",DATUM["WGS_1984",SPHEROID["WGS 84",6378137,298.257223563,AUTHORITY["EPSG","7030"]],AUTHORITY["EPSG","6326"]],PRIMEM["Greenwich",0],UNIT["degree",0.0174532925199433,AUTHORITY["EPSG","9122"]],AXIS["Latitude",NORTH],AXIS["Longitude",EAST],AUTHORITY["EPSG","4326"]]
Size is 108224 x 107798 x 3
Driver: GTiff/GeoTIFF
Origin = (29.746877767434313, -26.247113163017453)
Pixel Size = (2.3728143101288307e-06, -2.3728143101288515e-06)
2. Raster Band:
Band Type=Byte
Min=0.000, Max=255.000
Band has 8 overviews


In [6]:
# optional conversion to np array
# channel = np.array(dataset.GetRasterBand(1).ReadAsArray())
# print(channel.shape)

In [7]:
# Read the raster band as separate variable
#band = dataset.GetRasterBand(1)

# Check type of the variable 'band'
#type(band)

# Data type of the values
#gdal.GetDataTypeName(band.DataType)

In [8]:
def load_tif(tif_filepath):
    if not os.path.isfile(tif_filepath):
        print("Filepath exists:", os.path.isfile(tif_filepath)) 
    else:
        labeled_data = gdal.Open(tif_filepath)
        if labeled_data:
            print("0 Success", tif_filepath)
            inspect_tif(labeled_data)
        else: print("1 Failure", tif_filepath) #"File cannot be loaded." #"Filepath exists: ", os.path.isfile(tif_filepath), 

In [9]:
labels = path_labels + "22-01-05.TIF"

In [10]:
load_tif(labels)

0 Success /home/jovyan/work/saved_data/Semantic_Segmentation_of_Tree_Stock/1_labeled_data/tif/22-01-05.TIF
1. General TIF Data:
Metadata: {'TIFFTAG_RESOLUTIONUNIT': '2 (pixels/inch)', 'TIFFTAG_XRESOLUTION': '72', 'TIFFTAG_YRESOLUTION': '72'}
Type Image:  <class 'osgeo.gdal.Dataset'>
Projection:  
Size is 20335 x 20255 x 4
Driver: GTiff/GeoTIFF
Origin = (0.0, 0.0)
Pixel Size = (1.0, 1.0)
2. Raster Band:
Band Type=Byte
Min=255.000, Max=255.000


In [11]:
import matplotlib.pyplot as plt
#print(dataset.RasterCount)
#print(labels.RasterCount, labels.RasterXSize, labels.RasterYSize)
print(dataset.RasterCount, dataset.RasterXSize, dataset.RasterYSize)

3 108224 107798


In [13]:
# band1 = labels.GetRasterBand(1) # Red channel
# band2 = labels.GetRasterBand(2) # Green channel
# band3 = labels.GetRasterBand(3) # Blue channel

In [15]:
#labels.GetRasterBand()

In [19]:
def check_all_labels(directory = os.fsencode(path_labels)):
    print("Labels")    
    for file in os.listdir(directory):
        filename = os.fsdecode(file)
        if filename.endswith(".tif") or filename.endswith(".TIF") or filename.endswith(".tiff"): 
            print(filename)
            load_tif(path_labels + filename)
            print("")
            continue
        else:
            continue
check_all_labels(os.fsencode(path_labels))

Labels
22-01-05.TIF
0 Success /home/jovyan/work/saved_data/Semantic_Segmentation_of_Tree_Stock/1_labeled_data/tif/22-01-05.TIF
1. General TIF Data:
Metadata: {'TIFFTAG_RESOLUTIONUNIT': '2 (pixels/inch)', 'TIFFTAG_XRESOLUTION': '72', 'TIFFTAG_YRESOLUTION': '72'}
Type Image:  <class 'osgeo.gdal.Dataset'>
Projection:  
Size is 20335 x 20255 x 4
Driver: GTiff/GeoTIFF
Origin = (0.0, 0.0)
Pixel Size = (1.0, 1.0)
2. Raster Band:
Band Type=Byte
Min=255.000, Max=255.000

check-labels-jp2-capizalized-22-01-05.TIF
0 Success /home/jovyan/work/saved_data/Semantic_Segmentation_of_Tree_Stock/1_labeled_data/tif/check-labels-jp2-capizalized-22-01-05.TIF
1. General TIF Data:
Metadata: {'TIFFTAG_RESOLUTIONUNIT': '2 (pixels/inch)', 'TIFFTAG_XRESOLUTION': '72', 'TIFFTAG_YRESOLUTION': '72'}
Type Image:  <class 'osgeo.gdal.Dataset'>
Projection:  
Size is 20335 x 20255 x 4
Driver: GTiff/GeoTIFF
Origin = (0.0, 0.0)
Pixel Size = (1.0, 1.0)
2. Raster Band:
Band Type=Byte
Min=255.000, Max=255.000



In [None]:
# 0 Success exported.TIF                       --                                                      -- labels with hidden raster dimension
# 0 Success exported.tiff                      -- maybe can't read .tiff                               -- labels with hidden raster dimension
# 1 Failure exported.tif                       -- 
# 0 Success exported_in_caps.TIF               -- Something exported dirctly with TIF as fileextension -- labels with hidden raster dimension
# 1 Failure labels-22-01-05.TIF                -- label exported and renamed into TIF fileextension
# 0 Success exported_tif.tif                   -- only tif file exported
# 1 Failure labels-22-01-05-only-polygon.TIF
# os.getcwd()