[![](https://colab.research.google.com/assets/colab-badge.svg "Open in Colab button")](https://colab.research.google.com/github/thinkingmachines/geowrangler/blob/feat/upgrade-nbdev2/notebooks/11_raster_to_dataframe.ipynb)

In [1]:
#| default_exp raster_to_dataframe

In [2]:
#| include: false
#| no_test
! [ -e /content ] && pip install -Uqq geowrangler

In [3]:
#| include: false
#| no_test
!mkdir -p ../data
![ -e /content ] && ln -s ../data .

In [4]:
#| include: false
#| no_test
!mkdir -p ../data
# download sample geojson files from repo

In [5]:
#| include: false
#| no_test
%reload_ext autoreload
%autoreload 2
%matplotlib inline

In [6]:
#| include: false
import warnings

from shapely.errors import ShapelyDeprecationWarning

In [7]:
#| include: false
warnings.filterwarnings(action="ignore", category=UserWarning, module="geopandas")
warnings.filterwarnings(
    action="ignore", category=ShapelyDeprecationWarning, module="pandas"
)

In [8]:
#| export
from typing import List

import pandas as pd
import rasterio as rio
import rasterio.mask

In [9]:
#| export
def read_bands(image_list: List[str], mask: str):
    """
    Reads the bands for each image in the list and returns a dataframe where each band is one column with the image name as a suffix for column name.
    """

    data = []

    label_ = rio.open(mask)
    label = label_.read(1).ravel()

    # Iterate over each year
    for idx, image_file in enumerate(image_list):
        # Read each band
        subdata = dict()
        raster = rio.open(image_file)

        for band_idx in range(raster.count):
            band = raster.read(band_idx + 1).ravel()
            subdata["B{}".format(band_idx + 1)] = band

        # Cast to pandas subdataframe
        subdata = pd.DataFrame(subdata).fillna(0)
        subdata.columns = [column + "_" + str(idx) for column in subdata.columns]

        data.append(subdata)
        del subdata

    data = pd.concat(data, axis=1)
    data["label"] = label

    return data

## Test data

### Converting an image to dataframe with labels

In [10]:
# Get filepaths
tiff_files = ["../data/vector_to_raster_mask_sample/cabanglasan.tif"]
mask_file = "../data/vector_to_raster_mask_sample/labels_20220816.tiff"

In [11]:
data = read_bands(tiff_files, mask_file)

In [12]:
data

Unnamed: 0,B1_0,B2_0,B3_0,B4_0,B5_0,B6_0,B7_0,B8_0,B9_0,B10_0,B11_0,B12_0,label
0,0.1198,0.09635,0.09330,0.0698,0.10665,0.20250,0.2490,0.23525,0.28125,0.0377,0.19925,0.1002,0
1,0.1198,0.09580,0.09245,0.0708,0.10665,0.20250,0.2490,0.23925,0.28125,0.0377,0.19925,0.1002,0
2,0.1148,0.09420,0.09460,0.0707,0.10380,0.20395,0.2478,0.23150,0.27165,0.0385,0.18240,0.0902,0
3,0.1148,0.09190,0.08850,0.0631,0.10380,0.20395,0.2478,0.23300,0.27165,0.0385,0.18240,0.0902,0
4,0.1148,0.09350,0.09080,0.0643,0.10565,0.20830,0.2466,0.24205,0.26990,0.0385,0.18050,0.0894,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
775824,0.0000,0.00000,0.00000,0.0000,0.00000,0.00000,0.0000,0.00000,0.00000,0.0000,0.00000,0.0000,0
775825,0.0000,0.00000,0.00000,0.0000,0.00000,0.00000,0.0000,0.00000,0.00000,0.0000,0.00000,0.0000,0
775826,0.0000,0.00000,0.00000,0.0000,0.00000,0.00000,0.0000,0.00000,0.00000,0.0000,0.00000,0.0000,0
775827,0.0000,0.00000,0.00000,0.0000,0.00000,0.00000,0.0000,0.00000,0.00000,0.0000,0.00000,0.0000,0
