Skip to content

Commit

Permalink
Merge f840301 into 202b06c
Browse files Browse the repository at this point in the history
  • Loading branch information
mblackgeo committed Jul 8, 2018
2 parents 202b06c + f840301 commit b5e8662
Show file tree
Hide file tree
Showing 8 changed files with 295 additions and 132 deletions.
14 changes: 1 addition & 13 deletions .travis.yml
Original file line number Diff line number Diff line change
@@ -1,33 +1,21 @@
language: python
python:
# We don't actually use the Travis Python, but this keeps it organized.
- "3.5"
- "3.6"
install:
- sudo apt-get update
# We do this conditionally because it saves us some downloading if the
# version is the same.
- if [[ "$TRAVIS_PYTHON_VERSION" == "2.7" ]]; then
wget https://repo.continuum.io/miniconda/Miniconda2-latest-Linux-x86_64.sh -O miniconda.sh;
else
wget https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh;
fi
- wget https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh;
- bash miniconda.sh -b -p $HOME/miniconda
- export PATH="$HOME/miniconda/bin:$PATH"
- hash -r
- conda config --set always_yes yes --set changeps1 no
- conda update -q conda
# Useful for debugging any issues with conda
- conda info -a

# Replace dep1 dep2 ... with your dependencies
- conda create -q -n test-environment -c conda-forge python=$TRAVIS_PYTHON_VERSION gdal geopandas pandas pyproj pytest pytest-cov
- source activate test-environment
- pip install python-coveralls
- python setup.py install

script:
# Your test script goes here
python -m pytest -v --junitxml=result.xml --cov rastertodataframe --cov-report term-missing
after_success:
- coveralls
3 changes: 2 additions & 1 deletion environment.yml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
name: dev
name: rastertodataframe
channels:
- conda-forge
- defaults
Expand All @@ -7,6 +7,7 @@ dependencies:
- geopandas=0.3.0=py36_0
- pandas=0.23.2=py36_0
- pyproj=1.9.5.1=py36_0
- numpy=1.14.2=py36hdbf6ddf_1
- pip:
- coverage==4.5.1
- flake8==3.5.0
Expand Down
3 changes: 3 additions & 0 deletions rastertodataframe/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,6 @@
__author__ = """Martin Black"""
__email__ = 'mblack@posteo.de'
__version__ = '0.1.0'

from .rastertodataframe import *
from . import util
76 changes: 67 additions & 9 deletions rastertodataframe/rastertodataframe.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,21 @@
# -*- coding: utf-8 -*-
import os
import logging
import tempfile

import numpy as np
import pandas as pd
import geopandas as gpd

from . import util
from rastertodataframe import util

log = logging.getLogger(__name__)


def raster_to_dataframe(raster_path, vector_path=None):
"""Convert a raster to a Pandas DataFrame.
.. note:: This requires enough memory to load the entire raster.
Parameters
----------
raster_path : str
Expand All @@ -25,12 +29,66 @@ def raster_to_dataframe(raster_path, vector_path=None):
-------
pandas.core.frame.DataFrame
"""
raster = util.open_raster(raster_path)
vector = util.open_vector(vector_path) if vector_path is not None else None
# Placeholders for possible temporary files.
fid1 = fid2 = tmp_fname = vector_mask_fname = None

# Get raster band names.
ras = util.open_raster(raster_path)
raster_band_names = util.get_raster_band_names(ras)

# Create a mask from the pixels touched by the vector.
if vector_path is not None:
fid1, tmp_fname = tempfile.mkstemp(suffix='.geojson')

# Add a dummy feature ID column to the vector.
# This is not always present in OGR features.
vec_gdf = util.open_vector(vector_path, with_geopandas=True)
mask_values = list(range(1, len(vec_gdf) + 1))
vec_gdf['__fid__'] = pd.Series(mask_values)
vec_gdf.to_file(tmp_fname, driver='GeoJSON')

# Mask the vector using the feature ID column.
fid2, vector_mask_fname = tempfile.mkstemp()
vector_mask = util.burn_vector_mask_into_raster(
raster_path, tmp_fname, vector_mask_fname,
vector_field='__fid__')

# Loop over mask values to extract pixels.
mask_arr = vector_mask.GetRasterBand(1).ReadAsArray()
ras_arr = ras.ReadAsArray() # TODO this is not memory efficient.

out_dfs = []
for mask_val in mask_values:
pixels = util.extract_masked_px(ras_arr, mask_arr, mask_val=mask_val)\
.transpose()
fid_px = np.ones(pixels.shape[0]) * mask_val

mask_df = pd.DataFrame(pixels, columns=raster_band_names)
mask_df['__fid__'] = fid_px
out_dfs.append(mask_df)

# Fill DataFrame with pixels.
out_df = pd.concat(out_dfs)

# Join with vector attributes.
out_df = out_df.merge(vec_gdf, how='left', on='__fid__')

else:
# No vector given, simply load the raster.
ras_arr = ras.ReadAsArray() # TODO not memory efficient.
mask_arr = np.ones((ras_arr.shape[1], ras_arr.shape[2]))
pixels = util.extract_masked_px(ras_arr, mask_arr).transpose()
out_df = pd.DataFrame(pixels, columns=raster_band_names)

# TODO screen no data values.

# If vector:
# Get vector mask (either all features or by a specific vector field).
# Loop over mask values to extract pixels.
# Remove temporary files.
if fid1 is not None and tmp_fname is not None:
os.close(fid1)
os.remove(tmp_fname)
if fid2 is not None and vector_mask_fname is not None:
os.close(fid2)
os.remove(vector_mask_fname)

# Assemble dataframe from raster pixels.
pass
# Return dropping any extra cols.
return out_df.drop(columns=['__fid__', 'geometry'], errors='ignore')
66 changes: 64 additions & 2 deletions rastertodataframe/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -196,8 +196,8 @@ def _create_empty_raster(template, out_path, n_bands=1, no_data_value=None):
return out_dataset


def _burn_vector_mask_into_raster(raster_path, vector_path, out_path,
vector_field=None):
def burn_vector_mask_into_raster(raster_path, vector_path, out_path,
vector_field=None):
"""Create a new raster based on the input raster with vector features
burned into the raster. To be used as a mask for pixels in the vector.
Expand Down Expand Up @@ -253,3 +253,65 @@ def _burn_vector_mask_into_raster(raster_path, vector_path, out_path,
out_ds = None

return open_raster(out_path)


def get_raster_band_names(raster):
"""Obtain the names of bands from a raster. The raster metadata is queried
first, if no names a present, a 1-index list of band_N is returned.
Parameters
----------
raster : gdal.Dataset
Returns
-------
list[str]
"""
band_names = []
for i in range(1, raster.RasterCount + 1):
band = raster.GetRasterBand(i)

if band.GetDescription():
# Use the band description.
band_names.append(band.GetDescription())
else:
# Check for metedata.
this_band_name = 'Band_{}'.format(band.GetBand())
metadata = band.GetDataset().GetMetadata_Dict()

# If in metadata, return the metadata entry, else Band_N.
if this_band_name in metadata and metadata[this_band_name]:
band_names.append(metadata[this_band_name])
else:
band_names.append(this_band_name)

return band_names


def extract_masked_px(ras, mask, mask_val=None):
"""Extract raster values according to a mask.
Parameters
----------
ras : np.ndarray
2D or 3D input data in the form [bands][y][x].
mask : np.ndarray
1D or 2D with zeroes to mask data.
mask_val : int
Value of the data pixels in the mask. Default: non-zero.
Returns
-------
np.ndarray
Array of non-masked data.
"""
if mask is None:
return ras

# Use the mask to get the indices of the non-zero pixels.
if mask_val:
(i, j) = (mask == mask_val).nonzero()
else:
(i, j) = mask.nonzero()

return (ras[i, j] if ras.ndim == 2 else ras[:, i, j])
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,4 @@ GDAL==2.2.4
geopandas==0.3.0
pandas==0.23.2
pyproj==1.9.5.1
numpy==1.14.2
120 changes: 13 additions & 107 deletions tests/test_rastertodataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from osgeo import gdal, ogr
import geopandas as gpd

from rastertodataframe import rastertodataframe
from rastertodataframe import raster_to_dataframe


class TestRasterToDataFrame(unittest.TestCase):
Expand All @@ -21,114 +21,20 @@ def setUp(self):
self.raster_wgs84_path = os.path.join(test_data_path,
'raster_epsg4326.tif')

def test_open_raster(self):
ras = rastertodataframe.util.open_raster(self.raster_path)
self.assertIsInstance(ras, gdal.Dataset)
def test_raster_to_dataframe_with_vector(self):
out_df = raster_to_dataframe(
self.raster_wgs84_path, vector_path=self.vector_path)

def test_open_vector_ogr(self):
# As OGR DataSource.
vec = rastertodataframe.util.open_vector(
self.vector_path,
with_geopandas=False)
self.assertIsInstance(vec, ogr.DataSource)
expected_cols = ['Band_1', 'Band_2', 'Band_3', 'Band_4',
'fid', 'value', 'value_string']

def test_open_vector_gdf(self):
# As GeoPandasDataFrame.
vec_gdf = rastertodataframe.util.open_vector(
self.vector_path,
with_geopandas=True)
self.assertIsInstance(vec_gdf, gpd.GeoDataFrame)
self.assertEqual(out_df.shape, (267, 7))
self.assertCountEqual(list(out_df.columns), expected_cols)

def test__epsg_from_projection_proj4(self):
# Proj4 string.
prj = '+proj=utm +zone=32 +datum=WGS84 +units=m +no_defs'
self.assertEqual(
rastertodataframe.util._epsg_from_projection(prj),
32632)
def test_raster_to_dataframe_without_vector(self):
out_df = raster_to_dataframe(self.raster_path)

def test__epsg_from_projection_esri(self):
# ESRI PROJCS (WKT).
prj = ('PROJCS["WGS_1984_UTM_Zone_30N", '
'GEOGCS["GCS_WGS_1984", '
'DATUM["D_WGS_1984", '
'SPHEROID["WGS_1984", 6378137.0, 298.257223563]], '
'PRIMEM["Greenwich", 0.0], '
'UNIT["Degree", 0.0174532925199433]], '
'PROJECTION["Transverse_Mercator"], '
'PARAMETER["False_Easting", 500000.0], '
'PARAMETER["False_Northing", 0.0], '
'PARAMETER["Central_Meridian", -3.0], '
'PARAMETER["Scale_Factor", 0.9996], '
'PARAMETER["Latitude_Of_Origin", 0.0], '
'UNIT["Meter", 1.0]]')
expected_cols = ['Band_1', 'Band_2', 'Band_3', 'Band_4']

self.assertEqual(
rastertodataframe.util._epsg_from_projection(prj),
32630)

def test_get_ogr_epsg(self):
vec = ogr.OpenShared(self.vector_path)
self.assertEqual(rastertodataframe.util.get_epsg(vec), 4326)

def test_get_gdal_epsg(self):
ras = gdal.OpenShared(self.raster_path)
self.assertEqual(rastertodataframe.util.get_epsg(ras), 32632)

def test_get_gpd_epsg(self):
gdf = gpd.read_file(self.vector_path)
self.assertEqual(rastertodataframe.util.get_epsg(gdf), 4326)

def test_same_epsg(self):
ras = gdal.OpenShared(self.raster_path)
gdf = gpd.read_file(self.vector_path)
self.assertFalse(rastertodataframe.util.same_epsg(ras, gdf))
self.assertTrue(rastertodataframe.util.same_epsg(ras, ras))

def test__create_empty_raster(self):
fid, tmp_fname = tempfile.mkstemp()
ras = gdal.OpenShared(self.raster_path)
out = rastertodataframe.util._create_empty_raster(
ras, tmp_fname, n_bands=1, no_data_value=0)

self.assertIsInstance(out, gdal.Dataset)
self.assertEqual(out.RasterCount, 1)
self.assertEqual(ras.RasterXSize, out.RasterXSize)
self.assertEqual(ras.RasterYSize, out.RasterYSize)

os.close(fid)
os.remove(tmp_fname)

def test__brun_vector_mask_into_raster_wrong_epsg(self):
# Error for differing projections.
with self.assertRaises(ValueError):
out = rastertodataframe.util._burn_vector_mask_into_raster(
self.raster_path, self.vector_path, '')

def test__burn_vector_mask_into_raster_vector_mask(self):
# Burn in a specified vector field.
fid, tmp_fname = tempfile.mkstemp()
out = rastertodataframe.util._burn_vector_mask_into_raster(
self.raster_wgs84_path, self.vector_path, tmp_fname)

arr = out.GetRasterBand(1).ReadAsArray()
self.assertEqual(arr.shape, (39, 58))
self.assertEqual(arr.ndim, 2)
self.assertEqual(arr.max(), 1)

os.close(fid)
os.remove(tmp_fname)

def test__burn_vector_mask_into_raster_vector_field(self):
# Burn in a specified vector field.
fid, tmp_fname = tempfile.mkstemp()
out = rastertodataframe.util._burn_vector_mask_into_raster(
self.raster_wgs84_path, self.vector_path, tmp_fname,
vector_field='value')

arr = out.GetRasterBand(1).ReadAsArray()
self.assertEqual(arr.shape, (39, 58))
self.assertEqual(arr.ndim, 2)
self.assertEqual(arr.max(), 2000)

os.close(fid)
os.remove(tmp_fname)
self.assertEqual(out_df.shape, (2204, 4))
self.assertCountEqual(list(out_df.columns), expected_cols)

0 comments on commit b5e8662

Please sign in to comment.