# `extract` Module Usage Examples

## Installation
---
To install the `extract` module, clone the repository by running:

In [None]:
#!git clone https://github.com/KeiferC/extract-table.git

### Installing Dependencies
The `extract` module uses the `numpy`, `pandas`, and `geopandas` packages, which may or may not be installed on your machine.

To automatically install the dependencies run:

In [None]:
!pip install -r requirements.txt

Alternatively, you can manually install the dependecies by running:

In [None]:
!pip install numpy
!pip install pandas
!pip install geopandas

## Module Usage
---

*Note:* The example input files were pulled and converted from the GeoJSON [link](http://d2ad6b4ur7yvpq.cloudfront.net/naturalearth-3.3.0/ne_110m_land.geojson) provided in the [`geopandas` IO docs](https://geopandas.org/io.html).

In [None]:
from extract import ExtractTable # import module

import geopandas as gpd
import pandas as pd

### Example 1. Extract a table (returns a `geopandas GeoDataFrame`)
#### Example 1.1. Extract a table from a file
- __Example 1.1.1.__ Extract from a shapefile

In [None]:
# Ex. 1.1.1

shp_path = 'example-inputs/example-shp/example.shp' # path to file containing table to extract
shp_et = ExtractTable(shp_path) # alternative: ExtractTable.read_file(filepath)
shp_gdf = shp_et.extract() # extracts table as a geopandas GeoDataframe

# Resulting GeoDataFrame:
#
#    scalerank featurecla                                           geometry
# 0          1    Country  POLYGON ((-59.57209 -80.04018, -59.86585 -80.5...
# 1          1    Country  POLYGON ((-159.20818 -79.49706, -161.12760 -79...
# 2          1    Country  POLYGON ((-45.15476 -78.04707, -43.92083 -78.4...
# 3          1    Country  POLYGON ((-121.21151 -73.50099, -119.91885 -73...
# 4          1    Country  POLYGON ((-125.55957 -73.48135, -124.03188 -73...
# ...

- __Example 1.1.2.__ Extract from a CSV

In [None]:
# Ex. 1.1.2

csv_path = 'example-inputs/example.csv'
csv_et = ExtractTable.read_file(csv_path) # using alternative
csv_gdf = csv_et.extract()

# Resulting GeoDataFrame:
#
#    scalerank featurecla                                           geometry
# 0          1    Country  POLYGON ((-59.57209 -80.04018, -59.86585 -80.5...
# 1          1    Country  POLYGON ((-159.20818 -79.49706, -161.12760 -79...
# 2          1    Country  POLYGON ((-45.15476 -78.04707, -43.92083 -78.4...
# 3          1    Country  POLYGON ((-121.21151 -73.50099, -119.91885 -73...
# 4          1    Country  POLYGON ((-125.55957 -73.48135, -124.03188 -73...
# ...

- __Example 1.1.3.__ Extract from an Excel file

In [None]:
# Ex. 1.1.3

excel_path = 'example-inputs/example.csv'
excel_gdf = ExtractTable.read_file(excel_path).extract() # shorthand equivalent

# Resulting GeoDataFrame:
#
#    scalerank featurecla                                           geometry
# 0          1    Country  POLYGON ((-59.57209 -80.04018, -59.86585 -80.5...
# 1          1    Country  POLYGON ((-159.20818 -79.49706, -161.12760 -79...
# 2          1    Country  POLYGON ((-45.15476 -78.04707, -43.92083 -78.4...
# 3          1    Country  POLYGON ((-121.21151 -73.50099, -119.91885 -73...
# 4          1    Country  POLYGON ((-125.55957 -73.48135, -124.03188 -73...
# ...

- __Example 1.1.4.__ Extract from a ZIP file

In [None]:
# Ex. 1.1.4

zip_path = 'example-inputs/example.zip'
zip_gdf = ExtractTable.read_file(zip_path).extract()

# Resulting GeoDataFrame:
#
#    scalerank featurecla                                           geometry
# 0          1    Country  POLYGON ((-59.57209 -80.04018, -59.86585 -80.5...
# 1          1    Country  POLYGON ((-159.20818 -79.49706, -161.12760 -79...
# 2          1    Country  POLYGON ((-45.15476 -78.04707, -43.92083 -78.4...
# 3          1    Country  POLYGON ((-121.21151 -73.50099, -119.91885 -73...
# 4          1    Country  POLYGON ((-125.55957 -73.48135, -124.03188 -73...
# ...

#### Example 1.2. Extract a table from a URL

In [None]:
# Ex. 1.2

url = 'http://d2ad6b4ur7yvpq.cloudfront.net/naturalearth-3.3.0/ne_110m_land.geojson' 
    # URL copied from https://geopandas.org/io.html
url_gdf = ExtractTable(url).extract()

# Resulting GeoDataFrame:
#
#    scalerank featurecla                                           geometry
# 0          1    Country  POLYGON ((-59.57209 -80.04018, -59.86585 -80.5...
# 1          1    Country  POLYGON ((-159.20818 -79.49706, -161.12760 -79...
# 2          1    Country  POLYGON ((-45.15476 -78.04707, -43.92083 -78.4...
# 3          1    Country  POLYGON ((-121.21151 -73.50099, -119.91885 -73...
# 4          1    Country  POLYGON ((-125.55957 -73.48135, -124.03188 -73...
# ...

#### Example 1.3. Extract a table from a `pandas DataFrame`

In [None]:
# Ex. 1.3

pandas_df = pd.read_csv(csv_path)
pandas_gdf = ExtractTable(pandas_df).extract()

# Resulting GeoDataFrame:
#
#    scalerank featurecla                                           geometry
# 0          1    Country  POLYGON ((-59.57209 -80.04018, -59.86585 -80.5...
# 1          1    Country  POLYGON ((-159.20818 -79.49706, -161.12760 -79...
# 2          1    Country  POLYGON ((-45.15476 -78.04707, -43.92083 -78.4...
# 3          1    Country  POLYGON ((-121.21151 -73.50099, -119.91885 -73...
# 4          1    Country  POLYGON ((-125.55957 -73.48135, -124.03188 -73...
# ...

#### Example 1.4. Extract a table from a `geopandas GeoDataFrame`

In [None]:
# Ex. 1.4

geopandas_gdf = ExtractTable(csv_gdf).extract()

# Resulting GeoDataFrame:
#
#    scalerank featurecla                                           geometry
# 0          1    Country  POLYGON ((-59.57209 -80.04018, -59.86585 -80.5...
# 1          1    Country  POLYGON ((-159.20818 -79.49706, -161.12760 -79...
# 2          1    Country  POLYGON ((-45.15476 -78.04707, -43.92083 -78.4...
# 3          1    Country  POLYGON ((-121.21151 -73.50099, -119.91885 -73...
# 4          1    Country  POLYGON ((-125.55957 -73.48135, -124.03188 -73...
# ...

### Example 2. Extract a table with a selected column as the index
#### Example 2.1. Extract a table with a known column label

In [None]:
# Ex. 2.1

known_column = 'featurecla'
known_column_gdf = ExtractTable(shp_path, column=known_column).extract() 
    # alternative: ExtractTable.read_file(shp_path, column=known_column)

# Resulting GeoDataFrame:
#
#             scalerank                                           geometry
# featurecla                                                              
# Country             1  POLYGON ((-59.57209 -80.04018, -59.86585 -80.5...
# Country             1  POLYGON ((-159.20818 -79.49706, -161.12760 -79...
# Country             1  POLYGON ((-45.15476 -78.04707, -43.92083 -78.4...
# Country             1  POLYGON ((-121.21151 -73.50099, -119.91885 -73...
# Country             1  POLYGON ((-125.55957 -73.48135, -124.03188 -73...
# ...

#### Example 2.2. Extract a table without a known column label

In [None]:
# Ex. 2.2

unknown_column_et = ExtractTable(shp_path)
columns_list = unknown_column_et.list_columns() # returns a list of columns from which to choose
print(columns_list)

In [None]:
unknown_column_et.column = 'scalerank' # selects the 'scalerank' column as the index

unknown_column_gdf = unknown_column_et.extract()

# Resulting GeoDataFrame:
#
#           featurecla                                           geometry
# scalerank                                                              
# 1            Country  POLYGON ((-59.57209 -80.04018, -59.86585 -80.5...
# 1            Country  POLYGON ((-159.20818 -79.49706, -161.12760 -79...
# 1            Country  POLYGON ((-45.15476 -78.04707, -43.92083 -78.4...
# 1            Country  POLYGON ((-121.21151 -73.50099, -119.91885 -73...
# 1            Country  POLYGON ((-125.55957 -73.48135, -124.03188 -73...
# ...

### Example 3. Extract a subtable
#### Example 3.1. Extract a subtable with a known column value

In [None]:
# Ex. 3.1



#### Example 3.2. Extract a subtable with multiple known column values

In [None]:
# Ex. 3.2



#### Example 3.3. Extract a subtable without a known column value

In [None]:
# Ex. 3.3



### Example 4. Extract a table to a file

In [None]:
# Ex. 4.

