# The Spoken World Table Management

The purpose of this notebook is to create a new, customized dataframe from an existing dataset and export as a GeoJSON.

Michael McNeil | 2019-04-24

## Imports

In [12]:
import pandas as pd
import geopandas as gpd
import fiona
import os

## Variables

In [24]:
df = gpd.read_file(r'G:\SCHOOL\KENTUCKY\MAP698\the-spoken-world\data\raw\places.geojson')

In [25]:
df.head()

Unnamed: 0,scalerank,natscale,labelrank,featurecla,name,namepar,namealt,diffascii,nameascii,adm0cap,...,rank_max,rank_min,geonameid,meganame,ls_name,ls_match,checkme,min_zoom,ne_id,geometry
0,10,1,8,Admin-1 capital,Colonia del Sacramento,,,0,Colonia del Sacramento,0.0,...,7,7,3443013.0,,,0,0,9.0,1159112629,POINT (-57.84000247340134 -34.47999900541754)
1,10,1,8,Admin-1 capital,Trinidad,,,0,Trinidad,0.0,...,7,7,3439749.0,,,0,0,9.0,1159112647,POINT (-56.90099656015872 -33.5439989373607)
2,10,1,8,Admin-1 capital,Fray Bentos,,,0,Fray Bentos,0.0,...,7,7,3442568.0,,,0,0,9.0,1159112663,POINT (-58.3039974719095 -33.1389990288435)
3,10,1,8,Admin-1 capital,Canelones,,,0,Canelones,0.0,...,6,6,3443413.0,,,0,0,9.0,1159112679,POINT (-56.28400149324307 -34.53800400667546)
4,10,1,8,Admin-1 capital,Florida,,,0,Florida,0.0,...,7,7,3442585.0,,,0,0,7.0,1159112703,POINT (-56.21499844799416 -34.09900200521719)


#### How many column headers are in the df?

In [15]:
list(df)

['scalerank',
 'natscale',
 'labelrank',
 'featurecla',
 'name',
 'namepar',
 'namealt',
 'diffascii',
 'nameascii',
 'adm0cap',
 'capalt',
 'capin',
 'worldcity',
 'megacity',
 'sov0name',
 'sov_a3',
 'adm0name',
 'adm0_a3',
 'adm1name',
 'iso_a2',
 'note',
 'latitude',
 'longitude',
 'changed',
 'namediff',
 'diffnote',
 'pop_max',
 'pop_min',
 'pop_other',
 'rank_max',
 'rank_min',
 'geonameid',
 'meganame',
 'ls_name',
 'ls_match',
 'checkme',
 'min_zoom',
 'ne_id',
 'geometry']

## Manipulate Tables

#### Remove unnecessary fields for the purpose of this assignment

In [16]:
# remember to include geometry!
df = df.filter(items=['name', 'nameascii', 'adm0name', 'latitude', 'longitude', 'pop_max', 'geonameid', 'ne_id', 'geometry'])

In [17]:
df.head()

Unnamed: 0,name,nameascii,adm0name,latitude,longitude,pop_max,geonameid,ne_id,geometry
0,Colonia del Sacramento,Colonia del Sacramento,Uruguay,-34.479999,-57.840002,21714,3443013.0,1159112629,POINT (-57.84000247340134 -34.47999900541754)
1,Trinidad,Trinidad,Uruguay,-33.543999,-56.900997,21093,3439749.0,1159112647,POINT (-56.90099656015872 -33.5439989373607)
2,Fray Bentos,Fray Bentos,Uruguay,-33.138999,-58.303997,23279,3442568.0,1159112663,POINT (-58.3039974719095 -33.1389990288435)
3,Canelones,Canelones,Uruguay,-34.538004,-56.284001,19698,3443413.0,1159112679,POINT (-56.28400149324307 -34.53800400667546)
4,Florida,Florida,Uruguay,-34.099002,-56.214998,32234,3442585.0,1159112703,POINT (-56.21499844799416 -34.09900200521719)


In [18]:
df.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
RangeIndex: 7343 entries, 0 to 7342
Data columns (total 9 columns):
name         7343 non-null object
nameascii    7343 non-null object
adm0name     7343 non-null object
latitude     7343 non-null float64
longitude    7343 non-null float64
pop_max      7343 non-null int64
geonameid    7343 non-null float64
ne_id        7343 non-null int64
geometry     7343 non-null object
dtypes: float64(3), int64(2), object(4)
memory usage: 516.4+ KB


#### Remove Antarctica places for simplicity

In [19]:
df_less_antarctica = df.loc[df['adm0name']!='Antarctica']

#### Check to see if there are less features in the df now

In [20]:
df_less_antarctica.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
Int64Index: 7303 entries, 0 to 7342
Data columns (total 9 columns):
name         7303 non-null object
nameascii    7303 non-null object
adm0name     7303 non-null object
latitude     7303 non-null float64
longitude    7303 non-null float64
pop_max      7303 non-null int64
geonameid    7303 non-null float64
ne_id        7303 non-null int64
geometry     7303 non-null object
dtypes: float64(3), int64(2), object(4)
memory usage: 570.5+ KB


## Export

#### First need to find out how the data can be exported via fiona ...

In [21]:
fiona.supported_drivers

{'AeronavFAA': 'r',
 'ARCGEN': 'r',
 'BNA': 'raw',
 'DXF': 'raw',
 'CSV': 'raw',
 'OpenFileGDB': 'r',
 'ESRIJSON': 'r',
 'ESRI Shapefile': 'raw',
 'GeoJSON': 'rw',
 'GeoJSONSeq': 'rw',
 'GPKG': 'rw',
 'GML': 'raw',
 'GPX': 'raw',
 'GPSTrackMaker': 'raw',
 'Idrisi': 'r',
 'MapInfo File': 'raw',
 'DGN': 'raw',
 'PCIDSK': 'r',
 'S57': 'r',
 'SEGY': 'r',
 'SUA': 'r',
 'TopoJSON': 'r'}

#### Now delete the previous version of the export, if it exists ...

In [22]:
# run this first to delete old export
os.remove(r'G:\SCHOOL\KENTUCKY\MAP698\the-spoken-world\data\places-newtable.geojson')

#### ... and export a fresh new copy!

In [23]:
df_less_antarctica.to_file(r'G:\SCHOOL\KENTUCKY\MAP698\the-spoken-world\data\places-newtable.geojson', encoding='utf-8', driver='GeoJSON')