# Python for Creating CSV and Preliminary Reporting

In [3]:
import csv
#import geopandas as gpd
import json
import matplotlib.pyplot as plt
import pandas as pd
from shapely.geometry import Point

%matplotlib inline
plt.style.use('ggplot')

## Load Data and Write to CSV


In [5]:
with open("roman-amphitheaters.geojson") as f:
    j = json.load(f)

In [6]:
# If there is one, I'd welcome a more pythonic approach. One that 
# accomodates the variable data model supported by JSON.

d = []
for feature in j['features']:
    
    # Check for optional properties

    if 'latintoponym' in feature['properties'].keys():
        latintoponym = feature['properties']['latintoponym']
    else:
        latintoponym = ''    

    if 'welchid' in feature['properties'].keys():
        welchid = feature['properties']['welchid']
    else:
        welchid = ''

    if 'golvinid' in feature['properties'].keys():
        golvinid = feature['properties']['golvinid']
    else:
        golvinid = ''

    if 'buildingtype' in feature['properties'].keys():
        buildingtype = feature['properties']['buildingtype']
    else:
        buildingtype = ''
        
    if 'buildingtype' in feature['properties'].keys():
        buildingtype = feature['properties']['buildingtype']
    else:
        buildingtype = ''
 
    if 'chronogroup' in feature['properties'].keys():
        chronogroup = feature['properties']['chronogroup']
    else:
        chronogroup = ''

    secondcentury = True
    if 'exclude' in feature['properties'].keys():
        secondcentury = False

    if 'capacity' in feature['properties'].keys():
        capacity = feature['properties']['capacity']['quantity']
    else:
        capacity = ''

    if 'province' in feature['properties'].keys():
        romanregion = feature['properties']['province']
    elif 'region' in feature['properties'].keys():
        romanregion = feature['properties']['region']
    else:
        romanregion = ''
        
    arenamajor = ''
    arenaminor = ''
    extmajor = ''
    extminor = ''
    exteriorheight = ''
    if 'dimensions' in feature['properties'].keys():
        dimensions = feature['properties']['dimensions']
        
        if 'arenamajor' in dimensions.keys():
            arenamajor = dimensions['arenamajor']

        if 'arenaminor' in dimensions.keys():
            arenaminor = dimensions['arenaminor']
            
        if 'exteriormajor' in dimensions.keys():
            extmajor = dimensions['exteriormajor']

        if 'exteriorminor' in dimensions.keys():
            extminor = dimensions['exteriorminor']
            
        if 'exteriorheight' in dimensions.keys():
            exteriorheight = dimensions['exteriorheight']
            
    d.append((feature['id'],
              feature['properties']['title'],
              feature['properties']['label'],
              latintoponym,
              feature['properties']['pleiades'],
              welchid,
              golvinid,
              buildingtype,
              chronogroup,
              secondcentury,
              capacity,
              feature['properties']['moderncountry'],
              romanregion,
              arenamajor,
              arenaminor,
              extmajor,
              extminor,
              exteriorheight,
              feature['geometry']['coordinates'][0],
              feature['geometry']['coordinates'][1],
              feature['geometry']['coordinates'][2]))

ramphs_df = pd.DataFrame(d, columns=(
 'id',    # short id
 'title', # longer title
 'label', # short label
 'latintoponym', # latin toponym
 'pleiades', # pleiades https uri
 'welchid',  # id in Welch
 'golvinid', # id in Golvin
 'buildingtype',  # usually 'amphitheater'
 'chronogroup',   # label for the chronological group
 'secondcentury', # is this an amphitheater that was in use in 2nd century
 'capacity',    # capacity as integer
 'modcountry',  # modern country
 'romanregion', # province or augustan region of italy
 'arenamajor', # long axis of arena in meters
 'arenaminor', # short axis of arena in meters
 'extmajor',   # long axis of exterior
 'extminor', # short axis of exterior
 'exteriorheight',   # height of exterior wall if known
 'longitude', # latitude
 'latitude', # longitude
 'elevation'  # elevation in meters.
 )) 

ramphs_df[['capacity','elevation','arenamajor','arenaminor',
        'extmajor','extminor','exteriorheight']] = ramphs_df[['capacity','elevation','arenamajor',
        'arenaminor','extmajor','extminor','exteriorheight']].apply(pd.to_numeric)

In [6]:
ramphs_df.to_csv("roman-amphitheaters.csv", index = False, quoting = csv.QUOTE_NONNUMERIC)

In [7]:
ramphs_df[['id','title','chronogroup','latintoponym','romanregion','modcountry','capacity',
           'extmajor','extminor','arenamajor','arenaminor','latitude','longitude']].to_csv('tmp.csv', index = False, quoting = csv.QUOTE_NONNUMERIC)

## Basic Reporting

In [7]:
ramphs_df.head(2)

Unnamed: 0,id,title,label,latintoponym,pleiades,welchid,golvinid,buildingtype,chronogroup,secondcentury,...,modcountry,romanregion,arenamajor,arenaminor,extmajor,extminor,exteriorheight,longitude,latitude,elevation
0,duraEuroposAmphitheater,Amphitheater at Dura Europos,Dura,Dura Europus,https://pleiades.stoa.org/places/893989,,129,amphitheater,severan,False,...,Syria,syria,31.0,25.0,50.0,44.0,,40.728926,34.749855,223
1,arlesAmphitheater,Amphitheater at Arles,Arles,Arelate,https://pleiades.stoa.org/places/148217,,154,amphitheater,flavian,True,...,France,narbonensis,47.0,32.0,136.0,107.0,,4.631111,43.677778,21


In [8]:
ramphs_df.describe()

Unnamed: 0,capacity,arenamajor,arenaminor,extmajor,extminor,exteriorheight,longitude,latitude,elevation
count,124.0,150.0,149.0,180.0,166.0,3.0,260.0,260.0,260.0
mean,12095.806452,57.176667,38.089933,97.595167,77.20488,41.483333,10.66504,42.223168,191.407692
std,9200.1989,14.263028,8.49944,29.758685,24.903213,9.859048,9.008302,4.968707,210.000926
min,1000.0,25.0,19.0,39.6,34.0,32.45,-8.49333,31.608189,-121.0
25%,5112.5,47.125,33.0,76.75,59.1,36.225,5.514918,38.480299,32.0
50%,9200.0,58.0,39.0,95.0,75.0,40.0,10.954128,42.089044,117.5
75%,15662.5,67.0,43.0,117.79,94.375,46.0,14.250144,45.479995,282.25
max,50000.0,101.0,62.0,189.0,156.0,52.0,40.728926,55.6026,1170.0


In [10]:
ramphs_df[ramphs_df.secondcentury].describe()

Unnamed: 0,capacity,arenamajor,arenaminor,extmajor,extminor,exteriorheight,longitude,latitude,elevation
count,115.0,141.0,140.0,171.0,159.0,2.0,242.0,242.0,242.0
mean,11859.826087,57.281915,38.249286,96.927661,76.708868,42.225,10.179236,42.348624,193.739669
std,9071.861611,14.35844,8.561603,28.718851,24.820221,13.823938,8.824098,5.007255,211.043118
min,1200.0,25.0,19.0,39.6,34.0,32.45,-8.49333,31.608189,1.0
25%,5075.0,47.0,33.0,76.5,58.9,37.3375,4.323042,38.686097,34.0
50%,9000.0,58.0,39.0,94.0,75.0,42.225,10.606566,42.270997,119.5
75%,15095.0,67.0,43.0,115.0,93.6,47.1125,14.110261,45.692934,282.75
max,50000.0,101.0,62.0,189.0,156.0,52.0,38.273763,55.6026,1170.0


In [11]:
# Confirm that CSV is readable
# It would be nice if the "numeric pattern" string survived as strings.
pd.read_csv("roman-amphitheaters.csv", quoting = 2).describe()

Unnamed: 0,welchid,golvinid,capacity,arenamajor,arenaminor,extmajor,extminor,exteriorheight,longitude,latitude,elevation
count,18.0,82.0,124.0,150.0,149.0,180.0,166.0,3.0,260.0,260.0,260.0
mean,9.777778,109.52439,12095.806452,57.176667,38.089933,97.595167,77.20488,41.483333,10.66504,42.223168,191.407692
std,5.704029,63.329089,9200.1989,14.263028,8.49944,29.758685,24.903213,9.859048,9.008302,4.968707,210.000926
min,1.0,12.0,1000.0,25.0,19.0,39.6,34.0,32.45,-8.49333,31.608189,-121.0
25%,5.25,64.25,5112.5,47.125,33.0,76.75,59.1,36.225,5.514918,38.480299,32.0
50%,9.5,107.0,9200.0,58.0,39.0,95.0,75.0,40.0,10.954128,42.089044,117.5
75%,14.5,145.25,15662.5,67.0,43.0,117.79,94.375,46.0,14.250144,45.479995,282.25
max,19.0,298.0,50000.0,101.0,62.0,189.0,156.0,52.0,40.728926,55.6026,1170.0


In [12]:
# which have heights
ramphs_df[ramphs_df.exteriorheight > 0]

Unnamed: 0,id,title,label,latintoponym,pleiades,welchid,golvinid,buildingtype,chronogroup,secondcentury,...,modcountry,romanregion,arenamajor,arenaminor,extmajor,extminor,exteriorheight,longitude,latitude,elevation
4,romeFlavianAmphitheater,Flavian Amphitheater at Rome,Colosseum,,https://pleiades.stoa.org/places/423025,,152.0,amphitheater,flavian,True,...,Italy,regio-i,83.0,48.0,189.0,156.0,52.0,12.492269,41.890169,22
90,thysdrusAmphitheater,Amphitheater at Thysdrus,Thysdrus (lg.),,https://pleiades.stoa.org/places/324835,,,amphitheater,post-severan,False,...,Tunisia,proconsularis,65.0,39.0,148.0,122.0,40.0,10.706939,35.29639,111
97,pulaAmphitheater,Amphitheater at Pula,Pula,Colonia Pietas Iulia Pola Pollentia Herculanea,https://pleiades.stoa.org/places/197448,,,amphitheater,julio-claudian,True,...,Croatia,regio-x,67.95,41.65,132.45,105.1,32.45,13.850243,44.873229,16


In [13]:
# which don't have exteriormajor
ramphs_df[pd.isnull(ramphs_df.extmajor)].sort_values(by = 'longitude')\
[['id','modcountry','latintoponym','golvinid','extmajor','arenamajor','latitude','longitude']]

Unnamed: 0,id,modcountry,latintoponym,golvinid,extmajor,arenamajor,latitude,longitude
157,bragaAmphitheater,Portugal,Bracara Augusta,,,,41.546669,-8.430075
166,bobadelaAmphitheater,Portugal,Elbocoris,,,50.0,40.361088,-7.893572
56,lixusAmphitheater,Morocco,,,,,35.199900,-6.108468
121,caparraAmphitheater,Spain,Municipium Flavium Caparense,,,30.0,40.164159,-6.100049
98,carmonaAmphitheater,Spain,Carmo,,,,37.469674,-5.650907
159,leonAmphitheater,Spain,Legio,,,,42.598889,-5.566944
122,cordobaAmphitheater,Spain,Colonia Patricia,,,,37.884183,-4.788888
193,espejoAmphitheater,Spain,Ucubi,,,,37.683989,-4.554198
158,sisapoAmphitheater,Spain,,,,,38.645833,-4.516667
107,carmarthenAmphitheater,United Kingdom,Moridunum,,,,51.862005,-4.296651


In [None]:
ramphs_df[ramphs_df.golvinid == '' ][['id','latintoponym','golvinid','extmajor','arenamajor','latitude','longitude']]

In [None]:
ramphs_df[ramphs_df.latintoponym == '' ][['id','latintoponym','golvinid','extmajor','arenamajor','latitude','longitude']]

## Duplicate Checking

In [None]:
dups = ramphs_df[ramphs_df.label.duplicated(keep = False)]\
[['id','pleiades','latintoponym','latitude','longitude']].sort_values('pleiades')

len(dups) == 0

In [None]:
dups = ramphs_df[ramphs_df.id.duplicated(keep = False)]\
[['id','pleiades','latintoponym','latitude','longitude']].sort_values('pleiades')

len(dups) == 0

In [None]:
dups = ramphs_df[ramphs_df.pleiades.duplicated(keep = False)]\
[['id','pleiades','latintoponym','latitude','longitude']].sort_values('pleiades')

len(dups) == 15

In [None]:
dups = ramphs_df[ramphs_df.latintoponym.duplicated(keep = False)]\
[['id','pleiades','latintoponym',
  'latitude','longitude']].sort_values('pleiades')

len(dups.query("latintoponym != ''")) == 11

## Basic Mapping

In [None]:
rgdf = gpd.read_file("roman-amphitheaters.geojson")

In [None]:
rgdf.crs

In [None]:
rgdf.plot(color = 'black')

In [None]:
# this is simple enough that all steps are in one cell
c = []
for cgrp in j['romanamphitheaterschronogroups']:
    c.append((cgrp['id'],
    cgrp['startdate'],
    cgrp['enddate']))
    
chrono_df  = pd.DataFrame(c, columns=('chronogroup','startdate','enddate'))

chrono_df.to_csv("chronogrps.csv", index = False, quoting = csv.QUOTE_NONNUMERIC)
chrono_df.head(2)

In [None]:
ramphs_df.merge(chrono_df)