# Israel 1:100,000 Maps

Code for creating the first OpenIndexMap example for the AGSL.


### Import Libraries

In [1]:
import csv
import json
import sys
from collections import OrderedDict

print(sys.version)

3.7.1 (default, Dec 10 2018, 22:54:23) [MSC v.1915 64 bit (AMD64)]


### Input and outputh paths

In [2]:
input_file_path = r"S:\_H_GML\Departments\AGSL\GIS\Projects\GEODEX\OpenIndexMaps\israel_100k_csv_copy.csv"
geojson_file_path = r'S:\_H_GML\Departments\AGSL\GIS\Projects\GEODEX\OpenIndexMaps\israel_100k_2_Stephen.geojson'

print(input_file_path)
print(geojson_file_path)

S:\_H_GML\Departments\AGSL\GIS\Projects\GEODEX\OpenIndexMaps\israel_100k_csv_copy.csv
S:\_H_GML\Departments\AGSL\GIS\Projects\GEODEX\OpenIndexMaps\israel_100k_2_Stephen.geojson


### Set up CSV DictReader

In [3]:
try:
    input_csv_file = open(input_file_path)
    input_csv_file_reader = csv.DictReader(input_csv_file)
    input_data = list(input_csv_file_reader)
except:
    print("Check that the file path is pointing to a CSV file.")

print(input_data[1]) #prints the first row of data

OrderedDict([('RECORD', '1'), ('CDMURL', 'agdm/id/25000/'), ('AGSLSCAN', 'am002707'), ('ATLABEL', '[NAHARIYYA] (HEBR)'), ('DATE', '1969'), ('SERIES_TIT', 'Israel 1:100,000.'), ('PUBLISHER', 'Survey of Israel.'), ('SCALE', '100000'), ('MAP_TYPE', '18'), ('PRODUCTION', '31'), ('MAP_FOR', '41'), ('HOLD', '1'), ('YEAR1', '1969'), ('YEAR1_TYPE', '112'), ('YEAR2', '1967'), ('YEAR2_TYPE', '99'), ('YEAR3', ''), ('YEAR3_TYPE', ''), ('X1', '34.88861'), ('X2', '35.31667'), ('Y1', '33.29528'), ('Y2', '32.93417'), ('PRIME_MER', '131')])


In [4]:
# Test the json module:
test_json = json.dumps(input_data[9], sort_keys=False, indent=4)
print(test_json)

{
    "RECORD": "5",
    "CDMURL": "agdm/id/25016/",
    "AGSLSCAN": "am002711",
    "ATLABEL": "[NETANYA] (HEBR)",
    "DATE": "1968",
    "SERIES_TIT": "Israel 1:100,000.",
    "PUBLISHER": "Survey of Israel.",
    "SCALE": "100000",
    "MAP_TYPE": "18",
    "PRODUCTION": "31",
    "MAP_FOR": "41",
    "HOLD": "1",
    "YEAR1": "1968",
    "YEAR1_TYPE": "112",
    "YEAR2": "1967",
    "YEAR2_TYPE": "114",
    "YEAR3": "1967",
    "YEAR3_TYPE": "99",
    "X1": "34.785",
    "X2": "35.31583",
    "Y1": "32.57111",
    "Y2": "32.2125",
    "PRIME_MER": "131"
}


# Field Map

In [5]:
# Define the field map:
# Don't include date fields
# Spatial fields are inlcuded so they also show up as attributes

field_map = {
    "label" : "RECORD", # Sometimes RECORD and LOCATION are interchangable...
    "labelAlt" : "ATLABEL",
    "labelAlt2" : None,
    "west" : "X1",
    "east" : "X2",
    "north" : "Y1",
    "south" : "Y2",
    "location" : None,
    "scale" : "SCALE",
    "color" : "PRODUCTION",
    "mapType" : "MAP_TYPE",
    #"title" : "LOCATION",
    "titleAlt" : None,
    "revised": None,
    "overprint" : None,
    "edition" : None,
    "publisher" : "PUBLISHER",
    "overlays" : None,
    "projection" : "PROJECT",
    "lcCallNo" : None,
    #"contLines" : "ISO_TYPE",
    #"contInterv" : "ISO_VAL",
    #"bathLines" : "ISO_TYPE",
    #"bathInterv" : "ISO_VAL",
    "primeMer" : "PRIME_MER",
    # "inst" : "UWM Libraries", # This will be the same for every one we do.
    #"sheetId" : "CATLOC",
    "available" : "HOLD",
    "physHold" : None,
    "digHold" : "CDMURL", # https://collections.lib.uwm.edu/digital/collection/ + CDMURL
    # "instCallNo" : "CATLOC",
    "inst" : "inst",
    "recId" : "AGSLSCAN",
    "download" : None,
    "websiteUrl" : None,
    "thumbUrl" : None,
    "iiifUrl" : None,
    "fileName" : None,
    "note" : None
}

# Dictionaries 
(Might be collapsed!)

In [7]:
# Dictionaries:

map_type_dict = {
    30: "Administrative map",
    1: "Aerial photograph",
    6: "Aeronautical chart",
    7: "Bathymetric map",
    21: "Coal map",
    0 : "Not assigned",
    5 : "Geologic map",
    4 : "Hydrogeologic map",
    11 : "Land use map",
    12 : "Nautical chart",
    13 : "Orthophoto map",
    14 : "Planimetric map",
    998 : "Printed map - 2 color",
    997 : "Printed map - colored",
    996 : "Printed map - monochrome",
    995 : "Projection not indicated",
    15 : "Reference map",
    16 : "Road map",
    22 : "Satellite image map",
    24 : "Shaded relief map",
    18 : "Topo map (contours)",
    23 : "Topo map (form lines)",
    19 : "Topo map (hachures)",
    25 : "Topo map (irr interval)",
    20 : "Topo map (layer tints)"
}

production_dict = {
    38: "Blue line print",
    39: "Blueprint",
    37: "Negative microform",
    35: "Negative photocopy",
    34: "Positive photocopy",
    31: "Printed map - colored",
    33: "Printed map - monochrome",
    32: "Printed map - 2 color"
}

projection_dict = {
    0: "Not assigned",
    163 : "Azimuthal equidistant",
    185 : "Bonne",
    199 : "Cassini",
    182 : "Conic equidistant",
    183 : "Conic",
    171 : "Cylindrical",
    180 : "Gauss-Krüger",
    999 : "Gauss-Krüger",
    164 : "Gnomonic",
    186 : "Lambert conformal conic",
    175 : "Mercator",
    176 : "Miller",
    998 : "Munich PM",
    187 : "Polyconic",
    198 : "Polyhedric",
    161 : "Not indicated",
    178 : "Sinusoidal",
    168 : "Stereographic",
    179 : "Transverse Mercator"
}

primeMeridian_dict = {
    0 : "Not assigned",
    157 : "Athens PM",
    999 : "Cordoba PM",
    148 : "Copenhagen PM",
    135 : "Ferro PM",
    131 : "Greenwich PM",
    132 : "Madrid PM",
    146 : "Munich PM",
    142 : "Paris PM",
    138 : "Quito PM",
    147 : "Rome PM"
}

isoType_dict = {
    1 : "Isobars Feet",
    2 : "Isobars Fathoms",
    3 : "Isobars Meters",
    4 : "Contours Feet",
    5 : "Contours Meters",
    6 : "Multiple Isobar Types",
    7 : "No Isobar Indicated"
}

yearType_dict = { # modify this line from : to =, fix spacing
    97 : "Approximate Date", # datePub
    98 : "Publication Date", # datePub
    99 : "Compilation Date", # datePub
    100 : "Base Map Date", # date
    102 : "Field Checked", # dateSurvey
    103 : "Image Year", # datePhoto
    104 : "Photography to", # datePhoto
    105 : "Photo Inspected", # datePhoto
    106 : "Image Date", # datePhoto
    108 : "Preliminary Edition", # date
    109 : "Compiled From Map Dated", # datePSurvey
    110 : "Interim Edition", # date
    112 : "Printed", # datePub
    113 : "Printed Circa", # datePub
    114 : "Revised", # date
    115 : "Situation/Survey", # dateSurvey
    116 : "Transportation Network", # date
    118 : "Provisional Edition", # date
    120 : "Photo Revised", # datePhoto
    121 : "Edition of", # datePub
    119 : "Magnetic Declination Year" # date
} 

# Dealing with Dates:

`oim_date_dict` is a dictionary that sets up which types of Geodex dates match up to the OIM dates

`define_dates_from_gdx()` is a function that accepts a row of data as an argument and returns the different types of OIM date fields



In [8]:
# Define which Geodex date types correspond to which OIM field:
oim_date_dict = {
    "datePub" : [97, 98, 99, 113, 121],
    "date" : [100, 110, 114, 116, 118, 119],
    "dateSurvey" : [102, 109, 115],
    "datePhoto" : [103, 104, 105, 106, 120],
    "dateReprint" : []
}

# Define the date function:
# This function will get called within the gdx_to_oim_row_converter function
def define_dates_from_gdx(row):
    input_keys_dict = {
        "DATE" : "",
        "YEAR1" : "", 
        "YEAR1_TYPE": "",
        "YEAR2" : "",
        "YEAR2_TYPE":  "",
        "YEAR3" : "",
        "YEAR3_TYPE" : "",
        "YEAR4" : "",
        "YEAR4_TYPE" : ""}

    for input_key in input_keys_dict.keys():
        if input_key in row.keys():
            if input_key in ["YEAR1_TYPE", "YEAR2_TYPE", "YEAR3_TYPE", "YEAR4_TYPE"]:
                input_keys_dict[input_key] = int(row[input_key])
            else:
                input_keys_dict[input_key] = row[input_key]
        else:
            input_keys_dict[input_key] = None

    # Returns:
    datePub = None
    datePhoto = None
    dateSurvey = None
    dateReprint = None
    date = None

    # Set datePub
    if not input_keys_dict is None:
        datePub = input_keys_dict["DATE"]
    else:
        datePub = "9999" # There should ALWAYS be a "date" from Geodex. If we see this, we know something is very wrong.

        # Check for Survey dates:
    if input_keys_dict["YEAR1_TYPE"] in oim_date_dict["dateSurvey"]:
        dateSurvey = input_keys_dict["YEAR1"]
    elif input_keys_dict["YEAR2_TYPE"] in oim_date_dict["dateSurvey"]:
        dateSurvey = input_keys_dict["YEAR2"]
    elif input_keys_dict["YEAR3_TYPE"] in oim_date_dict["dateSurvey"]:
        dateSurvey = input_keys_dict["YEAR3"]
    elif input_keys_dict["YEAR4_TYPE"] in oim_date_dict["dateSurvey"]:
        dateSurvey = input_keys_dict["YEAR4"]
    else:
        dateSurvey = None

    # Check for Photo dates:
    if input_keys_dict["YEAR1_TYPE"] in oim_date_dict["datePhoto"]:
        datePhoto = input_keys_dict["YEAR1"]
    elif input_keys_dict["YEAR2_TYPE"] in oim_date_dict["datePhoto"]:
        datePhoto = input_keys_dict["YEAR2"]
    elif input_keys_dict["YEAR3_TYPE"] in oim_date_dict["datePhoto"]:
        datePhoto = input_keys_dict["YEAR3"]
    elif input_keys_dict["YEAR4_TYPE"] in oim_date_dict["datePhoto"]:
        datePhoto = input_keys_dict["YEAR4"]
    else:
        datePhoto = None

    # Check for Reprint Dates:
    # None (we probably won't use this field, if we do, just mimic the Photo or Survey dates

    # Check for Misc dates (date)
    if input_keys_dict["YEAR1_TYPE"] in oim_date_dict["date"]:
        date = input_keys_dict["YEAR1"]
    elif input_keys_dict["YEAR2_TYPE"] in oim_date_dict["date"]:
        date = input_keys_dict["YEAR2"]
    elif input_keys_dict["YEAR3_TYPE"] in oim_date_dict["date"]:
        date = input_keys_dict["YEAR3"]
    elif input_keys_dict["YEAR4_TYPE"] in oim_date_dict["date"]:
        date = input_keys_dict["YEAR4"]
    else:
        date = None
    
    return datePub, datePhoto, dateSurvey, dateReprint, date

# print an example of the output:
print(define_dates_from_gdx(input_data[9]))


('1968', None, None, None, '1967')


# Main Function

`gdx_to_oim_row_converter` is the main function and will run the date function above within it.

`output_dict` will hold the data as it's generated for each row.

`ordered_field_map` pulls the field map from above and orders it

The first `try` section runs the `define_dates_from_gdx()` dates function on the input row and grabs the values of the 5 OIM date fields, but only if there is data.

The next section grabs the rest of the fields. Fields that require special treament like X and Y fields and fields with coded domain values (Defined in the dictionaries above) are treated separately.

At the bottom of this second try block, all 1-1 fields are handled with a simple statement: `output_dict[key] = row[ordered_field_map[key]]`



In [9]:
def gdx_to_oim_row_converter(row, field_map):
    output_dict = {}
    ordered_field_map = OrderedDict(field_map)
    
    # The date fields:

    # define_dates_from_gdx(row)
    try:
        dates = define_dates_from_gdx(row)

        if not dates[0] is None:
            output_dict["datePub"] = dates[0]

        if not dates[1] is None:
            output_dict["datePhoto"] = dates[1]

        if not dates[2] is None:
            output_dict["dateSurvey"] = dates[2]

        if not dates[3] is None:
            output_dict["dateReprint"] = dates[3]

        if not dates[4] is None:
            output_dict["date"] = dates[4]
    except:
        print("There was a problem with the dates")

    # The rest of the non-date fields:
    try:
        for key in ordered_field_map:
            # print(ordered_field_map[key])
            if not ordered_field_map[key] is None:
                # location fields (geometry object not constructed here)
                if ordered_field_map[key] in ["X1", "X2", "Y1", "Y2"]:
                    output_dict[key] = float(row[ordered_field_map[key]])
                
                # available
                elif ordered_field_map[key] == "HOLD" and "HOLD" in row.keys():
                    if str(row[ordered_field_map[key]]) == "1":
                        output_dict[key] = True
                    else:
                        output_dict[key] = False
                
                ### Fields with controlled vocabularies (Defined in dictionaries above.)

                # Color
                elif ordered_field_map[key] == "PRODUCTION":
                    if "PRODUCTION" in row.keys():
                        production_type = int(row[ordered_field_map[key]]) # Get the integer of the PRODUCTION Type from GDX
                        output_dict[key] = production_dict[production_type] # Use that integer to pull the string from the dictionary
                    else:
                        exit

                    # The code above is more general than this:
                    #if str(row[ordered_field_map[key]]) == "31":
                    #    output_dict[key] = "Printed map - colored"
                    #elif str(row[ordered_field_map[key]]) == "32":
                    #    output_dict[key] = "Printed map - 2 color"
                    #elif str(row[ordered_field_map[key]]) == "33":
                    #    output_dict[key] = "Printed map - monochrome"           
                
                # Map Type (There is no OIM field for this, we will use mapType)
                elif ordered_field_map[key] == "MAP_TYPE":
                    if "MAP_TYPE" in row.keys():
                        map_type = int(row[ordered_field_map[key]]) # Get the integer of the MAP_TYPE Type from GDX
                        output_dict[key] = map_type_dict[map_type] # Use that integer to pull the string from the dictionary
                    else:
                        exit

                # Projection
                elif ordered_field_map[key] == "PROJECT":
                    if "PROJECT" in row.keys():
                        proj_type = int(row[ordered_field_map[key]]) # Get the integer of the PROJECT(ion) Type from GDX
                        output_dict[key] = projection_dict[proj_type] # Use that integer to pull the string from the dictionary
                    else:
                        exit

                # Prime Meridian
                elif ordered_field_map[key] == "PRIME_MER":
                    if "PRIME_MER" in row.keys():
                        prime_mer_type = int(row[ordered_field_map[key]])
                        output_dict[key] = primeMeridian_dict[prime_mer_type]
                    else:
                        exit

                # Locations!
                # no locations = null, 1 location = string, 2+ locations = array
                elif ordered_field_map[key] == "LOCATIONS":
                    locations_list = row[ordered_field_map[key]].split('|')       
                    if len(locations_list) == 1:
                        if locations_list[0] == "":
                            output_dict[key] = None
                        else:
                            output_dict[key] = locations_list[0]
                    else:
                        output_dict[key] = locations_list
                
                # Content DM Link in digihold field:
                elif ordered_field_map[key] == "CDMURL":
                    output_dict[key] = r'https://collections.lib.uwm.edu/digital/collection/' + row[ordered_field_map[key]]                   
                
                # Institution
                elif ordered_field_map[key] == "inst":
                    output_dict[key] = r'Univeristy of Wisconsin-Milwaukee Libraries - AGSL'

                # Scale (adds the 1: to the start of the string)
                elif ordered_field_map[key] == "SCALE":
                    output_dict[key] = '1:' + row[ordered_field_map[key]]

                # For normal string fields:
                else:    
                    output_dict[key] = row[ordered_field_map[key]]
                
    except:
        print("Something went wrong when trying to run a row through the gdx_to_oim_row_converter function!")
        raise
    
    try:
        ordered_output_dict = OrderedDict(output_dict)
    except:
        print("The script was unable to order the output dict from the gdx_to_oim_row_converter function!")
        raise
    
    try:
        output_json = json.dumps(ordered_output_dict, indent=4)
    except:
        print("error writing the json")
        raise
        
    return output_json, ordered_output_dict

# Calling the function on one record to test:
output_json = gdx_to_oim_row_converter(input_data[9], field_map)[0]


# Print to test:
print(output_json)


{
    "datePub": "1968",
    "date": "1967",
    "label": "5",
    "labelAlt": "[NETANYA] (HEBR)",
    "west": 34.785,
    "east": 35.31583,
    "north": 32.57111,
    "south": 32.2125,
    "scale": "1:100000",
    "color": "Printed map - colored",
    "mapType": "Topo map (contours)",
    "publisher": "Survey of Israel.",
    "primeMer": "Greenwich PM",
    "available": true,
    "digHold": "https://collections.lib.uwm.edu/digital/collection/agdm/id/25016/",
    "inst": "Univeristy of Wisconsin-Milwaukee Libraries - AGSL",
    "recId": "am002711"
}


# Define the Geometry

Although the north, south, east, and west OIM fields have been populated with text, we also need to define the geometry object using those values.

A simple boudning box is constructed using the GeoJSON standard.

In [10]:
# Define the geometry for each row

def define_geometry_from_gdx(row):
    # round to 6 decimal points, because there is no need for more than that! (~10 cm resolution)
    n = round(float(row["Y1"]), 6)
    s = round(float(row["Y2"]), 6)
    w = round(float(row["X1"]), 6)
    e = round(float(row["X2"]), 6)
    
    coordinates = '''[
    [
        [
            [
                {long_nw},
                {lat_nw}
            ],
            [
                {long_sw},
                {lat_sw}
            ],
            [
                {long_se},
                {lat_se}
            ],
            [
                {long_ne},
                {lat_ne}
            ],
            [
                {long_nw},
                {lat_nw}
            ]
        ]
    ]
]'''.format(long_nw = w,
           lat_nw = n,
           long_sw = w,
           lat_sw = s,
           long_se = e,
           lat_se = s,
           long_ne = e,
           lat_ne = n
    )
    
    return coordinates

# Feature Writer:

This combines all the above functions into a single function that will run for each feature.

The function takes two arguments, the row of data and a flag for if the row is the last row. This is useful for excluding a comma after the last feature.

In [11]:

def features_writer(row, last=0):
    features_string ='''{{
      "type": "Feature",
      "id": "{fid}",
      "geometry": {{
        "type": "MultiPolygon",
        "coordinates": {cor}}},
        "geometry_name": "geom",
        "properties": {prop}
        }}{com}
        '''
    print(features_string)
    
    com = ","
    if last == 1:
        com = ""
    
    feature = features_string.format(fid = row["AGSLSCAN"],
                                     com = com,
                                     cor = define_geometry_from_gdx(row), 
                                     prop = gdx_to_oim_row_converter(row, field_map)[0])
        
    return feature

output = features_writer(input_data[10])
print(output)



{{
      "type": "Feature",
      "id": "{fid}",
      "geometry": {{
        "type": "MultiPolygon",
        "coordinates": {cor}}},
        "geometry_name": "geom",
        "properties": {prop}
        }}{com}
        
{
      "type": "Feature",
      "id": "am002712",
      "geometry": {
        "type": "MultiPolygon",
        "coordinates": [
    [
        [
            [
                35.31556,
                32.57278
            ],
            [
                35.31556,
                32.21333
            ],
            [
                35.74194,
                32.21333
            ],
            [
                35.74194,
                32.57278
            ],
            [
                35.31556,
                32.57278
            ]
        ]
    ]
]},
        "geometry_name": "geom",
        "properties": {
    "datePub": "1976",
    "date": "1976",
    "label": "6",
    "labelAlt": "BET SHE'AN",
    "west": 35.31556,
    "east": 35.74194,
    "north": 32.57278,
 

# Set up the GeoJSON file

Opens the geoJSON file

Writes some header text.

Loops through the features (flags the last one) and puts them through `feature_writer()` function

writes footer text 

closes the JSON

In [12]:

geojson_file = open(geojson_file_path, 'w')

# {} are escapped {{}}
output_text_1 = """{
  "type": "FeatureCollection",
  "features": [
  """

geojson_file.write(output_text_1)

i = 0
while i < len(input_data)-1:
    feature_text = features_writer(input_data[i])
    geojson_file.write(feature_text)
    i += 1
        
geojson_file.write(features_writer(input_data[len(input_data)-1], last=1))

output_text_2 = """
  ]
}"""

geojson_file.write(output_text_2)


geojson_file.close()





{{
      "type": "Feature",
      "id": "{fid}",
      "geometry": {{
        "type": "MultiPolygon",
        "coordinates": {cor}}},
        "geometry_name": "geom",
        "properties": {prop}
        }}{com}
        
{{
      "type": "Feature",
      "id": "{fid}",
      "geometry": {{
        "type": "MultiPolygon",
        "coordinates": {cor}}},
        "geometry_name": "geom",
        "properties": {prop}
        }}{com}
        
There was a problem with the dates
{{
      "type": "Feature",
      "id": "{fid}",
      "geometry": {{
        "type": "MultiPolygon",
        "coordinates": {cor}}},
        "geometry_name": "geom",
        "properties": {prop}
        }}{com}
        
{{
      "type": "Feature",
      "id": "{fid}",
      "geometry": {{
        "type": "MultiPolygon",
        "coordinates": {cor}}},
        "geometry_name": "geom",
        "properties": {prop}
        }}{com}
        
There was a problem with the dates
{{
      "type": "Feature",
      "id": "{fid}

### Notes on CDM API:

Base URL to the server: https://server17272.contentdm.oclc.org

dmwebservices root: https://server17272.contentdm.oclc.org/dmwebservices/index.php?q=

Example that get's the alias of a collection, in this case, the map colleciton: https://server17272.contentdm.oclc.org/dmwebservices/index.php?q=dmGetCollectionParameters/agdm/json

Getting a list of items ina  compound object: https://server17272.contentdm.oclc.org/dmwebservices/index.php?q=dmGetCompoundObjectInfo/agdm/28275/json

The ID number for an object, such as 28275 for the cuba set above, is a unique ID at the collection level.  In this case, 28275 is the ID for the whole compound object and each item within the compound object has it's own unique ID.

