In [62]:
import pandas as pd
import geopandas as gpd
import requests
import zipfile
import json
import numpy as np
from shapely.geometry import Point

In [63]:
#define the urls for the API call
#one from GeoCommons using CKAN API, one from GeoCommons using ArcGis REST API, and one from NDAWN website using developer tools

MNGeo_CKAN_outdoor_heritage_zip =("https://resources.gisdata.mn.gov/pub/gdrs/data/pub/us_mn_state_lcc/env_lsohcsections2016/geojson_env_lsohcsections2016.zip")

MNGeo_ArcGIS_EAB_zip = ("https://gis.mda.state.mn.us/arcgis/rest/services/MDA_EAB/MDA_EAB/MapServer/2/query?where=1%3D1&text=&objectIds=&time=&timeRelation=esriTimeRelationOverlaps&geometry=&geometryType=esriGeometryPoint&inSR=&spatialRel=esriSpatialRelIntersects&distance=&units=esriSRUnit_Foot&relationParam=&outFields=&returnGeometry=true&returnTrueCurves=false&maxAllowableOffset=&geometryPrecision=&outSR=&havingClause=&returnIdsOnly=false&returnCountOnly=false&orderByFields=&groupByFieldsForStatistics=&outStatistics=&returnZ=false&returnM=false&gdbVersion=&historicMoment=&returnDistinctValues=false&resultOffset=&resultRecordCount=&returnExtentOnly=false&sqlFormat=none&datumTransformation=&parameterValues=&rangeValues=&quantizationParameters=&featureEncoding=esriDefault&f=pjson")

NDAWN_URL_csv = ("https://ndawn.ndsu.nodak.edu/table.csv?station=95&variable=ydavt&ttype=yearly")

In [64]:
#reading in the ArcGIS REST API JSON data but not joining it - that'll be the CKAN and NDAWN

#make the API call
EAB_response = requests.get(MNGeo_ArcGIS_EAB_zip) #not actually a zip, misnamed
MNGeo_Arc_EAB_JSON = EAB_response.json()

#normalize json format
features = MNGeo_Arc_EAB_JSON['features']
MNGeo_Arc_EAB_Norm = pd.json_normalize(features)

#set geom before making gdf
geometry = [Point(np.array([x,y])) for x, y in zip(MNGeo_Arc_EAB_Norm['geometry.x'], MNGeo_Arc_EAB_Norm['geometry.y'])]
MNGeo_Arc_EAB_Norm['geometry'] = geometry

#make gdf
MNGeo_EAB_gdf = gpd.GeoDataFrame(MNGeo_Arc_EAB_Norm, geometry='geometry', crs="EPSG:4326")
print(MNGeo_EAB_gdf.head())

  attributes.commonname   geometry.x    geometry.y  \
0     emerald ash borer  484429.7874  4.978984e+06   
1     emerald ash borer  484539.7286  4.979115e+06   
2     emerald ash borer  484540.0214  4.979237e+06   
3     emerald ash borer  484527.1428  4.979129e+06   
4     emerald ash borer  484771.2307  4.979297e+06   

                             geometry  
0  POINT (484429.78740 4978983.55640)  
1  POINT (484539.72860 4979115.48350)  
2  POINT (484540.02140 4979236.56800)  
3  POINT (484527.14280 4979128.84450)  
4  POINT (484771.23070 4979297.11110)  


  arr = construct_1d_object_array_from_listlike(values)


In [65]:
#the geocommons CKAN API gives a zip file, first extract it

#make the API call to get the content
CKAN_response = requests.get(MNGeo_CKAN_outdoor_heritage_zip)

#save zip locally
MNGeo_CKAN_OH = "MNGeo_CKAN_OH.zip"
with open(MNGeo_CKAN_OH, "wb") as file:
    file.write(CKAN_response.content)

print("downloaded zip: {MNGeo_CKAN_OH}")

#extract zip so its correctly utf-8 encoded
with zipfile.ZipFile(MNGeo_CKAN_OH) as zip_ref:
    zip_ref.extractall()

print("extracted CKAN zipfile to wd")

downloaded zip: {MNGeo_CKAN_OH}
extracted CKAN zipfile to wd


In [66]:
#great it worked, and the geojson is in my wd. time to create a geo data frame for it
OH_GeoJSON = gpd.read_file("lsohc_sections.geojson")
MNGeo_OH_gdf = gpd.GeoDataFrame(data=OH_GeoJSON, crs="EPSG:4326")
MNGeo_OH_gdf

Unnamed: 0,OBJECTID,name,Area,Perimeter,Acres,PerFeet,x,y,Shape_Leng,Shape_Area,id,geometry
0,1,Prairie,74227440000.0,2893120.16,18341599.75,9491864.05,320137.34,4995371.19,2893120.0,74227440000.0,1,"POLYGON ((-96.97718 48.98688, -96.97638 48.986..."
1,2,Southeast Forest,10713820000.0,763812.66,2647384.18,2505947.04,567489.58,4873503.14,763812.7,10713820000.0,2,"POLYGON ((-92.81276 44.75130, -92.81138 44.750..."
2,3,Metropolitan Urbanizing Area,13318880000.0,1044004.34,3291096.41,3425211.09,455462.07,4989220.13,1044004.0,13318880000.0,3,"POLYGON ((-93.13790 45.68564, -93.13774 45.685..."
3,4,Northern Forest,93741290000.0,2816239.42,23163471.9,9239630.64,462730.64,5247602.68,2816239.0,93741290000.0,4,"POLYGON ((-95.05840 49.35317, -95.04938 49.353..."
4,5,Forest/Prairie Transition,26548690000.0,2263843.93,6560181.91,7427309.48,314885.87,5072966.91,2263844.0,26548690000.0,5,"POLYGON ((-96.40549 48.99998, -96.40508 48.999..."


In [67]:
#pulling in the NDAWN csv
NDAWN_csv = pd.read_csv(NDAWN_URL_csv, skiprows=3)#need to skip top 3 rows of metadata to read csv
NDAWN_csv = NDAWN_csv.drop(index=0,columns=["Number Missing", "Number Estimated"])#drop headers
print(NDAWN_csv.head())

  Station Name  Latitude  Longitude Elevation    Year Avg Temp
1     Williams  48.85841  -94.98082      1093  2017.0  37.9970
2     Williams  48.85841  -94.98082      1093  2018.0  36.7390
3     Williams  48.85841  -94.98082      1093  2019.0  35.3600
4     Williams  48.85841  -94.98082      1093  2020.0  37.9580
5     Williams  48.85841  -94.98082      1093  2021.0  40.0760


In [68]:
#set point geometry (just used the same way as for the Arc REST API again)
#but first lat and long need to be float
NDAWN_csv['Latitude'] = NDAWN_csv['Latitude'].astype(float)
NDAWN_csv['Longitude'] = NDAWN_csv['Longitude'].astype(float)

#create geom column like before
NDAWNgeometry = [Point(np.array([x,y])) for x, y in zip(NDAWN_csv['Latitude'], NDAWN_csv['Longitude'])]
NDAWN_csv['geometry'] = NDAWNgeometry

                                        
#make gdf
NDAWN_gdf = gpd.GeoDataFrame(NDAWN_csv, geometry='geometry', crs="EPSG:4326")
NDAWN_gdf

  arr = construct_1d_object_array_from_listlike(values)


Unnamed: 0,Station Name,Latitude,Longitude,Elevation,Year,Avg Temp,geometry
1,Williams,48.85841,-94.98082,1093,2017.0,37.997,POINT (48.85841 -94.98082)
2,Williams,48.85841,-94.98082,1093,2018.0,36.739,POINT (48.85841 -94.98082)
3,Williams,48.85841,-94.98082,1093,2019.0,35.36,POINT (48.85841 -94.98082)
4,Williams,48.85841,-94.98082,1093,2020.0,37.958,POINT (48.85841 -94.98082)
5,Williams,48.85841,-94.98082,1093,2021.0,40.076,POINT (48.85841 -94.98082)
6,Williams,48.85841,-94.98082,1093,2022.0,35.4,POINT (48.85841 -94.98082)
7,Williams,48.85841,-94.98082,1093,2023.0,40.143,POINT (48.85841 -94.98082)


In [69]:
#now, spatial join CKAN OH geodataframe with the NDAWN geodataframe
NDAWN_and_CKAN_gdf = gpd.sjoin(NDAWN_gdf, MNGeo_OH_gdf, how="right", predicate="within")
print(NDAWN_and_CKAN_gdf.head())

   index_left Station Name  Latitude  Longitude Elevation  Year Avg Temp  \
0         NaN          NaN       NaN        NaN       NaN   NaN      NaN   
1         NaN          NaN       NaN        NaN       NaN   NaN      NaN   
2         NaN          NaN       NaN        NaN       NaN   NaN      NaN   
3         NaN          NaN       NaN        NaN       NaN   NaN      NaN   
4         NaN          NaN       NaN        NaN       NaN   NaN      NaN   

   OBJECTID                          name          Area   Perimeter  \
0         1                       Prairie  7.422744e+10  2893120.16   
1         2              Southeast Forest  1.071382e+10   763812.66   
2         3  Metropolitan Urbanizing Area  1.331888e+10  1044004.34   
3         4               Northern Forest  9.374129e+10  2816239.42   
4         5     Forest/Prairie Transition  2.654869e+10  2263843.93   

         Acres     PerFeet          x           y    Shape_Leng    Shape_Area  \
0  18341599.75  9491864.05  320137.

In [70]:
#so, it joins. but there's no data from the point NDAWN gdf
#if i try an inner join it returns an empty gdf
#i could do a full outer join?
#i tried the same with the MNGeo_EAB_gdf to the MNGeo_OH_gdf with same results: attributes will show but have NaN values
#i did print a head of the table showing the merged attributes tho...

In [71]:
#create a gpkg for the merged table
lab1_gpkg = 'lab1.gpkg'

In [75]:
#save table to gpkg
NDAWN_and_CKAN_gdf.to_file(lab1_gpkg, layer="NDAWN_and_CKAN_gdf", driver = "GPKG")