# Example of a PAGES2k query and data transformation

For integration into ClimateDisk, we need to be able to query from the LinkedEarth GraphDB directly and create a pickle file that cfr will like. 

In [66]:
import json
import requests
import wget
import lipd as lpd
import pandas as pd
import io
import ast
import numpy as np

## Example pickle file

In [86]:
df_pickle = pd.read_pickle('data/pages2k_dataset.pkl')
df_pickle.head()

Unnamed: 0,paleoData_pages2kID,dataSetName,archiveType,geo_meanElev,geo_meanLat,geo_meanLon,year,yearUnits,paleoData_variableName,paleoData_units,paleoData_values,paleoData_proxy
0,NAm_153,NAm-Bennington.Luckman.2001,tree,1850.0,52.7,-118.3,"[1563.0, 1564.0, 1565.0, 1566.0, 1567.0, 1568....",AD,trsgi,,"[0.883, 0.942, 1.043, 0.945, 1.141, 0.715, 0.6...",TRW
1,Asi_245,Asi-SouthChina.Wang.1998,documents,2200.0,23.0,114.0,"[1500.0, 1510.0, 1520.0, 1530.0, 1540.0, 1550....",AD,temperature,index,"[0.157429639092533, -0.118646665464906, 0.3035...",historic
2,NAm_165,NAm-RedMountainPassSilverton.Graybill.1994,tree,3400.0,37.9,-107.7,"[1626.0, 1627.0, 1628.0, 1629.0, 1630.0, 1631....",AD,MXD,,"[1.071, 1.014, 1.014, 0.966, 1.005, 1.072, 1.0...",MXD
3,Asi_178,Asi-BURGPW.PAGES2k.2013,tree,2500.0,28.77,83.73,"[1303.0, 1304.0, 1305.0, 1306.0, 1307.0, 1308....",AD,trsgi,,"[0.889, 0.788, 0.836, 0.741, 0.613, 0.732, 0.7...",TRW
4,Asi_174,Asi-NEPA029.Krusic.2013,tree,4000.0,28.18,85.43,"[1559.0, 1560.0, 1561.0, 1562.0, 1563.0, 1564....",AD,trsgi,,"[1.172, 1.21, 1.145, 1.307, 1.368, 1.37, 1.152...",TRW


Let's get the name of the columns

In [87]:
list(df_pickle.columns)

['paleoData_pages2kID',
 'dataSetName',
 'archiveType',
 'geo_meanElev',
 'geo_meanLat',
 'geo_meanLon',
 'year',
 'yearUnits',
 'paleoData_variableName',
 'paleoData_units',
 'paleoData_values',
 'paleoData_proxy']

## Using the graphDatabase

Create the query (note some fields have been altered following legacy code from the original LinkedEarth work; we need to fix this):

In [92]:
url = 'https://linkedearth.graphdb.mint.isi.edu/repositories/LiPDVerse'

query = """PREFIX le: <http://linked.earth/ontology#>
select ?ds ?val ?timeval ?archive ?proxy ?pages2kID ?timeunits ?varname ?varunits ?lat ?lon ?alt FROM <http://linked.earth/lipd/Pages2k2_1_2> where { 
	?ds  le:includesPaleoData ?data .
    ?ds le:hasUrl ?url .
    ?ds  le:collectedFrom ?loc .
    ?loc le:wgs84_Lat ?lat .
    ?loc le:wgs84_Long ?lon .
    ?loc le:wgs84_Alt ?alt .
	?data le:foundInMeasurementTable ?table .
	?table le:includesVariable ?var .
    ?var le:useInGlobalTemperatureAnalysis true .
    ?var le:hasValues ?val .
    ?var le:name ?varname . 
    ?var le:hasUnits ?varunits .
    ?table le:includesVariable ?timevar .
    ?timevar le:name "year" .
    ?timevar le:hasValues ?timeval .
    ?ds le:proxyArchiveType ?archive .
    ?timevar le:hasUnits ?timeunits .
    #?var le:proxy ?proxy .
    #?var le:pages2kID ?pages2kID .
}"""

In [93]:
response = requests.post(url, data = {'query': query})

In [94]:
data = io.StringIO(response.text)
df = pd.read_csv(data, sep=",")

df

Unnamed: 0,ds,val,timeval,archive,proxy,pages2kID,timeunits,varname,varunits,lat,lon,alt
0,http://linked.earth/lipd/Pages2k2_1_2#Ant-WDC0...,"[-33.32873325, -35.6732, -33.1574, -34.2854, -...","[2005, 2004, 2003, 2002, 2001, 2000, 1999, 199...",glacier ice,,,AD,d18O,permil,-79.460,-112.090,1806.0
1,http://linked.earth/lipd/Pages2k2_1_2#Ocn-Mayo...,"[-4.72, -4.95, -5.45, -5.157, -5.05, -5.065, -...","[1993.62, 1993.46, 1993.29, 1993.12, 1992.96, ...",coral,,,AD,d18O,permil,-12.650,45.100,-3.0
2,http://linked.earth/lipd/Pages2k2_1_2#Ocn-Mayo...,"[8.6, 8.701, 8.802, 8.84, 8.877, 8.804, 8.588,...","[1994.29, 1994.12, 1993.96, 1993.79, 1993.62, ...",coral,,,AD,Sr_Ca,mmol/mol,-12.650,45.100,-3.0
3,http://linked.earth/lipd/Pages2k2_1_2#Ocn-Mayo...,"[0.534, 0.535, 0.721, 0.614, 0.458, 0.033, 0.3...","[1993.62, 1993.46, 1993.29, 1993.12, 1992.96, ...",coral,,,AD,d18O,permil,-12.650,45.100,-3.0
4,http://linked.earth/lipd/Pages2k2_1_2#Ocn-LosR...,"[-3.8123, -3.73082, -3.74912, -3.6656, -3.6995...","[1917.79, 1917.87, 1917.96, 1918.04, 1918.12, ...",coral,,,AD,d18O,permil,11.770,-66.750,-2.0
...,...,...,...,...,...,...,...,...,...,...,...,...
194,http://linked.earth/lipd/Pages2k2_1_2#Ocn-Nort...,"[-4.15, -3.66, -3.69, -4.07, -3.95, -4.12, -3....","[1983.21, 1983.13, 1983.04, 1982.96, 1982.88, ...",coral,,,AD,d18O,permil,32.467,-64.700,-12.0
195,http://linked.earth/lipd/Pages2k2_1_2#Ocn-Nort...,"[9.08, 9.17, 9.14, 9.14, 9.17, 9.09, 9.08, 9.1...","[1983.21, 1983.13, 1983.04, 1982.96, 1982.88, ...",coral,,,AD,Sr_Ca,mmol/mol,32.467,-64.700,-12.0
196,http://linked.earth/lipd/Pages2k2_1_2#Afr-Cold...,"[0.459055, 0.4062401, 0.3621507, 0.3517711, 0....","[1992, 1991, 1990, 1989, 1988, 1987, 1986, 198...",speleothem,,,AD,d18O,permil,-24.000,29.180,1450.0
197,http://linked.earth/lipd/Pages2k2_1_2#Ocn-Tong...,"[0.356, -0.053, 0.639, 0.714, 0.6, 0.586, 0.58...","[1890, 1891, 1892, 1893, 1894, 1895, 1896, 189...",sclerosponge,,,AD,d18O,permil,23.504,-76.577,-143.0


Let's fix the dataframe

In [97]:
df['val']=df['val'].apply(lambda row : ast.literal_eval(row))
df['timeval']=df['timeval'].apply(lambda row : ast.literal_eval(row))
df['ds']=df['ds'].apply(lambda row : row.replace('http://linked.earth/lipd/Pages2k2_1_2#',''))

df

Unnamed: 0,ds,val,timeval,archive,proxy,pages2kID,timeunits,varname,varunits,lat,lon,alt
0,Ant-WDC05A.Steig.2013,"[-33.32873325, -35.6732, -33.1574, -34.2854, -...","[2005, 2004, 2003, 2002, 2001, 2000, 1999, 199...",glacier ice,,,AD,d18O,permil,-79.460,-112.090,1806.0
1,Ocn-Mayotte.Zinke.2008,"[-4.72, -4.95, -5.45, -5.157, -5.05, -5.065, -...","[1993.62, 1993.46, 1993.29, 1993.12, 1992.96, ...",coral,,,AD,d18O,permil,-12.650,45.100,-3.0
2,Ocn-Mayotte.Zinke.2008,"[8.6, 8.701, 8.802, 8.84, 8.877, 8.804, 8.588,...","[1994.29, 1994.12, 1993.96, 1993.79, 1993.62, ...",coral,,,AD,Sr_Ca,mmol/mol,-12.650,45.100,-3.0
3,Ocn-Mayotte.Zinke.2008,"[0.534, 0.535, 0.721, 0.614, 0.458, 0.033, 0.3...","[1993.62, 1993.46, 1993.29, 1993.12, 1992.96, ...",coral,,,AD,d18O,permil,-12.650,45.100,-3.0
4,Ocn-LosRoques.Hetzinger.2008,"[-3.8123, -3.73082, -3.74912, -3.6656, -3.6995...","[1917.79, 1917.87, 1917.96, 1918.04, 1918.12, ...",coral,,,AD,d18O,permil,11.770,-66.750,-2.0
...,...,...,...,...,...,...,...,...,...,...,...,...
194,Ocn-NorthEastBreakers_Bermuda.Kuhnert.2005,"[-4.15, -3.66, -3.69, -4.07, -3.95, -4.12, -3....","[1983.21, 1983.13, 1983.04, 1982.96, 1982.88, ...",coral,,,AD,d18O,permil,32.467,-64.700,-12.0
195,Ocn-NorthEastBreakers_Bermuda.Kuhnert.2005,"[9.08, 9.17, 9.14, 9.14, 9.17, 9.09, 9.08, 9.1...","[1983.21, 1983.13, 1983.04, 1982.96, 1982.88, ...",coral,,,AD,Sr_Ca,mmol/mol,32.467,-64.700,-12.0
196,Afr-ColdAirCave.Sundqvist.2013,"[0.459055, 0.4062401, 0.3621507, 0.3517711, 0....","[1992, 1991, 1990, 1989, 1988, 1987, 1986, 198...",speleothem,,,AD,d18O,permil,-24.000,29.180,1450.0
197,Ocn-TongueoftheOcean.Rosenheim.2005,"[0.356, -0.053, 0.639, 0.714, 0.6, 0.586, 0.58...","[1890, 1891, 1892, 1893, 1894, 1895, 1896, 189...",sclerosponge,,,AD,d18O,permil,23.504,-76.577,-143.0
