In [1]:
from pysimplesoap.client import SoapClient
from suds.client import Client
from datetime import datetime
import pytz
import untangle
import pandas as pd
import numpy as np
import itertools
%matplotlib inline

In [2]:
def event_client_datetime(event, tz_server, tz_client='Europe/Amsterdam'):
    """
    Get datetime object in client time of an XML Element named event with attributes date and time
    input:
    event     : XML Element named event [eg: obj.TimeSeries.series.event[0]]
    tz_server : datetime abbreviation of the server timezone [eg: 'Etc/GMT']
    tz_client : datetime abbreviation of the client timezone [eg: 'Europe/Amsterdam']

    return
    event_client_time : an datetime object of the event in client timezome

    """    
    # convert XML element date string to integer list
    event_server_date = list(map(int, event['date'].split('-'))) # -> [yyyy, MM, dd]
    event_server_time = list(map(int, event['time'].split(':'))) # -> [HH, mm, ss]
    
    # define server time
    server_time = datetime(event_server_date[0], event_server_date[1], event_server_date[2], 
                           event_server_time[0], event_server_time[1], event_server_time[2], 
                           tzinfo=pytz.timezone(tz_server))    
    client_timezone = pytz.timezone(tz_client)

    # returns datetime in the new timezone
    event_client_time = server_time.astimezone(client_timezone)     
    
    return event_client_time

In [3]:
# link to fewsPI Service
url = 'http://www.oms-waddenzee.nl:8081/FewsPiService/fewspiservice?wsdl'
client = Client(url)
print( SoapClient(wsdl=url,trace=False) )

SOAP CLIENT
 ELEMENTS
  SOAPException {message: str}
  getFilters {filterId: str, piVersion: str}
  getFiltersResponse {return: str}
  getLocations {clientId: str, filterId: str, piVersion: str}
  getLocationsAsStream {filterId: str, piVersion: str}
  getLocationsAsStreamResponse {return: str}
  getLocationsResponse {return: str}
  getParameters {clientId: str, filterId: str, piVersion: str}
  getParametersAsStream {filterId: str, piVersion: str}
  getParametersAsStreamResponse {return: str}
  getParametersResponse {return: str}
  getSamplesAsStream {sampleQueryParams: sampleQueryParameters}
  getSamplesAsStreamResponse {return: str}
  getTaskRunStatus {taskId: str, maxWaitMillis: long}
  getTaskRunStatusResponse {return: str}
  getTimeSeries {queryParams: queryParameters}
  getTimeSeriesAsStream {queryParams: queryParameters}
  getTimeSeriesAsStreamResponse {return: str}
  getTimeSeriesForFilter {clientId: str, startTime: datetime, timeZero: datetime, endTime: datetime, filterId: str,

In [4]:
# # get TimeZone identifier from the FEWS PI Service
# timeZoneID = client.service.getTimeZoneId()
# print (timeZoneID)
# #out: 'GMT'

In [5]:
# create the service input xml
getTimeSeries = client.factory.create('getTimeSeries')

# fill in the query parameters
getTimeSeries.queryParams.convertDatum = 'false'
getTimeSeries.queryParams.omitMissing = 'false'
getTimeSeries.queryParams.onlyHeaders = 'false'
getTimeSeries.queryParams.moduleInstanceIds = 'pr.Reizen.vdLee.baggeracties'
getTimeSeries.queryParams.locationIds = ['bv.1.7.2.1.1','bv.1.7.2.3.2']
getTimeSeries.queryParams.parameterIds = 'k.m3.zandwinning.vdlee'
getTimeSeries.queryParams.startTime = '2016-08-01T02:00:00.0+02:00'#'2016-08-01 02:00'
getTimeSeries.queryParams.endTime = '2017-02-01T01:00:00.0+01:00'#'2017-04-01 02:00'
getTimeSeries.queryParams.version = '1.22'

# print the query
print (getTimeSeries.queryParams)

# execute the query
#getTimeSeries_response = client.service.getTimeSeries(getTimeSeries.queryParams)

(queryParameters){
   convertDatum = "false"
   endCreationTime = None
   endForecastTime = None
   endTime = "2017-02-01T01:00:00.0+01:00"
   ensembleId = None
   externalForecastTimes[] = <empty>
   filterId = None
   forecastSearchCount = None
   importFromExternalDataSource = None
   locationIds[] = 
      "bv.1.7.2.1.1",
      "bv.1.7.2.3.2",
   moduleInstanceIds = "pr.Reizen.vdLee.baggeracties"
   omitMissing = "false"
   onlyHeaders = "false"
   parameterIds = "k.m3.zandwinning.vdlee"
   qualifierIds[] = <empty>
   showEnsembleMemberIds = None
   showStatistics = None
   showThresholds = None
   startCreationTime = None
   startForecastTime = None
   startTime = "2016-08-01T02:00:00.0+02:00"
   useDisplayUnits = None
   version = "1.22"
 }


In [None]:
getTimeSeries

In [6]:
# print (getTimeSeries_response)

In [7]:
timeZoneID = 'GMT'
getTimeSeries_response = r'D:\Projects\Pr\3427.10\Export\GSOD//GSOD_v2.xml'
obj = untangle.parse(getTimeSeries_response)

In [16]:
obj.TimeSeries.series

TypeError: list indices must be integers or slices, not str

In [17]:
# fill list with stationNames
series_stationNames = []
series_locationIds = []
event_datetimes = []
event_values = []
event_flags = []
event_attributes = ['value','flag']

events_values = []
events_flags = []

for series in obj.TimeSeries.series:
    series_stationNames.append(series.header.stationName.cdata)
    for event in series.event:     
        event_values.append( float(event['value']))
        event_flags.append( int(event['flag']))

    events_values.append(event_values)
    events_flags.append(event_flags)
        
    #event_datetimes = []
    event_values = []
    event_flags = []

# only once select the datetimes    
for event in series.event:     
    event_datetimes.append( event_client_datetime(event, tz_server='Etc/'+timeZoneID, tz_client='Asia/Yangon'))
    

#listcombine_T = list(map(list, zip(*[events_values,events_flags])))
listcombine = [events_values,events_flags]
multiColumns = (list(itertools.product(series_stationNames, event_attributes)))
#df = pd.DataFrame(listcombine,columns=multiColumns)
#df.columns = pd.MultiIndex.from_tuples(df.columns, names=['locationId','attributes'])

In [18]:
listcombine

[[[1.0,
   0.7,
   0.5,
   1.2,
   -999.0,
   -999.0,
   -999.0,
   -999.0,
   -999.0,
   -999.0,
   -999.0,
   -999.0,
   -999.0,
   -999.0,
   0.7,
   -999.0,
   -999.0,
   -999.0,
   -999.0,
   -999.0,
   -999.0,
   -999.0],
  [1.6,
   1.4,
   1.0,
   1.6,
   2.2,
   -999.0,
   2.0,
   2.6,
   1.2,
   0.5,
   5.4,
   1.2,
   -999.0,
   0.5,
   0.8,
   1.2,
   1.2,
   0.6,
   1.6,
   1.4,
   -999.0,
   -999.0],
  [0.6,
   0.7,
   0.8,
   0.4,
   0.6,
   1.2,
   1.0,
   0.6,
   0.6,
   0.2,
   1.0,
   1.2,
   -999.0,
   0.6,
   0.6,
   0.6,
   -999.0,
   0.2,
   -999.0,
   0.0,
   -999.0,
   -999.0],
  [1.4,
   0.7,
   1.0,
   0.8,
   0.6,
   0.5,
   0.8,
   1.0,
   0.8,
   0.4,
   1.2,
   1.0,
   -999.0,
   0.4,
   0.8,
   0.6,
   0.7,
   0.6,
   0.6,
   0.6,
   -999.0,
   -999.0],
  [-999.0,
   -999.0,
   -999.0,
   -999.0,
   -999.0,
   -999.0,
   -999.0,
   -999.0,
   -999.0,
   -999.0,
   -999.0,
   -999.0,
   -999.0,
   -999.0,
   -999.0,
   -999.0,
   -999.0,
   -999.0,
   -999

In [19]:
df_ = pd.DataFrame(index=event_datetimes, columns=series_stationNames)
df_ = df_.fillna(0) # with 0s rather than NaNs
df_.head()

Unnamed: 0,PUTAO,HKAMTI,MYITKYINA,HOMALIN,PINLEBU,KATHA,BHAMO,MAWLAIK,KALEMYO,KALEWA,...,THATON,HPAAN,BILIN,PHYAPON,MAWLAMYINE,YE,DAWEI,COCO ISLAND,MERGUI,VICTORIA POINT
2017-04-25 06:30:00+06:30,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2017-04-26 06:30:00+06:30,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2017-04-27 06:30:00+06:30,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2017-04-28 06:30:00+06:30,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2017-04-29 06:30:00+06:30,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [115]:
# empty dictionary to fill with dictionary format of each row
# method adopted to avoid appending to pandas dataframe
rows_ts_dict = {}
rows_latlon_list = []

# start iteration
for series in obj.TimeSeries.series:
    # initiate empty lists
    moduleInstanceId = []
    locationId = []

    stationName= []
    parameterId = []

    event_datetimes = []
    event_values = []
    event_flags = []    
    
    # collect metadata        
    # GET moduleInstanceId
    try:
        moduleInstanceId.append(series.header.moduleInstanceId.cdata)
    except AttributeError as e:
        print ('warning:',e)
        
    # GET locationId 
    try:
        locationId.append(series.header.locationId.cdata)
    except AttributeError as e:
        print ('warning:',e)

    # GET lat
    try:
        lat = float(series.header.lat.cdata)
    except AttributeError as e:
        print ('warning:',e)        

    # GET lon
    try:
        lon = float(series.header.lon.cdata)
    except AttributeError as e:
        print ('warning:',e)                

    # GET stationNames
    try:
        stationName.append(series.header.stationName.cdata)
    except AttributeError as e:
        print ('warning:',e)          
        
    # GET parameterId    
    try:
        parameterId.append(series.header.parameterId.cdata + ' (' + series.header.units.cdata + ')')
    except AttributeError as e:
        print ('warning:',e)
        
    # GET data values    
    for event in series.event:
        event_datetimes.append( event_client_datetime(event, tz_server='Etc/'+timeZoneID, tz_client='Asia/Yangon'))
        event_values.append( float(event['value']))
        event_flags.append( int(event['flag']))

    # PUT timeseries info into row dictionary
    dataValuesFlags = [event_values,event_flags]
    multiColumns = pd.MultiIndex.from_product([moduleInstanceId, parameterId, locationId, event_attributes], names=['moduleInstanceIds','parameterIds','locationIds','event_attributes'])
    df_ts_dict = pd.DataFrame(dataValuesFlags,index=multiColumns, columns=event_datetimes).T.to_dict()

    # PUT timeseries row in dictionary of rows
    rows_ts_dict.update(df_ts_dict)
    
#     # PUT latlon/location info into row dictionary
#     df_latlon_dict = pd.DataFrame([{'stationName':stationName[0],'lat':lat,'lon':lon}]).to_dict(orient='split')
#     print (df_latlon_dict)
    
    # PUT latlon/location row in dictionary of rows
    rows_latlon_list.append({'stationName':stationName[0],'Lat':lat,'Lon':lon})

# CREATE dataframe of timeseries rows dictionary
df_timeseries = pd.DataFrame(rows_ts_dict)

# CREATE dataframe of location rows dictionary
df_latlon = pd.DataFrame(rows_latlon_list)

# CONVERT latitude longitude to geometry points
geometry = [Point(xy) for xy in zip(df_latlon.Lon, df_latlon.Lat)]
df_latlon = df_latlon.drop(['Lon', 'Lat'], axis=1)
crs = {'init': 'epsg:4326'}
gdf_latlon = gpd.GeoDataFrame(df_latlon, crs=crs, geometry=geometry).to_json()


In [116]:
import folium
import geopandas as gpd
from shapely.geometry import Point

In [122]:
geometry = [Point(xy) for xy in zip(df_latlon.Lon, df_latlon.Lat)]
df_latlon = df_latlon.drop(['Lon', 'Lat'], axis=1)
crs = {'init': 'epsg:4326'}
geo_df = gpd.GeoDataFrame(df_latlon, crs=crs, geometry=geometry).to_json()

#gpd.GeoDataFrame(rows_latlon_list,geometry=['lat','lon']).to_json()

In [124]:
mapa = folium.Map([-15.783333, -47.866667],
                  zoom_start=4,
                  tiles='cartodbpositron')

points = folium.features.GeoJson(geo_df)

mapa.add_children(points)
mapa



In [98]:
dic.update({'stationName':'y'})

In [99]:
dic

{'stationName': 'y'}

In [72]:
float(series.header.lon.cdata)

98.583

In [32]:
df.head()

Unnamed: 0_level_0,ImportGSOD,ImportGSOD,ImportGSOD,ImportGSOD,ImportGSOD,ImportGSOD,ImportGSOD,ImportGSOD,ImportGSOD,ImportGSOD,ImportGSOD,ImportGSOD,ImportGSOD,ImportGSOD,ImportGSOD,ImportGSOD,ImportGSOD,ImportGSOD,ImportGSOD,ImportGSOD,ImportGSOD
Unnamed: 0_level_1,P.GSOD (mm),P.GSOD (mm),P.GSOD (mm),P.GSOD (mm),P.GSOD (mm),P.GSOD (mm),P.GSOD (mm),P.GSOD (mm),P.GSOD (mm),P.GSOD (mm),P.GSOD (mm),P.GSOD (mm),P.GSOD (mm),P.GSOD (mm),P.GSOD (mm),P.GSOD (mm),P.GSOD (mm),P.GSOD (mm),P.GSOD (mm),P.GSOD (mm),P.GSOD (mm)
Unnamed: 0_level_2,480010,480010,480040,480040,480080,480080,480100,480100,480170,480170,...,481070,481070,481080,481080,481090,481090,481100,481100,481120,481120
Unnamed: 0_level_3,flag,value,flag,value,flag,value,flag,value,flag,value,...,flag,value,flag,value,flag,value,flag,value,flag,value
2017-04-25 06:30:00+06:30,0.0,1.0,0.0,1.6,0.0,0.6,0.0,1.4,8.0,-999.0,...,0.0,1.9,0.0,3.0,0.0,1.6,0.0,2.3,0.0,2.1
2017-04-26 06:30:00+06:30,0.0,0.7,0.0,1.4,0.0,0.7,0.0,0.7,8.0,-999.0,...,0.0,1.7,0.0,3.5,0.0,3.5,0.0,3.3,0.0,2.7
2017-04-27 06:30:00+06:30,0.0,0.5,0.0,1.0,0.0,0.8,0.0,1.0,8.0,-999.0,...,0.0,2.5,8.0,-999.0,0.0,3.3,0.0,3.2,0.0,1.9
2017-04-28 06:30:00+06:30,0.0,1.2,0.0,1.6,0.0,0.4,0.0,0.8,8.0,-999.0,...,0.0,1.4,0.0,1.9,0.0,2.0,0.0,3.3,0.0,2.5
2017-04-29 06:30:00+06:30,8.0,-999.0,0.0,2.2,0.0,0.6,0.0,0.6,8.0,-999.0,...,0.0,6.0,0.0,2.2,0.0,1.4,0.0,2.1,0.0,2.1


In [None]:
df.xs(('value','480010'),axis=1,level=(3,2), drop_level=False)

In [None]:
#df.xs(('value','480010'),axis=1,level=(3,2), drop_level=False)

df.xs('480010',axis=1,level=2, drop_level=True).reset_index().to_json()

In [None]:
#ts_selected = df['ImportGSOD']['P.GSOD']['480010']['value'].where(df['ImportGSOD']['P.GSOD']['480010']['flag']!=8.0, np.nan).reset_index()
ts_selected = df['ImportGSOD']['P.GSOD']['480010']['value'].reset_index()
ts_selected.columns = ['date','value']
#ts_selected = ts_selected.stack()
ts_selected.head()

In [44]:
df['ImportGSOD'].columns.levels[0]

Index(['P.GSOD (mm)'], dtype='object')

In [None]:
#ts_selected = ts_selected.stack()##.to_json()
ts_selected.to_json(orient='records')

In [None]:
ts_selected.plot()

In [None]:
from vega import VegaLite

VegaLite({
  "mark": "line",
  "encoding": {
    "x": {"field": "date", "type": "temporal", "axis": {"format": "%Y"}},
    "y": {
      "field": "value",
      "type": "quantitative",
      "scale": {"domain":[0,10]}
    }
  }
}, ts_selected)

In [None]:
from altair import *

#genderscale = Scale(domain=['M', 'F'], range=["#659CCA", "#EA98D2"])

Chart(ts_selected).mark_line().encode(
    X('date:T', timeUnit='day'),
    Y('value:Q')
    #Color('gender:N', scale=genderscale)
)

In [None]:
df.xs('value', level=[0,'480010'], axis=0)

In [None]:
ts_selected.reset_index()

In [None]:
ts_selected

In [None]:
ts_selected.stplot(kind='box')

In [None]:
df['ImportGSOD']['P.GSOD']['480010']['flag']

In [None]:
#reform = {(outerKey, innerKey): values for outerKey, innerDict in df_dict.items() for innerKey, values in innerDict.items()}
df = pd.DataFrame(rows_list)

In [None]:
df.T

In [None]:
rows_list = []
#for row in input_rows:
dataValuesFlags = [event_values,event_flags]
multiColumns = pd.MultiIndex.from_product([moduleInstanceIds, parameterIds, locationIds, event_attributes], names=['moduleInstanceIds','parameterIds','locationIds','event_attributes'])
df_dict = pd.DataFrame(dataValuesFlags,index=multiColumns, columns=event_datetimes).to_dict()
rows_list.append(df_dict)

In [None]:

#df.columns = pd.MultiIndex.from_tuples(df.columns, names=['locationId','attributes'])

In [None]:
event_datetimes

In [None]:
dataValuesFlags

In [None]:
(list(itertools.product([moduleInstanceIds, parameterIds, locationIds, event_datetimes,event_attributes])))#,dataValuesFlags,event_datetimes])))

In [None]:
df.T.to_dict()

In [None]:
df.to_dict()

In [None]:
try:
    moduleInstanceIds.append(series.header.moduleInstanceId.cdata)
except AttributeError as e:
    print ('warning:',e)


In [None]:
# print datetime of last event
test_event_time = event_client_datetime(event, tz_server='Etc/'+timeZoneID, tz_client='Europe/Amsterdam')
print (test_event_time.isoformat())

In [None]:
listcombine_T = list(map(list, zip(*[event_values,event_flags])))
multiColumns=[('bv.1.7.2.1.1', 'value'), ('bv.1.7.2.1.1','flag')]
df = pd.DataFrame(listcombine_T,columns=multiColumns)
df.columns = pd.MultiIndex.from_tuples(df.columns, names=['locationId','attributes'])
#df.set_index(['datetime'], inplace=True)
#df['value'].replace(-999.,np.NaN,inplace=True)

In [None]:
df.head()

In [None]:
#df.set_index('bv.1.7.2.1.1','datetime')
# set datetime as index from multiColumn, and remove the same column from the multiColumn
df.set_index([('bv.1.7.2.1.1', 'datetime')]).rename(
    columns=df.columns.to_series().to_dict())#.columns
df.replace(-999.,np.NaN,inplace=True)

In [None]:
df['bv.1.7.2.1.1']['value'].plot(kind='area')