In [131]:
import mapbox_vector_tile
import requests
import datetime

import pandas as pd 

In [145]:
def get_layer(url,fields,zoom=1):
    """fetches all tiles correponding to the zoom level and extracts fields from tiles as indicated by parameter fields"""
    ret = []
    for x in range(0,2**zoom):
        for y in range(0,2**zoom):
             ret = ret + get_and_decode_tile(url,zoom,x,y,fields) 
                
    return ret

def get_and_decode_tile(url,zoom,x,y,fields):
    """fetches a tile, decodes it and extracts fields from it as per parameter fields. 
    Adds info on the tile the feature is contained in"""
    
    url = url.format(zoom=zoom,x=x,y=y)
    #print(url)
    #return []
    
    features = mapbox_vector_tile.decode(requests.get(url).content)
    
    if not features or len(features)==0:
        return []
    
    ret = [ (f | {"tile": f"{zoom}/{x}/{y}"} )  for f in extract_features(list(features.values())[0]["features"],fields) ]
    
    return ret

def extract_features(l,fields):
    """extracts and returns fields from list of features"""
    return [ extract_feature(r,fields) for r in l ]

def extract_feature(record,fields):
    """extracts features from a records as per fields parameter"""
    props = record["properties"]
    
    ret = { 
        "coordinates" : "+".join([str(g) for g in record["geometry"]["coordinates"]]),
        #"lat" : record["geometry"]["coordinates"][0], "lon" : record["geometry"]["coordinates"][1]
    }
    for f in fields + no_compare_field:
        ret[f] = props.get(f,"")
    
    return ret


In [146]:
def group_df(df,fields):
    """sorts and groups dataframe object by determinictis logic"""
    my_agg = { f:list for f in fields + no_compare_field}
    return df.sort_values(by=["coordinates"] + fields ).groupby("coordinates").agg( my_agg )

def compare_urls(url_old,url_new,fields,zoom=1):
    """main function. obtains tiles from old and new layers, groups features and compares them"""
    df_old = pd.DataFrame.from_records( get_layer(url_old,fields,zoom=zoom) )
    df_new = pd.DataFrame.from_records( get_layer(url_new,fields,zoom=zoom) )
    print("nr records old",len(df_old),"nr records new",len(df_new))
    
    grp_old = group_df(df_old,fields)
    grp_new = group_df(df_new,fields)
    
    same = grp_old[fields].equals(grp_new[fields])
    
    s = " " if same else " not "
    print("the records in the URLs are{}the same for zoom {}".format(s,zoom))
    
    if not same:
        diff_a = grp_old.index.difference(grp_new.index).values
        diff_b = grp_new.index.difference(grp_old.index).values
        print("old vs new:",diff_a)
        print(grp_old.loc[diff_a])
        print("new vs old:",diff_b)
        print(grp_new.loc[diff_b])

        
    return grp_old,grp_new       

In [157]:
# fields used to match stations 
surface_fields = ["nr_received","nr_expected","var_id","in_oscar"]
ua_fields = ["nr_received","nr_expected","availability","in_oscar","default_schedule","country_id"]
no_compare_field = ["station_id","tile"] # do not use these fields to compare stations

In [148]:
# daily synop
url_old = "https://wdqms-dev.wmo.int/wdqmsapi/vector-tiles/nwp/synop/daily/availability/110/true/{zoom}/{x}/{y}/?period_JMA=17044&period_ECMWF=17047&period_DWD=17026&period_NCEP=17045&date=2023-07-26"
url_new = "https://wdqms-test.wmo.int/wdqmstiles/tileserv.synop_observationby_daily_period_combined/{zoom}/{x}/{y}.pbf?param_var_id=110&period_jma=17044&period_ecmwf=17047&period_ncep=17045&period_dwd=17026"

b=datetime.datetime.now()
get_layer(url_old,surface_fields,zoom=2)
print("old",(datetime.datetime.now()-b).total_seconds())

b=datetime.datetime.now()
get_layer(url_new,surface_fields,zoom=2)
print("new",(datetime.datetime.now()-b).total_seconds())

old 11.698521
new 21.308967


In [149]:
# 6h synop
url_old = "https://wdqms-dev.wmo.int/wdqmsapi/vector-tiles/nwp/synop/six_hour/availability/110/true/{zoom}/{x}/{y}/?period_ECMWF=64921&period_DWD=64877&period_NCEP=64904&period_JMA=64899&date=2023-07-26"
url_new = "https://wdqms-test.wmo.int/wdqmstiles/tileserv.synop_observationby_sixhour_period_combined/{zoom}/{x}/{y}.pbf?param_var_id=110&period_jma=64900&period_dwd=64878&period_ecmwf=64922&period_ncep=64905" 

_=compare_urls(url_old,url_new,surface_fields,zoom=1)

nr records old 11893 nr records new 11893
the records in the URLs are the same for zoom 1


In [150]:
# 6h synop (zoom 2)
url_old = "https://wdqms-dev.wmo.int/wdqmsapi/vector-tiles/nwp/synop/six_hour/availability/110/true/{zoom}/{x}/{y}/?period_ECMWF=64921&period_DWD=64877&period_NCEP=64904&period_JMA=64899&date=2023-07-26"
url_new = "https://wdqms-test.wmo.int/wdqmstiles/tileserv.synop_observationby_sixhour_period_combined/{zoom}/{x}/{y}.pbf?param_var_id=110&period_jma=64900&period_dwd=64878&period_ecmwf=64922&period_ncep=64905" 

grp_old,grp_new=compare_urls(url_old,url_new,surface_fields,zoom=2)

nr records old 11893 nr records new 11893
the records in the URLs are not the same for zoom 2
old vs new: ['0+1705']
            nr_received nr_expected var_id in_oscar station_id     tile
coordinates                                                            
0+1705              [4]         [6]  [110]   [True]    [67356]  [2/1/1]
new vs old: ['4096+1705']
            nr_received nr_expected var_id in_oscar station_id     tile
coordinates                                                            
4096+1705           [4]         [6]  [110]   [True]    [66940]  [2/0/1]


In [151]:
# daily synop
url_old = "https://wdqms-dev.wmo.int/wdqmsapi/vector-tiles/nwp/synop/daily/availability/110/true/{zoom}/{x}/{y}/?period_JMA=17044&period_ECMWF=17047&period_DWD=17026&period_NCEP=17045&date=2023-07-26"
url_new = "https://wdqms-test.wmo.int/wdqmstiles/tileserv.synop_observationby_daily_period_combined/{zoom}/{x}/{y}.pbf?param_var_id=110&period_jma=17044&period_ecmwf=17047&period_ncep=17045&period_dwd=17026"

#_=compare_urls(url_old,url_new,surface_fields,zoom=1)
_=compare_urls(url_old,url_new,surface_fields,zoom=1)

nr records old 12218 nr records new 12218
the records in the URLs are the same for zoom 1


In [152]:
# daily synop (zoom 3)
url_old = "https://wdqms-dev.wmo.int/wdqmsapi/vector-tiles/nwp/synop/daily/availability/110/true/{zoom}/{x}/{y}/?period_JMA=17044&period_ECMWF=17047&period_DWD=17026&period_NCEP=17045&date=2023-07-26"
url_new = "https://wdqms-test.wmo.int/wdqmstiles/tileserv.synop_observationby_daily_period_combined/{zoom}/{x}/{y}.pbf?param_var_id=110&period_jma=17044&period_ecmwf=17047&period_ncep=17045&period_dwd=17026"

#_=compare_urls(url_old,url_new,surface_fields,zoom=1)
_=compare_urls(url_old,url_new,surface_fields,zoom=3)

nr records old 12218 nr records new 12220
the records in the URLs are not the same for zoom 3
old vs new: ['0+3410']
            nr_received nr_expected var_id in_oscar station_id     tile
coordinates                                                            
0+3410             [16]        [24]  [110]   [True]    [67356]  [3/2/3]
new vs old: ['4096+2117' '4096+2982' '4096+3410']
            nr_received nr_expected var_id in_oscar station_id     tile
coordinates                                                            
4096+2117           [3]         [2]  [110]   [True]    [56443]  [3/2/4]
4096+2982           [1]         [2]  [110]   [True]    [72809]  [3/2/4]
4096+3410          [16]        [24]  [110]   [True]    [66940]  [3/1/3]


In [155]:
# 6h temp, (zoom 1)
url_old = "https://wdqms-dev.wmo.int/wdqmsapi/vector-tiles/nwp/temp/six_hour/availability/2/true/{zoom}/{x}/{y}/?period_JMA=64887&period_DWD=64879&period_ECMWF=64922&period_NCEP=64909&date=2023-07-26"
url_new = "https://wdqms-test.wmo.int/wdqmstiles/tileserv.temp_observationby_sixhour_period_combined/{zoom}/{x}/{y}.pbf?param_var_id=2&period_dwd=64880&period_ecmwf=64923&period_jma=64888&period_ncep=64910"

_=compare_urls(url_old,url_new,ua_fields,zoom=1)

nr records old 53 nr records new 53
the records in the URLs are the same for zoom 1


In [156]:
# daily temp (zoom 1)
url_old = "https://wdqms-dev.wmo.int/wdqmsapi/vector-tiles/nwp/temp/daily/availability/2/true/{zoom}/{x}/{y}/?period_ECMWF=17048&period_JMA=17042&period_DWD=17033&period_NCEP=17046&date=2023-07-26"
url_new = "https://wdqms-test.wmo.int/wdqmstiles/tileserv.temp_observationby_daily_period_combined/{zoom}/{x}/{y}.pbf?param_var_id=2&period_dwd=17033&period_ncep=17046&period_jma=17042&period_ecmwf=17048"

=_compare_urls(url_old,url_new,ua_fields)

SyntaxError: invalid syntax (739387071.py, line 5)