In [1]:
import mapbox_vector_tile
import requests
import datetime

import pandas as pd 

In [2]:
def get_layer(url,fields,zoom=1):
    """fetches all tiles correponding to the zoom level and extracts fields from tiles as indicated by parameter fields"""
    ret = []
    i=0
    for x in range(0,2**zoom):
        for y in range(0,2**zoom):
            ret = ret + get_and_decode_tile(url,zoom,x,y,fields)
            i=i+1
                
    print("fetched",i,"tiles")
    return ret

def get_and_decode_tile(url,zoom,x,y,fields):
    """fetches a tile, decodes it and extracts fields from it as per parameter fields. 
    Adds info on the tile the feature is contained in"""
    
    url = url.format(zoom=zoom,x=x,y=y)
    #print(url)
    #return []
    
    features = mapbox_vector_tile.decode(requests.get(url).content)
    
    if not features or len(features)==0:
        return []
    
    ret = [ (f | {"tile": f"{zoom}/{x}/{y}"} )  for f in extract_features(list(features.values())[0]["features"],fields) ]
    
    return ret

def extract_features(l,fields):
    """extracts and returns fields from list of features"""
    return [ extract_feature(r,fields) for r in l ]

def extract_feature(record,fields):
    """extracts features from a records as per fields parameter"""
    props = record["properties"]
    
    ret = { 
        "coordinates" : "+".join([str(g) for g in record["geometry"]["coordinates"]]),
        #"lat" : record["geometry"]["coordinates"][0], "lon" : record["geometry"]["coordinates"][1]
    }
    for f in fields + no_compare_field:
        ret[f] = props.get(f,"")
    
    return ret


In [3]:
def group_df(df,fields):
    """sorts and groups dataframe object by determinictis logic"""
    my_agg = { f:list for f in fields + no_compare_field}
    return df.sort_values(by=["coordinates"] + fields ).groupby("coordinates").agg( my_agg )

def compare_urls(url_old,url_new,fields,zoom=1):
    """main function. obtains tiles from old and new layers, groups features and compares them"""
    b=datetime.datetime.now()
    df_old = pd.DataFrame.from_records( get_layer(url_old,fields,zoom=zoom) )
    print(f"old:","%.2f" % (datetime.datetime.now()-b).total_seconds(),"seconds")

    b=datetime.datetime.now()
    df_new = pd.DataFrame.from_records( get_layer(url_new,fields,zoom=zoom) )
    print("new:","%.2f" % (datetime.datetime.now()-b).total_seconds(),"seconds")
    
    print("nr records old",len(df_old),"nr records new",len(df_new))
    
    grp_old = group_df(df_old,fields)
    grp_new = group_df(df_new,fields)
    
    same = grp_old[fields].equals(grp_new[fields])
    
    s = " " if same else " not "
    print("the records in the URLs are{}the same for zoom {}".format(s,zoom))
    
    if not same:
        diff_a = grp_old.index.difference(grp_new.index).values
        diff_b = grp_new.index.difference(grp_old.index).values
        print("old vs new:",diff_a)
        print(grp_old.loc[diff_a])
        print("new vs old:",diff_b)
        print(grp_new.loc[diff_b])

        
    return grp_old,grp_new       

In [4]:
# fields used to match stations 
surface_fields = ["nr_received","nr_expected","var_id","in_oscar"]
ua_fields = ["nr_received","nr_expected","availability","in_oscar","default_schedule","country_id"]
no_compare_field = ["station_id","tile"] # do not use these fields to compare stations

In [5]:
# daily synop (zoom 2)
url_old = "https://wdqms-dev.wmo.int/wdqmsapi/vector-tiles/nwp/synop/daily/availability/110/true/{zoom}/{x}/{y}/?period_JMA=17044&period_ECMWF=17047&period_DWD=17026&period_NCEP=17045&date=2023-07-26"
url_new = "https://wdqms-test.wmo.int/wdqmstiles/tileserv.synop_observationby_daily_period_combined/{zoom}/{x}/{y}.pbf?param_var_id=110&period_jma=17044&period_ecmwf=17047&period_ncep=17045&period_dwd=17026"

_=compare_urls(url_old,url_new,surface_fields,zoom=2)

fetched 16 tiles
old: 7.48 seconds
fetched 16 tiles
new: 7.41 seconds
nr records old 12218 nr records new 12218
the records in the URLs are the same for zoom 2


In [6]:
# 6h synop
url_old = "https://wdqms-dev.wmo.int/wdqmsapi/vector-tiles/nwp/synop/six_hour/availability/110/true/{zoom}/{x}/{y}/?period_ECMWF=64921&period_DWD=64877&period_NCEP=64904&period_JMA=64899&date=2023-07-26"
url_new = "https://wdqms-test.wmo.int/wdqmstiles/tileserv.synop_observationby_sixhour_period_combined/{zoom}/{x}/{y}.pbf?param_var_id=110&period_jma=64900&period_dwd=64878&period_ecmwf=64922&period_ncep=64905" 

_=compare_urls(url_old,url_new,surface_fields,zoom=1)

fetched 4 tiles
old: 2.45 seconds
fetched 4 tiles
new: 2.06 seconds
nr records old 11893 nr records new 11893
the records in the URLs are the same for zoom 1


In [7]:
# 6h synop (zoom 2)
url_old = "https://wdqms-dev.wmo.int/wdqmsapi/vector-tiles/nwp/synop/six_hour/availability/110/true/{zoom}/{x}/{y}/?period_ECMWF=64921&period_DWD=64877&period_NCEP=64904&period_JMA=64899&date=2023-07-26"
url_new = "https://wdqms-test.wmo.int/wdqmstiles/tileserv.synop_observationby_sixhour_period_combined/{zoom}/{x}/{y}.pbf?param_var_id=110&period_jma=64900&period_dwd=64878&period_ecmwf=64922&period_ncep=64905" 

_=compare_urls(url_old,url_new,surface_fields,zoom=2)

fetched 16 tiles
old: 12.71 seconds
fetched 16 tiles
new: 7.24 seconds
nr records old 11893 nr records new 11893
the records in the URLs are the same for zoom 2


In [8]:
# daily synop
url_old = "https://wdqms-dev.wmo.int/wdqmsapi/vector-tiles/nwp/synop/daily/availability/110/true/{zoom}/{x}/{y}/?period_JMA=17044&period_ECMWF=17047&period_DWD=17026&period_NCEP=17045&date=2023-07-26"
url_new = "https://wdqms-test.wmo.int/wdqmstiles/tileserv.synop_observationby_daily_period_combined/{zoom}/{x}/{y}.pbf?param_var_id=110&period_jma=17044&period_ecmwf=17047&period_ncep=17045&period_dwd=17026"

#_=compare_urls(url_old,url_new,surface_fields,zoom=1)
_=compare_urls(url_old,url_new,surface_fields,zoom=1)

fetched 4 tiles
old: 2.80 seconds
fetched 4 tiles
new: 3.70 seconds
nr records old 12218 nr records new 12218
the records in the URLs are the same for zoom 1


In [9]:
# daily synop (zoom 3)
url_old = "https://wdqms-dev.wmo.int/wdqmsapi/vector-tiles/nwp/synop/daily/availability/110/true/{zoom}/{x}/{y}/?period_JMA=17044&period_ECMWF=17047&period_DWD=17026&period_NCEP=17045&date=2023-07-26"
url_new = "https://wdqms-test.wmo.int/wdqmstiles/tileserv.synop_observationby_daily_period_combined/{zoom}/{x}/{y}.pbf?param_var_id=110&period_jma=17044&period_ecmwf=17047&period_ncep=17045&period_dwd=17026"

#_=compare_urls(url_old,url_new,surface_fields,zoom=1)
_=compare_urls(url_old,url_new,surface_fields,zoom=3)

fetched 64 tiles
old: 46.75 seconds
fetched 64 tiles
new: 45.83 seconds
nr records old 12218 nr records new 12218
the records in the URLs are the same for zoom 3


In [10]:
# 6h temp, (zoom 1)
url_old = "https://wdqms-dev.wmo.int/wdqmsapi/vector-tiles/nwp/temp/six_hour/availability/2/true/{zoom}/{x}/{y}/?period_JMA=64887&period_DWD=64879&period_ECMWF=64922&period_NCEP=64909&date=2023-07-26"
url_new = "https://wdqms-test.wmo.int/wdqmstiles/tileserv.temp_observationby_sixhour_period_combined/{zoom}/{x}/{y}.pbf?param_var_id=2&period_dwd=64880&period_ecmwf=64923&period_jma=64888&period_ncep=64910"

_=compare_urls(url_old,url_new,ua_fields,zoom=1)

fetched 4 tiles
old: 3.20 seconds
fetched 4 tiles
new: 2.90 seconds
nr records old 53 nr records new 53
the records in the URLs are the same for zoom 1


In [11]:
# daily temp (zoom 1)
url_old = "https://wdqms-dev.wmo.int/wdqmsapi/vector-tiles/nwp/temp/daily/availability/2/true/{zoom}/{x}/{y}/?period_ECMWF=17048&period_JMA=17042&period_DWD=17033&period_NCEP=17046&date=2023-07-26"
url_new = "https://wdqms-test.wmo.int/wdqmstiles/tileserv.temp_observationby_daily_period_combined/{zoom}/{x}/{y}.pbf?param_var_id=2&period_dwd=17033&period_ncep=17046&period_jma=17042&period_ecmwf=17048"

_=compare_urls(url_old,url_new,ua_fields)

fetched 4 tiles
old: 3.14 seconds
fetched 4 tiles
new: 2.81 seconds
nr records old 985 nr records new 985
the records in the URLs are the same for zoom 1
