In [None]:
import sys
sys.path.insert(0, '../')
import geopandas
import pandas as pd
import matplotlib.pyplot as plt
from adjustText import adjust_text
from shapely.geometry import Point
from src.config import PARAM, PATH_RESOURCES, PATH_RESULTS
from src.geo_data import load_geonames

In [None]:
map_ = geopandas.read_file(PATH_RESOURCES / 'shapefiles/ref-countries-2016-01m.shp/CNTR_RG_01M_2016_3857.shp')
map_ = map_[map_.CNTR_ID != 'AQ']
map_.crs

In [None]:
df_geo_ = load_geonames()

In [None]:
countries = ['GB', 'NL']
df_geo = df_geo_.query("country_code not in @countries").copy()

In [None]:
geometry = [Point(xy) for xy in zip(df_geo.longitude, df_geo.latitude)]
crs = {'init' :'epsg:4326'}
gdf_ = geopandas.GeoDataFrame(df_geo, crs=crs, geometry=geometry)
gdf_ = gdf_.to_crs({'init' :'epsg:3857'})

In [None]:
fig, ax = plt.subplots(figsize=(20,20))
map_.plot(ax=ax, color='lightyellow', edgecolor='lightgray')
scatter = gdf_.plot(
    ax=ax, 
    marker='o', 
    color='black', 
    alpha=0.5, 
    markersize=1
    )
ax.set_title('Distributie plaatsnamen', fontdict={'fontsize':24})
plt.tight_layout()

In [None]:
df = pd.read_pickle(PATH_RESULTS / 'df_attribute_counts.pkl').xs('places', level=0)
df = df.rename(columns={'index': 'naam'})

df_annotations = pd.read_pickle(PATH_RESULTS / 'df_annotations_places.pkl')
df_annotations['positive'] = df_annotations.annotation == '+'
positives = df_annotations.groupby('phrase')['positive'].any()

df_ = df[df.index.isin(positives[positives].index)]

In [None]:
gdf = gdf_.merge(df_, how='inner', left_on='alternate_name', right_index=True)
gdf = gdf.sort_values(
    ['alternate_name', 'population'],
    ascending=False
    ).drop_duplicates(
        subset='alternate_name',
        keep='first'
        )

In [None]:
thrshld = 10
factor = 5

for batch in PARAM.lexisnexis.batches:
    fig, ax = plt.subplots(figsize=(20,20))
    map_.plot(ax=ax, color='lightyellow', edgecolor='lightgray')
    scatter = gdf.plot(
        ax=ax, 
        marker='o', 
        color='red', 
        alpha=0.5, 
        markersize=gdf[batch].fillna(0) * factor
        )
    kw = dict(
        prop="sizes",
        num=[1,10,50,200],
        color='red',
        alpha=0.5,
        func=lambda s: s/factor,
        )
    legend = ax.legend(
        *scatter.collections[1].legend_elements(**kw),
        loc="upper left",
        borderpad=2.0,
        labelspacing=4.5,
        handletextpad=2.0,
        frameon=False,
        )
    ax.set_title(batch, fontdict={'fontsize':24})    
    rows = gdf.loc[gdf[batch] >= thrshld].dropna(subset=[batch])
    texts = [
        ax.text(
            row['geometry'].x, 
            row['geometry'].y, 
            row['alternate_name'], 
            ha='center', 
            va='center',
            fontsize=12,
            ) for _, row in rows.iterrows()
        ]
    adjust_text(texts, ax=ax)
    plt.tight_layout()

In [None]:
n=1e7
limitx = [-0.2 * n, 0.5 * n]
limity = [0.35 * n, 1.15 * n]

factor = 12
thrshld = 5
fig, axs = plt.subplots(2, 2, sharex=True, sharey=True, figsize=(20, 20))
for ax, batch in zip(axs.reshape(-1), PARAM.lexisnexis.batches):
    ax.set_xlim(limitx)
    ax.set_ylim(limity)
    
    map_.plot(ax=ax, color='lightyellow', edgecolor='lightgray')
    scatter = gdf.plot(
        ax=ax, 
        marker='o', 
        color='red', 
        alpha=0.5, 
        markersize=gdf[batch].fillna(0) * factor
        )
    kw = dict(
        prop="sizes",
        num=[1,10,50,200],
        color='red',
        alpha=0.5,
        func=lambda s: s/factor,
        )
    legend = ax.legend(
        *scatter.collections[1].legend_elements(**kw),
        loc="upper left",
        borderpad=2.0,
        labelspacing=4.5,
        handletextpad=2.0,
        frameon=False,
        )
    ax.set_title(batch, fontdict={'fontsize':24})    
    rows = gdf.loc[gdf[batch] >= thrshld].dropna(subset=[batch])
    texts = [
        ax.text(
            row['geometry'].x, 
            row['geometry'].y, 
            row['alternate_name'], 
            ha='center', 
            va='center',
            fontsize=12,
            ) for _, row in rows.iterrows()
        ]
    adjust_text(texts, ax=ax)
plt.tight_layout()