In [None]:
#!/usr/bin/python
# -*- coding: utf-8 -*-
"""This notebook creates the map of TAG in COUNTRY 
for all its editions, provided images have been 
properlycategorized"""

import inspect, os, sys

try :
    import pywikibot as pb
    from pywikibot import pagegenerators

except :
    current_folder = os.path.realpath(os.path.abspath(os.path.split(inspect.getfile(inspect.currentframe()))[0]))
    folder_parts = current_folder.split(os.sep)
    pywikibot_folder = os.sep.join(folder_parts[:-1])

    if current_folder not in sys.path:
        sys.path.insert(0, current_folder)
    if pywikibot_folder not in sys.path:
        sys.path.insert(0, pywikibot_folder)

    import pywikibot as pb
    from pywikibot import pagegenerators

In [None]:
import pandas as pd
import numpy as np
from mako.template import Template
from io import StringIO
import random
import seaborn as sns

In [None]:
from geojson import Feature, Point, FeatureCollection
import geojson

In [None]:
from modules.wmtools import coordinate_shaker

In [None]:
YEARS               = [2015, 2016, 2017]

TAG                 = 'WLE'
TAG_EXT             = 'Wiki Loves Earth'
COUNTRY             = "Spain"

BASE_NAME           = "Commons:Wiki Loves in {2}/{1}/{0}"
LOG_PAGES           = ["{0}/Log".format(BASE_NAME.format(YEAR, TAG_EXT, COUNTRY)) for YEAR in YEARS]

BASE_SITE_DB_NAME   = "Commons:Wiki Loves in {1}/{0}".format(TAG_EXT, COUNTRY)
SITE_DB_PAGE        = BASE_SITE_DB_NAME + "/Sites DB"
MAP_WLE_PAGE        = BASE_SITE_DB_NAME + '/Map'

commons_site = pb.Site('commons', 'commons')

In [None]:
annexes = {
   'ES-AN': [u'Anexo:Lugares de importancia comunitaria de Andalucía', 'Andalusia'],
   'ES-AR': [u'Anexo:Lugares de importancia comunitaria de Aragón', 'Aragon'],
   'ES-AS': [u'Anexo:Lugares de importancia comunitaria de Asturias', 'Asturias'],
   'ES-CB': [u'Anexo:Lugares de importancia comunitaria de Cantabria', 'Cantabria'],
   'ES-CM': [u'Anexo:Lugares de importancia comunitaria de Castilla-La Mancha', 'Castile-La Mancha'],
   'ES-CL': [u'Anexo:Lugares de importancia comunitaria de Castilla y León', u'Castile and León'],
   'ES-CT': [u'Anexo:Lugares de importancia comunitaria de Cataluña', 'Catalonia'],
   'ES-MD': [u'Anexo:Lugares de importancia comunitaria de la Comunidad de Madrid', 'Community of Madrid'],
   'ES-VC': [u'Anexo:Lugares de importancia comunitaria de la Comunidad Valenciana', 'Valencian Community'],
   'ES-EX': [u'Anexo:Lugares de importancia comunitaria de Extremadura', 'Extremadura'],
   'ES-IB': [u'Anexo:Lugares de importancia comunitaria de las Islas Baleares', 'Balearic Islands'],
   'ES-CN': [u'Anexo:Lugares de importancia comunitaria de las Islas Canarias', 'Canary Islands'],
   'ES-GA': [u'Anexo:Lugares de importancia comunitaria de Galicia', 'Galicia'],
   'ES-RI': [u'Anexo:Lugares de importancia comunitaria de La Rioja', 'La Rioja'],
   'ES-NC': [u'Anexo:Lugares de importancia comunitaria de Navarra', 'Navarre'],
   'ES-MC': [u'Anexo:Lugares de importancia comunitaria de la Región de Murcia', 'Region of Murcia'],
   'ES-PV': [u'Anexo:Lugares de importancia comunitaria del País Vasco', 'Basque Country'],
   'ES-CE': [u'Anexo:Lugares de importancia comunitaria de Ceuta y Melilla', 'Ceuta'],
   'ES-ML': [u'Anexo:Lugares de importancia comunitaria de Ceuta y Melilla', 'Melilla'],
   'ES-MAGRAMA': [u'Anexo:Lugares de importancia comunitaria del MAGRAMA', 'MAGRAMA']
}

In [None]:
autcom_palette = [i[1:] for i in sns.color_palette('hls', len(annexes)).as_hex()]
autcoms = [annexes[i][1] for i in annexes]
autcom_colors = {autcom: autcom_palette[i] for i, autcom in enumerate(autcoms)}
autcom_colors

In [None]:
def to_geojson (row) :
    """For each site of community importance, identified by row['code'], this function
    creates a proper GeoJSON Feature"""
    images_subset_df = images_df[(images_df['code'] == row['code']) & (images_df['width'] > images_df['height'])]
    if len (images_subset_df.index) == 0:
        images_subset_df = images_df[images_df['code'] == row['code']]

    if len(images_subset_df[images_subset_df['qi'] == 'qi']) > 0 :
        popup_image = images_subset_df[images_subset_df['qi'] == 'qi'].sample(1, random_state=0)['image_title'].values[0]
    elif len(images_subset_df[images_subset_df['finalist'] == 'finalist']) > 0 :
        popup_image = images_subset_df[images_subset_df['finalist'] == 'finalist'].sample(1, random_state=0)['image_title'].values[0]
    else :
        popup_image = images_subset_df.sample(1, random_state=0)['image_title'].values[0]

    properties = {"description": "[[File:{0}|150px]]".format(popup_image),
                  "title": "[[:Category:Images of a site of community importance with code {0} from {1} in {3}|{2}]]".format(row['code'], TAG_EXT, row['name'], COUNTRY),
                  "marker-size": "small",
                  "marker-symbol": "circle",
                  "marker-color": autcom_colors[row['aut_com']]}

    feature = Feature(geometry=Point((float(row['longitude']), float(row['latitude']))), 
                      properties=properties
                     )
    return feature

In [None]:
# retrieval of the WLE SCI (site of community importance) log
pb.output('Retrieving --> WLE site of community importance list')
site_list_page = pb.Page(commons_site, SITE_DB_PAGE)
site_list_text = StringIO(site_list_page.text[site_list_page.text.find('\n') + 
                                                      1:site_list_page.text.rfind('\n')])
site_df = pd.read_csv(site_list_text, sep=";",
                      index_col=False,
                      names=["name", "code", "magrama_url", "community",
                            "bio_region", "continent", "min_altitude",
                            "max_altitude", "avg_altitude", "longitude",
                            "latitude", "area", "marine_percentage",
                            "marine_area", "image", "commons_cat", "wikidata_id"])

pb.output('Retrieved --> WLE site of community importance list')

In [None]:
site_df["aut_com"] = site_df["community"].apply(lambda x: annexes[x][1])

In [None]:
valid_sites = site_df['code'].values
valid_sites

In [None]:
image_columns = ['image_title', 'code', 'uploader', 'uploader_registration',
                 'timestamp', 'date', 'size', 'height', 'width', 'qi', 'finalist']
images_df = pd.DataFrame(columns=image_columns)

In [None]:
pb.output('Retrieving --> {0} in {1} images list from cache'.format(TAG, COUNTRY))
for log_page in LOG_PAGES:
    list_page = pb.Page(commons_site, log_page)
    list_page_text = StringIO(list_page.text[list_page.text.find('\n') + 1:list_page.text.rfind('\n')])
    yearly_df = pd.read_csv(list_page_text,
                            sep=";",
                            index_col=False,
                            names=image_columns
                           ).fillna('')
    images_df = pd.concat([images_df, yearly_df])
pb.output('Retrieved --> {0} in {1} images list from cache'.format(TAG, COUNTRY))

images_df['timestamp'] = pd.to_datetime(images_df['timestamp'], format="%Y-%m-%d %H:%M:%S")

images_df.set_index(["timestamp"], inplace=True)
del images_df.index.name

total_images_length = len(images_df)
total_images_length

In [None]:
images_extended_df = pd.merge(images_df, site_df, on='code', how='left')
len(images_extended_df.index)

In [None]:
images_per_site = images_extended_df[images_extended_df['code'].isin(valid_sites)]['code'].value_counts()
images_per_site

In [None]:
images_per_site_df = pd.DataFrame(data=images_per_site).reset_index()
images_per_site_df.rename(columns={'index': 'code', 'code': 'count'}, inplace=True)

In [None]:
images_per_site_df = pd.merge(images_per_site_df, site_df, on='code')[['count', 'code', 'name', 'aut_com', 'latitude', 'longitude', 'commons_cat']].fillna('')
images_per_site_df = images_per_site_df.iloc[np.lexsort([images_per_site_df['name'], -images_per_site_df['count']])]
images_per_site_df['name'] = images_per_site_df['name'].map(lambda x: x.replace('_', ' '))
images_per_site_df.head()

In [None]:
images_per_site_df['geojson'] = images_per_site_df.apply(lambda row: to_geojson(row), axis=1)

In [None]:
features = images_per_site_df['geojson'].tolist()
feature_collection = FeatureCollection(features)
dump = geojson.dumps(feature_collection, ensure_ascii=False, indent=2)
#print(dump)

In [None]:
template = """=== WLE contributions map ===
The map below includes all the contributions, by site of community importance, for all the editions of the contest 
(${years[0]}-${years[-1]}).
<mapframe text="Festivals" latitude="39" longitude="-4" zoom="5" width="800" height="600" align="center"> 
${map}
</mapframe>
"""
vars = {
    "map": dump,
    "years": YEARS
}
t = Template(template)
map_text = t.render(**vars)

In [None]:
maps_page = pb.Page(commons_site, MAP_WLE_PAGE)
maps_page.text = map_text
pb.output('Publishing --> {0} in Spain Statistics'.format(TAG))
maps_page.save("{0} in Spain statistics".format(TAG))