In [1]:
#!/usr/bin/python
# -*- coding: latin-1 -*-
"""This notebook creates user galleries for all the contestants of
TAG in Spain in YEAR.

As WLE 2015 allowed also contributions from Flickr, some special
handling must be done to have it into account.
"""
import os

try :
    import pywikibot as pb

except :
    import sys, inspect
    current_folder = os.path.realpath(os.path.abspath(os.path.split(inspect.getfile(inspect.currentframe()))[0]))
    folder_parts = current_folder.split(os.sep)
    pywikibot_folder = os.sep.join(folder_parts[0:-1])

    if current_folder not in sys.path:
        sys.path.insert(0, current_folder)
    if pywikibot_folder not in sys.path:
        sys.path.insert(0, pywikibot_folder)

    import pywikibot as pb
    
import pandas as pd
from mako.template import Template

from io import StringIO

In [2]:
YEAR                 = 2015
TAG                  = 'WLE'
TAG_EXT              = 'Wiki Loves Earth'

BASE_NAME            = "Commons:Wiki Loves in Spain/{1}/{0}".format(YEAR, TAG_EXT)
LOG_PAGE             = BASE_NAME + '/Log'
GALLERY_PAGE         = BASE_NAME + '/User Gallery'

USER_GALLERY_PAGE    = 'Commons:{0}/{1}/Images from {0} {1} in Spain by {2}'
REDUCED_GALLERY_PAGE = 'Commons:{0}/{1}/Images from {0} {1} in Spain by {2} (reduced)'

commons_site = pb.Site('commons', 'commons')

## Templates

In [3]:
# Template for the complete user's gallery
gallery_template = """== Photographs by ${author} in ${tag} ${year} ==

<gallery mode="packed" heights="150px">
% for item in image_list :
${item}
% endfor
</gallery>"""

In [4]:
# Template for a 10 photo user's gallery
reduced_gallery_template = """== Photographs by ${author} in ${tag} ${year} ==

<gallery mode="packed" heights="150px">
% for item in image_list[:10] :
${item}
% endfor
</gallery>
<%
if len(image_list) > 10 and 'flickr' not in context.get('author'):
    person=context.get('author')
    person_link = '[[User:{0}|{0}]]'.format(person)
elif len(image_list) > 10:
    person = context.get('author').replace(' (flickr)', '')
    person_link = '[{0} {1}] (from Flickr)'.format(context.get('flickr_author')[context.get('author')], person)
%>
% if len(image_list) > 10 and 'flickr' not in author:
; See '''gallery''' with all contributions by ${person_link} '''[[Commons:${tag}/${year}/Images from ${tag} ${year} in Spain by ${author}|here]]'''.
% elif len(image_list) > 10:
; See '''gallery''' with all contributions by ${person_link} '''[[Commons:${tag}/${year}/Images from ${tag} ${year} in Spain by ${author}|here]]'''.
% endif"""

### Image log retrieval

In [5]:
pb.output('Retrieving --> {1} {0} in Spain images list from cache'.format(YEAR, TAG))
list_page = pb.Page(commons_site, LOG_PAGE)
list_page_text = StringIO(list_page.text[list_page.text.find('\n') + 1:list_page.text.rfind('\n')])
images_df = pd.read_csv(list_page_text,
                            sep=";",
                            index_col=False,
                            names=['image_title', 'lic_id', 
                                   'uploader', 'uploader_registration', 
                                   'timestamp', 'date', 'size', 
                                   'height', 'width', 'qi', 
                                   'finalist']
                       ).fillna('')
pb.output('Retrieved --> {1} {0} in Spain images list from cache'.format(YEAR, TAG))

images_df['timestamp'] = pd.to_datetime(images_df['timestamp'], format="%Y-%m-%d %H:%M:%S")

images_df.set_index(["timestamp"], inplace=True)
del images_df.index.name

Retrieving --> WLE 2015 in Spain images list from cache
Retrieved --> WLE 2015 in Spain images list from cache


In [6]:
images_df.head()

Unnamed: 0,image_title,lic_id,uploader,uploader_registration,date,size,height,width,qi,finalist
2015-05-03 12:36:37,"""Encina andante"". Parque Natural de la Subbéti...",ES6130002,Teckömo,2015-05-03,2011-05-09,3801106,2592,3456,,
2015-05-09 12:54:58,'Grasilla' (Pinguicula grandiflora) en la Send...,ES1200001,ROSUROB,2015-05-09,2007-05-16,1593022,1932,2576,,
2015-05-13 18:00:59,(1) ABEJA LIBANDO FLOR DEL NARANJO EN EL CORRA...,ES4320037,JESUS SANCHEZ RODRIGUEZ,2015-05-13,2009-05-03,984666,3072,2304,,
2015-05-13 18:01:00,(2) ABEJA LIBANDO FLOR DEL NARANJO EN EL CORRA...,ES4320037,JESUS SANCHEZ RODRIGUEZ,2015-05-13,2009-05-03,1147630,2304,3072,,
2015-05-06 19:25:36,0004395970-original.jpg,ES7010033,Tamara k,2009-12-13,2013-03-02,3672007,2092,3000,,


### Galleries creation

In [7]:
# Creation of complete users galleries
uploaders = []
flickr_ids = {}

grouped_images = images_df.groupby(['uploader'])['image_title'].apply(list).apply(lambda x: sorted(x))
for index, item in grouped_images.iteritems() :
    if 'flickr' in index:
        fid = index.split(' ')[0]
        index = '{} (flickr)'.format(' '.join(index.split(' ')[1:]))
        flickr_ids[index] = fid
    uploaders.append(index)
    vars = {
        "author": index,
        "image_list": item,
        "tag": TAG_EXT,
        "year": YEAR
    }
    t = Template(gallery_template)
    gallery_text = t.render(**vars).strip()
    
    gallery_page = pb.Page(commons_site, USER_GALLERY_PAGE.format(TAG_EXT, YEAR, index))
    if gallery_page.text != gallery_text :
        gallery_page.text = gallery_text
        pb.output('Publishing --> {1} {0} in Spain user gallery'.format(YEAR, TAG))
        gallery_page.save("{1} {0} in Spain user gallery".format(YEAR, TAG))

In [8]:
# Creation of reduced users galleries
grouped_images = images_df.groupby(['uploader'])['image_title'].apply(list).apply(lambda x: sorted(x))
for index, item in grouped_images.iteritems() :
    if 'flickr' in index:
        index = '{} (flickr)'.format(' '.join(index.split(' ')[1:]))
    vars = {
        "author": index,
        "flickr_author": flickr_ids,
        "image_list": item,
        "tag": TAG_EXT,
        "year": YEAR
    }
    t = Template(reduced_gallery_template)
    gallery_text = t.render(**vars).strip()
    
    gallery_page = pb.Page(commons_site, REDUCED_GALLERY_PAGE.format(TAG_EXT, YEAR, index))

    if gallery_page.text != gallery_text :
        gallery_page.text = gallery_text
        pb.output('Publishing --> {1} {0} in Spain user gallery'.format(YEAR, TAG))
        gallery_page.save("{1} {0} in Spain user gallery".format(YEAR, TAG))

### List of reduced galleries creation and publication

In [9]:
gallery_index_template = """'''Contributions to [[Commons:${tag}|${tag} ${year}]] in Spain'''

${galleries}

[[Category:${tag} ${year} in Spain]]"""

galleries = '\n'.join(['{{' + REDUCED_GALLERY_PAGE.format(TAG_EXT, YEAR, i) + '}}' for i in uploaders])
vars = {
    "galleries": galleries,
    "tag": TAG_EXT,
    "year": YEAR
}
t = Template(gallery_index_template)
gallery_index_text = t.render(**vars)

In [10]:
gallery_index_page = pb.Page(commons_site, GALLERY_PAGE)

if gallery_index_page.text != gallery_index_text :
    gallery_index_page.text = gallery_index_text
    pb.output('Publishing --> {1} {0} in Spain User Gallery'.format(YEAR, TAG))
    gallery_index_page.save("{1} {0} in Spain User Gallery".format(YEAR, TAG))

Publishing --> WLE 2015 in Spain User Gallery
Page [[commons:Commons:Wiki Loves in Spain/Wiki Loves Earth/2015/User Gallery]] saved
