In [1]:
import numpy as np
import pandas as pd
import re
import sys
from csv import QUOTE_NONE


def get_year(article):
    """
    Use regex capturing groups to convert the article and return the year 
    if it ends with "_(yyyy_film)", otherwise return np.nan
    
    this is the only function you need to implement
    
    :param article: the article
    :return: the year or np.nan
    """
    
    res=re.search(r'\_\([1-9][0-9][0-9][0-9]_film\)$',article)
    if res!=None:
        return article[-10:-6]
    else:
        return np.nan
        
#     raise NotImplementedError("To be implemented")


def main():
    """
    Please search and read the docs of the following methods.
    
    pandas.Index.map
        to apply mapper function to an index
        
    pandas.Series.rank
        to compute the ranking, specific parameters are needed
        
    pandas.Series.astype
        to cast the data type (to int)
        
    pandas.Series.to_json
        to convert the series to a JSON string
    """
    s = pd.read_table('output', header=None, index_col=0, squeeze=True,
                      quoting=QUOTE_NONE, keep_default_na=False,
                      encoding='utf-8')
    # convert each article to the year
    s.index = s.index.map(get_year)
    # group by, sum and rank the years
    # NA groups in GroupBy are automatically excluded
    ranking = s.groupby(s.index).sum().rank(method='min',ascending=False).astype(int)
    # convert to JSON and write to StdOut
    ranking.to_json(sys.stdout)


if __name__ == "__main__":
    main()


{"1895":120,"1896":114,"1897":115,"1899":117,"1900":117,"1901":120,"1903":108,"1904":120,"1905":116,"1906":120,"1907":111,"1908":111,"1910":102,"1911":110,"1912":105,"1913":109,"1914":95,"1915":101,"1916":104,"1917":107,"1918":106,"1919":99,"1920":100,"1921":96,"1922":103,"1923":96,"1924":98,"1925":89,"1926":91,"1927":69,"1928":93,"1929":94,"1930":90,"1931":70,"1932":78,"1933":78,"1934":82,"1935":74,"1936":81,"1937":67,"1938":92,"1939":42,"1940":51,"1941":73,"1942":88,"1943":86,"1944":75,"1945":87,"1946":71,"1947":84,"1948":83,"1949":80,"1950":66,"1951":63,"1952":76,"1953":59,"1954":63,"1955":76,"1956":47,"1957":62,"1958":65,"1959":49,"1960":39,"1961":72,"1962":41,"1963":61,"1964":53,"1965":85,"1966":60,"1967":57,"1968":58,"1969":68,"1970":55,"1971":54,"1972":50,"1973":40,"1974":46,"1975":56,"1976":44,"1977":52,"1978":31,"1979":43,"1980":37,"1981":38,"1982":34,"1983":36,"1984":45,"1985":35,"1986":33,"1987":32,"1988":30,"1989":26,"1990":29,"1991":28,"1992":24,"1993":27,"1994":25,"1995":