In [6]:
# Before start running make sure summer.csv is in the working directory

import pandas as pd
import numpy as np
from bokeh.plotting import figure, show
from bokeh.io import output_notebook, output_file
from bokeh.models import ColumnDataSource
from collections import Counter

summer_data = pd.read_csv('summer.csv')
first_elem_list = lambda r: [d[0] for d in r]



def most_winners():
    """
    Return athletes who won most medals in all the Summer Olympic Games (1896-2014).
    """
    athlete_count = Counter(summer_data['Athlete'])
    male, female = athlete_count.most_common(2)
    male = " ".join(male[0].split(", ")[::-1])
    female = " ".join(female[0].split(", ")[::-1])
    return male, female

def topten_countries():
    """
    Return the first 10 countries that won most medals, for men and women.
    """
    men = summer_data['Gender'] == 'Men'
    women = summer_data['Gender'] == 'Women'
    men_counter = Counter(summer_data[men]['Country'])
    women_counter = Counter(summer_data[women]['Country'])
    men_countries = first_elem_list(men_counter.most_common(10))
    women_countries = first_elem_list(women_counter.most_common(10))
    return men_countries, women_countries

def graph(men_countries, women_countries):
    """
    Here we define most popular as the disciplines with more ocurrence.
    """
    events = Counter(summer_data['Discipline'])
    popular = summer_data['Discipline'].isin(first_elem_list(events.most_common(10)))
    men_filter = summer_data['Country'].isin(men_countries)
    year_filter = summer_data['Year'] <= 2012
    data_men = summer_data[popular & men_filter & year_filter][['Year','Country']]    
    
    
    data_men = data_men.groupby('Year')['Country'].value_counts()
    m = data_men.reset_index(name="Quantity")
    m = m[m['Year'] == 2012]

    
    data_source_men = ColumnDataSource(m)    

    # output_file('plotting.html',
    #             title='Olimpic Games')
    output_notebook()
    
    fig = figure(       
        x_range=m['Country'], 
        title='Medals won by countries in 2012 Olimpic games',
        x_axis_label='Countries', y_axis_label='Medals won',
    )

    # fig.vbar()
    fig.line(
        'Country','Quantity',
        color='blue', line_width=1,
        legend="Medals won by men",
        source=data_source_men
    )
    show(fig)

def main():
    male, female = most_winners()
    print("Most winners")
    print("-"*15)
    print(male,female, sep="\n")
    men_countries, women_countries = topten_countries()
    print("-"*15)
    print("Men","Women", sep="\t")
    print("-"*15)
    for i,_ in enumerate(men_countries):
        print(men_countries[i], women_countries[i], sep="\t")

    graph(men_countries, women_countries)

    
if __name__ == "__main__":
    main()



Most winners
---------------
Michael PHELPS
Larisa LATYNINA
---------------
Men	Women
---------------
USA	USA
URS	URS
GBR	CHN
FRA	AUS
ITA	GER
SWE	GDR
GER	RUS
HUN	NED
AUS	ROU
JPN	GBR
