Data Uploading

In [None]:
from google.colab import files
uploaded = files.upload()

Saving athlete_events.csv to athlete_events.csv


Libraries Loading

In [None]:
import collections
import sys
import os
import io
import pandas as pd
import numpy as np

%matplotlib inline

from plotly import tools
from plotly.offline import init_notebook_mode, iplot
init_notebook_mode(connected=True)
import plotly.figure_factory as ff
import plotly.offline as py
import plotly.graph_objs as go
import plotly.tools as tls
import plotly.express as px
py.init_notebook_mode(connected=True)

import warnings
warnings.filterwarnings('ignore')

In [None]:
df = pd.read_csv(io.BytesIO(uploaded['athlete_events.csv']))
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 271116 entries, 0 to 271115
Data columns (total 15 columns):
 #   Column  Non-Null Count   Dtype  
---  ------  --------------   -----  
 0   ID      271116 non-null  int64  
 1   Name    271116 non-null  object 
 2   Sex     271116 non-null  object 
 3   Age     261642 non-null  float64
 4   Height  210945 non-null  float64
 5   Weight  208241 non-null  float64
 6   Team    271116 non-null  object 
 7   NOC     271116 non-null  object 
 8   Games   271116 non-null  object 
 9   Year    271116 non-null  int64  
 10  Season  271116 non-null  object 
 11  City    271116 non-null  object 
 12  Sport   271116 non-null  object 
 13  Event   271116 non-null  object 
 14  Medal   39783 non-null   object 
dtypes: float64(3), int64(2), object(10)
memory usage: 31.0+ MB


Plotly Code Loading

In [None]:
def configure_plotly_browser_state():
  import IPython
  display(IPython.core.display.HTML('''
        <script src="/static/components/requirejs/require.js"></script>
        <script>
          requirejs.config({
            paths: {
              base: '/static/base',
              plotly: 'https://cdn.plot.ly/plotly-latest.min.js?noext',
            },
          });
        </script>
        '''))

1. Pie Chart

In [None]:
def PieChart(df):
  configure_plotly_browser_state()
  colors = ['#f4cb42', '#cd7f32', '#a1a8b5']
  medal_counts = df.Medal.value_counts(sort=True)
  labels = medal_counts.index
  values = medal_counts.values

  trace = go.Pie(
      labels = labels,
      values = values,
      marker = dict(colors=colors),
  )

  layout = go.Layout(
      title = {
          'text': 'Medal Distribution',
          'x': 0.5,
      },
      template = 'plotly_white',
  )

  fig = go.Figure(data=[trace], layout=layout)
  iplot(fig)

In [None]:
PieChart(df)

In [None]:
def PieChart2(df, numberOfCountries):
  configure_plotly_browser_state()
  male = df[df['Sex'] == 'M']
  female = df[df['Sex'] == 'F']
  count_male = male.dropna().NOC.value_counts()[:numberOfCountries].reset_index()
  count_female = female.dropna().NOC.value_counts()[:numberOfCountries].reset_index()
  data = []

  for i in range(0, 2):
    count = count_male if i == 0 else count_female
    name = 'Male' if i == 0 else 'Female'
    x = [0, 0.46] if i == 0 else [0.5, 1]

    trace = go.Pie(
        labels = count['index'],
        values = count.NOC,
        name = name,
        hole = 0.4,
        domain = {'x': x}
    )

    data.append(trace)

  layout = go.Layout(
      title = 'Top {} Countries With Medal By Gender'.format(numberOfCountries),
      template = 'plotly_white',
      font = dict(size=15),
      legend = dict(orientation='h'),
      annotations = [
                     dict(x=0.215, y=0.5, text='Male', showarrow=False, font=dict(size=20)),
                     dict(x=0.775, y=0.5, text='Female', showarrow=False, font=dict(size=20)),
      ]
  )

  fig = go.Figure(data=data, layout=layout)
  iplot(fig)

In [None]:
PieChart2(df, 10)

2. Bar Chart

In [None]:
def BarChartSeason(df, season):
  configure_plotly_browser_state()
  games = df[df['Season'] == season].Games.unique()
  games.sort()
  sports_count = np.array([df[df['Games'] == game].groupby('Sport').size().shape[0] for game in games])

  trace = go.Bar(
      x = games,
      y = sports_count,
      marker = dict(
          color = sports_count,
          colorscale = 'Blues',
          showscale = True 
      )
  )

  data = [trace]

  layout = go.Layout(
      title = 'Olympics Sports in {} Season of By Year'.format(season),
      template = 'plotly_white',
  )

  fig = go.Figure(data=data, layout=layout)
  fig.show()

In [None]:
BarChartSeason(df, 'Winter')

In [None]:
def BarChartMedals(df, numberOfCountries):
  configure_plotly_browser_state()
  countries = df.dropna().NOC.value_counts()[:numberOfCountries]

  gold = df[df.Medal == 'Gold'].NOC.value_counts()
  gold = gold[top10.index]
  silver = df[df.Medal == 'Silver'].NOC.value_counts()
  silver = silver[top10.index]
  bronze = df[df.Medal == 'Bronze'].NOC.value_counts()
  bronze = bronze[top10.index]

  bar_gold = go.Bar(x=gold.index, y=gold, name = 'Gold', marker=dict(color = '#f4cb42'))
  bar_silver = go.Bar(x=silver.index, y=silver, name = 'Silver', marker=dict(color = '#a1a8b5'))
  bar_bronze = go.Bar(x=bronze.index, y=bronze, name = 'Bronze', marker=dict(color = '#cd7f32'))

  layout = go.Layout(
      title = 'Top {} Countries With Medals'.format(numberOfCountries), 
      yaxis = dict(title = 'Count of medals'),
      template = 'plotly_white',
  )

  fig = go.Figure(data=[bar_gold, bar_silver, bar_bronze], layout=layout)
  iplot(fig)

In [None]:
BarChartMedals(df, 25)

3. Scatter

In [None]:
configure_plotly_browser_state()
tmp = df.groupby(['Sport'])['Height', 'Weight'].agg('mean').dropna()
df1 = pd.DataFrame(tmp).reset_index()
tmp = df.groupby(['Sport'])['ID'].count()
df2 = pd.DataFrame(tmp).reset_index()
dataset = df1.merge(df2) 

scatterplots = list()
for sport in dataset['Sport']:
    df3 = dataset[dataset['Sport'] == sport]
    trace = go.Scatter(
        x = df3['Height'],
        y = df3['Weight'],
        name = sport,
        marker=dict(
            symbol='circle',
            sizemode='area',
            sizeref=10,
            size=df3['ID'],
        )
    )
    scatterplots.append(trace)
                         
layout = go.Layout(
    title='Mean height and weight by sport', 
    xaxis=dict(title='Height, cm'), 
    yaxis=dict(title='Weight, kg'),
    showlegend=True,
    template = 'plotly_white',
)

fig = dict(data=scatterplots, layout=layout)
py.iplot(fig)

4. Choropeth

In [None]:
!pip install pycountry

Collecting pycountry
[?25l  Downloading https://files.pythonhosted.org/packages/76/73/6f1a412f14f68c273feea29a6ea9b9f1e268177d32e0e69ad6790d306312/pycountry-20.7.3.tar.gz (10.1MB)
[K     |████████████████████████████████| 10.1MB 12.2MB/s 
[?25hBuilding wheels for collected packages: pycountry
  Building wheel for pycountry (setup.py) ... [?25l[?25hdone
  Created wheel for pycountry: filename=pycountry-20.7.3-py2.py3-none-any.whl size=10746863 sha256=214f096a25a8bbb8865619a7cea20d0ba0ea2b39b22e0f72782b1edcc81e4fb6
  Stored in directory: /root/.cache/pip/wheels/33/4e/a6/be297e6b83567e537bed9df4a93f8590ec01c1acfbcd405348
Successfully built pycountry
Installing collected packages: pycountry
Successfully installed pycountry-20.7.3


In [None]:
def get_name(code):
  try:
      name = pycountry.countries.get(alpha_3=code).name
  except:
      name=code
  return name

country_number = pd.DataFrame(df.NOC.value_counts())
country_number['country'] = country_number.index
country_number.columns = ['number', 'country']
country_number.reset_index().drop(columns=['index'], inplace=True)
country_number['country'] = country_number['country'].apply(lambda c: get_name(c))
country_number.head(3)

Unnamed: 0,number,country
USA,18853,USA
FRA,12758,FRA
GBR,12256,GBR


In [None]:
configure_plotly_browser_state()
worldmap = [dict(
    type = 'choropleth', 
    locations = country_number['country'], 
    locationmode = 'country names',
    z = country_number['number'], 
    autocolorscale = True, 
    reversescale = False, 
    marker = dict(
        line = dict(color = 'rgb(180,180,180)', width = 0.5)
    ), 
    colorbar = dict(autotick = False, title = 'Number of athletes')
)]

layout = dict(
    title = 'The Nationality of Athletes', 
    geo = dict(
        showframe = False, 
        showcoastlines = True, 
        projection = dict(type = 'Mercator')
    )
)

fig = dict(data=worldmap, layout=layout)
py.iplot(fig, validate=False)