#                                POTATO VISUALISATION SYSTEM

## Instructions:

1. Run the code
2. Select on different countries to view the relevant data

In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

!pip install geopandas
!pip install altair-viewer

## Visualisation Tasks and examples for each of them:
### Analyse:  

1. **Consume:** The user may want to simply get a gist of the total number of medals won by different countries just by looking at the visualisation. 

2. **Produce:** The user may want to calculate the percentage or difference between the total number of male and female participants in each country.  

### Search: 

1. The user may want to **lookup** the number of medals won by Afghanistan in the year 2016. 

2. The user may want to **browse** the year in which India won the highest number of medals.  

3. The user may want to **locate** if India won any medals at all in the year 2000. 

4. The use may want to **explore** if the number of male and female participants have increased over years in India. 

### Query:

1.	**Compare** Which country has the most number of medals?
2.	**Summarise:** Of all the years, in 2016 India had the highest female participation

### Targets:

1.	**Trends** in the number of male and female participants over the years in Australia
2.	Check if there is an year where the medals won by India doesn’t match the usual pattern(**outlier**).



In [None]:
import pandas as pd
import altair as alt
import geopandas as gpd

alt.data_transformers.disable_max_rows()
olympic_data = pd.read_csv("/Users/ankitanand/Downloads/archive/athlete_events.csv")
olympic_medal = olympic_data.dropna(subset=['Medal'])
#medal_0= olympic_original.fillna(0)
medal_count = olympic_medal.groupby(['NOC']).count()['Medal']

world_map = gpd.read_file(gpd.datasets.get_path('naturalearth_lowres'))
world_map  = world_map[world_map.continent!='Antarctica']
world_merge = pd.merge(world_map,medal_count,how='left',left_on="iso_a3",right_index=True)
world_merge['Medal'] = world_merge['Medal'].fillna(0)
world_merge['name'] = world_merge['name'].replace(['United States of America'],'United States')

alt.renderers.enable('altair_viewer')

country_select = alt.selection_single( fields = ['name'])
    
color = alt.condition(country_select,
                alt.Color(field = "Medal",type = "quantitative",scale=alt.Scale(type = "sqrt"),
        legend=alt.Legend(title="Medals",labelFontSize = 20,symbolSize = 20,titleFontSize=20)),
                alt.value('lightgray')    
    )

#-----------medals--------------------------------
gold_countries = olympic_data[olympic_data["Medal"]=='Gold']
silver_countries= olympic_data[olympic_data["Medal"]=='Silver']
bronze_countries = olympic_data[olympic_data["Medal"]=='Bronze']
domain = ['Bronze', 'Silver', 'Gold']
range_ = ['#ff531a', '#666699', '#e6b800']
#-----------

chart_medal = alt.Chart(world_merge).mark_geoshape().encode( 
    color= color,
    tooltip = [alt.Tooltip('name'),
     alt.Tooltip('Medal'),
    ]  
).properties(
    title='Olympic Medals of The World',
    projection={"type":'mercator'},
    width=1100,
    height=600
).add_selection(
    country_select
)
alt.renderers.enable('kaggle')

#-----------line chart------------------------------------

vis = olympic_medal.groupby(['Team', 'Year'])["Medal"].count()
vis = vis.reset_index()
vis = vis.rename(columns={"Team": "name"})

# Create a selection that chooses the nearest point & selects based on x-value
nearest = alt.selection(type='single', nearest=True, on='mouseover',
                        fields=['Year'])


line = alt.Chart(vis).mark_line().encode(
        x='Year:O',
        y='Medal:Q'        
    ).properties(
    title = 'Number of Medals won each year',
    width = 600,
    height= 400
    ).transform_filter(
        country_select
    )

selectors = alt.Chart(vis).mark_point().encode(
    x='Year:O',
    opacity=alt.value(0),
).add_selection(
    nearest
)

# Draw points on the line, and highlight based on selection
points = line.mark_point().encode(
    opacity=alt.condition(nearest, alt.value(1), alt.value(0))
)

# Draw text labels near the points, and highlight based on selection
text = line.mark_text(align='left', dx=5, dy=-5).encode(
    text=alt.condition(nearest, 'Medal:Q', alt.value(' '))
)

# Draw a rule at the location of the selection
rules = alt.Chart(vis).mark_rule(color='gray').encode(
    x='Year:O',
).transform_filter(
    nearest
)

# Put the five layers into a chart and bind the data
line_medal = alt.layer(
    line,selectors, points, rules, text
).properties(
    width=600, height=300
)


#--------------sex-------------------------------

#sex = ["Male","Female"]
#sex_select = alt.selection_single(fields=["Sex"],name="Sex")
olympic_data = olympic_data.rename(columns={"Team": "name"})
olympic_data['Sex'] = olympic_data['Sex'].replace(['F'],'Female')
olympic_data['Sex'] = olympic_data['Sex'].replace(['M'],'Male')
sex_chart =  alt.Chart(olympic_data).mark_bar().encode(
    x = alt.X('Sex',type='nominal'),
    y = alt.Y('count(Sex)',type='quantitative' ),
    
     color=alt.condition(
        alt.datum.Sex == 'Female',  
        alt.value('pink'),     
        alt.value('steelblue')        
    ),
    tooltip= ['count(Sex)']
    ).properties(
    title = 'Proportion of Male and Female Participants',
    width = 600,
    height= 400
    ).transform_filter(
    country_select
    )

sex_num= olympic_data.groupby(['NOC']).count()['Sex']
sex_num=pd.DataFrame(sex_num)
#sex_num.rename(columns={1:'Sex'})
osex = olympic_data
osex['num'] = 1

people_chart = alt.Chart(osex).mark_bar().encode(
    x= alt.X('Year:O'),
    y= alt.Y('sum(num):Q',axis=alt.Axis(title='Participants')),
    color='Sex:N',
    tooltip=['Sex:N','count(Sex):Q']
    
).properties(
    title = 'Participation of Men and women over Years',
    width = 600,
    height= 400
).transform_filter(
country_select
)


#-----------------------------medals--------------------
frames = [gold_countries, silver_countries, bronze_countries]
Medal_types = pd.concat(frames)

Medal_types = Medal_types.rename(columns={"Team": "name"})
medal_chart_gsb = alt.Chart(Medal_types).mark_bar().encode(
x = alt.X('count(Medal)',type='quantitative'),
y = alt.Y('Medal',type='nominal'),
color=alt.Color('Medal', scale=alt.Scale(domain=domain, range=range_)),
tooltip=[alt.Tooltip('count(Medal)',title='Medals won:')]
).properties(
    title = 'Types and Number of Medals won',
    width = 600,
    height= 400
    ).transform_filter(
    country_select
    )

medal_chart_gsb_year = alt.Chart(Medal_types).mark_bar().encode(
column= alt.Column('Year'),
x= alt.X('Medal:N'),
y= alt.Y('count(Medal):Q'),
color=alt.Color('Medal', scale=alt.Scale(domain=domain, range=range_)),
#tooltip=[alt.Tooltip('count(Medal)',title='Medals won:')]
).transform_filter(
    country_select
)



alt.vconcat(chart_medal,alt.hconcat(people_chart,sex_chart),line_medal,medal_chart_gsb,medal_chart_gsb_year)


## Visualisation Tasks:
### Targets:
1.	User may have interest in certain **features** such as Swimming having the highest number of records.


In [None]:
pts = alt.selection(type="single", encodings=['x'])

rect = alt.Chart(olympic_data).mark_rect().encode(
    alt.X('Height:Q', bin=True),
    alt.Y('Weight:Q', bin=True),
    alt.Color('Sex',
        scale=alt.Scale(scheme='greenblue'),
        legend=alt.Legend(title='Total Records')
    )
)

circ = rect.mark_point().encode(
    alt.ColorValue('grey'),
    alt.Size('count()',
        legend=alt.Legend(title='Records in Selection')
    )
)
bar=alt.Chart(olympic_data).mark_bar().encode(
    x='Sport',
    y='count()',
    color=alt.condition(pts, alt.ColorValue("steelblue"), alt.ColorValue("grey"))
).properties(
    width=550,
    height=200
).add_selection(pts
).interactive()

d = rect+circ
d.transform_filter(
    pts
)

alt.vconcat(
    d,
    bar
).resolve_legend(
    color="independent",
    size="independent"
)