# Data visualization with Plotly

In [1]:
import numpy as np 
import pandas as pd
from datetime import datetime
import plotly.express as px
import plotly.graph_objects as go

In [2]:
gapminder = px.data.gapminder()
gapminder.head()

Unnamed: 0,country,continent,year,lifeExp,pop,gdpPercap,iso_alpha,iso_num
0,Afghanistan,Asia,1952,28.801,8425333,779.445314,AFG,4
1,Afghanistan,Asia,1957,30.332,9240934,820.85303,AFG,4
2,Afghanistan,Asia,1962,31.997,10267083,853.10071,AFG,4
3,Afghanistan,Asia,1967,34.02,11537966,836.197138,AFG,4
4,Afghanistan,Asia,1972,36.088,13079460,739.981106,AFG,4


Because of a little big data, the data of recent year will be used to draw visualization through Plotly

In [3]:
gapminder.year.unique()

array([1952, 1957, 1962, 1967, 1972, 1977, 1982, 1987, 1992, 1997, 2002,
       2007])

In [4]:
max_year = gapminder.year.max()
df = gapminder[gapminder.year == max_year]
df

Unnamed: 0,country,continent,year,lifeExp,pop,gdpPercap,iso_alpha,iso_num
11,Afghanistan,Asia,2007,43.828,31889923,974.580338,AFG,4
23,Albania,Europe,2007,76.423,3600523,5937.029526,ALB,8
35,Algeria,Africa,2007,72.301,33333216,6223.367465,DZA,12
47,Angola,Africa,2007,42.731,12420476,4797.231267,AGO,24
59,Argentina,Americas,2007,75.320,40301927,12779.379640,ARG,32
...,...,...,...,...,...,...,...,...
1655,Vietnam,Asia,2007,74.249,85262356,2441.576404,VNM,704
1667,West Bank and Gaza,Asia,2007,73.422,4018332,3025.349798,PSE,275
1679,"Yemen, Rep.",Asia,2007,62.698,22211743,2280.769906,YEM,887
1691,Zambia,Africa,2007,42.384,11746035,1271.211593,ZMB,894


In [5]:
#Calculate total of population for each contient
df_continent = df.groupby(['continent', 'year'],as_index=False).sum(numeric_only = True)
df_continent.head()

Unnamed: 0,continent,year,lifeExp,pop,gdpPercap,iso_num
0,Africa,2007,2849.914,929539692,160629.695446,23859
1,Americas,2007,1840.203,898871184,275075.790634,9843
2,Asia,2007,2334.04,3811953827,411609.886714,13354
3,Europe,2007,2329.458,586098529,751634.449078,12829
4,Oceania,2007,161.439,24549947,59620.37655,590


In [6]:
#Draw bargraph for population of each continent in 2007
fig = px.bar(
    data_frame=df_continent,
    x='continent',
    y='pop',
    title='Continent population - 2007',
    labels={'pop': 'Population', 'continent': 'Continent'},
)
fig.show()

Using Plotly graph object also returns the same result as plotly express

In [7]:
#Create trace
trace = go.Bar(
    x = df_continent['continent'],
    y = df_continent['pop'],
    marker=dict(color = 'royalblue')
)

#create layout
layout = go.Layout(
    title="Continent population - " + str(max_year),
    xaxis=dict(title = 'Continent'),
    yaxis=dict(title = 'Population')
)

#create figure and add trace
fig = go.Figure(data=[trace], layout=layout)
fig.show()

## Scatter

In [8]:
# Create scatter graph
fig = px.scatter(
    data_frame=df,
    x = 'gdpPercap',
    y ='lifeExp',
    color = 'continent',
    hover_name='country',
    size = 'pop',
    title = 'GDP vs LifeExp',
    size_max=50
)
fig.show()

#### Some questions can be answered from graph
1. Which country has the highest life expectancy? 
Japan
2. Which country has the highest life expectancy in Asia and the Americas?
Japan and Cananda
3. Which country has the largest population in the Americas?
United States
4. Which country has the lowest life expectancy in Europe?
Turkey
5. Which country has the lowest life expectancy and GDP per capita in the world?
Mozambique
6. Countries with an average life expectancy above 80. On which continent are these countries mainly concentrated?
Europe
7. Which country has the lowest GDP per capita?
Mozambique
8. Which continent has the greatest disparity in life expectancy among its countries?
Asia

## Boxplot

In [9]:
#Draw boxplot for average life expection of each continet
fig_box = px.box(
    data_frame=df,
    x='continent',
    y='lifeExp',
    title = 'Distribution of average life expectancy across continents'
)
fig_box.show()

#### Answer questions

1. Which continent has the highest median life expectancy?
Europe
2. Which continent has the lowest median life expectancy?
Africa
3. Which continent has the largest interquartile range (IQR) in life expectancy?
Africa
4. Which continent has the smallest interquartile range (IQR) in life expectancy?
Oceania
5. Are there any continents with outliers? What is special about these countries?
Yes, they are Americas and Asia which has average life expection above 70 and smaller than 80
6. Which continent has the widest range of life expectancy, from the smallest to the largest value?
7. Comment on the distribution of life expectancy between Europe and Asia.
8. How do the life expectancy values in the continents differ compared to the global average life expectancy?

## Histogram

In [10]:
#Draw histogram for average life expection of each continent
fig_hist = px.histogram(
    data_frame=df,
    x='lifeExp',
    color='continent',
    title = 'Distribution average life expectation across continents',
    nbins=25
)
fig_hist.show()

#### Answer questions
1. What are your opinion about the distribution of average life expectancy across continents?
Americans has the highest average life expection while Africa has the lowest one
2. What is the highest data density, meaning in which range of values is the average life expectancy most concentrated?
Concentrated on range (70,80)
3. In your opinion, what are the outlier values for average life expectancy, and in which continents are they found?
Outlier values are 
- Life expectation below 40 and belongs to Africa
- Life expectation is below 70 and belongs to Americans
- Life expectation is above 80 and belongs to Asia
4. Observe the number of specific countries in each average life expectancy value range (bin) and indicate which bin has the most countries.
bins range (70,80)
5. Observe the number of specific countries in each average life expectancy value range (bin) and indicate which bin has the fewest countries.
it is range (0,40)

## Treemap

In [11]:
df.groupby('continent')['pop'].sum()

continent
Africa       929539692
Americas     898871184
Asia        3811953827
Europe       586098529
Oceania       24549947
Name: pop, dtype: int64

In [12]:
fig = px.treemap(
    data_frame=df,
    path=['continent','country'],
    values = 'pop',
    color='lifeExp',
    hover_data=['lifeExp','gdpPercap','iso_alpha'],
    title='Treemap of Population and Life Expectancy by Country and Continent'
)
fig.update_layout(
    width = 1000,
    height = 500,
    margin = dict(t = 40, l = 0, r = 0, b=0)
)
fig.show()

#### Answer questions
1. Which continent has the largest total population?
Asia
2. Which country has the highest average life expectancy?
Japan
3. Which country has the lowest average life expectancy that can be found?
Swaziland
4. What are your observations about Asia from the chart?
- China has largest population and has averge life expectation is around 70
- Afghasistan is a country having the lowest average expectancy
- Japan has the highest average life expectation
5. What are your observations about Africa from the chart?
Africa has the lowest life expecatancy than other continents
- There has some countries which having average life expectation from 65-70
- Nigeria has the largest population with the average life expectation is in range (45,55)

## Pie chart

In [13]:
#Draw pie chart showing distribution of population
fig = px.pie(
    data_frame=df_continent,
    values='pop',
    names='continent',
    title = 'Distribution of Population across continent',
    color_discrete_sequence=px.colors.qualitative.D3
)
fig.update_traces(text = df_continent['continent'],
                  textposition = 'outside'
                  )

fig.update_layout(
    width = 600,
    height = 350,
    margin = dict(t = 50, l = 50, r = 25, b = 25),
    showlegend = True
)
fig.show()

#### Answer questions:
1. Which continent has the largest population? 
Asia has the largest population, accounting for 61%
2. Which continent has the least population?
Oceania has the least population, accouting for 0.393%

## Map visualization

In [14]:
fig = px.choropleth(
    data_frame=df,
    locations="iso_alpha",
    color="lifeExp",
    title = "Average life expectation in the world",
    hover_name="country"
)
fig.update_layout(
    width = 1000,
    height = 500,
    margin = dict(t=40,l=50,r=25,b=25)
)
fig.show()

## Scatter Geo

In [15]:
fig = px.scatter_geo(
    df,
    locations='country',
    locationmode='country names',
    size='pop',
    hover_name="country",
    color="continent",
    title = "Population distribution of each continent",
    projection="natural earth"
    )
fig.show()

## Animation Map

In [17]:
fig = px.choropleth(
    data_frame=gapminder,
    locations="iso_alpha",
    color="gdpPercap",
    hover_name="country",
    projection="natural earth", #argument for shape
    animation_frame="year",
    animation_group="country",
    title="GDP of each country"
)
fig.update_layout(
    width = 1000,
    height = 500,
    margin = dict(t=40, l=50,r=25,b =25)
)
fig.show()