## Libraries

In [176]:
from gapminder import gapminder
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import plotly.express as px
import seaborn as sns
from plotly.subplots import make_subplots
import plotly.graph_objects as go
import plotly.figure_factory as ff

## Loading the gapminder data

In [3]:
df = gapminder

In [5]:
df['year'].unique()

array([1952, 1957, 1962, 1967, 1972, 1977, 1982, 1987, 1992, 1997, 2002,
       2007], dtype=int64)

In [7]:
df = df[df['year'] == 2007]

In [8]:
df.head()

Unnamed: 0,country,continent,year,lifeExp,pop,gdpPercap
11,Afghanistan,Asia,2007,43.828,31889923,974.580338
23,Albania,Europe,2007,76.423,3600523,5937.029526
35,Algeria,Africa,2007,72.301,33333216,6223.367465
47,Angola,Africa,2007,42.731,12420476,4797.231267
59,Argentina,Americas,2007,75.32,40301927,12779.37964


In [95]:
df[df["continent"] != "Oceania"].iloc[:,[1,3]].groupby(['continent'])['lifeExp'].median().sort_values(ascending=True).index.to_list()

['Africa', 'Asia', 'Americas', 'Europe']

## Exploring color palettes

In [106]:
px.colors.sequential.swatches()

In [101]:
px.colors.qualitative.swatches()

## Plot 1

In [219]:
fig1 = px.box(data_frame=df[df["continent"] != "Oceania"],
    x="continent",
    y="lifeExp",
    color="continent",
    color_discrete_sequence=px.colors.qualitative.G10,
    points="all",
    hover_data=df.columns[[1, 0, 3]],
    labels={"continent": "Continent", "lifeExp": "Life Expectancy"}
)

fig2 = px.ecdf(data_frame=df[df["continent"] != "Oceania"],
                    x="lifeExp",
                    color="continent",
                    color_discrete_sequence=px.colors.qualitative.G10)



fig3 = px.box(
    data_frame = df[df["continent"] != "Oceania"],
    x="continent",
    y="gdpPercap",
    color="continent",
    color_discrete_sequence=px.colors.qualitative.G10,
    points="all",
    hover_data=df.columns[[1, 0, 3]],
    labels={"continent": "Continent", "gdpPercap": "GDP Per Capita"}
)

fig4 = px.ecdf(data_frame=df[df["continent"] != "Oceania"],
                    x="gdpPercap",
                    color="continent",
                    color_discrete_sequence=px.colors.qualitative.G10)


fig5 = px.scatter(
    data_frame = df[df["continent"] != "Oceania"],
    x="gdpPercap",
    y="lifeExp",
    color="continent",
    color_discrete_sequence=px.colors.qualitative.G10,
    hover_data=df.columns[[1, 0, 3]],
    labels={"gdpPercap": "GDP Per Capita", "lifeExp": "Life Expectancy"}
)
fig5.update_layout(xaxis_title = "")


fig_combined = make_subplots(rows=3, cols=2, subplot_titles=["Life Expectancy by Continent", "Life Expectancy by Continent - Empirical Cumulative Distribution Function","GDP Per Capita by Continent", "GDP Per Capita - Empirical Cumulative Distribution Function",
                                                             "GDP Per Capita vs Life Expectancy"])


for trace in fig1.data:
    trace.showlegend = True
    fig_combined.add_trace(trace, row=1, col=1)

for trace in fig2.data:
    trace.showlegend = True
    fig_combined.add_trace(trace, row=1, col=2)

for trace in fig3.data:
    trace.showlegend=True
    fig_combined.add_trace(trace,row=2,col=1)

for trace in fig4.data:
    trace.showlegend=True
    fig_combined.add_trace(trace,row=2,col=2)

for trace in fig5.data:
    trace.showlegend=True
    fig_combined.add_trace(trace,row=3,col=1)


fig_combined.update_layout({'xaxis5': {'domain': [0, 1]}})
fig_combined.update_layout(height=800, width=1200, title_text="Exploratory Analysis - Plots")
fig_combined.update_layout(title=dict(x=0))


fig_combined.show()

In [217]:
df2 = gapminder

## Plot 2

In [235]:
fig1 = px.line(data_frame=df2.groupby(["continent","year"])["lifeExp"].mean().reset_index(),
        x = "year",
        y = "lifeExp",
        color = "continent",
        color_discrete_sequence=px.colors.qualitative.G10,
        markers=True)



fig2 = px.line(data_frame=df2.groupby(["continent","year"])["pop"].mean().reset_index(),
        x = "year",
        y = "pop",
        color = "continent",
        color_discrete_sequence=px.colors.qualitative.G10,
        markers=True)



fig3 = px.line(data_frame=df2.groupby(["continent","year"])["gdpPercap"].mean().reset_index(),
        x = "year",
        y = "gdpPercap",
        color = "continent",
        color_discrete_sequence=px.colors.qualitative.G10,
        markers=True)


fig_combined = make_subplots(rows=3, cols=1, subplot_titles=["Mean Life Expectancy by Continent and Year",
                                                             "Mean Population by Continent and Year",
                                                             "Mean GDP Per Capita by Continent and Year"])


for trace in fig1.data:
    trace.showlegend = True
    fig_combined.add_trace(trace, row=1, col=1)

for trace in fig2.data:
    trace.showlegend = True
    fig_combined.add_trace(trace, row=2, col=1)

for trace in fig3.data:
    trace.showlegend=True
    fig_combined.add_trace(trace,row=3,col=1)

fig_combined.update_layout(height=800, width=1200, title_text="Exploratory Analysis - Line Plots")
fig_combined.update_layout(title=dict(x=0))


fig_combined.show()