# Visualizing data for exploration and communication

![](http://style.org/images/stdp2/STDP2_095.jpg)  
Jonathan Corum,  http://style.org/stdp2/

![](http://style.org/images/stdp2/STDP2_096.jpg)  
  
Jonathan Corum,  http://style.org/stdp2/

### import modules

In [None]:
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
import seaborn as sns

### set defaults for plotting

In [None]:
sns.set_context('talk')
sns.set_style('ticks')

### define data

In [None]:
co2_df = pd.read_csv('../myData/co2Wikipedia.csv',index_col=[0])
co2_df.tail()

In [None]:
pop_df = pd.read_csv('../myData/popWikipedia.csv', index_col=[0])
pop_df.tail()

In [None]:
countries_df = pd.merge(co2_df, pop_df, on='Country')
countries_df.index = countries_df.loc[:, 'Country']
countries_df.tail()

In [None]:
countries_df = countries_df.loc[:, ['Population', 'FossilfuelCO2emissions(kt)in2015[8]']]
countries_df.columns = ['population', 'co2']
countries_df.loc[:,'population'] = countries_df.loc[:, 'population'] / 1000000000.
countries_df.loc[:, 'co2'] = countries_df.loc[:, 'co2'] / 1000000.

countries_df.tail()

### simple plot

In [None]:
sns.scatterplot(data=countries_df, x='population', y='co2')

### annotated plot

In [None]:
# Controls size and aspect ratio of the figure
plt.figure(figsize=(16, 16))

# Move through row names of the table
for i in countries_df.index:

    # For each iteration of the loop:

    # Get population value for country i
    x_val = countries_df.loc[i, 'population']
    # Get co2 value for country i
    y_val = countries_df.loc[i, 'co2']

    # Plot the data points at x and y
    plt.plot(x_val, y_val, 'o')
    # Annotate the points
    plt.annotate(
        xy=(x_val, y_val),  # Position of annotation
        s=i  # String printed for annotation
    )

# Range of axes
plt.xlim(0, 1.5)
plt.ylim(0, 15)

# Label the axes
plt.xlabel('population (billion)')
plt.ylabel('CO2 emissions (millon kt)')
# Title of plot
plt.title('carbon dioxide emissions')

sns.despine(
    offset=10,  # Gap between x and y axis at origin
    trim=True  # End axis at last 'tick'
)

# Show the figure
plt.show()

### Further explorations

In [None]:
ratio_df = pd.DataFrame(countries_df.loc[:, 'co2'] / countries_df.loc[:, 'population'])
ratio_df.columns = ['co2/pop']
ratio_df.sort_values(by='co2/pop', inplace=True, ascending=False)

In [None]:
fig, ax = plt.subplots(1, 1, figsize=(16, 8))
sns.barplot(y="co2/pop", x=ratio_df.index, data=ratio_df, ax=ax, color="b")
plt.xticks(rotation=90)
plt.show()

In [None]:
fig, ax = plt.subplots(1, 1, figsize=(8, 16))
sns.barplot(x="co2/pop", y=ratio_df.index, data=ratio_df, ax=ax, color="b")
plt.show()

In [None]:
sns.set_color_codes('muted')

In [None]:
fig, ax = plt.subplots(1, 1, figsize=(8, 16))
sns.barplot(x="co2/pop", y=ratio_df.index, data=ratio_df, ax=ax, color="b")
sns.despine()
plt.show()

In [None]:
fig, ax = plt.subplots(1, 1, figsize=(8, 16))
sns.barplot(x="co2/pop", y=ratio_df.index, data=ratio_df, ax=ax, color="b")
sns.despine(left=True)
plt.show()

In [None]:
mean_ratio = ratio_df.loc[:, 'co2/pop'].mean()
mean_ratio

In [None]:
fig, ax = plt.subplots(1, 1, figsize=(8, 16))
sns.barplot(x="co2/pop", y=ratio_df.index, data=ratio_df, ax=ax, color="b")
sns.despine(left=True)
plt.axvline(mean_ratio, linestyle='dashed', color='k', linewidth=1)
plt.show()

In [None]:
x = np.arange(0, 2.01, 0.1)
y = [xi * mean_ratio for xi in x]

plt.plot(x, y)

In [None]:
# Controls size and aspect ratio of the figure
plt.figure(figsize=(16, 16))

# Move through row names of the table
for i in countries_df.index:

    # For each iteration of the loop:

    # Get population value for country i
    x_val = countries_df.loc[i, 'population']
    # Get co2 value for country i
    y_val = countries_df.loc[i, 'co2']

    # Plot the data points at x and y
    plt.plot(x_val, y_val, 'o')
    # Annotate the points
    plt.annotate(
        xy=(x_val, y_val),  # Position of annotation
        s=i  # String printed for annotation
    )

# add average
x = np.arange(0, 2.01, 0.1)
y = [xi * mean_ratio for xi in x]

plt.plot(x, y)

# Range of axes
plt.xlim(0, 1.5)
plt.ylim(0, 15)

# Label the axes
plt.xlabel('population (billion)')
plt.ylabel('CO2 emissions (millon kt)')
# Title of plot
plt.title('carbon dioxide emissions')

sns.despine(
    offset=10,  # Gap between x and y axis at origin
    trim=True  # End axis at last 'tick'
)

# Show the figure
plt.show()

In [None]:
def make_annot(i):

    # Get population value for country i
    x_val = countries_df.loc[i, 'population']
    # Get co2 value for country i
    y_val = countries_df.loc[i, 'co2']

    # Plot the data points at x and y
    plt.plot(x_val, y_val, 'o', markersize=10)
    # Annotate the points
    plt.annotate(
        xy=(x_val, y_val),  # Position of annotation
        s=i  # String printed for annotation
    )

In [None]:
from ipywidgets import interact

In [None]:
interact(
    make_annot, i=countries_df.index)

In [None]:
def make_annot(i):

    # Controls size and aspect ratio of the figure
    plt.figure(figsize=(16, 16))

    # Get population value for country i
    x_val = countries_df.loc[i, 'population']
    # Get co2 value for country i
    y_val = countries_df.loc[i, 'co2']

    # Plot the data points at x and y
    plt.plot(x_val, y_val, 'o', markersize=10, c='r')
    # Annotate the points
    plt.annotate(
        xy=(x_val, y_val),  # Position of annotation
        s=i,  # String printed for annotation
        fontsize=32,
    )

    # Get population value for all countries (rows)
    x_vals = countries_df.loc[:, 'population']
    # Get co2 value for all countries (rows)
    y_vals = countries_df.loc[:, 'co2']

    # Plot the data points at x and y
    plt.plot(x_vals, y_vals, 'o', c='b', alpha=0.5)

    # add average
    x = np.arange(0, 2.01, 0.1)
    y = [xi * mean_ratio for xi in x]

    plt.plot(x, y)

    # Range of axes
    plt.xlim(0, 1.5)
    plt.ylim(0, 15)

    # Label the axes
    plt.xlabel('population (billion)')
    plt.ylabel('CO2 emissions (millon kt)')
    # Title of plot
    plt.title('carbon dioxide emissions')

    sns.despine(
        offset=10,  # Gap between x and y axis at origin
        trim=True  # End axis at last 'tick'
    )

    # Show the figure
    plt.show()

In [None]:
interact(
    make_annot, i=countries_df.index)

In [None]:
def make_annot(i, my_xlim, my_ylim):

    # Controls size and aspect ratio of the figure
    plt.figure(figsize=(16, 16))

    # Get population value for country i
    x_val = countries_df.loc[i, 'population']
    # Get co2 value for country i
    y_val = countries_df.loc[i, 'co2']

    # Plot the data points at x and y
    plt.plot(x_val, y_val, 'o', markersize=10, c='r')
    # Annotate the points
    plt.annotate(
        xy=(x_val, y_val),  # Position of annotation
        s=i,  # String printed for annotation
        fontsize=32,
    )

    # Get population value for all countries (rows)
    x_vals = countries_df.loc[:, 'population']
    # Get co2 value for all countries (rows)
    y_vals = countries_df.loc[:, 'co2']

    # Plot the data points at x and y
    plt.plot(x_vals, y_vals, 'o', c='b', alpha=0.5)

    # add average
    x = np.arange(0, 2.01, 0.1)
    y = [xi * mean_ratio for xi in x]

    plt.plot(x, y)

    # Range of axes
    plt.xlim(0, my_xlim)
    plt.ylim(0, my_ylim)

    # Label the axes
    plt.xlabel('population (billion)')
    plt.ylabel('CO2 emissions (millon kt)')
    # Title of plot
    plt.title('carbon dioxide emissions')

    sns.despine(
        offset=10,  # Gap between x and y axis at origin
        trim=True  # End axis at last 'tick'
    )

    # Show the figure
    plt.show()

In [None]:
interact(
    make_annot,
    i=countries_df.index,
    my_xlim=(0, 1.5, 0.1),
    my_ylim=(0, 15, 1))

In [None]:
def make_annot(i, my_zoom):

    # Controls size and aspect ratio of the figure
    plt.figure(figsize=(16, 16))

    # Get population value for country i
    x_val = countries_df.loc[i, 'population']
    # Get co2 value for country i
    y_val = countries_df.loc[i, 'co2']

    # Plot the data points at x and y
    plt.plot(x_val, y_val, 'o', markersize=10, c='r')
    # Annotate the points
    plt.annotate(
        xy=(x_val, y_val),  # Position of annotation
        s=i,  # String printed for annotation
        fontsize=32,
    )

    # Get population value for all countries (rows)
    x_vals = countries_df.loc[:, 'population']
    # Get co2 value for all countries (rows)
    y_vals = countries_df.loc[:, 'co2']

    # Plot the data points at x and y
    plt.plot(x_vals, y_vals, 'o', c='b', alpha=0.5)

    # add average
    x = np.arange(0, 2.01, 0.1)
    y = [xi * mean_ratio for xi in x]

    plt.plot(x, y)

    # Range of axes
    plt.xlim(0, 1.5 / my_zoom)
    plt.ylim(0, 15 / my_zoom)

    # Label the axes
    plt.xlabel('population (billion)')
    plt.ylabel('CO2 emissions (millon kt)')
    # Title of plot
    plt.title('carbon dioxide emissions')

    sns.despine()

    # Show the figure
    plt.show()

In [None]:
interact(
    make_annot, 
    i=countries_df.index, 
    my_zoom=(1, 20, 1))

In [None]:
def make_annot(i, my_zoom):

    # Controls size and aspect ratio of the figure
    plt.figure(figsize=(16, 16))

    # Get population value for country i
    x_val = countries_df.loc[i, 'population']
    # Get co2 value for country i
    y_val = countries_df.loc[i, 'co2']

    # Plot the data points at x and y
    plt.plot(x_val, y_val, 'o', markersize=10, c='r')

    # Annotate the points
    plt.annotate(
        xy=(x_val, y_val),  # Position of annotation
        s=i,  # String printed for annotation
        fontsize=32,
    )

    # Move through row names of the table
    for j in countries_df.index:

        # For each iteration of the loop:

        # Get population value for country i
        x_val = countries_df.loc[j, 'population']
        # Get co2 value for country i
        y_val = countries_df.loc[j, 'co2']

        plt.annotate(
            xy=(x_val, y_val),  # Position of annotation
            s=j,  # String printed for annotation
            alpha=0.9)

    # Get population value for all countries (rows)
    x_vals = countries_df.loc[:, 'population']
    # Get co2 value for all countries (rows)
    y_vals = countries_df.loc[:, 'co2']

    # Plot the data points at x and y
    plt.plot(x_vals, y_vals, 'o', c='b', alpha=0.5)

    # add average
    x = np.arange(0, 2.01, 0.1)
    y = [xi * mean_ratio for xi in x]

    plt.plot(x, y)

    # Range of axes
    plt.xlim(0, 1.5 / my_zoom)
    plt.ylim(0, 15 / my_zoom)

    # Label the axes
    plt.xlabel('population (billion)')
    plt.ylabel('CO2 emissions (millon kt)')
    # Title of plot
    plt.title('carbon dioxide emissions')

    sns.despine()

    # Show the figure
    plt.show()

In [None]:
interact(
    make_annot, 
    i=countries_df.index, 
    my_zoom=(1, 20, 1))

In [None]:
def make_annot(i, my_zoom):

    # Controls size and aspect ratio of the figure
    plt.figure(figsize=(16, 16))

    # Get population value for country i
    x_val = countries_df.loc[i, 'population']
    # Get co2 value for country i
    y_val = countries_df.loc[i, 'co2']

    # Plot the data points at x and y
    plt.plot(x_val, y_val, 'o', markersize=10, c='r')

    # Annotate the points
    plt.annotate(
        xy=(x_val, y_val),  # Position of annotation
        s=i,  # String printed for annotation
        fontsize=32,
    )

    # Move through row names of the table
    for j in countries_df.index.drop(i):

        # For each iteration of the loop:

        # Get population value for country i
        x_val = countries_df.loc[j, 'population']
        # Get co2 value for country i
        y_val = countries_df.loc[j, 'co2']

        plt.annotate(
            xy=(x_val, y_val),  # Position of annotation
            s=j,  # String printed for annotation
            alpha=0.9)

    # Get population value for all countries (rows)
    x_vals = countries_df.loc[:, 'population']
    # Get co2 value for all countries (rows)
    y_vals = countries_df.loc[:, 'co2']

    # Plot the data points at x and y
    plt.plot(x_vals, y_vals, 'o', c='b', alpha=0.5)

    # add average
    x = np.arange(0, 2.01, 0.1)
    y = [xi * mean_ratio for xi in x]

    plt.plot(x, y)

    # Range of axes
    plt.xlim(0, 1.5 / my_zoom)
    plt.ylim(0, 15 / my_zoom)

    # Label the axes
    plt.xlabel('population (billion)')
    plt.ylabel('CO2 emissions (millon kt)')
    # Title of plot
    plt.title('carbon dioxide emissions')

    sns.despine()

    # Show the figure
    plt.show()

In [None]:
interact(
    make_annot, 
    i=countries_df.index, 
    my_zoom=(1, 20, 1))