## This is a short tutorial about how to build a Grouped Bar Chart using Pandas and Plotly. 
In this example we are showing the number of Airports and Airlines by country in the same graph.

<b>About Pandas and Plotly:</b>
+ Plotly
 +  Plotly lets users easily create interactive charts and dashboard (https://plot.ly/)
+ Pandas
 + Provides high-performance, easy-to-use data structures and data analysis tools for the Python programming language (http://pandas.pydata.org/)
 


In [22]:
import sys
import pandas as pd
import plotly.graph_objs as go
import plotly.plotly as py

### Using Pandas to read in the Airports and Airlines data from a CSV file

In [23]:
# Read in the airports data.
try:
    AIRPORTS = pd.read_csv("https://raw.githubusercontent.com/marcelobp/dataviz/master/data/airports.csv", header=None, dtype=str)
    AIRPORTS.columns = ["id", "name", "city", "country", "code", "icao", "latitude", "longitude", "altitude", "offset",
                        "dst", "timezone", "type", "group"]
except OSError as e:
    print("I/O error: {0}".format(e))
except:
    print("Unexpected error:", sys.exc_info()[0])
    raise
    
print(AIRPORTS['name'].head())

0                                 Goroka Airport
1                                 Madang Airport
2                   Mount Hagen Kagamuga Airport
3                                 Nadzab Airport
4    Port Moresby Jacksons International Airport
Name: name, dtype: object


In [24]:
# Read in the airlines data.
try:
    AIRLINES = pd.read_csv("https://raw.githubusercontent.com/marcelobp/dataviz/master/data/airlines.csv", header=None, dtype=str)
    AIRLINES.columns = ["id", "name", "alias", "iata", "icao", "callsign", "country", "active"]
except OSError as e:
    print("I/O error: {0}".format(e))
except:
    print("Unexpected error:", sys.exc_info()[0])
    raise

print(AIRLINES['name'].head())    

0                                         Unknown
1                                  Private flight
2                                     135 Airways
3                                   1Time Airline
4    2 Sqn No 1 Elementary Flying Training School
Name: name, dtype: object


### Grouping and Counting Airports and Airlines by Country


In [25]:
AIRPORTS_GROUPED = AIRPORTS.groupby(['country']).size().reset_index(name='count')
print(AIRPORTS_GROUPED.head(5))

          country  count
0     Afghanistan     22
1         Albania      5
2         Algeria     47
3  American Samoa      3
4          Angola     27


In [26]:
AIRLINES_GROUPED = AIRLINES.groupby(['country']).size().reset_index(name='count')
print(AIRLINES_GROUPED.head(5))

                 country  count
0   Boonville Stage Line      1
1                   S.A.      1
2                   ACOM      1
3            ACTIVE AERO      1
4             AEROCENTER      1


### Merging Airports and Airlines datasets in one dataset

In [27]:
AIRPORTS_AIRLINES = pd.merge(AIRPORTS_GROUPED, AIRLINES_GROUPED, 
                             on='country', suffixes=['_airports', '_airlines']).query('count_airlines > 50')

print(AIRPORTS_AIRLINES.head(10))

         country  count_airports  count_airlines
9      Australia             400              94
25        Brazil             254              60
34        Canada             581             323
40         China             349              72
65        France             293             123
71       Germany             475             135
90         Italy             140              93
94    Kazakhstan              38              79
115       Mexico             101             440
127  Netherlands              73              52


In [28]:
trace1 = go.Bar(
    x=AIRPORTS_AIRLINES['country'],
    y=AIRPORTS_AIRLINES['count_airports'],
    name='Airports',
    marker=dict(
        color='rgba(55, 128, 191, 0.7)',
        line=dict(
            color='rgba(55, 128, 191, 1.0)',
            width=1.5),
        )
)

In [29]:
trace2 = go.Bar(
    x=AIRPORTS_AIRLINES['country'],
    y=AIRPORTS_AIRLINES['count_airlines'],
    name='Airlines',
    marker=dict(
        color='rgba(50, 171, 96, 0.7)',
        line=dict(
            color='rgba(50, 171, 96, 1.0)',
            width=1.5),
        )
)

In [30]:
data = [trace1, trace2]

layout = go.Layout(
    title='Number of Airlines and Airports by Country',
    xaxis=dict(
        tickfont=dict(
            size=14,
            color='rgb(107, 107, 107)'
        )
    ),

    barmode='group',
    bargroupgap=0.1
)

In [31]:
fig = go.Figure(data=data, layout=layout)

py.iplot(fig, filename='airports-airlines-bar-chart')