# Plotting Data Packages using Pandas and Plotly Express
See the [tableschema-pandas-py](https://github.com/frictionlessdata/tableschema-pandas-py) repository for more information.

In [1]:
# for reading data packages into Pandas
from tableschema import Storage
from datapackage import Package
import pandas as pd

# for plotting data
import plotly_express as px

In [2]:
# load resources from a data package as Pandas data frames by using datapackage.push_datapackage function:
url = 'https://raw.githubusercontent.com/frictionlessdata/example-data-packages/master/cpi/datapackage.json'

storage = Storage.connect('pandas')
package = Package(url)
package.save(storage=storage)

True

In [3]:
# Storage works as a container for Pandas data frames
# learn more about storage here: https://github.com/frictionlessdata/tableschema-py#storage
storage.buckets

['cpi']

In [4]:
type(storage['cpi'])

pandas.core.frame.DataFrame

In [5]:
# you can now use Pandas functions to work with your data package-turned-data frame
storage['cpi'].head()

Unnamed: 0,Country Name,Country Code,Year,CPI
0,Afghanistan,AFG,2004,63.131893
1,Afghanistan,AFG,2005,71.140974
2,Afghanistan,AFG,2006,76.302178
3,Afghanistan,AFG,2007,82.774807
4,Afghanistan,AFG,2008,108.0666


In [6]:
# we can also get some info from the package descriptor
package.descriptor

{'name': 'cpi',
 'title': 'Annual Consumer Price Index (CPI)',
 'description': 'Annual Consumer Price Index (CPI) for most countries in the world. Reference year is 2005.',
 'profile': 'tabular-data-package',
 'licenses': [{'name': 'CC-BY-4.0',
   'title': 'Creative Commons Attribution 4.0',
   'path': 'https://creativecommons.org/licenses/by/4.0/'}],
 'keywords': ['CPI',
  'World',
  'Consumer Price Index',
  'Annual Data',
  'The World Bank'],
 'version': '2.0.0',
 'sources': [{'title': 'The World Bank',
   'path': 'http://data.worldbank.org/indicator/FP.CPI.TOTL'}],
 'resources': [{'path': 'data/cpi.csv',
   'name': 'cpi',
   'profile': 'tabular-data-resource',
   'schema': {'fields': [{'name': 'Country Name',
      'type': 'string',
      'format': 'default'},
     {'name': 'Country Code', 'type': 'string', 'format': 'default'},
     {'name': 'Year', 'type': 'year', 'format': 'default'},
     {'name': 'CPI',
      'description': 'CPI (where 2005=100)',
      'type': 'number',
     

In [None]:
# let's plot it
px.line(
    # get data from 2008 onwards
    storage['cpi'].loc[storage['cpi']['Year'] >= 2008],
    # set title and subtitle according to data package
    title = '<br>'.join((
        package.descriptor['title'],
        f"<span style='font-size: small;'>{package.descriptor['description']}</span>"
    )),
    # set column to use for the x axis
    x = 'Year',
    # set column to use for the y axis
    y = 'CPI',
    color = 'Country Name',
    # by default labels repeat the column a lot of times so we remove them
    labels = {'Country Name': ''}
)

![Annual Consumer Price Index (CPI)](../img/consumer-price-index-plotly.png)

**Notes:**

* Output from the plot is included as a static image so you can preview it in
  Github, without getting the notebook file size too high. When running the
  notebook, the output will be an interactive plot.
* The data package description says the reference year is 2005, but from the
  actual data it seems to be 2010.
* The countries with hiperinflation are clearly visible outliers:
  Venezuela, Sudan, Iran, Malawi, Ethiopia and Guinea.