**Column Charts**

For this example I'm using the Iris data set from sci-kit learn library (if you're not familiar with this library or dataset then don't worry ....  I'm just using them to get data for a fully reproducible example)

I use `pandas.DataFrame.to_excel()` (Docs: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.to_excel.html) and `pandas.ExcelWriter()` (Docs: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.ExcelWriter.html)

A lot of the documentation useful documentation on xlsxwriter chart class: https://xlsxwriter.readthedocs.io/chart.html 

In [None]:
!pip install xlsxwriter
import xlsxwriter
import pandas as pd
import numpy as np
from sklearn.datasets import load_iris

iris = load_iris()
iris_df = pd.DataFrame(data= np.c_[iris['data'], iris['target']], columns= iris['feature_names'] + ['target'])

iris_grouped_df = iris_df.groupby('target').mean()

condition_list = [iris_grouped_df.index == 0,iris_grouped_df.index == 1,iris_grouped_df.index == 2]
choice_list = ['setosa' , 'versicolor', 'virginica']
iris_grouped_df['target_name'] = np.select(condition_list, choice_list, default='unknown')

print(iris_grouped_df)

with pd.ExcelWriter('iris_column_chart.xlsx') as writer:
    iris_grouped_df.to_excel(writer, index=False)
    wb = writer.book
    chart = wb.add_chart({'type': 'column'})
    ws = writer.sheets['Sheet1']
    chart.add_series({'name': '=Sheet1!E2', 'values': '=Sheet1!$A$2:$D$2', 'categories': '=Sheet1!$A$1:$D$1'})
    chart.add_series({'name': '=Sheet1!E3', 'values': '=Sheet1!$A$3:$D$3', 'categories': '=Sheet1!$A$1:$D$1'})
    chart.add_series({ 'name': '=Sheet1!E4', 'values': '=Sheet1!$A$4:$D$4', 'categories': '=Sheet1!$A$1:$D$1'})
    ws.insert_chart('G2', chart)
    wb.close()

Collecting xlsxwriter
[?25l  Downloading https://files.pythonhosted.org/packages/00/1f/2092a81056d36c1b6651a645aa84c1f76bcee03103072d4fe1cb58501d69/XlsxWriter-1.2.8-py2.py3-none-any.whl (141kB)
[K     |██▎                             | 10kB 20.8MB/s eta 0:00:01[K     |████▋                           | 20kB 1.8MB/s eta 0:00:01[K     |███████                         | 30kB 2.6MB/s eta 0:00:01[K     |█████████▎                      | 40kB 1.7MB/s eta 0:00:01[K     |███████████▋                    | 51kB 2.2MB/s eta 0:00:01[K     |██████████████                  | 61kB 2.6MB/s eta 0:00:01[K     |████████████████▏               | 71kB 3.0MB/s eta 0:00:01[K     |██████████████████▌             | 81kB 2.3MB/s eta 0:00:01[K     |████████████████████▉           | 92kB 2.6MB/s eta 0:00:01[K     |███████████████████████▏        | 102kB 2.9MB/s eta 0:00:01[K     |█████████████████████████▌      | 112kB 2.9MB/s eta 0:00:01[K     |███████████████████████████▉    | 122kB 2.9M

**Pie Charts**

For this example I'm using the California housing data set from sci-kit learn library.  We'll look at the percentage of houses in Average Bedroom number categories.

Similar to the Column Chart example, I use `pandas.DataFrame.to_excel()` (Docs: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.to_excel.html) and `pandas.ExcelWriter()` (Docs: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.ExcelWriter.html)


In [None]:
!pip install xlsxwriter
import xlsxwriter
import pandas as pd
import numpy as np
from sklearn.datasets import fetch_california_housing

cal = fetch_california_housing()

cal_df = pd.DataFrame(data=np.c_[cal['data'], cal['target']],columns= list(cal['feature_names']) + ['target'])
# Here I round the average number of bedrooms (for a block) to the nearest integer and then any with 10 or more rooms I put in it's own category 
cal_df['AveRoomsRounded'] = cal_df['AveRooms'].round(decimals=0).astype(int)

cal_df.loc[cal_df.AveRoomsRounded > 9, 'AveRoomsBin'] = '10+'
cal_df.loc[(cal_df.AveRoomsRounded > 7) &  (cal_df.AveRoomsRounded < 10), 'AveRoomsBin'] = '8-9'
cal_df.loc[(cal_df.AveRoomsRounded > 5) &  (cal_df.AveRoomsRounded < 8), 'AveRoomsBin'] = '6-7'
cal_df.loc[(cal_df.AveRoomsRounded > 3) &  (cal_df.AveRoomsRounded < 6), 'AveRoomsBin'] = '4-5'
cal_df.loc[cal_df.AveRoomsRounded < 4, 'AveRoomsBin'] = '<=3'

#If you use the print functions below it gives you a sense of how we're binning these by Ave Rooms
#You'll see that one block in the Data set has an average of 142 rooms per house. Wow!
#print(cal_df['AveRoomsBin'].value_counts(normalize=True))
#print(cal_df['AveRoomsRounded'].value_counts(normalize=True))
#print(cal.DESCR)

cal_df_binned = cal_df['AveRoomsBin'].value_counts(normalize=True).rename_axis('AveRooms').reset_index(name='percentage')

with pd.ExcelWriter('california_housing_pie_chart.xlsx') as writer:
    cal_df_binned.to_excel(writer, index=False)
    wb = writer.book
    chart = wb.add_chart({'type': 'pie'})
    chart.set_title({'name': 'Average Rooms per Dwelling'})
    ws = writer.sheets['Sheet1']
    chart.add_series({'name': 'Average Rooms per Dwelling', 'values': '=Sheet1!$B$2:$B$6', 'categories': '=Sheet1!$A$2:$A$6'})
    ws.insert_chart('D2', chart)
    wb.close()





Downloading Cal. housing from https://ndownloader.figshare.com/files/5976036 to /root/scikit_learn_data


**Line Charts**

For this example I'm using the US Unemployment Rate retrieved using pandas-datareader python library.  We'll look at producing time series charts in excel

Similar to other Chart examples, I use `pandas.DataFrame.to_excel()` (Docs: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.to_excel.html) and `pandas.ExcelWriter()` (Docs: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.ExcelWriter.html)

This is the most detailed example thus far as we'll look at transforming a python date to an excel date and demonstrate how to chart time series data with a line chart.



In [None]:
!pip install xlsxwriter
import pandas_datareader.data as data
import datetime

start = datetime.datetime(2000, 1, 1)
end = datetime.datetime(2019, 12, 1)

#Unemployment Rate
unemp_df = data.DataReader("UNRATE", "fred", start, end)
unemp_df['xlsx_date'] = unemp_df.index - datetime.datetime(1899, 12, 31)

print(unemp_df.head())

with pd.ExcelWriter('Unemployment_Rate_line_chart.xlsx') as writer:
    unemp_df.to_excel(writer, index=False)
    wb = writer.book
    dateFormat = wb.add_format({'num_format':'mmm-yy'})
    chart = wb.add_chart({'type': 'line'})
    chart.set_title({'name': 'US Unemployment Rate (2000-2019)'})
    ws = writer.sheets['Sheet1']
    ws.write_column('B2', unemp_df['xlsx_date'], dateFormat) #overwriting number with formatted date
    chart.add_series({'name': 'US Unemployment Rate (2000-2019)', 'values': '=Sheet1!$A$2:$A$%d' % (len(unemp_df)), 'categories': '=Sheet1!$B$2:$B$%d' % (len(unemp_df))})
    ws.insert_chart('D2', chart)
    wb.close()




  from pandas.util.testing import assert_frame_equal


            UNRATE  xlsx_date
DATE                         
2000-01-01     4.0 36525 days
2000-02-01     4.1 36556 days
2000-03-01     4.0 36585 days
2000-04-01     3.8 36616 days
2000-05-01     4.0 36646 days


**Scatter Charts**

For this example we will return to the Iris Data Set from sci-kit learn library. 

I use pandas.DataFrame.to_excel() and pandas.ExcelWriter() to create the workbook.

A lot of useful documentation on xlsxwriter chart class: https://xlsxwriter.readthedocs.io/chart.html. And Specifically for Scatter Plots https://xlsxwriter.readthedocs.io/example_chart_scatter.html

In [None]:
!pip install xlsxwriter
import xlsxwriter
import pandas as pd
import numpy as np
from sklearn.datasets import load_iris

iris = load_iris()
iris_df = pd.DataFrame(data= np.c_[iris['data'], iris['target']], columns= iris['feature_names'] + ['target'])

condition_list = [iris_df.target == 0, iris_df.target == 1, iris_df.target == 2]
choice_list = ['setosa' , 'versicolor', 'virginica']
iris_df['target_name'] = np.select(condition_list, choice_list, default='unknown')


with pd.ExcelWriter('iris_scatter_chart.xlsx') as writer:
    iris_df.to_excel(writer, index=False)
    wb = writer.book
    chart = wb.add_chart({'type': 'scatter'})
    ws = writer.sheets['Sheet1']
    chart.set_title ({'name': 'Sepal Width vs. Sepal Length (in centimeters)'})
    for i, j in enumerate(choice_list):
      rowStart = (i * len(iris_df[(iris_df['target_name'] == j)])) + 2
      rowEnd = ((i+1) * len(iris_df[(iris_df['target_name'] == j)])) + 1
      chart.add_series({'name': j, 'values': '=Sheet1!$A$%d:$A$%d' % (rowStart, rowEnd), 'categories': '=Sheet1!$B$%d:$B$%d' % (rowStart, rowEnd)})
    ws.insert_chart('I2', chart)
    wb.close()


setosa 0 2 51
versicolor 1 52 101
virginica 2 102 151


**Combined Charts**

For this example, I'll be using the US Unemployment Rate retrieved using pandas-datareader python library. And I'll essentially use the same code code we used to create a time series line chart and and a column chart to show recession period.  So the end result will be a combination of a line and column chart. Column Chart data are publicly available recession dates for the US economy as determined by the National Bureau of Economic Research (NBER.org).

As with the other Chart examples, I use pandas.DataFrame.to_excel() (Docs: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.to_excel.html) and pandas.ExcelWriter() (Docs: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.ExcelWriter.html)

I make this chart rather detailed as with the time series line chart.  I also introduce axis labelling for the first time.

In [None]:
!pip install xlsxwriter
import pandas_datareader.data as data
import datetime

start = datetime.datetime(2000, 1, 1)
end = datetime.datetime(2019, 12, 1)

#Unemployment Rate
unemp_df = data.DataReader("UNRATE", "fred", start, end)
unemp_df['xlsx_date'] = unemp_df.index - datetime.datetime(1899, 12, 31)

# Recession dates are based on NBER Recession Dates set by NBER Business Cycle Committee (NBER.org)
unemp_df['month'] = unemp_df.index.map(lambda x: x.month)
unemp_df['year'] = unemp_df.index.map(lambda x: x.year)

unemp_df['recession'] = 0
unemp_df.loc[(unemp_df['year'] == 2001) & (unemp_df['month'].isin([3,4,5,6,7,8,9,10,11])), 'recession'] = 100
unemp_df.loc[(unemp_df['year'] == 2007) & (unemp_df['month'] == 12), 'recession'] = 100
unemp_df.loc[unemp_df['year'] == 2008, 'recession'] = 100
unemp_df.loc[unemp_df['year'] == 2009 & (unemp_df['month'].isin([3,4,5,6,7,8,9,10,11])), 'recession'] = 100
unemp_df = unemp_df.drop(columns=['year', 'month'])


with pd.ExcelWriter('Unemployment_Rate_combined_chart.xlsx') as writer:
    unemp_df.to_excel(writer, index=False)
    wb = writer.book
    dateFormat = wb.add_format({'num_format':'mmm-yy'})
    ws = writer.sheets['Sheet1']
    chart0 = wb.add_chart({'type': 'line'})
    chart1 = wb.add_chart({'type': 'column'})
    ws.write_column('B2', unemp_df['xlsx_date'], dateFormat) #overwriting number with formatted date
    chart0.add_series({'name': 'US Unemployment Rate (2000-2019)',
                       'values': '=Sheet1!$A$2:$A$%d' % (len(unemp_df)+2), 
                       'categories': '=Sheet1!$B$2:$B$%d' % (len(unemp_df)+2)})
    chart1.add_series({'name': 'Recession',
                       'values': '=Sheet1!$C$2:$C$%d' % (len(unemp_df)+2),
                       'categories': '=Sheet1!$B$2:$B$%d' % (len(unemp_df)+2),
                       'fill': {'color': 'blue', 'transparency': 80}})
    chart1.combine(chart0)
    chart1.set_x_axis({'name': 'Date',
                       'date_axis': True,
                       'interval_unit': 6,
                       'major_tick_mark': 'inside',
                       'minor_tick_mark': 'none',
                       'num_font': {'rotation': -45, 'bold': True,},})
    chart1.set_legend({'none': True})
    chart1.set_y_axis({'name': 'Unemployment Rate','max': round(unemp_df['UNRATE'].max() + 1), 'min': 0})
    chart1.set_title({'name': 'US Unemployment Rate (2000-2019)'})
    ws.insert_chart('E2', chart1)
    wb.close()




**Creating a Chart Sheet**

In this example we use the same code as the previous example.  This time we add a chart sheet of the same chart.

Xlsxwriter documentation and examples of chartsheets can be found at https://xlsxwriter.readthedocs.io/chartsheet.html.





In [None]:
!pip install xlsxwriter
import pandas_datareader.data as data
import datetime

start = datetime.datetime(2000, 1, 1)
end = datetime.datetime(2019, 12, 1)

#Unemployment Rate
unemp_df = data.DataReader("UNRATE", "fred", start, end)
unemp_df['xlsx_date'] = unemp_df.index - datetime.datetime(1899, 12, 31)

# Recession dates are based on NBER Recession Dates set by NBER Business Cycle Committee (NBER.org)
unemp_df['month'] = unemp_df.index.map(lambda x: x.month)
unemp_df['year'] = unemp_df.index.map(lambda x: x.year)

unemp_df['recession'] = 0
unemp_df.loc[(unemp_df['year'] == 2001) & (unemp_df['month'].isin([3,4,5,6,7,8,9,10,11])), 'recession'] = 100
unemp_df.loc[(unemp_df['year'] == 2007) & (unemp_df['month'] == 12), 'recession'] = 100
unemp_df.loc[unemp_df['year'] == 2008, 'recession'] = 100
unemp_df.loc[unemp_df['year'] == 2009 & (unemp_df['month'].isin([3,4,5,6,7,8,9,10,11])), 'recession'] = 100
unemp_df = unemp_df.drop(columns=['year', 'month'])

with pd.ExcelWriter('Unemployment_Rate_combined_chartsheet.xlsx') as writer:
    unemp_df.to_excel(writer, index=False)
    wb = writer.book
    cs = wb.add_chartsheet()
    dateFormat = wb.add_format({'num_format':'mmm-yy'})
    ws = writer.sheets['Sheet1']
    chart0 = wb.add_chart({'type': 'line'})
    chart1 = wb.add_chart({'type': 'column'})
    ws.write_column('B2', unemp_df['xlsx_date'], dateFormat) #overwriting number with formatted date
    chart0.add_series({'name': 'US Unemployment Rate (2000-2019)',
                       'values': '=Sheet1!$A$2:$A$%d' % (len(unemp_df)+2), 
                       'categories': '=Sheet1!$B$2:$B$%d' % (len(unemp_df)+2)})
    chart1.add_series({'name': 'Recession',
                       'values': '=Sheet1!$C$2:$C$%d' % (len(unemp_df)+2),
                       'categories': '=Sheet1!$B$2:$B$%d' % (len(unemp_df)+2),
                       'fill': {'color': 'blue', 'transparency': 80}})
    chart1.combine(chart0)
    chart1.set_x_axis({'name': 'Date',
                       'date_axis': True,
                       'interval_unit': 6,
                       'major_tick_mark': 'inside',
                       'minor_tick_mark': 'none',
                       'num_font': {'rotation': -45, 'bold': True,},})
    chart1.set_legend({'none': True})
    chart1.set_y_axis({'name': 'Unemployment Rate','max': round(unemp_df['UNRATE'].max() + 1), 'min': 0})
    chart1.set_title({'name': 'US Unemployment Rate (2000-2019)'})
    cs.set_chart(chart1)
    wb.close()

