# Python-Excel Integration
package openpyxl
[openpyxl](https://openpyxl.readthedocs.io/en/default/)

OpenPyXL is a library used to read and write Excel 2010 xlsx/xlsm/xltx/xltm files.
You can use other spreadsheet softwares, such as LibreOffice Calc and OpenOffice Calc.

## Installing the package
package openpyxl.

If Python has been installed via anaconda then you do not need to do the following steps.

In [1]:
#!pip3 install openpyxl

In [None]:
warnings.filterwarnings("default", "", DeprecationWarning, "", 0)

NOTE: -- Specific to my jupyter notebook --.the code above removes the warning for this  error: "DeprecationWarning: Interpreting naive datetime as local 2018-09-05 07:08:07.599023. Please add timezone info to timestamps. new_obj[k] = extract_dates(v)".

## Creating a worksheet
Name of the worksheet: demo.xlsx

In [None]:
# creating a workbook

# import Workbook
from openpyxl import Workbook

# create Workbook object
wb=Workbook()

# set file path
filepath="./data/demo.xlsx"

# save workbook 
wb.save(filepath)


NOTE1:  There is no need to create a file on the filesystem to get started with openpyxl. Just import the Workbook class and start using it. 
The instruction below will create a workbook.

    from openpyxl import Workbook
    
NOTE2: A workbook is always created with at least one worksheet.  


## Adding data to a workbook

In [None]:
# import load_workbook
from openpyxl import load_workbook

# set file path
filepath="./data/demo.xlsx"

# load demo.xlsx 
wb=load_workbook(filepath)


# select demo.xlsx
ws=wb.active

# set value for cells A1 and A2
ws['A1'] = 56
ws['A2'] = 43
    
# set value for cell B2=2
ws.cell(row=2, column=2).value = 2

# save workbook 
wb.save(filepath)

NOTE: When loading a file, use the following type of import:

    from openpyxl import load_workbook
    

## Appending data 

In [None]:
# appending group of values at the bottom of the current sheet

# import Workbook
from openpyxl import Workbook

# create Workbook object
wb=Workbook()

# set file path
filepath="./data/demo.xlsx"

# select demo.xlsx
ws=wb.active

# create a data set
data=[('Id','Name','Marks'),
      (1,'ABC',50),
      (2,'DEF',100)]

# append all rows
for row in data:
    ws.append(row)
    
# save file
wb.save(filepath)

## Reading cells from a worksheeta


In [None]:
# reading cells from a sheet

# import load_workbook
from openpyxl import load_workbook

# set file path
filepath="./data/demo.xlsx"

# load demo.xlsx 
wb=load_workbook(filepath)

# select demo.xlsx
ws=wb.active

# get b1 cell value
b1=ws['B1'].value

# get b2 cell value
b2=ws['B2'].value

# get b3 cell value
b3=ws.cell(row=3,column=2)
b3=ws['B3'].value

# print b1, b2 and b3
print(b1)
print(b2)
print(b3)



## Reading multiple cells

In [None]:

# reading cells from a sheet

# import load_workbook
from openpyxl import load_workbook

# set file path
filepath="./data/items.xlsx"
#filepath="./data/demo.xlsx"

# load demo.xlsx 
wb=load_workbook(filepath)

# select demo.xlsx
ws=wb.active

# cells range - we use a slicing operation and read data from 2 columns
cells = ws['A1':'B6']

for c1, c2 in cells:
    print("{0:8} {1:8}".format(c1.value, c2.value))


## Iterating by rows

In [None]:
#!/usr/bin/python3

# Iterating row by row

from openpyxl import Workbook

wb = Workbook()
ws = wb.active

rows = (
    (88, 46, 57),
    (89, 38, 12),
    (23, 59, 78),
    (56, 21, 98),
    (24, 18, 43),
    (34, 15, 67)
)

for row in rows:
    ws.append(row)

# print - we provide boundaries for the operations   
for row in ws.iter_rows(min_row=1, min_col=1, max_row=6, max_col=3):
    for cell in row:
        print(cell.value, end=" ")
    print()    

wb.save('./data/iterbyrows.xlsx')


NOTE:  The ws.iter_rows() method will return rows

## Iterating by columns


In [None]:
#!/usr/bin/python3
# interating by columns

from openpyxl import Workbook

wb = Workbook()
ws = wb.active

rows = (
    (88, 46, 57),
    (89, 38, 12),
    (23, 59, 78),
    (56, 21, 98),
    (24, 18, 43),
    (34, 15, 67)
)

for row in rows:
    ws.append(row)

# iterating by columns
for row in ws.iter_cols(min_row=1, min_col=1, max_row=6, max_col=3):
    for cell in row:
        print(cell.value, end=" ")
    print()    

wb.save('./data/iterbycols.xlsx')

NOTE:  The ws.iter_cols() method will return columns.

## Adding a sheet


In [None]:
# Adding sheet to a workbook

# import load_workbook
from openpyxl import load_workbook

# set file path
filepath="./data/demo.xlsx"

# load demo.xlsx 
wb=load_workbook(filepath)

# create new sheet
wb.create_sheet('Sheet 2')

# print  the worksheets
for sheet in wb.worksheets:
    print(sheet)
    
# save workbook
wb.save(filepath)

## Copying data from one sheet to another sheet

In [None]:
# copying data from one sheet to another

# import load_workbook
from openpyxl import load_workbook

# set file path
filepath="./data/demo.xlsx"

# load demo.xlsx 
wb=load_workbook(filepath)


# get Sheet - first worksheet
#source=wb.get_sheet_by_name('Sheet') #depreciated
source= wb['Sheet']

# copy sheet
target=wb.copy_worksheet(source)

# getting the sheets
for sheet in wb.worksheets:
    print(sheet)

# save workbook
wb.save(filepath)


NOTE: The function copy_worksheet() cannot copy worksheets between workbooks. worksheets can only be copied within the workbook that they belong.

## Removing a worksheet


In [None]:
# removing a worksheet

# import load_workbook
from openpyxl import load_workbook

# set file path
filepath="./data/demo.xlsx"

# load demo.xlsx 
wb=load_workbook(filepath)

# create new sheet
#wb.remove(wb.get_sheet_by_name('Sheet 2')) #depreciated
wb.remove(wb['Sheet 2'])

# getting the sheets
for sheet in wb.worksheets:
    print(sheet)

# save workbook
wb.save(filepath)


## Excel - Working with formulas

In [None]:
# Excel Formula

from openpyxl import Workbook
from copy import copy  # copying styles

wb = Workbook()
ws = wb.active

rows = (
    (34, 26),
    (88, 36),
    (24, 29),
    (15, 22),
    (56, 13),
    (76, 18)
)

for row in rows:
    ws.append(row)

# position the total cell computation
cell = ws.cell(row=7, column=2)
cell.value = "=SUM(A1:B6)"

# cell font type
new_font = copy(cell.font)
new_font.strike = True
cell.font = new_font
#cell.font = cell.font.copy(bold=True) depreciated

wb.save('./data/formulas.xlsx')

## Excel - Merging Cells

In [None]:
#!/usr/bin/python3
# Merging Cells

from openpyxl import Workbook
from openpyxl.styles import Alignment

wb = Workbook()
ws = wb.active

ws.merge_cells('A1:B2')

cell = ws.cell(row=1, column=1)
cell.value = 'Sunny day'
cell.alignment = Alignment(horizontal='center', vertical='center')

wb.save('./data/merging.xlsx')


## Excel - Adding Images

In [None]:
#!/usr/bin/python3
# adding an image in a worksheet

from openpyxl import Workbook
from openpyxl.drawing.image import Image

wb = Workbook()
ws = wb.active

img = Image("../img/python.png")
ws['A1'] = 'This is Python'

ws.add_image(img, 'B2')

wb.save("./data/sheet_image.xlsx")

## Excel - Bar Chart
In bar charts values are plotted as either horizontal bars or vertical columns.

In [None]:
#!/usr/bin/python3
# adding an image in a worksheet
# Chart
# Number of gold medal per countries (2012)

#!/usr/bin/python3

from openpyxl import Workbook
from openpyxl.chart import (
    Reference,
    Series,
    BarChart
)

# A new workwb is created.
wb = Workbook()
ws = wb.active

# We create some data and add it to the cells of the active ws.
rows = [
    ("USA", 46),
    ("China", 38),
    ("UK", 29),
    ("Russia", 22),
    ("South Korea", 13),
    ("Germany", 11)
]

for row in rows:
    ws.append(row)

# With the Reference class, we refer to the rows in the ws that represent data.
# In our case, these are the numbers of olympic gold medals.    
data = Reference(ws, min_col=2, min_row=1, max_col=2, max_row=6)

# We create a category axis. A category axis is an axis with the data treated as a sequence of non-numerical text labels.
# In our case, we have text labels representing names of countries.
categs = Reference(ws, min_col=1, min_row=1, max_row=6)

# We create a bar chart and set it data and categories.
chart = BarChart()
chart.add_data(data=data)
chart.set_categories(categs)

# Using legend and majorGridlines attributes, we turn off the legends and major grid lines.
chart.legend = None
chart.y_axis.majorGridlines = None

# Setting varyColors to True, each bar has a different colour.
chart.varyColors = True

# Setting the title for the bar chart
chart.title = "Olympic Gold medals in London"

# The created method is added to the ws using the add_chart() method
ws.add_chart(chart, "A8")    

wb.save("./data/bar_chart.xlsx")

File output:  ./data/bar_chart.xls
![title](../img/excel-barChart.png)

## Excel - Bubble Chart
Bubble charts are like scatter charts but use a third dimension to determine the size of the bubbles. Charts can include multiple series.

In [None]:
# Sample bubble chart

from openpyxl import Workbook
from openpyxl.chart import Series, Reference, BubbleChart

wb = Workbook()
ws = wb.active

rows = [
    ("Number of Products", "Sales in USD", "Market share"),
    (14, 12200, 15),
    (20, 60000, 33),
    (18, 24400, 10),
    (22, 32000, 42),
    (),
    (12, 8200, 18),
    (15, 50000, 30),
    (19, 22400, 15),
    (25, 25000, 50),
]

for row in rows:
    ws.append(row)

chart = BubbleChart()
chart.style = 18 # use a preset style

# add the first series of data
xvalues = Reference(ws, min_col=1, min_row=2, max_row=5)
yvalues = Reference(ws, min_col=2, min_row=2, max_row=5)
size = Reference(ws, min_col=3, min_row=2, max_row=5)
series = Series(values=yvalues, xvalues=xvalues, zvalues=size, title="2013")
chart.series.append(series)

# add the second
xvalues = Reference(ws, min_col=1, min_row=7, max_row=10)
yvalues = Reference(ws, min_col=2, min_row=7, max_row=10)
size = Reference(ws, min_col=3, min_row=7, max_row=10)
series = Series(values=yvalues, xvalues=xvalues, zvalues=size, title="2014")
chart.series.append(series)

# place the chart starting in cell E1
ws.add_chart(chart, "E1")
wb.save("./data/bubble.xlsx")

File output:  ./data/bubble.xls
![title](../img/excel-bubbleChart.png)