In [342]:
import openpyxl
import pandas as pd
import re

# Using OpenPyXL for Excel Workbooks

## Why not just use Pandas?

In [343]:
df = pd.read_excel('example.xlsx', header=None)
df

Unnamed: 0,0,1,2,3,4,5,6,7
0,Period,,,,,,,
1,2020 March,,,,,,,
2,Campaign,,,,,,,
3,Tutorials,,,,,,,
4,,,,,,,,
5,,,,,,,,
6,,,,,,,,
7,Blog Engagement,,,,,,,
8,,,,,,,,
9,Period: 2020 March Campaign: Tutorials Run D...,,,,,,,


In [75]:
df.to_excel('pandas.xlsx')

In [14]:
df.to_excel('pandas.xlsx', header=False, index=False)

## Opening a spreadsheet with OpenPyXL

In [344]:
from openpyxl import load_workbook

# Start by opening the spreadsheet and selecting the main sheet
workbook = load_workbook(filename="example.xlsx")
sheet = workbook.active

# Save the spreadsheet
workbook.save('openpyxl.xlsx')


## Extracting data from specific cells

We can extract a cell from a worksheet the same way we would perform a dictionary lookup in Python.

In [345]:
cell = sheet['A10']
cell

<Cell 'Sheet1'.A10>

Let's examine some of the contents of the cell.

In [346]:
print(f'Coordinate: {cell.coordinate}')
print(f'Value: {cell.value}')

Coordinate: A10
Value:  Period: 2020 March Campaign: Tutorials  Run Date: 4/30/2020


Everything looks normal so far. Let's assign the cell's value to a new variable `s`.

In [347]:
s = sheet['A10'].value
s

'\xa0Period: 2020 March\xa0Campaign: Tutorials\xa0\xa0Run Date: 4/30/2020'

### Clean the text

What happened? This doesn't look like it did above. The `\xa0` characters in the string represent sequences of **bytes**. More specifically, they represent a type of whitespace character in the **utf-8** Unicode string encoding.

We can remove these using Python's `unicodedata` module. 

In [348]:
from unicodedata import normalize

In [349]:
def clean_text(text):
    
    normalized = unicodedata.normalize('NFKC', text)
    return normalized.strip()


In [350]:
text = clean_text(s)
text

'Period: 2020 March Campaign: Tutorials  Run Date: 4/30/2020'

### Extract the period, campaign, and run date

In [351]:
def get_fields(text):
    
    regex = re.compile(r'Period: (.*) Campaign: (.*) Run Date: (.*)')
    return regex.match(text).groups()
    

In [352]:
text = clean_text(s)

In [353]:
period, campaign, run_date = get_fields(text)

print(period)
print(campaign)
print(run_date)

2020 March
Tutorials 
4/30/2020


## Writing into a worksheet

In [354]:
from openpyxl.utils import column_index_from_string

**Write a value to a single cell**

In [355]:
sheet['I12'] = 'Period'
sheet['J12'] = 'Campaign'
sheet['K12'] = 'Run Date'

**Access cells by numerical coordinates**

In [356]:
row = 12
col_i = column_index_from_string('I')
print(f'I translates to {col}')

sheet.cell(row, col).value

I translates to 9


'Period'

### What about writing to a range of cells?

In [357]:
openpyxl.utils.range_boundaries('A1:D3')

(1, 1, 4, 3)

In [358]:
def fill_range(sheet, range_string, value):
    
    boundaries = openpyxl.utils.range_boundaries(range_string)
    col_start, row_start, col_end, row_end = boundaries
    
    
    for row in range(row_start, row_end+1):
        for column in range(col_start, col_end+1):
            sheet.cell(row, column).value = value
            

In [359]:
fill_range(sheet, 'I13:I15', period)
fill_range(sheet, 'J13:J15', campaign)
fill_range(sheet, 'K13:K15', run_date)

In [360]:
workbook.save('example-02.xlsx')

## Formatting cells

In [361]:
from copy import copy

In [362]:
def copy_style_to_range(sheet, source_coordinate, destination_range):
    
    c, r = openpyxl.utils.coordinate_to_tuple(source_coordinate)
    source = sheet.cell(c, r)
    boundaries = openpyxl.utils.range_boundaries(destination_range)
    col_start, row_start, col_end, row_end = boundaries
    
    
    for row in range(row_start, row_end+1):
        for column in range(col_start, col_end+1):
            destination = sheet.cell(row, column)
            destination._style = copy(source._style)


In [365]:
copy_style_to_range(sheet, 'H12', 'I12:K12')
copy_style_to_range(sheet, 'H13', 'I13:K15')
copy_style_to_range(sheet, 'H16', 'I16:K16')

In [366]:
workbook.save('example-03.xlsx')