In [None]:
import openpyxl
import pandas as pd
import re

# Using OpenPyXL for Excel Workbooks

## Why not just use Pandas?

In [None]:
df = pd.read_excel('example.xlsx', header=None)
df

In [None]:
df.to_excel('pandas.xlsx')

In [None]:
df.to_excel('pandas.xlsx', header=False, index=False)

## Opening a spreadsheet with OpenPyXL

In [None]:
from openpyxl import load_workbook

# Start by opening the spreadsheet and selecting the main sheet
workbook = load_workbook(filename="example.xlsx")
sheet = workbook.active

# Save the spreadsheet
workbook.save('openpyxl.xlsx')

# Always close the file to prevent memory leakage
# because OpenPyXL is very memory-intensive
workbook.close()


## Extracting data from specific cells

In [None]:
workbook = load_workbook(filename="example.xlsx")
sheet = workbook.active

We can extract a cell from a worksheet the same way we would perform a dictionary lookup in Python.

In [None]:
cell = sheet['A10']
cell

Let's examine some of the contents of the cell.

In [None]:
print(f'Coordinate: {cell.coordinate}')
print(f'Value: {cell.value}')

Everything looks normal so far. Let's assign the cell's value to a new variable `s`.

In [None]:
s = sheet['A10'].value
s

### Clean the text

What happened? This doesn't look like it did above. The `\xa0` characters in the string represent sequences of **bytes**. More specifically, they represent a type of whitespace character in the **utf-8** Unicode string encoding.

We can remove these using Python's `unicodedata` module. 

In [None]:
from unicodedata import normalize

In [None]:
def clean_text(text):
    
    normalized = unicodedata.normalize('NFKC', text)
    return normalized.strip()


In [None]:
text = clean_text(s)
text

### Extract the period, campaign, and run date

In [None]:
def get_fields(text):
    
    regex = re.compile(r'Period: (.*) Campaign: (.*) Run Date: (.*)')
    return regex.match(text).groups()
    

In [None]:
text = clean_text(s)

In [None]:
period, campaign, run_date = get_fields(text)

print(period)
print(campaign)
print(run_date)

## Writing into a worksheet

In [None]:
from openpyxl.utils import column_index_from_string

**Write a value to a single cell**

In [None]:
sheet['I12'] = 'Period'
sheet['J12'] = 'Campaign'
sheet['K12'] = 'Run Date'

**Access cells by numerical coordinates**

In [None]:
row = 12
col_i = column_index_from_string('I')
print(f'I translates to {col}')

sheet.cell(row, col).value

### What about writing to a range of cells?

In [None]:
openpyxl.utils.range_boundaries('A1:D3')

In [None]:
def fill_range(sheet, range_string, value):
    
    boundaries = openpyxl.utils.range_boundaries(range_string)
    col_start, row_start, col_end, row_end = boundaries
    
    
    for row in range(row_start, row_end+1):
        for column in range(col_start, col_end+1):
            sheet.cell(row, column).value = value
            

In [None]:
fill_range(sheet, 'I13:I15', period)
fill_range(sheet, 'J13:J15', campaign)
fill_range(sheet, 'K13:K15', run_date)

In [None]:
workbook.save('example-02.xlsx')

## Formatting cells

In [None]:
from copy import copy

In [None]:
def copy_style_to_range(sheet, source_coordinate, destination_range):
    
    c, r = openpyxl.utils.coordinate_to_tuple(source_coordinate)
    source = sheet.cell(c, r)
    boundaries = openpyxl.utils.range_boundaries(destination_range)
    col_start, row_start, col_end, row_end = boundaries
    
    
    for row in range(row_start, row_end+1):
        for column in range(col_start, col_end+1):
            destination = sheet.cell(row, column)
            destination._style = copy(source._style)


In [None]:
copy_style_to_range(sheet, 'H12', 'I12:K12')
copy_style_to_range(sheet, 'H13', 'I13:K15')
copy_style_to_range(sheet, 'H16', 'I16:K16')

In [None]:
workbook.save('example-03.xlsx')