# Importing an Excel File into Polars

## Load the required libraries

In [1]:
# Load libraries
import polars as pl
import polars.selectors as cs
import sys 

print(f'My system version is {sys.version}; \npolars version is {pl.__version__}')

My system version is 3.12.4 (main, Jul  1 2024, 00:48:18) [Clang 15.0.0 (clang-1500.3.9.4)]; 
polars version is 1.6.0
My system version is 3.12.4 (main, Jul  1 2024, 00:48:18) [Clang 15.0.0 (clang-1500.3.9.4)]; 
polars version is 1.6.0


## Import the data

In [2]:
# Import data
url = '../00-data/banklist.xlsx'

failed_banks = (
    pl.read_excel(
        url,
        engine='calamine',
        read_options={'header_row': 4},
    )
)

# Inspect output
print(failed_banks.head())

shape: (5, 7)
┌─────────────┬─────────────┬────────┬───────┬────────────┬────────────┬───────┐
│ Bank Name   ┆ City        ┆ State  ┆ Cert  ┆ Acquiring  ┆ Closing    ┆ Fund  │
│ ---         ┆ ---         ┆ ---    ┆ ---   ┆ Institutio ┆ Date       ┆ ---   │
│ str         ┆ str         ┆ str    ┆ i64   ┆ n          ┆ ---        ┆ i64   │
│             ┆             ┆        ┆       ┆ ---        ┆ str        ┆       │
│             ┆             ┆        ┆       ┆ str        ┆            ┆       │
╞═════════════╪═════════════╪════════╪═══════╪════════════╪════════════╪═══════╡
│ Republic    ┆ Philadelphi ┆ PA     ┆ 27332 ┆ Fulton     ┆ 26-Apr-24  ┆ 10546 │
│ First Bank  ┆ a           ┆        ┆       ┆ Bank,      ┆            ┆       │
│ dba Republ… ┆             ┆        ┆       ┆ National   ┆            ┆       │
│             ┆             ┆        ┆       ┆ Associat…  ┆            ┆       │
│ Citizens    ┆ Sac City    ┆ IA     ┆ 8758  ┆ Iowa Trust ┆ 3-Nov-23   ┆ 10545 │
│ Bank        

In [3]:
banks = (
    failed_banks
    .rename(lambda col: col.lower().replace(' ', '_').replace('\xa0', ''))
    # .columns 
    .with_columns(
        closing_date=pl.col('closing_date').str.to_date('%d-%B-%y')
    )
    # .glimpse()
)

# inspect output
print(banks.head())

shape: (5, 7)
┌─────────────┬─────────────┬───────┬───────┬─────────────┬────────────┬───────┐
│ bank_name   ┆ city        ┆ state ┆ cert  ┆ acquiring_i ┆ closing_da ┆ fund  │
│ ---         ┆ ---         ┆ ---   ┆ ---   ┆ nstitution  ┆ te         ┆ ---   │
│ str         ┆ str         ┆ str   ┆ i64   ┆ ---         ┆ ---        ┆ i64   │
│             ┆             ┆       ┆       ┆ str         ┆ date       ┆       │
╞═════════════╪═════════════╪═══════╪═══════╪═════════════╪════════════╪═══════╡
│ Republic    ┆ Philadelphi ┆ PA    ┆ 27332 ┆ Fulton      ┆ 2024-04-26 ┆ 10546 │
│ First Bank  ┆ a           ┆       ┆       ┆ Bank,       ┆            ┆       │
│ dba Republ… ┆             ┆       ┆       ┆ National    ┆            ┆       │
│             ┆             ┆       ┆       ┆ Associat…   ┆            ┆       │
│ Citizens    ┆ Sac City    ┆ IA    ┆ 8758  ┆ Iowa Trust  ┆ 2023-11-03 ┆ 10545 │
│ Bank        ┆             ┆       ┆       ┆ & Savings   ┆            ┆       │
│             

In [4]:
# Using the glimpse method
(
    failed_banks
    .glimpse()
)

Rows: 569
Columns: 7
$ Bank Name              <str> 'Republic First Bank dba Republic Bank', 'Citizens Bank', 'Heartland Tri-State Bank', 'First Republic Bank', 'Signature Bank', 'Silicon Valley Bank', 'Almena State Bank', 'First City Bank of Florida', 'The First State Bank', 'Ericson State Bank'
$ City                   <str> 'Philadelphia', 'Sac City', 'Elkhart', 'San Francisco', 'New York', 'Santa Clara', 'Almena', 'Fort Walton Beach', 'Barboursville', 'Ericson'
$ State                  <str> 'PA', 'IA', 'KS', 'CA', 'NY', 'CA', 'KS', 'FL', 'WV', 'NE'
$ Cert                   <i64> 27332, 8758, 25851, 59017, 57053, 24735, 15426, 16748, 14361, 18265
$ Acquiring Institution  <str> 'Fulton Bank, National Association', 'Iowa Trust & Savings Bank', 'Dream First Bank, N.A.', 'JPMorgan Chase Bank, N.A.', 'Flagstar Bank, N.A.', 'First–Citizens Bank & Trust Company', 'Equity Bank', 'United Fidelity Bank, fsb', 'MVB Bank, Inc.', 'Farmers and Merchants Bank'
$ Closing Date           <str> '26-A

In [5]:
# inspect the last 5 rows
print(banks.tail())

shape: (5, 7)
┌──────────────┬────────────┬───────┬───────┬─────────────┬─────────────┬──────┐
│ bank_name    ┆ city       ┆ state ┆ cert  ┆ acquiring_i ┆ closing_dat ┆ fund │
│ ---          ┆ ---        ┆ ---   ┆ ---   ┆ nstitution  ┆ e           ┆ ---  │
│ str          ┆ str        ┆ str   ┆ i64   ┆ ---         ┆ ---         ┆ i64  │
│              ┆            ┆       ┆       ┆ str         ┆ date        ┆      │
╞══════════════╪════════════╪═══════╪═══════╪═════════════╪═════════════╪══════╡
│ Superior     ┆ Hinsdale   ┆ IL    ┆ 32646 ┆ Superior    ┆ 2001-07-27  ┆ 6004 │
│ Bank, FSB    ┆            ┆       ┆       ┆ Federal,    ┆             ┆      │
│              ┆            ┆       ┆       ┆ FSB         ┆             ┆      │
│ Malta        ┆ Malta      ┆ OH    ┆ 6629  ┆ North       ┆ 2001-05-03  ┆ 4648 │
│ National     ┆            ┆       ┆       ┆ Valley Bank ┆             ┆      │
│ Bank         ┆            ┆       ┆       ┆             ┆             ┆      │
│ First       

In [6]:
# Check for missing values
(
    banks
    .select(pl.all().is_null().sum())
)

bank_name,city,state,cert,acquiring_institution,closing_date,fund
u32,u32,u32,u32,u32,u32,u32
0,0,0,0,0,0,0


In [7]:
(
    banks
    .select(pl.all().null_count())
)

bank_name,city,state,cert,acquiring_institution,closing_date,fund
u32,u32,u32,u32,u32,u32,u32
0,0,0,0,0,0,0


In [16]:
from glob import glob 
multi_excel_files = glob('../00-data/multiple_excel_files/*.xlsx')