# Load Azure Usage CSV

Downloads of Azure pdf invoice from the portal include a methodto download the csv detail corresponding to
the bill. This CSV is overloaded with two tables: a summary table by resource and 
a details table with daily usage by resource. 

Instructions for downloading the billing detail can be found at [Download or view your Azure billing invoice and daily usage data](https://docs.microsoft.com/en-us/azure/billing/billing-download-azure-invoice-daily-usage-date).

This workbook parses a bill CSV tables into two DataFrames, one with summary usage, the other 
with daily usage.

In [1]:
import pandas as pd
import numpy as np
import json
import csv

In [2]:
with open('201807-v2.csv', 'r') as f:
    reader = csv.reader(f)
    invoice_csv = list(reader)

In [3]:
invoice_csv[0:5]

[['\ufeffProvisioning Status'],
 ['Subscription ID',
  'Subscription Name',
  'Order Id',
  'Description',
  'Billing Date(Anniversary Date)',
  'Offer Name',
  'Service Name',
  'Subs Status',
  'Subs Extra Status',
  'Provisioning Status'],
 ['"2f0b3aa9-b433-4063-afca-9b304cc8bcc9"',
  '"Pay-As-You-Go"',
  '"b490c95c-c8de-4272-afe4-88a9c799045e"',
  '"Pay-As-You-Go"',
  '12',
  '',
  '"Azure Service Component"',
  'Enabled',
  'None',
  'Provisioned'],
 [],
 ['Statement']]

There are three blocks of data in the csv.
* The first block in the csv is the list of subscriptions for this bill. 
* The second is the summary usage by resource by day
* The third is the daily usage by resource

Each block is seprated by a blank line.

In [4]:
blank_lines = [idx for idx, x in enumerate(invoice_csv) if len(x) == 0]
blank_lines

[3, 57]

In [5]:
subs = invoice_csv[1:blank_lines[0]]
subs

[['Subscription ID',
  'Subscription Name',
  'Order Id',
  'Description',
  'Billing Date(Anniversary Date)',
  'Offer Name',
  'Service Name',
  'Subs Status',
  'Subs Extra Status',
  'Provisioning Status'],
 ['"2f0b3aa9-b433-4063-afca-9b304cc8bcc9"',
  '"Pay-As-You-Go"',
  '"b490c95c-c8de-4272-afe4-88a9c799045e"',
  '"Pay-As-You-Go"',
  '12',
  '',
  '"Azure Service Component"',
  'Enabled',
  'None',
  'Provisioned']]

In [6]:
usage_summary = invoice_csv[blank_lines[0]+1:blank_lines[1]]
usage_summary[0:3]

[['Statement'],
 ['Billing Period',
  'Meter Category',
  'Meter Sub-category',
  'Meter Name',
  'Meter Region',
  'SKU',
  'Unit',
  'Consumed Quantity',
  'Included Quantity',
  'Within Commitment',
  'Overage Quantity',
  'Currency',
  'Overage',
  'Commitment Rate',
  'Rate',
  'Value'],
 ['201807(6/12/2018 - 7/11/2018)',
  '"Networking"',
  '',
  '"Data Transfer In (GB)"',
  '"Zone 1"',
  '',
  '"GB"',
  '13.68050634',
  '0.00000000',
  '0',
  '13.68050634',
  'USD',
  '0',
  '0',
  '0',
  '$0.00 USD']]

In [7]:
usage_summary.pop(0)

['Statement']

In [23]:
df_usage_summary = pd.DataFrame(usage_summary[1:], columns=usage_summary.pop(0))
df_usage_summary

Unnamed: 0,201807(6/12/2018 - 7/11/2018),"""Networking""",Unnamed: 3,"""Data Transfer In (GB)""","""Zone 1""",Unnamed: 6,"""GB""",13.68050634,0.00000000,0,13.68050634.1,USD,0.1,0.2,0.3,$0.00 USD
0,201807(6/12/2018 - 7/11/2018),"""Networking""",,"""Data Transfer Out (GB)""","""Zone 1""","""7TD-00001""","""GB""",0.50321847,5.0,0,0.0,USD,0,0,0.0,$0.00 USD
1,201807(6/12/2018 - 7/11/2018),"""Azure App Service""",,"""Free App Service""",,,"""Apps""",0.008064,10.0,0,0.0,USD,0,0,0.0,$0.00 USD
2,201807(6/12/2018 - 7/11/2018),"""Azure App Service""",,"""Standard Small App Service Hours""",,"""7UD-00001""","""Hours""",1.66649,0.0,0,1.66649,USD,0,0,0.1020108131461934,$0.17 USD
3,201807(6/12/2018 - 7/11/2018),"""Virtual Machines""","""A1 VM""","""Compute Hours""","""US South Central""","""7UD-00001""","""Hours""",0.733348,0.0,0,0.733348,USD,0,0,0.0545443636581813,$0.04 USD
4,201807(6/12/2018 - 7/11/2018),"""Storage""","""Locally Redundant""","""Standard IO - Page Blob/Disk (GB)""",,"""7UD-00001""","""GB""",0.000212,0.0,0,0.000212,USD,0,0,0.0,$0.00 USD
5,201807(6/12/2018 - 7/11/2018),"""Storage""","""Locally Redundant""","""Premium Storage - Page Blob/P10 (Units)""","""US West""","""7UD-00001""","""Units""",1.284252,0.0,0,1.284252,USD,0,0,19.70797008686769,$25.31 USD
6,201807(6/12/2018 - 7/11/2018),"""Service Bus""","""Basic Event Hubs""","""Ingress Events (in 1,000,000s)""",,"""9YD-00001""","""1,000,000s""",0.009826,0.0,0,0.009826,USD,0,0,0.0,$0.00 USD
7,201807(6/12/2018 - 7/11/2018),"""Service Bus""","""Basic Event Hubs""","""Throughput Units (Hours)""",,"""9YD-00001""","""Hours""",196.0,0.0,0,196.0,USD,0,0,0.015,$2.94 USD
8,201807(6/12/2018 - 7/11/2018),"""Service Bus""","""Basic Messaging""","""Operations (in 1,000,000s)""",,"""9YD-00001""","""1,000,000s""",3.6e-05,0.0,0,3.6e-05,USD,0,0,0.0,$0.00 USD
9,201807(6/12/2018 - 7/11/2018),"""Networking""","""Public IP Addresses""","""IP Address Hours""",,"""7UD-00001""","""Hours""",83.1,0.0,0,83.1,USD,0,0,0.003971119133574,$0.33 USD


In [24]:
daily_usage = invoice_csv[blank_lines[1]+1:]
daily_usage[0:3]

[['Daily Usage'],
 ['Usage Date',
  'Meter Category',
  'Meter Id',
  'Meter Sub-category',
  'Meter Name',
  'Meter Region',
  'Unit',
  'Consumed Quantity',
  'Resource Location',
  'Consumed Service',
  'Resource Group',
  'Instance Id',
  'Tags',
  'Additional Info',
  'Service Info 1',
  'Service Info 2'],
 ['6/12/2018',
  '"Storage"',
  '"e9549cbe-02d9-4213-b4be-22d6dfe8a3af"',
  '"Locally Redundant"',
  '"Premium Storage - Page Blob/P10 (Units)"',
  '"US West"',
  '"Units"',
  '0.001389',
  '"uswest"',
  '"Microsoft.Compute"',
  '"EPENDYSIS"',
  '"/subscriptions/3e6b71a1-1c47-4188-a4dc-793259a87549/resourceGroups/EPENDYSIS/providers/Microsoft.Compute/disks/ependysis_OsDisk_1_2b38d7f47ec74cd8a435155b4b732392"',
  '"{}"',
  '"{}"',
  '',
  '']]

In [25]:
daily_usage.pop(0)

['Daily Usage']

In [26]:
df_daily_usage = pd.DataFrame(daily_usage[1:], columns=daily_usage.pop(0))
df_daily_usage

Unnamed: 0,Usage Date,Meter Category,Meter Id,Meter Sub-category,Meter Name,Meter Region,Unit,Consumed Quantity,Resource Location,Consumed Service,Resource Group,Instance Id,Tags,Additional Info,Service Info 1,Service Info 2
0,6/12/2018,"""Storage""","""e9549cbe-02d9-4213-b4be-22d6dfe8a3af""","""Locally Redundant""","""Premium Storage - Page Blob/P10 (Units)""","""US West""","""Units""",0.001389,"""uswest""","""Microsoft.Compute""","""EPENDYSIS""","""/subscriptions/3e6b71a1-1c47-4188-a4dc-793259...","""{}""","""{}""",,
1,6/12/2018,"""Data Management""","""9cb0bde8-bc0d-468c-8423-a25fe06779d3""",,"""Standard IO - Table Write Operation Units (in...",,"""10,000s""",0.0005,"""uswest""","""Microsoft.Storage""","""ependysis""","""/subscriptions/3e6b71a1-1c47-4188-a4dc-793259...","""{}""","""{}""",,
2,6/12/2018,"""Networking""","""9995d93a-7d35-4d3f-9c69-7a7fea447ef4""",,"""Data Transfer Out (GB)""","""Zone 1""","""GB""",0.059841,"""westus""","""Microsoft.Compute""","""EPENDYSIS""","""/subscriptions/3e6b71a1-1c47-4188-a4dc-793259...","""{}""","""{""ImageType"":"""",""ServiceType"":"""",""VMName"":"""",...",,
3,6/12/2018,"""Virtual Machines""","""d101de3e-ae70-48bb-8605-64fcd0a3ce8f""","""Standard_D4_v3 VM (Windows)""","""Compute Hours""","""US West""","""Hours""",0.766682,"""westus""","""Microsoft.Compute""","""EPENDYSIS""","""/subscriptions/3e6b71a1-1c47-4188-a4dc-793259...","""{}""","""{""ImageType"":"""",""ServiceType"":""Standard_D4s_v...",,
4,6/12/2018,"""Data Management""","""b9e5e77c-a0b3-4a2c-9b8b-57fa54f31c52""",,"""Standard IO - Table Batch Write Operation Uni...",,"""10,000s""",0.0001,"""uswest""","""Microsoft.Storage""","""ependysis""","""/subscriptions/3e6b71a1-1c47-4188-a4dc-793259...","""{}""","""{}""",,
5,6/12/2018,"""Networking""","""d54686f0-77ff-43f3-9e7c-2099030d32a7""","""DNS""","""DNS Queries (1M)""",,"""1M Queries""",0.000804,"""global""","""Microsoft.Network""","""dns""","""/subscriptions/3e6b71a1-1c47-4188-a4dc-793259...","""{}""","""{}""",,
6,6/12/2018,"""Networking""","""f114cb19-ea64-40b5-bcd7-aee474b62853""","""Public IP Addresses""","""IP Address Hours""",,"""Hours""",0.89999999999999991,"""westus""","""Microsoft.Network""","""ependysis""","""/subscriptions/3e6b71a1-1c47-4188-a4dc-793259...","""{}""","""{}""",,
7,6/12/2018,"""Data Management""","""c80a3636-2edb-4248-bcb1-04ef818a75ac""",,"""Standard IO - Disk Write Operation Units (in ...",,"""10,000s""",0.0093,"""uswest""","""Microsoft.Storage""","""ependysis""","""/subscriptions/3e6b71a1-1c47-4188-a4dc-793259...","""{}""","""{}""",,
8,6/12/2018,"""Data Management""","""923978e1-fd3f-4bd5-a798-f4b533057e46""",,"""Standard IO - Block Blob Delete Operation Uni...",,"""10,000s""",0.0045,"""uswest""","""Microsoft.Storage""","""ependysis""","""/subscriptions/3e6b71a1-1c47-4188-a4dc-793259...","""{}""","""{}""",,
9,6/12/2018,"""Networking""","""32c3ebec-1646-49e3-8127-2cafbd3a04d8""",,"""Data Transfer In (GB)""","""Zone 1""","""GB""",1.619603,"""westus""","""Microsoft.Compute""","""EPENDYSIS""","""/subscriptions/3e6b71a1-1c47-4188-a4dc-793259...","""{}""","""{""ImageType"":"""",""ServiceType"":"""",""VMName"":"""",...",,


In [27]:
for col in ['Meter Id', 'Meter Category', 'Meter Region', 'Meter Name', 
            'Meter Sub-category', 'Unit', 'Resource Location', 'Consumed Service', 'Resource Group', 'Instance Id']:
    df_daily_usage[col] = df_daily_usage[col].astype('category')

df_daily_usage['Usage Date'] = pd.to_datetime(df_daily_usage['Usage Date'])
df_daily_usage['Resource Group'] = df_daily_usage['Resource Group'].str.lower()

df_daily_usage.dtypes

Usage Date            datetime64[ns]
Meter Category              category
Meter Id                    category
Meter Sub-category          category
Meter Name                  category
Meter Region                category
Unit                        category
Consumed Quantity             object
Resource Location           category
Consumed Service            category
Resource Group                object
Instance Id                 category
Tags                          object
Additional Info               object
Service Info 1                object
Service Info 2                object
dtype: object

In [28]:
df_daily_usage

Unnamed: 0,Usage Date,Meter Category,Meter Id,Meter Sub-category,Meter Name,Meter Region,Unit,Consumed Quantity,Resource Location,Consumed Service,Resource Group,Instance Id,Tags,Additional Info,Service Info 1,Service Info 2
0,2018-06-12,"""Storage""","""e9549cbe-02d9-4213-b4be-22d6dfe8a3af""","""Locally Redundant""","""Premium Storage - Page Blob/P10 (Units)""","""US West""","""Units""",0.001389,"""uswest""","""Microsoft.Compute""","""ependysis""","""/subscriptions/3e6b71a1-1c47-4188-a4dc-793259...","""{}""","""{}""",,
1,2018-06-12,"""Data Management""","""9cb0bde8-bc0d-468c-8423-a25fe06779d3""",,"""Standard IO - Table Write Operation Units (in...",,"""10,000s""",0.0005,"""uswest""","""Microsoft.Storage""","""ependysis""","""/subscriptions/3e6b71a1-1c47-4188-a4dc-793259...","""{}""","""{}""",,
2,2018-06-12,"""Networking""","""9995d93a-7d35-4d3f-9c69-7a7fea447ef4""",,"""Data Transfer Out (GB)""","""Zone 1""","""GB""",0.059841,"""westus""","""Microsoft.Compute""","""ependysis""","""/subscriptions/3e6b71a1-1c47-4188-a4dc-793259...","""{}""","""{""ImageType"":"""",""ServiceType"":"""",""VMName"":"""",...",,
3,2018-06-12,"""Virtual Machines""","""d101de3e-ae70-48bb-8605-64fcd0a3ce8f""","""Standard_D4_v3 VM (Windows)""","""Compute Hours""","""US West""","""Hours""",0.766682,"""westus""","""Microsoft.Compute""","""ependysis""","""/subscriptions/3e6b71a1-1c47-4188-a4dc-793259...","""{}""","""{""ImageType"":"""",""ServiceType"":""Standard_D4s_v...",,
4,2018-06-12,"""Data Management""","""b9e5e77c-a0b3-4a2c-9b8b-57fa54f31c52""",,"""Standard IO - Table Batch Write Operation Uni...",,"""10,000s""",0.0001,"""uswest""","""Microsoft.Storage""","""ependysis""","""/subscriptions/3e6b71a1-1c47-4188-a4dc-793259...","""{}""","""{}""",,
5,2018-06-12,"""Networking""","""d54686f0-77ff-43f3-9e7c-2099030d32a7""","""DNS""","""DNS Queries (1M)""",,"""1M Queries""",0.000804,"""global""","""Microsoft.Network""","""dns""","""/subscriptions/3e6b71a1-1c47-4188-a4dc-793259...","""{}""","""{}""",,
6,2018-06-12,"""Networking""","""f114cb19-ea64-40b5-bcd7-aee474b62853""","""Public IP Addresses""","""IP Address Hours""",,"""Hours""",0.89999999999999991,"""westus""","""Microsoft.Network""","""ependysis""","""/subscriptions/3e6b71a1-1c47-4188-a4dc-793259...","""{}""","""{}""",,
7,2018-06-12,"""Data Management""","""c80a3636-2edb-4248-bcb1-04ef818a75ac""",,"""Standard IO - Disk Write Operation Units (in ...",,"""10,000s""",0.0093,"""uswest""","""Microsoft.Storage""","""ependysis""","""/subscriptions/3e6b71a1-1c47-4188-a4dc-793259...","""{}""","""{}""",,
8,2018-06-12,"""Data Management""","""923978e1-fd3f-4bd5-a798-f4b533057e46""",,"""Standard IO - Block Blob Delete Operation Uni...",,"""10,000s""",0.0045,"""uswest""","""Microsoft.Storage""","""ependysis""","""/subscriptions/3e6b71a1-1c47-4188-a4dc-793259...","""{}""","""{}""",,
9,2018-06-12,"""Networking""","""32c3ebec-1646-49e3-8127-2cafbd3a04d8""",,"""Data Transfer In (GB)""","""Zone 1""","""GB""",1.619603,"""westus""","""Microsoft.Compute""","""ependysis""","""/subscriptions/3e6b71a1-1c47-4188-a4dc-793259...","""{}""","""{""ImageType"":"""",""ServiceType"":"""",""VMName"":"""",...",,


In [30]:
# pickle everything
import pickle
pickle.dump( df_daily_usage, open( "df_daily_usage.p", "wb" ) )
pickle.dump( df_usage_summary, open( "df_usage_summary.p", "wb" ) )