# Load Azure Daily Usage for Month to Match Invoice

The API we are using reports daily or hourly usage based on a query of the date the usage was reported. 

Since there is some delay between when the usage occured and when the usage is reported, it is important to query for a day or so *before* the first day of the invoice billing period to make sure all usage is picked up. 

This notebook analyzes an invoice with a billing period from 6/12/2018 to 7/11/2018, so we will query reported usage between 6/10/2018 to 7/14/2018 and filter down the records returned to the 6/12/2018 to 7/11/2018 period based on the actual usage date. 

In [1]:
import adal
from msrestazure.azure_active_directory import AADTokenCredentials
from dotenv import load_dotenv, find_dotenv
import os
load_dotenv(find_dotenv()) 
import pandas as pd
import numpy as np

In [2]:
import requests
 
# Parameters need for API
subscription = os.getenv("SUBSCRIPTION")
tenant = os.getenv("TENANT")
client_id = os.getenv("CLIENT_ID")
client_secret = os.getenv("CLIENT_SECRET")
offer = 'MS-AZR-0003P'
currency = 'USD'
locale = 'en-US'
region = 'US'

In [3]:
# using authentication with secret
# To configure user service principal see 
# https://docs.microsoft.com/en-us/azure/role-based-access-control/role-assignments-portal
# https://docs.microsoft.com/en-us/azure/azure-resource-manager/resource-group-create-service-principal-portal

def authenticate_client_key(tenant, client_id, client_secret):
    """
    Authenticate using service principal w/ key.
    """
    authority_host_uri = 'https://login.microsoftonline.com'
    authority_uri = authority_host_uri + '/' + tenant
    resource_uri = 'https://management.core.windows.net/'
    
    context = adal.AuthenticationContext(authority_uri, api_version=None)
    mgmt_token = context.acquire_token_with_client_credentials(resource_uri, client_id, client_secret)
    credentials = AADTokenCredentials(mgmt_token, client_id)

    return credentials


In [4]:
credentials = authenticate_client_key(tenant, client_id, client_secret)
access_token = credentials.token.get('access_token')

In [5]:
azure_mgmt_uri = 'https://management.azure.com:443/subscriptions/{subscriptionId}'.format(subscriptionId = subscription)
uri_str = "{azure_mgmt_uri}/providers/Microsoft.Commerce/UsageAggregates?" + \
    "api-version=2015-06-01-preview&" + \
    "aggregationGranularity=Daily&" + \
    "reportedstartTime=2018-06-10+00%3a00%3a00Z&" + \
    "reportedEndTime=2018-07-14+00%3a00%3a00Z"
usage_url = uri_str.format(azure_mgmt_uri = azure_mgmt_uri)
usage_url

'https://management.azure.com:443/subscriptions/3e6b71a1-1c47-4188-a4dc-793259a87549/providers/Microsoft.Commerce/UsageAggregates?api-version=2015-06-01-preview&aggregationGranularity=Daily&reportedstartTime=2018-06-10+00%3a00%3a00Z&reportedEndTime=2018-07-14+00%3a00%3a00Z'

In [6]:
response = requests.get(usage_url, allow_redirects=False, headers = {'Authorization': 'Bearer %s' %access_token})
usage = response.json()

In [7]:
# pull the 'properties' key from each usage record and create a dataframe
df_daily_usage_api = pd.DataFrame([x['properties'] for x in usage['value']])

In [8]:
# the API doesn't actually return aggregates by day
# the date has to be summarized to get a result by day by resource
df_by_day_group = df_daily_usage_api.groupby(['meterId','usageStartTime'])
df_daily_usage = df_by_day_group.agg({
    'usageEndTime': np.max,
    'meterCategory': np.max, 
    'meterRegion': np.max, 
    'meterName': np.max, 
    'meterSubCategory' : np.max, 
    'subscriptionId': np.max, 
    'unit': np.max, 
    'quantity': np.sum})
df_daily_usage = df_daily_usage.reset_index()


In [9]:
# clean up column data types
for col in ['meterCategory', 'meterRegion', 'meterName', 'meterSubCategory', 'subscriptionId', 'unit']:
    df_daily_usage[col] = df_daily_usage[col].astype('category')
    
df_daily_usage['usageEndTime'] = pd.to_datetime(df_daily_usage['usageEndTime'])
df_daily_usage['usageStartTime'] = pd.to_datetime(df_daily_usage['usageStartTime'])
df_daily_usage.dtypes

meterId                     object
usageStartTime      datetime64[ns]
usageEndTime        datetime64[ns]
meterCategory             category
meterRegion               category
meterName                 category
meterSubCategory          category
subscriptionId            category
unit                      category
quantity                   float64
dtype: object

In [10]:
df_daily_usage

Unnamed: 0,meterId,usageStartTime,usageEndTime,meterCategory,meterRegion,meterName,meterSubCategory,subscriptionId,unit,quantity
0,0c4d13cb-7134-4be8-a6fa-a52fdcb87e4c,2018-06-20,2018-06-21,Networking,US West,Data transfer - Geo-Replication (GB),Geo Redundant,3e6b71a1-1c47-4188-a4dc-793259a87549,GB,0.000311
1,0c4d13cb-7134-4be8-a6fa-a52fdcb87e4c,2018-06-21,2018-06-22,Networking,US West,Data transfer - Geo-Replication (GB),Geo Redundant,3e6b71a1-1c47-4188-a4dc-793259a87549,GB,0.001250
2,0c4d13cb-7134-4be8-a6fa-a52fdcb87e4c,2018-06-22,2018-06-23,Networking,US West,Data transfer - Geo-Replication (GB),Geo Redundant,3e6b71a1-1c47-4188-a4dc-793259a87549,GB,0.001251
3,0c4d13cb-7134-4be8-a6fa-a52fdcb87e4c,2018-06-23,2018-06-24,Networking,US West,Data transfer - Geo-Replication (GB),Geo Redundant,3e6b71a1-1c47-4188-a4dc-793259a87549,GB,0.001251
4,0c4d13cb-7134-4be8-a6fa-a52fdcb87e4c,2018-06-24,2018-06-25,Networking,US West,Data transfer - Geo-Replication (GB),Geo Redundant,3e6b71a1-1c47-4188-a4dc-793259a87549,GB,0.001251
5,0c4d13cb-7134-4be8-a6fa-a52fdcb87e4c,2018-06-25,2018-06-26,Networking,US West,Data transfer - Geo-Replication (GB),Geo Redundant,3e6b71a1-1c47-4188-a4dc-793259a87549,GB,0.001251
6,0c4d13cb-7134-4be8-a6fa-a52fdcb87e4c,2018-06-26,2018-06-27,Networking,US West,Data transfer - Geo-Replication (GB),Geo Redundant,3e6b71a1-1c47-4188-a4dc-793259a87549,GB,0.001251
7,0c4d13cb-7134-4be8-a6fa-a52fdcb87e4c,2018-06-27,2018-06-28,Networking,US West,Data transfer - Geo-Replication (GB),Geo Redundant,3e6b71a1-1c47-4188-a4dc-793259a87549,GB,0.001251
8,0c4d13cb-7134-4be8-a6fa-a52fdcb87e4c,2018-06-28,2018-06-29,Networking,US West,Data transfer - Geo-Replication (GB),Geo Redundant,3e6b71a1-1c47-4188-a4dc-793259a87549,GB,0.001251
9,0c4d13cb-7134-4be8-a6fa-a52fdcb87e4c,2018-06-29,2018-06-30,Networking,US West,Data transfer - Geo-Replication (GB),Geo Redundant,3e6b71a1-1c47-4188-a4dc-793259a87549,GB,0.001251


In [11]:
len(df_daily_usage)

502

In [12]:
# filter usage to dates from 6/12/2018 to 7/11/2018
import datetime
from_date = pd.Timestamp(datetime.date(2018, 6, 12))
to_date = pd.Timestamp(datetime.date(2018, 7, 10))
df_invoice_daily_usage = df_daily_usage.loc[(df_daily_usage['usageStartTime'] >= from_date) & (df_daily_usage['usageEndTime'] <= to_date)]

In [13]:
len(df_invoice_daily_usage)

439

In [14]:
df_invoice_daily_usage['usageEndTime'].unique()

array(['2018-06-21T00:00:00.000000000', '2018-06-22T00:00:00.000000000',
       '2018-06-23T00:00:00.000000000', '2018-06-24T00:00:00.000000000',
       '2018-06-25T00:00:00.000000000', '2018-06-26T00:00:00.000000000',
       '2018-06-27T00:00:00.000000000', '2018-06-28T00:00:00.000000000',
       '2018-06-29T00:00:00.000000000', '2018-06-30T00:00:00.000000000',
       '2018-07-01T00:00:00.000000000', '2018-07-02T00:00:00.000000000',
       '2018-07-03T00:00:00.000000000', '2018-07-04T00:00:00.000000000',
       '2018-07-05T00:00:00.000000000', '2018-07-06T00:00:00.000000000',
       '2018-07-07T00:00:00.000000000', '2018-07-08T00:00:00.000000000',
       '2018-07-09T00:00:00.000000000', '2018-07-10T00:00:00.000000000',
       '2018-06-13T00:00:00.000000000', '2018-06-15T00:00:00.000000000',
       '2018-06-20T00:00:00.000000000', '2018-06-17T00:00:00.000000000',
       '2018-06-18T00:00:00.000000000', '2018-06-19T00:00:00.000000000',
       '2018-06-16T00:00:00.000000000', '2018-06-14

In [15]:
# pickle everything
import pickle
pickle.dump( df_invoice_daily_usage, open( "df_invoice_daily_usage.p", "wb" ) )
pickle.dump( df_daily_usage_api, open( "df_daily_usage_api.p", "wb" ) )

In [16]:
# 2018-06-16	65d4ded2-41ae-43a8-bb68-3c200e1ba864
import datetime
usage_date = pd.Timestamp(datetime.date(2018, 6, 16))

df_daily_usage.loc[(df_daily_usage['meterId'] == '65d4ded2-41ae-43a8-bb68-3c200e1ba864') & (df_daily_usage['usageStartTime'] == usage_date)]

Unnamed: 0,meterId,usageStartTime,usageEndTime,meterCategory,meterRegion,meterName,meterSubCategory,subscriptionId,unit,quantity
194,65d4ded2-41ae-43a8-bb68-3c200e1ba864,2018-06-16,2018-06-17,Data Services,,100 Request Units (Hours),Cosmos DB,3e6b71a1-1c47-4188-a4dc-793259a87549,Hours,76.0
