# Load Azure Daily Usage for Month to Match Invoice

The API we are using reports daily or hourly usage based on a query of the date the usage was reported. 

Since there is some delay between when the usage occured and when the usage is reported, it is important to query for a day or so *before* the first day of the invoice billing period to make sure all usage is picked up. 

This notebook analyzes an invoice with a billing period from 6/12/2018 to 7/11/2018, so we will query reported usage between 6/10/2018 to 7/14/2018 and filter down the records returned to the 6/12/2018 to 7/11/2018 period based on the actual usage date. 

In [None]:
import adal
from msrestazure.azure_active_directory import AADTokenCredentials
from dotenv import load_dotenv, find_dotenv
import os
load_dotenv(find_dotenv()) 
import pandas as pd
import numpy as np

In [None]:
import requests
 
# Parameters need for API
subscription = os.getenv("SUBSCRIPTION")
tenant = os.getenv("TENANT")
client_id = os.getenv("CLIENT_ID")
client_secret = os.getenv("CLIENT_SECRET")
offer = 'MS-AZR-0003P'
currency = 'USD'
locale = 'en-US'
region = 'US'

In [None]:
# using authentication with secret
# To configure user service principal see 
# https://docs.microsoft.com/en-us/azure/role-based-access-control/role-assignments-portal
# https://docs.microsoft.com/en-us/azure/azure-resource-manager/resource-group-create-service-principal-portal

def authenticate_client_key(tenant, client_id, client_secret):
    """
    Authenticate using service principal w/ key.
    """
    authority_host_uri = 'https://login.microsoftonline.com'
    authority_uri = authority_host_uri + '/' + tenant
    resource_uri = 'https://management.core.windows.net/'
    
    context = adal.AuthenticationContext(authority_uri, api_version=None)
    mgmt_token = context.acquire_token_with_client_credentials(resource_uri, client_id, client_secret)
    credentials = AADTokenCredentials(mgmt_token, client_id)

    return credentials


In [None]:
credentials = authenticate_client_key(tenant, client_id, client_secret)
access_token = credentials.token.get('access_token')

In [None]:
azure_mgmt_uri = 'https://management.azure.com:443/subscriptions/{subscriptionId}'.format(subscriptionId = subscription)
uri_str = "{azure_mgmt_uri}/providers/Microsoft.Commerce/UsageAggregates?" + \
    "api-version=2015-06-01-preview&" + \
    "aggregationGranularity=Daily&" + \
    "reportedstartTime=2018-06-10+00%3a00%3a00Z&" + \
    "reportedEndTime=2018-07-14+00%3a00%3a00Z"
usage_url = uri_str.format(azure_mgmt_uri = azure_mgmt_uri)
usage_url

In [None]:
response = requests.get(usage_url, allow_redirects=False, headers = {'Authorization': 'Bearer %s' %access_token})
usage = response.json()

In [None]:
# pull the 'properties' key from each usage record and create a dataframe
df_daily_usage_api = pd.DataFrame([x['properties'] for x in usage['value']])

In [None]:
# the API doesn't actually return aggregates by day
# the date has to be summarized to get a result by day by resource
df_by_day_group = df_daily_usage_api.groupby(['meterId','usageStartTime'])
df_daily_usage = df_by_day_group.agg({
    'usageEndTime': np.max,
    'meterCategory': np.max, 
    'meterRegion': np.max, 
    'meterName': np.max, 
    'meterSubCategory' : np.max, 
    'subscriptionId': np.max, 
    'unit': np.max, 
    'quantity': np.sum})
df_daily_usage = df_daily_usage.reset_index()


In [None]:
# clean up column data types
for col in ['meterCategory', 'meterRegion', 'meterName', 'meterSubCategory', 'subscriptionId', 'unit']:
    df_daily_usage[col] = df_daily_usage[col].astype('category')
    
df_daily_usage['usageEndTime'] = pd.to_datetime(df_daily_usage['usageEndTime'])
df_daily_usage['usageStartTime'] = pd.to_datetime(df_daily_usage['usageStartTime'])
df_daily_usage.dtypes

In [None]:
df_daily_usage

In [None]:
len(df_daily_usage)

In [None]:
# filter usage to dates from 6/12/2018 to 7/11/2018
import datetime
from_date = pd.Timestamp(datetime.date(2018, 6, 12))
to_date = pd.Timestamp(datetime.date(2018, 7, 11))
df_invoice_daily_usage = df_daily_usage.loc[(df_daily_usage['usageStartTime'] >= from_date) & (df_daily_usage['usageStartTime'] <= to_date)]

In [None]:
len(df_invoice_daily_usage)

In [None]:
df_invoice_daily_usage['usageEndTime'].unique()

In [None]:
# pickle everything
import pickle
pickle.dump( df_invoice_daily_usage, open( "df_invoice_daily_usage.p", "wb" ) )
pickle.dump( df_daily_usage_api, open( "df_daily_usage_api.p", "wb" ) )

In [None]:
# 2018-06-16	65d4ded2-41ae-43a8-bb68-3c200e1ba864
import datetime
usage_date = pd.Timestamp(datetime.date(2018, 6, 16))

df_daily_usage.loc[(df_daily_usage['meterId'] == '65d4ded2-41ae-43a8-bb68-3c200e1ba864') & (df_daily_usage['usageStartTime'] == usage_date)]