# PWBI Admin API Data Pull

In [1]:
import requests
import pandas as pd
from tqdm import tqdm
from urllib.parse import urljoin
from azure.identity import InteractiveBrowserCredential

In [2]:
credential = InteractiveBrowserCredential()
credential.authenticate()

<azure.identity._auth_record.AuthenticationRecord at 0x1de90128548>

In [4]:
api = 'https://analysis.windows.net/powerbi/api/.default'
access_token = credential.get_token(api)
access_token = access_token.token

In [5]:
access_token

'eyJ0eXAiOiJKV1QiLCJhbGciOiJSUzI1NiIsIng1dCI6IlQxU3QtZExUdnlXUmd4Ql82NzZ1OGtyWFMtSSIsImtpZCI6IlQxU3QtZExUdnlXUmd4Ql82NzZ1OGtyWFMtSSJ9.eyJhdWQiOiJodHRwczovL2FuYWx5c2lzLndpbmRvd3MubmV0L3Bvd2VyYmkvYXBpIiwiaXNzIjoiaHR0cHM6Ly9zdHMud2luZG93cy5uZXQvNjkyZjM3OGEtNDdmOC00MDRmLWE5OTctODgwODdhNDczMTQ1LyIsImlhdCI6MTcwMjQ2NDk4MiwibmJmIjoxNzAyNDY0OTgyLCJleHAiOjE3MDI0Njk4MTUsImFjY3QiOjAsImFjciI6IjEiLCJhaW8iOiJBVlFBcS84VkFBQUFDTW5UMzZHU3VYQnQyTTgvcmk2K3Q3VjB0UW1jZFY1WG1lZGhmOTZlSVRxMjJIbngvSCtEMnE1Q1ArdTFCYko0YlN4TG5mbDQ4MTZIUUVwWjF4QjhQV2RxR0hFY0JETXdOSmFka2FaOHNTOD0iLCJhbXIiOlsicHdkIiwicnNhIiwibWZhIl0sImFwcGlkIjoiMDRiMDc3OTUtOGRkYi00NjFhLWJiZWUtMDJmOWUxYmY3YjQ2IiwiYXBwaWRhY3IiOiIwIiwiZGV2aWNlaWQiOiIxMDc1ZjVhNi1lM2YwLTRlMzItOGZhYS00YWJiMWQzYWM3MzEiLCJmYW1pbHlfbmFtZSI6Ikdvd2RhIiwiZ2l2ZW5fbmFtZSI6IlByYWp3YWwiLCJpcGFkZHIiOiIyMTcuMTY1LjExMC4yMzgiLCJuYW1lIjoiUHJhandhbCBHb3dkYSIsIm9pZCI6IjA4N2MxZWQ5LTQwOTAtNGE4Zi05NjM0LTA3ZDQyMjdlODVkMSIsIm9ucHJlbV9zaWQiOiJTLTEtNS0yMS01NjgzMjg1Mi0yMTYzNjM2MTg2LTMyNDcwODI0MD

## Querying the API

In [5]:
base_url = 'https://api.powerbi.com/v1.0/myorg/'
header = {'Authorization': f'Bearer {access_token}'}

### Apps

In [10]:
path = 'admin/apps'
url = urljoin(base_url, path)
params = {'$top': 100}
apps=requests.get(url, params=params,headers=header)

print(f"Calling the API: {apps.url}")

if apps.status_code == 200:
    df_apps = pd.json_normalize(apps.json(), record_path = 'value')
else:
    print(apps.status_code)

with pd.ExcelWriter("Apps.xlsx", engine='xlsxwriter') as writer:
    df_apps.to_excel(writer, sheet_name = 'Apps')
    
print(f"Shape of apps is {df_apps.shape}")
df_apps.head(2) 

Calling the API: https://api.powerbi.com/v1.0/myorg/admin/apps?%24top=100
Shape of apps is (79, 7)


Unnamed: 0,id,name,lastUpdate,description,publishedBy,workspaceId,users
0,4328a84b-79a4-4977-a9d7-70fdd5bb91c7,Grandiose,2023-12-08T04:55:16.57Z,This Grandiose Report shows a dashboard and mo...,Pooja Sachdev,3be941d8-fb28-4603-979b-3795a778c77f,[]
1,be829724-1619-469b-892a-6b369f71a7fc,Azure Backup,2021-03-07T08:35:31.477Z,Gain insights and create custom reports for pr...,Marwan Khadem Alsrouji,e4f4fbda-8d66-4825-b518-41f6f606ed5d,[]


### Datasets

In [40]:
path = 'admin/datasets'
url = urljoin(base_url, path)
params = {}
datasets=requests.get(url, params=params,headers=header)

print(f"Calling the API: {datasets.url}")

if datasets.status_code == 200:
    df_datasets = pd.json_normalize(datasets.json(), record_path = 'value')
else:
    print(datasets.status_code)
    
with pd.ExcelWriter("Datasets.xlsx", engine='xlsxwriter') as writer:
    df_datasets.to_excel(writer, sheet_name = 'Datasets')
    
print(f"Shape of datasets is {df_datasets.shape}")
df_datasets.head(2) 

Calling the API: https://api.powerbi.com/v1.0/myorg/admin/datasets
Shape of datasets is (622, 19)


Unnamed: 0,id,name,webUrl,addRowsAPIEnabled,configuredBy,isRefreshable,isEffectiveIdentityRequired,isEffectiveIdentityRolesRequired,targetStorageMode,createdDate,contentProviderType,createReportEmbedURL,qnaEmbedURL,upstreamDatasets,users,isInPlaceSharingEnabled,workspaceId,queryScaleOutSettings.autoSyncReadOnlyReplicas,queryScaleOutSettings.maxReadOnlyReplicas
0,1f6d9c95-8581-405b-ad68-e94f95b0fc77,POS Transaction Reports,https://app.powerbi.com/groups/24fd5829-f54e-4...,False,b.joseph@gagroup.net,False,False,False,Abf,2017-07-31T08:54:26.797Z,Unknown,https://app.powerbi.com/reportEmbed?config=eyJ...,https://app.powerbi.com/qnaEmbed?config=eyJjbH...,[],[],False,24fd5829-f54e-4eba-8a3a-43f84a6bc60f,True,0
1,6a25f78e-cb37-4c40-a631-100cefbc9b94,Dashboard Usage Metrics Model,https://app.powerbi.com/groups/3be941d8-fb28-4...,False,b.joseph@gagroup.net,False,False,False,Abf,2017-07-31T09:01:14.087Z,UsageMetricsUserDashboard,https://app.powerbi.com/reportEmbed?config=eyJ...,https://app.powerbi.com/qnaEmbed?config=eyJjbH...,[],[],False,3be941d8-fb28-4603-979b-3795a778c77f,True,0


### Dashboards

In [12]:
path = 'admin/dashboards'
url = urljoin(base_url, path)
params = {}
dashboards=requests.get(url, params=params,headers=header)

print(f"Calling the API: {dashboards.url}")

if dashboards.status_code == 200:
    df_dashboards = pd.json_normalize(dashboards.json(), record_path = 'value')
else:
    print(dashboards.status_code)
    
with pd.ExcelWriter("Dashboards.xlsx", engine='xlsxwriter') as writer:
    df_dashboards.to_excel(writer, sheet_name = 'Dashboards')
    
print(f"Shape of dashboards is {df_dashboards.shape}")
df_dashboards.head(2) 

Calling the API: https://api.powerbi.com/v1.0/myorg/admin/dashboards
Shape of dashboards is (30, 9)


Unnamed: 0,id,displayName,isReadOnly,webUrl,embedUrl,users,subscriptions,workspaceId,appId
0,c25a7692-e67a-43f4-a0c5-8442b13d82bc,Azure Backup,False,https://app.powerbi.com/groups/me/dashboards/c...,https://app.powerbi.com/dashboardEmbed?dashboa...,[],[],e4f4fbda-8d66-4825-b518-41f6f606ed5d,
1,45b93841-b9fa-44de-a785-8c5cf5826048,[App] Azure Backup,True,https://app.powerbi.com/groups/me/apps/be82972...,https://app.powerbi.com/dashboardEmbed?dashboa...,[],[],e4f4fbda-8d66-4825-b518-41f6f606ed5d,be829724-1619-469b-892a-6b369f71a7fc


### Refreshables

In [44]:
path = 'admin/capacities/refreshables'
url = urljoin(base_url, path)
params = {
    '$top': 200,
    '$expand': 'datasets'
}
refreshables=requests.get(url, params=params,headers=header)

print(f"Calling the API: {refreshables.url}")

if refreshables.status_code == 200:
    df_refreshables = pd.json_normalize(refreshables.json(), record_path = 'value')
else:
    print(refreshables.status_code)
    
with pd.ExcelWriter("Refreshables.xlsx", engine='xlsxwriter') as writer:
    df_refreshables.to_excel(writer, sheet_name = 'Refreshables')
    
print(f"Shape of refreshables is {df_refreshables.shape}")
df_refreshables.head(2) 

Calling the API: https://api.powerbi.com/v1.0/myorg/admin/capacities/refreshables?%24top=200&%24expand=datasets
400
Shape of refreshables is (94, 27)


Unnamed: 0,id,name,kind,configuredBy,lastRefresh.id,lastRefresh.refreshType,lastRefresh.startTime,lastRefresh.endTime,lastRefresh.serviceExceptionJson,lastRefresh.status,...,refreshSchedule.notifyOption,group.id,group.name,startTime,endTime,refreshCount,refreshFailures,averageDuration,medianDuration,refreshesPerDay
0,6a25f78e-cb37-4c40-a631-100cefbc9b94,Dashboard Usage Metrics Model,Dataset,[b.joseph@gagroup.net],191826571,Scheduled,2023-04-24T11:13:00.32Z,2023-04-24T11:13:00.32Z,,Disabled,...,MailOnFailure,3be941d8-fb28-4603-979b-3795a778c77f,Grandiose,,,,,,,
1,916afafc-4cb4-497c-961a-2b9259fd48f0,Report Usage Metrics Model,Dataset,[b.joseph@gagroup.net],289517260,Scheduled,2023-12-12T06:18:00.153Z,2023-12-12T06:18:00.497Z,,Completed,...,MailOnFailure,3be941d8-fb28-4603-979b-3795a778c77f,Grandiose,2023-12-09T07:43:07.897Z,2023-12-12T06:18:00.497Z,36.0,0.0,0.333111,0.2905,12.0


### User Access 

In [13]:
bi_team_members = ['Pooja Sachdev', 'Prajwal Gowda', 'Balu Renganathan', 'Elizabeth Abraham','B Lakshmi Narayanan','Sachin Basavanakatti Math','']
bi_apps = df_apps[df_apps['publishedBy'].isin(bi_team_members)][['id','name','publishedBy']].copy()

In [14]:
user_access_list = pd.DataFrame()
with pd.ExcelWriter("User Access.xlsx", engine='xlsxwriter') as writer:
    for each in tqdm(bi_apps['id'].unique()):
        path = f'admin/apps/{each}/users'
        url = urljoin(base_url, path)
        users=requests.get(url, headers=header)
        if users.status_code == 200:
            inner_df = pd.json_normalize(users.json(), record_path = 'value')
            inner_df['appId'] = each
            inner_df = pd.merge(left=inner_df, right=bi_apps,how='left',left_on='appId', right_on='id')
            inner_df.to_excel(writer, sheet_name = f"{inner_df['name'].unique()[0]}"[:25]+'...')
            user_access_list = pd.concat([user_access_list,inner_df], axis=0)
        else:
            print(users.status_code)

print(f"Shape of the final DF is {user_access_list.shape}")

100%|██████████████████████████████████████████████████████████████████████████████████| 64/64 [00:28<00:00,  2.23it/s]


Shape of the final DF is (2138, 11)


### Groups / Workspaces

In [15]:
path = 'groups'
url = urljoin(base_url, path)

groups=requests.get(url, headers=header)

print(f"Calling the API: {groups.url}")

if groups.status_code == 200:
    df_groups = pd.json_normalize(groups.json(), record_path = 'value')
else:
    print(groups.status_code)

Calling the API: https://api.powerbi.com/v1.0/myorg/groups


In [16]:
df_groups

Unnamed: 0,id,isReadOnly,isOnDedicatedCapacity,type,name,capacityId,defaultDatasetStorageFormat
0,3d366bdd-694d-4d57-853a-4369c7f9cffa,False,False,Workspace,HR Department,,
1,3be941d8-fb28-4603-979b-3795a778c77f,False,True,Workspace,Grandiose,6045F862-2E7B-40FC-8CF5-B74FB64B2C44,Large
2,8dd061ff-43d1-4e7e-893d-4c8b9e4e5bb7,False,False,Workspace,Olive Country Reports,,
3,d0f6e941-e243-4e67-af18-e437e8e17581,False,True,Workspace,Grandiose Report,6045F862-2E7B-40FC-8CF5-B74FB64B2C44,Small
4,d5178c0c-421e-4e47-b53d-060384376121,False,False,Workspace,GA SpareParts,,
5,603de850-d77d-411f-bd0a-7a63fb423b7d,False,False,Workspace,GAG - Unilever,,
6,e44c7104-545b-4ca7-8dc3-a27e683e2ef8,False,False,Workspace,UAT-Blueprint BI Workspace,,
7,4012438a-a523-44ca-a28b-498a965943e6,False,False,Workspace,Gallega Reports,,
8,d3c8d986-cdca-4155-bd35-bab26d1fab67,False,False,Workspace,GA SpareParts Reports,,
9,d859b502-a8e4-4f81-a7d5-286174fe9600,False,False,Workspace,GA SpareParts Procurement Reports,,


### Datasets in Group

In [17]:
groupID = 'c00504de-44db-4d0e-8d3d-e206e3efa1cd'
path = f'groups/{groupID}/datasets'
url = urljoin(base_url, path)

datasets=requests.get(url, headers=header)

print(f"Calling the API: {datasets.url}")

if datasets.status_code == 200:
    df_datasets = pd.json_normalize(datasets.json(), record_path = 'value')
else:
    print(datasets.status_code)

Calling the API: https://api.powerbi.com/v1.0/myorg/groups/c00504de-44db-4d0e-8d3d-e206e3efa1cd/datasets


In [18]:
df_datasets.head(5)

Unnamed: 0,id,name,webUrl,addRowsAPIEnabled,configuredBy,isRefreshable,isEffectiveIdentityRequired,isEffectiveIdentityRolesRequired,isOnPremGatewayRequired,targetStorageMode,createdDate,createReportEmbedURL,qnaEmbedURL,upstreamDatasets,users,queryScaleOutSettings.autoSyncReadOnlyReplicas,queryScaleOutSettings.maxReadOnlyReplicas
0,15502813-55ac-459b-8e34-623c56bbc53a,Stock Availability Report,https://app.powerbi.com/groups/c00504de-44db-4...,False,Sachin.bm@gagroup.net,True,False,False,False,Abf,2022-10-10T06:30:49.453Z,https://app.powerbi.com/reportEmbed?config=eyJ...,https://app.powerbi.com/qnaEmbed?config=eyJjbH...,[],[],True,0
1,3c6d814f-c8a5-4af2-b6a0-1e57d1b75089,Stock Availability Report 2021,https://app.powerbi.com/groups/c00504de-44db-4...,False,Sachin.bm@gagroup.net,True,False,False,False,Abf,2022-10-11T11:10:28.637Z,https://app.powerbi.com/reportEmbed?config=eyJ...,https://app.powerbi.com/qnaEmbed?config=eyJjbH...,[],[],True,0
2,9fdb3dee-5024-4604-b2c1-513329276510,Stock Availability Report 2022,https://app.powerbi.com/groups/c00504de-44db-4...,False,Sachin.bm@gagroup.net,True,False,False,False,Abf,2022-10-11T11:14:50.027Z,https://app.powerbi.com/reportEmbed?config=eyJ...,https://app.powerbi.com/qnaEmbed?config=eyJjbH...,[],[],True,0
3,b0a13820-fbb8-4544-bedb-be3bdf9f7137,Stock Availability Report LQ,https://app.powerbi.com/groups/c00504de-44db-4...,False,Sachin.bm@gagroup.net,True,False,False,False,Abf,2022-12-28T04:32:57.127Z,https://app.powerbi.com/reportEmbed?config=eyJ...,https://app.powerbi.com/qnaEmbed?config=eyJjbH...,[],[],True,0
4,e1750c16-e9f6-4cb3-80b5-d688d2c1deea,Stock Ageing Detail Report,https://app.powerbi.com/groups/c00504de-44db-4...,False,Sachin.bm@gagroup.net,True,False,False,True,Abf,2023-01-11T05:43:03.487Z,https://app.powerbi.com/reportEmbed?config=eyJ...,https://app.powerbi.com/qnaEmbed?config=eyJjbH...,[],[],True,0


### Refresh history

In [19]:
groupID = 'c00504de-44db-4d0e-8d3d-e206e3efa1cd'
datasetID = 'e1750c16-e9f6-4cb3-80b5-d688d2c1deea'
path = f'groups/{groupID}/datasets/{datasetID}/refreshes'
url = urljoin(base_url, path)

refreshes=requests.get(url, headers=header)

print(f"Calling the API: {refreshes.url}")

if refreshes.status_code == 200:
    df_refreshes = pd.json_normalize(refreshes.json(), record_path = 'value')
else:
    print(refreshes.status_code)

Calling the API: https://api.powerbi.com/v1.0/myorg/groups/c00504de-44db-4d0e-8d3d-e206e3efa1cd/datasets/e1750c16-e9f6-4cb3-80b5-d688d2c1deea/refreshes


In [20]:
df_refreshes

Unnamed: 0,requestId,id,refreshType,startTime,endTime,serviceExceptionJson,status,refreshAttempts
0,343b0826-7a56-4a9b-a486-1f516c8140a1,213497097,Scheduled,2023-06-16T05:04:14.947Z,2023-06-16T05:04:15.413Z,"{""errorCode"":""Gateway_Offline"",""errorDescripti...",Failed,[]
1,823cfa16-ae93-4460-a93b-c346a323b06d,213079666,Scheduled,2023-06-15T05:04:07.717Z,2023-06-15T05:04:08.183Z,"{""errorCode"":""Gateway_Offline"",""errorDescripti...",Failed,[]
2,ea983cd9-5c73-4b5c-8b43-95f0a526d945,212665346,Scheduled,2023-06-14T05:04:19.81Z,2023-06-14T05:04:20.357Z,"{""errorCode"":""Gateway_Offline"",""errorDescripti...",Failed,[]
3,493e994d-f67f-47f8-8fc6-8a260d7d361b,212251126,Scheduled,2023-06-13T05:04:15.757Z,2023-06-13T05:04:16.303Z,"{""errorCode"":""Gateway_Offline"",""errorDescripti...",Failed,[]
4,e7d67929-fac2-4e44-8917-085508b39fd5,211840593,Scheduled,2023-06-12T05:04:26.257Z,2023-06-12T05:29:05.353Z,,Completed,[]
5,51103524-5295-43f7-9145-4c125570e05b,211438340,Scheduled,2023-06-11T05:04:15.777Z,2023-06-11T05:29:54.673Z,,Completed,[]
6,c64304df-62eb-4283-bdcc-0db0db72a1c3,211035293,Scheduled,2023-06-10T05:04:14.283Z,2023-06-10T06:04:47.3Z,,Completed,[]
7,0c07439b-cfad-43dc-b7cf-e0d3ada332d4,210620090,Scheduled,2023-06-09T05:04:17.01Z,2023-06-09T05:28:05.783Z,,Completed,[]
8,e1e97fe0-dad4-41b7-9ce8-3cd64f816e2c,210205889,Scheduled,2023-06-08T05:04:06.937Z,2023-06-08T05:28:44.567Z,,Completed,[]
9,876b6727-4aa1-49b9-8d6d-8f4f7f89c918,209792299,Scheduled,2023-06-07T05:04:06.89Z,2023-06-07T05:29:21.787Z,,Completed,[]


## Master Table

In [47]:
path = 'admin/groups'
url = urljoin(base_url, path)
params = {
    '$top': 1000,
    '$expand': 'datasets'
}
expanded_groups=requests.get(url, params=params,headers=header)

print(f"Calling the API: {expanded_groups.url}")

if expanded_groups.status_code == 200:
    df_expanded_groups = pd.json_normalize(expanded_groups.json(), record_path = 'value')
else:
    print(expanded_groups.status_code)
    
with pd.ExcelWriter("expanded_groups.xlsx", engine='xlsxwriter') as writer:
    df_expanded_groups.to_excel(writer, sheet_name = 'Expanded_groups')
    
print(f"Shape of expanded_groups is {df_expanded_groups.shape}")
df_expanded_groups.head(2) 

Calling the API: https://api.powerbi.com/v1.0/myorg/admin/groups?%24top=1000&%24expand=datasets
Shape of expanded_groups is (645, 12)


Unnamed: 0,id,isReadOnly,isOnDedicatedCapacity,capacityMigrationStatus,type,state,hasWorkspaceLevelSettings,name,datasets,description,capacityId,defaultDatasetStorageFormat
0,094ff601-0292-49d0-8aaf-e7a334cf1102,False,False,,Workspace,Active,False,Store reports,[],,,
1,8f4bf57c-86cf-4d81-87e6-6d785e0af828,False,False,,Workspace,Active,False,Test Work,[{'id': '8ed51b15-6a8e-46ce-b72a-7d6348ab91ed'...,Test Report,,
