### Load all Dataframes

In [13]:
import pandas as pd

# Read the CSV files into dataframes
policy_assignments_df = pd.read_csv('../output/policyAssignments.csv')
export_users_df = pd.read_csv('../output/exportUsers_2025-1-23.csv')
policy_definitions_df = pd.read_csv('../output/policyDefinitions.csv')
initiave_policies_df = pd.read_csv('../output/initiativePolicies.csv')
management_groups_df = pd.read_csv('../output/managementGroups.csv')

# Display the dataframes
# print(policy_assignments_df.head())
# print(export_users_df.head())
policy_assignments_df.head()


Unnamed: 0,Assignment Name,Policy Name,Kind,Policy Type,Scope,Scope Name,Scope Type,notScopes,createdBy,createdOn,policyDefinitionId
0,AEP-Data-Platform-MG-Initiative,AEP-Data-Platform-MG-Initiative,Initiative,Custom,/providers/Microsoft.Management/managementGrou...,MGID-HEI-GIS-AEP-00,Management Group,,195eda00-b7fe-4b73-a620-c25490fcdedb,27/10/2022 12:13:35,/providers/Microsoft.Management/managementGrou...
1,AFRA - DB Compliance,AFRA Compliance,Initiative,Custom,/subscriptions/df345f01-e91c-4fdb-8894-6a2ebc2...,HEA-GIS-TSYSTEMS-NEPAAS-01,Subscription,,d0472a49-60ab-45eb-9d73-55bfc78e120e,10/09/2021 08:29:34,/subscriptions/df345f01-e91c-4fdb-8894-6a2ebc2...
2,ASC DataProtection (subscription: 01ef65f4-e06...,Configure Azure Defender to be enabled on SQL ...,Initiative,BuiltIn,/subscriptions/01ef65f4-e06f-4761-8eef-3ed7d32...,GC-AZR-0093,Subscription,,d0633c2e-5b2e-4846-ae6e-4266f7a290ca,17/10/2024 09:33:12,/providers/Microsoft.Authorization/policySetDe...
3,ASC DataProtection (subscription: 01f03d9b-11a...,Configure Azure Defender to be enabled on SQL ...,Initiative,BuiltIn,/subscriptions/01f03d9b-11ae-469e-9fdb-30412aa...,GC-AZR-0065,Subscription,,9667c887-124d-49df-b760-c3a1f23a7d2b,21/06/2024 09:03:10,/providers/Microsoft.Authorization/policySetDe...
4,ASC DataProtection (subscription: 01f47b54-f0e...,Configure Azure Defender to be enabled on SQL ...,Initiative,BuiltIn,/subscriptions/01f47b54-f0e3-43a6-a8a1-d903c98...,GC-AZR-0104,Subscription,,d0633c2e-5b2e-4846-ae6e-4266f7a290ca,26/11/2024 09:59:45,/providers/Microsoft.Authorization/policySetDe...


In [17]:
# merge with ManagementGroup DF - "Scope" column from initiave_policies_df and "id" column from management_groups_df. Only retrieve columns "properties_displayName" and "mgParent_displayName"
policy_assignments_analysis_df = pd.merge(policy_assignments_df, management_groups_df[['id', 'properties_displayName', 'parentDisplayName']], left_on='Scope', right_on='id', how='left')
policy_assignments_analysis_df.drop('id', axis=1, inplace=True)

# Merge the dataframes adding user to the Policy Assignment sheet. The column ID of the Users sheet is the same as the column CreatedBy of the Policy Assignment sheet
policy_assignments_analysis_df = pd.merge(policy_assignments_analysis_df, export_users_df[['id', 'displayName']], left_on='createdBy', right_on='id', how='left')
policy_assignments_analysis_df.drop(['id','createdBy'], axis=1, inplace=True)

# Merge with policy_definitions_df - "Policy Name" column with column "Name" from policy_definitions_df. Only retrieve the columns "Category" "AvailableEffects"	"Description"
policy_assignments_analysis_df = pd.merge(policy_assignments_analysis_df, policy_definitions_df[['Id','Category', 'AvailableEffects', 'Description']], left_on='policyDefinitionId', right_on='Id', how='left')
policy_assignments_analysis_df.drop(['Id'], axis=1, inplace=True)

# policy_assignments_analysis_df has 1:N relationship with initiave_policies_df. Merge with initiave_policies_df - "InitiativeId" column with column "Policy Name" from policy_assignments_analysis_df. Transform all the items of initiative_policies_df into a list and add it to the policy_assignments_analysis_df
# Group initiave_policies_df by 'InitiativeId' and aggregate the rows into lists
initiative_policies_grouped = initiave_policies_df[['InitiativeId', 'PolicyName']].groupby('InitiativeId').agg(lambda x: '\n'.join(x)).reset_index()

# Merge the grouped initiative_policies_df with policy_assignments_analysis_df
policy_assignments_analysis_df = pd.merge(policy_assignments_analysis_df, initiative_policies_grouped, left_on='policyDefinitionId', right_on='InitiativeId', how='left')

# Drop the 'InitiativeId' column as it is redundant after the merge
policy_assignments_analysis_df.drop(['InitiativeId','policyDefinitionId'], axis=1, inplace=True)

# Rename the "displayName" column to "createdByDisplayName"
policy_assignments_analysis_df.rename(columns={'displayName': 'createdByDisplayName', 'Category': 'Policy Category', 'Description': 'Policy Description', 'Type':'Policy Type', 'PolicyName': 'Initiative Policies', 'properties_displayName': 'Mgmt Group'}, inplace=True)


# Sort the dataframe by "Scope Type"
policy_assignments_analysis_df.sort_values(by='Scope Type', inplace=True)

# Make sure that policy_assignments_analysis_df lenght is the same 
assert len(policy_assignments_analysis_df) == len(policy_assignments_df), f"The length of policy_assignments_analysis_df ({len(policy_assignments_analysis_df)}) does not match the length of policy_assignments_df ({len(policy_assignments_df)})"

policy_assignments_analysis_df.head()

Unnamed: 0,Assignment Name,Policy Name,Kind,Policy Type,Scope,Scope Name,Scope Type,notScopes,createdOn,Mgmt Group,parentDisplayName,createdByDisplayName,Policy Category,AvailableEffects,Policy Description,Initiative Policies
0,AEP-Data-Platform-MG-Initiative,AEP-Data-Platform-MG-Initiative,Initiative,Custom,/providers/Microsoft.Management/managementGrou...,MGID-HEI-GIS-AEP-00,Management Group,,27/10/2022 12:13:35,AEP - Data Platform,Tenant Root Group,(Adm) Catalin Popa,,,,Prevent enabling SAS/key access on DLS storage...
1548,Deny Storage HTTP,Deny Storage HTTP,Policy,Custom,/providers/Microsoft.Management/managementGrou...,landingzones,Management Group,,10/03/2022 09:14:31,LandingZones,HEINEKEN,,Storage,[parameters('effect')],This policy deny storage accounts that allow H...,
1547,Deny Storage HTTP,Deny Storage HTTP,Policy,Custom,/providers/Microsoft.Management/managementGrou...,platform,Management Group,,10/03/2022 09:17:48,Platform,HEINEKEN,,Storage,[parameters('effect')],This policy deny storage accounts that allow H...,
1546,Deny Public IP,Deny Public IP,Policy,Custom,/providers/Microsoft.Management/managementGrou...,internal,Management Group,/subscriptions/17296913-4624-41af-8d8a-b7eb1b2...,10/03/2022 09:09:38,Internal,LandingZones,,Network,[parameters('effect')],Deny use of Public IP,
1545,Deny Public IP,Deny Public IP,Policy,Custom,/providers/Microsoft.Management/managementGrou...,management,Management Group,/subscriptions/17296913-4624-41af-8d8a-b7eb1b2...,10/03/2022 09:06:23,Management,Platform,,Network,[parameters('effect')],Deny use of Public IP,


### Generate the AEP final sheet

In [20]:
# Filter the rows where the "Scope Name" contains "AEP"
policy_assignments_analysis_aep_df = policy_assignments_analysis_df[policy_assignments_analysis_df['Scope Name'].str.contains('AEP', case=False)].copy()

# Column "Scope Name" = if "Scope Type" is "Management Group" then "Mgmt Group" else "Scope Name"
policy_assignments_analysis_aep_df.loc[:, 'Scope Name'] = policy_assignments_analysis_aep_df.apply(lambda x: x['Mgmt Group'] if x['Scope Type'] == 'Management Group' else x['Scope Name'], axis=1)

# Only keep columns "Scope Type", "Scope Name", "Kind", "Policy Category", "Policy Type", "Policy Name" and "Initiative Policies"
policy_assignments_analysis_aep_df = policy_assignments_analysis_aep_df[['Scope Type', 'Scope Name', 'Kind', 'Policy Category', 'Policy Type', 'Policy Name', 'Initiative Policies']]

# Sort the dataframe by "Scope Type"
policy_assignments_analysis_aep_df.sort_values(by='Scope Type', inplace=True)

policy_assignments_analysis_aep_df.head()


Unnamed: 0,Scope Type,Scope Name,Kind,Policy Category,Policy Type,Policy Name,Initiative Policies
0,Management Group,AEP - Data Platform,Initiative,,Custom,AEP-Data-Platform-MG-Initiative,Prevent enabling SAS/key access on DLS storage...
1270,Management Group,AEP - Data Platform,Policy,Monitoring,BuiltIn,Configure Azure Activity logs to stream to spe...,
934,Management Group,AEP - Data Platform,Policy,SQL,BuiltIn,An Azure Active Directory administrator should...,
1837,Management Group,AEP - Data Platform,Policy,Tags,Custom,Require AEP tags on resources,
1742,Resource Group,HEI-AEP-SHIRASE-Q-ASE-RG-01,Initiative,MCSAz custom initiative v3,Custom,MCSAz - Basic policy set assigned to Advanced-...,MCSAz - Audit any PostgreSQL databases in your...


### Write to Excel

In [22]:
with pd.ExcelWriter('../output/policyAssignmentsAnalysis.xlsx') as writer:
    policy_assignments_analysis_aep_df.to_excel(writer, sheet_name='AEP Assignments Analysis', index=False)
    policy_assignments_analysis_df.to_excel(writer, sheet_name='Policy Assignments', index=False)
    policy_definitions_df.to_excel(writer, sheet_name='Policy Definitions', index=False)
    initiave_policies_df.to_excel(writer, sheet_name='Initiative Policies', index=False)
    management_groups_df.to_excel(writer, sheet_name='Management Groups', index=False)