In [11]:
#Installing required packages

! pip install pyodbc
! pip install openpyxl
! pip install pandas



In [12]:
# Import date class from datetime module
from datetime import datetime

# create variable for current date and time, rounded to nearest second (for filename of Excel extract at end)
now = datetime.now()

now_ns = (now.strftime("%Y-%m-%d-%H-%M-%S"))
print(now_ns)

# create variable for start of reporting month and reporting month, in yyyy-mm[-dd] format. Need to increment month number in start_of_month variable each reporting month.
# (start of reporting month is for SQL queries, reporting month is for filename of Excel extract)
start_of_month = datetime(2025, 6, 1)
start_of_month_d = start_of_month.strftime("%Y-%m-%d")
reporting_month = start_of_month.strftime("%Y-%m")
print(start_of_month_d)
print(reporting_month)

# create variable for CSDS month ID (1499 = February 2025, 1500 = March 2025, etc. This needs to be incremented each reporting month)
csds_month_id = 1503

2025-09-25-14-45-13
2025-06-01
2025-06


In [13]:
# Query to connect to SQL Server and run a SQL query for DQ metrics (PU5001-2)

import env
import pyodbc
import pandas as pd
import os


def data_connection_dq():
    """
    Using the user's credentials, retrieve the data from the UDAL Warehouse

    returns:
    pandas dataframe of the data
    """

    server = env.SERVER
    database = env.DATABASE
    uid = env.UID

    conn_str = (
        f'DRIVER={{ODBC Driver 17 for SQL Server}};'
        f'SERVER={server};'
        f'DATABASE={database};'
        f'UID={uid};'
        'Authentication=ActiveDirectoryInteractive;'
    )

    conn = pyodbc.connect(conn_str)


#    table = os.getenv('table')
#   subtable = os.getenv('subtable')

    query = f"""
    DECLARE @MonthStart1 as DATE 
    DECLARE @MonthEnd1 as DATE 
    SET @MonthStart1 = '{start_of_month_d}'
    SET @MonthEnd1 = EOMONTH(@MonthStart1)

    SELECT
	@MonthEnd1 AS [ReportingDate],
	[OrgID_Provider] as [ProviderCode],
	[CodedFinding] as [CodedFindingOrProcedure],
	count(*) as [NumberOfRecords]

	from [Reporting_MESH_CSDS].[CYP202CareActivity]

	where [Unique_MonthID] = '{csds_month_id}'

	group by
	[OrgID_Provider],
	[CodedFinding]
    """
    
    query2 = f"""
    DECLARE @MonthStart1 as DATE 
    DECLARE @MonthEnd1 as DATE 
    SET @MonthStart1 = '{start_of_month_d}'
    SET @MonthEnd1 = EOMONTH(@MonthStart1)

    SELECT
	@MonthEnd1 AS [ReportingDate],
	[OrgID_Provider] as [ProviderCode],
	[CodedProcedure] as [CodedFindingOrProcedure],
	count(*) as [NumberOfRecords]

	from [Reporting_MESH_CSDS].[CYP202CareActivity]

	where [Unique_MonthID] = '{csds_month_id}'

	group by
	[OrgID_Provider],
	[CodedProcedure]
    """

    # Execute the query and load it into a pandas DataFrame
    df = pd.read_sql(query, conn)
    df2 = pd.read_sql(query2, conn)
    
    # Close the connection after retrieving data
    conn.close()

    df["Metric_ID"] = 'PU5001'
    df2["Metric_ID"] = 'PU5002'

    df_combined = pd.concat([df, df2])

    return df_combined

In [14]:
# Running function created above to generate the SQL DQ query results as one merged and transformed dataframe
df = data_connection_dq()

# Creating copy of dataframe to work with in case of errors
df_copy = df

# Checking the generated dataframe looks right by inspecting first 10 rows
df_copy.head(10)

  df = pd.read_sql(query, conn)
  df2 = pd.read_sql(query2, conn)


Unnamed: 0,ReportingDate,ProviderCode,CodedFindingOrProcedure,NumberOfRecords,Metric_ID
0,2025-06-30,NQT,Xa2u6,440,PU5001
1,2025-06-30,RXE,135A.,856,PU5001
2,2025-06-30,TAD,Xa2F9,68,PU5001
3,2025-06-30,RCB,XE0Ux,50,PU5001
4,2025-06-30,RXK,XaLMY,4,PU5001
5,2025-06-30,8A644,4695.,4,PU5001
6,2025-06-30,RTR,XaFvq,4,PU5001
7,2025-06-30,RY5,Xa3iK,256,PU5001
8,2025-06-30,TAD,Xa37i,2,PU5001
9,2025-06-30,ADN,X75rx,9,PU5001


In [15]:
#Function to transform dataframe into format which can be imported into MHS

import pandas as pd

def transform_ll_dq_data(input_df):

    # Step 1: Define input data
    df = input_df

    # Step 2: Prepare numeratorValue and denominatorValue for each ProviderCode
    # numeratorValue: count of non-null values for each ProviderCode
    numerator = df[df['CodedFindingOrProcedure'].notna()].groupby(['ProviderCode', 'Metric_ID'])['NumberOfRecords'].sum().reset_index(name='numeratorValue')

    # denominatorValue: count of all rows (including nulls) for each ProviderCode
    denominator = df.groupby(['ProviderCode', 'Metric_ID'])['NumberOfRecords'].sum().reset_index(name='denominatorValue')

    # Merge numerator and denominator counts
    counts = pd.merge(numerator, denominator, on=['ProviderCode', 'Metric_ID'])

    # Step 3: Calculate value = (numeratorValue / denominatorValue) * 100
    counts['value'] = (counts['numeratorValue'] / counts['denominatorValue']) * 100

    # Step 4: Get rangeMin and rangeMax from the 'value' column
    # rangeMin = counts['value'].min()
    # rangeMax = counts['value'].max()

    # Step 5: Since the reportingDate might vary, let's associate each ProviderCode with its reportingDate(s).
    # Assuming the ReportingDate is consistent per ProviderCode, take the first ReportingDate for each ProviderCode.
    reporting_dates = df.groupby('ProviderCode')['ReportingDate'].first().reset_index()

    # Step 6: Build the final DataFrame with the specified columns and order
    df_transformed = counts.merge(reporting_dates, on='ProviderCode')

    df_transformed = df_transformed.rename(columns={
        'ProviderCode': 'providerCode',
        'ReportingDate': 'reportingDate',
        'Metric_ID': 'metricID'
    })

    df_transformed['lowerBenchmark'] = ''
    df_transformed['upperBenchmark'] = ''
    df_transformed['rangeMin'] = '' # rangeMin
    df_transformed['rangeMax'] = '' # rangeMax

    # Reorder columns as requested
    df_transformed = df_transformed[
        ['metricID', 'providerCode', 'reportingDate', 'value', 'lowerBenchmark', 'upperBenchmark',
        'numeratorValue', 'denominatorValue', 'rangeMin', 'rangeMax']
    ]
    
    return df_transformed

In [16]:
# Run function above on DQ dataframe
df_copy_transformed = transform_ll_dq_data(df_copy)

# Checking the transformed dataframe looks right by inspecting first 10 rows
df_copy_transformed.head(10)

Unnamed: 0,metricID,providerCode,reportingDate,value,lowerBenchmark,upperBenchmark,numeratorValue,denominatorValue,rangeMin,rangeMax
0,PU5001,108,2025-06-30,44.663913,,,5947,13315,,
1,PU5002,108,2025-06-30,75.546376,,,10059,13315,,
2,PU5001,113,2025-06-30,52.192691,,,9426,18060,,
3,PU5002,113,2025-06-30,72.248062,,,13048,18060,,
4,PU5001,216,2025-06-30,19.010382,,,1410,7417,,
5,PU5002,216,2025-06-30,66.590266,,,4939,7417,,
6,PU5001,219,2025-06-30,34.580499,,,8540,24696,,
7,PU5002,219,2025-06-30,69.926304,,,17269,24696,,
8,PU5001,503,2025-06-30,25.021618,,,15047,60136,,
9,PU5002,503,2025-06-30,70.797858,,,42575,60136,,


In [17]:
# Query to connect to SQL Server and run a SQL query for the LL wound code metrics (PU5003-7)

import env
import pyodbc
import pandas as pd
import os


def data_connection_ll_codes():
    """
    Using the user's credentials, retrieve the data from the UDAL Warehouse

    returns:
    pandas dataframe of the data
    """

    server = env.SERVER
    database = env.DATABASE
    uid = env.UID

    conn_str = (
        f'DRIVER={{ODBC Driver 17 for SQL Server}};'
        f'SERVER={server};'
        f'DATABASE={database};'
        f'UID={uid};'
        'Authentication=ActiveDirectoryInteractive;'
    )

    conn = pyodbc.connect(conn_str)


#    table = os.getenv('table')
#   subtable = os.getenv('subtable')

    query = f"""
        
    DECLARE @MonthStart1 as DATE
    DECLARE @MonthEnd1 as DATE
    SET @MonthStart1 = '{start_of_month_d}'
    SET @MonthEnd1 = EOMONTH(@MonthStart1)

    SELECT
    @MonthEnd1 AS [ReportingDate],
    Contacts.OrgID_Provider AS [ProviderCode],
    COUNT (DISTINCT Contacts.Person_ID) AS Unique_Patients,

    CASE WHEN Activity.CodedFinding IN ('X50Bb','X50Bd','X50Be','X50Bf','402863005','238792006','238793001','828101000000106','Xa0lV','Y2bf7','413167008','XaJWE','Xa6nc',
    '298006005','XaQn5','8437810000000105','446841001','XaYmM','XaX0e','XaX0f','Y1258','Y1259','XB001','Y36af','Y36b0','Ua1De','X77cy','X77cz','X79mk','P5sb.','p5vW.',
    'p5vX.','p5z9.','p5zA.','Xa6nb')
    or Activity.CodedProcedure IN ('X50Bb','X50Bd','X50Be','X50Bf','402863005','238792006','238793001','828101000000106','Xa0lV','Y2bf7','413167008','XaJWE','Xa6nc',
    '298006005','XaQn5','8437810000000105','446841001','XaYmM','XaX0e','XaX0f','Y1258','Y1259','XB001','Y36af','Y36b0','Ua1De','X77cy','X77cz','X79mk','P5sb.','p5vW.',
    'p5vX.','p5z9.','p5zA.','Xa6nb') THEN 'LegUlcer_Code_Present'
    ELSE 'Other' END AS Metric


    FROM [Reporting_MESH_CSDS].[CYP201CareContact] AS [Contacts]

    LEFT JOIN [Reporting_MESH_CSDS].[CYP202CareActivity] AS [Activity]
    ON Contacts.Person_ID = Activity.Person_ID
    AND Contacts.UniqueSubmissionID = Activity.UniqueSubmissionID
    AND Contacts.Unique_CareContactID = Activity.Unique_CareContactID
    AND Contacts.AuditID = Activity.AuditID

    LEFT JOIN [Reporting_MESH_CSDS].[DerLatestFlag] AS [Submissions]
    ON Contacts.UniqueSubmissionID = Submissions.UniqueSubmissionID
    AND Contacts.OrgID_Provider = Submissions.OrgID_Provider
    AND Submissions.RP_EndDate = @MonthEnd1

    WHERE 
    Contacts.Contact_Date BETWEEN @MonthStart1 AND @MonthEnd1
    AND Submissions.DerIsLatest = '1'
    AND (CASE WHEN AttendanceStatus IS NULL THEN AttendOrNot ELSE AttendanceStatus END IN ('5','6'))

    GROUP BY
    Contacts.OrgID_Provider,
    CASE WHEN Activity.CodedFinding IN ('X50Bb','X50Bd','X50Be','X50Bf','402863005','238792006','238793001','828101000000106','Xa0lV','Y2bf7','413167008','XaJWE','Xa6nc',
    '298006005','XaQn5','8437810000000105','446841001','XaYmM','XaX0e','XaX0f','Y1258','Y1259','XB001','Y36af','Y36b0','Ua1De','X77cy','X77cz','X79mk','P5sb.','p5vW.',
    'p5vX.','p5z9.','p5zA.','Xa6nb')
    or Activity.CodedProcedure IN ('X50Bb','X50Bd','X50Be','X50Bf','402863005','238792006','238793001','828101000000106','Xa0lV','Y2bf7','413167008','XaJWE','Xa6nc',
    '298006005','XaQn5','8437810000000105','446841001','XaYmM','XaX0e','XaX0f','Y1258','Y1259','XB001','Y36af','Y36b0','Ua1De','X77cy','X77cz','X79mk','P5sb.','p5vW.',
    'p5vX.','p5z9.','p5zA.','Xa6nb') THEN 'LegUlcer_Code_Present'
    ELSE 'Other' END

    union

    SELECT
    @MonthEnd1 AS [ReportingDate],
    Contacts.OrgID_Provider AS [ProviderCode],
    COUNT (DISTINCT Contacts.Person_ID) AS Unique_Patients,

    CASE WHEN Activity.CodedFinding IN ('X50Bb','X50Bd','X50Be','X50Bf','402863005','238792006','238793001','828101000000106','Xa0lV','Y2bf7') 
    or Activity.CodedProcedure IN ('X50Bb','X50Bd','X50Be','X50Bf','402863005','238792006','238793001','828101000000106','Xa0lV','Y2bf7') THEN 'LegUlcer_Present'
    WHEN Activity.CodedProcedure IN ('8437810000000105','446841001','XaYmM','XaX0e','XaX0f','Y1258','Y1259','XB001','X77cy','X77cz','X79mk') 
    or Activity.CodedFinding IN ('8437810000000105','446841001','XaYmM','XaX0e','XaX0f','Y1258','Y1259','XB001','X77cy','X77cz','X79mk') THEN 'LegUlcer_Assessment'
    WHEN Activity.CodedProcedure IN ('Y36af','Y36b0','Ua1De','P5sb.','p5vW.','p5vX.','p5z9.','p5zA.') or Activity.CodedFinding IN ('Y36af','Y36b0','Ua1De','P5sb.',
    'p5vW.','p5vX.','p5z9.','p5zA.') THEN 'LegUlcer_Treatment'
    WHEN Activity.CodedFinding IN ('413167008','XaJWE') or Activity.CodedProcedure IN ('413167008','XaJWE') THEN 'LegUlcer_TreatmentNotSpecified'
    WHEN Activity.CodedFinding IN ('Xa6nc','298006005','XaQn5','Xa6nb') or Activity.CodedProcedure IN ('Xa6nc','298006005','XaQn5','Xa6nb') THEN 'LegUlcer_Healed'
    ELSE 'Other' END AS Metric


    FROM [Reporting_MESH_CSDS].[CYP201CareContact] AS [Contacts]

    LEFT JOIN [Reporting_MESH_CSDS].[CYP202CareActivity] AS [Activity]
    ON Contacts.Person_ID = Activity.Person_ID
    AND Contacts.UniqueSubmissionID = Activity.UniqueSubmissionID
    AND Contacts.Unique_CareContactID = Activity.Unique_CareContactID
    AND Contacts.AuditID = Activity.AuditID

    LEFT JOIN [Reporting_MESH_CSDS].[DerLatestFlag] AS [Submissions]
    ON Contacts.UniqueSubmissionID = Submissions.UniqueSubmissionID
    AND Contacts.OrgID_Provider = Submissions.OrgID_Provider
    AND Submissions.RP_EndDate = @MonthEnd1

    WHERE 
    Contacts.Contact_Date BETWEEN @MonthStart1 AND @MonthEnd1
    AND Submissions.DerIsLatest = '1'
    AND (CASE WHEN AttendanceStatus IS NULL THEN AttendOrNot ELSE AttendanceStatus END IN ('5','6'))

    GROUP BY
    Contacts.OrgID_Provider,
    CASE WHEN Activity.CodedFinding IN ('X50Bb','X50Bd','X50Be','X50Bf','402863005','238792006','238793001','828101000000106','Xa0lV','Y2bf7') 
    or Activity.CodedProcedure IN ('X50Bb','X50Bd','X50Be','X50Bf','402863005','238792006','238793001','828101000000106','Xa0lV','Y2bf7') THEN 'LegUlcer_Present'
    WHEN Activity.CodedProcedure IN ('8437810000000105','446841001','XaYmM','XaX0e','XaX0f','Y1258','Y1259','XB001','X77cy','X77cz','X79mk') 
    or Activity.CodedFinding IN ('8437810000000105','446841001','XaYmM','XaX0e','XaX0f','Y1258','Y1259','XB001','X77cy','X77cz','X79mk') THEN 'LegUlcer_Assessment'
    WHEN Activity.CodedProcedure IN ('Y36af','Y36b0','Ua1De','P5sb.','p5vW.','p5vX.','p5z9.','p5zA.') or Activity.CodedFinding IN ('Y36af','Y36b0','Ua1De','P5sb.',
    'p5vW.','p5vX.','p5z9.','p5zA.') THEN 'LegUlcer_Treatment'
    WHEN Activity.CodedFinding IN ('413167008','XaJWE') or Activity.CodedProcedure IN ('413167008','XaJWE') THEN 'LegUlcer_TreatmentNotSpecified'
    WHEN Activity.CodedFinding IN ('Xa6nc','298006005','XaQn5','Xa6nb') or Activity.CodedProcedure IN ('Xa6nc','298006005','XaQn5','Xa6nb') THEN 'LegUlcer_Healed'
    ELSE 'Other' END
    """
    
    # Execute the query and load it into a pandas DataFrame
    df = pd.read_sql(query, conn)
    
    # Close the connection after retrieving data
    conn.close()

    return df

In [18]:
# Running function created above to generate the SQL DQ query results as one merged and transformed dataframe
df2 = data_connection_ll_codes()

# Creating copy of dataframe to work with in case of errors
df2_copy = df2

# Checking the generated dataframe looks right by inspecting first 10 rows
df2_copy.head(10)

  df = pd.read_sql(query, conn)


Unnamed: 0,ReportingDate,ProviderCode,Unique_Patients,Metric
0,2025-06-30,RAL,10305,Other
1,2025-06-30,216,1639,Other
2,2025-06-30,305,423,Other
3,2025-06-30,RMP,15453,Other
4,2025-06-30,C7J7A,161,Other
5,2025-06-30,114,45,Other
6,2025-06-30,819,1499,Other
7,2025-06-30,318,240,Other
8,2025-06-30,8AM97,404,Other
9,2025-06-30,RRK,7,LegUlcer_Treatment


In [19]:
def transform_ll_metric_data(input_df):

    # Mapping for recoding the Metric column including the new entry
    metric_map = {
        'LegUlcer_Code_Present': 'PU5003',
        'LegUlcer_Present': 'PU5004',
        'LegUlcer_Assessment': 'PU5005',
        'LegUlcer_Treatment': 'PU5006',
        'LegUlcer_Healed': 'PU5007'
    }

    # Read the Excel file into a DataFrame
    df = input_df

    # Recode Metric column and drop rows not in mapping
    df['metricID'] = df['Metric'].map(metric_map)
    df = df.dropna(subset=['metricID'])

    # Rename and reorder columns, also transfer data accordingly
    df_transformed = pd.DataFrame({
        'metricID': df['metricID'],
        'providerCode': df['ProviderCode'],
        'reportingDate': df['ReportingDate'],
        'value': df['Unique_Patients'],
        'lowerBenchmark': '',  # blank column
        'upperBenchmark': '',  # blank column
        'numeratorValue': '',  # blank column
        'denominatorValue': '',# blank column
        'rangeMin': '',        # blank column
        'rangeMax': ''         # blank column
    })

    # Convert 'value' column to numeric if not already (just in case)
    df_transformed['value'] = pd.to_numeric(df_transformed['value'], errors='coerce')
 
    return df_transformed


In [20]:
# Run function above on DQ dataframe
df2_copy_transformed = transform_ll_metric_data(df2_copy)

# Checking the transformed dataframe looks right by inspecting first 10 rows
df2_copy_transformed.head(10)

Unnamed: 0,metricID,providerCode,reportingDate,value,lowerBenchmark,upperBenchmark,numeratorValue,denominatorValue,rangeMin,rangeMax
9,PU5006,RRK,2025-06-30,7,,,,,,
10,PU5006,RXE,2025-06-30,30,,,,,,
22,PU5004,NL3,2025-06-30,80,,,,,,
32,PU5006,NR5,2025-06-30,11,,,,,,
33,PU5006,NQA,2025-06-30,67,,,,,,
37,PU5004,TAD,2025-06-30,24,,,,,,
44,PU5005,8AW20,2025-06-30,1,,,,,,
50,PU5005,8A644,2025-06-30,7,,,,,,
57,PU5004,RXF,2025-06-30,18,,,,,,
58,PU5005,RXP,2025-06-30,12,,,,,,


In [21]:
# Concatenating the two dataframes
import pandas as pd

df_all_metrics = pd.concat([df_copy_transformed, df2_copy_transformed])

In [22]:
df_all_metrics.head(10)

Unnamed: 0,metricID,providerCode,reportingDate,value,lowerBenchmark,upperBenchmark,numeratorValue,denominatorValue,rangeMin,rangeMax
0,PU5001,108,2025-06-30,44.663913,,,5947,13315,,
1,PU5002,108,2025-06-30,75.546376,,,10059,13315,,
2,PU5001,113,2025-06-30,52.192691,,,9426,18060,,
3,PU5002,113,2025-06-30,72.248062,,,13048,18060,,
4,PU5001,216,2025-06-30,19.010382,,,1410,7417,,
5,PU5002,216,2025-06-30,66.590266,,,4939,7417,,
6,PU5001,219,2025-06-30,34.580499,,,8540,24696,,
7,PU5002,219,2025-06-30,69.926304,,,17269,24696,,
8,PU5001,503,2025-06-30,25.021618,,,15047,60136,,
9,PU5002,503,2025-06-30,70.797858,,,42575,60136,,


In [23]:
# query to connect to MHS Prod SQL Server and import MHS provider list as a dataframe (excluding inactive providers)

import env
import pyodbc
import pandas as pd
import os

def mhs_data_connection_providers():
    
    server = env.SERVER2
    database = env.DATABASE2
    uid = env.UID2
    pwd = env.PWD

    conn_str = (
        f'DRIVER={{ODBC Driver 17 for SQL Server}};'
        f'SERVER={server};'
        f'DATABASE={database};'
        f'UID={uid};'
        f'PWD={pwd}'
    )

    conn = pyodbc.connect(conn_str)

    query = f"""
    select *
    from [dbo].[Provider]
    where [Active] = '1'
    """
    # Execute the query and load it into a pandas DataFrame
    df = pd.read_sql(query, conn)
    
    # Close the connection after retrieving data
    conn.close()

    return df

In [24]:
# Run function above on DQ dataframe
df_ref = mhs_data_connection_providers()

df_ref_copy = df_ref

df_ref_copy.head(10)

  df = pd.read_sql(query, conn)


Unnamed: 0,ID,BenchmarkID,TypeID,Code,PeerSelectable,OrganisationType,ProviderName,RegionID,GirftRegionID,STPCode,TargetTrust,Active,CategoryID
0,69AD1D82-9B6E-4871-BDB6-0001080F1207,DBEF4606-38F7-401E-AF7B-0B487E42F122,B43FD2ED-4CC2-46C3-AB7F-91504912B32E,NTN63,False,Independent Provider Site,Priory Group Limited - Priory Hurstfield,5D3BB52E-BD67-4E2A-BBED-B682BF8C6030,,QWO,,True,5
1,C24AD3F6-1AB1-442D-AE35-00046A5394A1,DBEF4606-38F7-401E-AF7B-0B487E42F122,B43FD2ED-4CC2-46C3-AB7F-91504912B32E,AA475,False,Independent Provider Site,Intrahealth Ltd - Radcliffe Primary Care Centre,59C9F178-7B9F-42CB-B119-4064B0446DA5,,QOP,,True,5
2,BE772A4F-FA14-4BBC-8DB4-00049F461000,DBEF4606-38F7-401E-AF7B-0B487E42F122,B43FD2ED-4CC2-46C3-AB7F-91504912B32E,NT420,False,Independent Provider Site,Bmi Healthcare - Bmi - The Highfield Hospital,59C9F178-7B9F-42CB-B119-4064B0446DA5,,QOP,,True,5
3,4FB66DCB-0ACC-45A4-8421-0004DE92D1CA,DBEF4606-38F7-401E-AF7B-0B487E42F122,B43FD2ED-4CC2-46C3-AB7F-91504912B32E,DDV27,False,Independent Provider Site,Bestcare Diagnostics - Glodwick Primary Care C...,59C9F178-7B9F-42CB-B119-4064B0446DA5,,QOP,,True,5
4,6444BE13-46EC-4069-89EC-000564E6DCDC,DBEF4606-38F7-401E-AF7B-0B487E42F122,B43FD2ED-4CC2-46C3-AB7F-91504912B32E,NBGG4,False,Independent Provider Site,Mediscan Diagnostics Services Limited - Acorn ...,4919C458-039D-4F59-ACC5-D5A06201057D,,QT1,,True,5
5,53507CB3-7CFE-4E0E-A76A-00098812C2B6,DBEF4606-38F7-401E-AF7B-0B487E42F122,B43FD2ED-4CC2-46C3-AB7F-91504912B32E,NYNA5,False,Independent Provider Site,Specsavers Hearcare Group Ltd - Specsavers Hea...,105ED8D1-04C2-40A4-AEF8-706588C70CA2,,QJK,,True,5
6,E29BEE52-C385-4F01-B801-0009D131A84F,13993EB4-6FA7-45C1-BB61-E39FBFCE99E8,75140E1E-DFEE-4AAE-BEF9-DE15F42D4FF5,R1H,True,NHS Trust,Barts Health,DA9F3A9D-EDFB-4328-92C7-FA42AF61FE07,90FD1233-7698-4F86-BBB5-2E8CDD5EF7FE,QMF,0.0,True,0
7,623E523B-0984-44BA-A57D-000B4B211AE8,DBEF4606-38F7-401E-AF7B-0B487E42F122,B43FD2ED-4CC2-46C3-AB7F-91504912B32E,DDV2B,False,Independent Provider Site,Bestcare Diagnostics - Dean Medical Centre,59C9F178-7B9F-42CB-B119-4064B0446DA5,,QOP,,True,5
8,B65E923F-1D63-47BE-8A49-0011F1F826AB,DBEF4606-38F7-401E-AF7B-0B487E42F122,B43FD2ED-4CC2-46C3-AB7F-91504912B32E,AV004,False,Independent Provider Site,Bluesky Orthopaedic Ltd - Willows Medical Prac...,4919C458-039D-4F59-ACC5-D5A06201057D,,QK1,,True,5
9,5C805621-B360-4BBB-8260-0014DA04AB73,DBEF4606-38F7-401E-AF7B-0B487E42F122,B43FD2ED-4CC2-46C3-AB7F-91504912B32E,NGP0G,False,Independent Provider Site,Excell Ultrasound Ltd - Excell Ultrasound Ltd ...,FE980BEC-392D-42D3-BE4D-A8635C58F0CA,,QH8,,True,5


In [25]:
# Function to drop rows for providers not in MHS provider list and export concatenated dataframe to Excel

def drop_providers_and_export(df, ref_df, output_file):
    # Add the inMHSProviderList column based on matching 'providerCode' with provider_df['Code']
    df['inMHSProviderList'] = df['providerCode'].isin(ref_df['Code'])
 
    # Filter df to keep only rows where inMHSProviderList is True
    df = df[df['inMHSProviderList']].copy()

    # Drop inMHSProviderList column
    df = df.drop(['inMHSProviderList'], axis=1)

    # Step 7: Export to Excel
    # output_excel = 'output_transformed.xlsx'
    df.to_excel(output_file, sheet_name='Insert', index=False)

    print(f"Transformation complete. Data saved to {output_file}")
    

In [26]:
drop_providers_and_export(df_all_metrics, df_ref_copy, f'C:/Users/owen.kenn (DDCC7590)/NHS/Quality and Improvement Analytics - Productivity/Wound Care/Wound Care - LL 2025/Excel outputs/all_metrics_{reporting_month}_{now_ns}.xlsx')

Transformation complete. Data saved to C:/Users/owen.kenn (DDCC7590)/NHS/Quality and Improvement Analytics - Productivity/Wound Care/Wound Care - LL 2025/Excel outputs/all_metrics_2025-06_2025-09-25-14-45-13.xlsx
