# Medications Compliance workbook

This workbook uses medication order and statements tables from the NEL primary care data to analyse medications compliance by expolring methods of proportion of days covered (PDC)

See paper https://joppp.biomedcentral.com/articles/10.1186/s40545-021-00385-w

Please check and install requirements.txt before proceeding


## Import required packages/modules

In [21]:
import matplotlib.pyplot as plt
import pandas as pd
from dotenv import load_dotenv

from phmlondon.snow_utils import SnowflakeConnection

pd.set_option("display.max_rows", 200)
pd.set_option("display.max_columns", 100)

## CONFIG and FUNCTIONS

In [22]:
def get_top_n_classes(df, column_name, n=10):
    """
    Get the top n most frequent values in a specified column of the dataframe.

    Args:
        df (pd.DataFrame): The dataframe containing the data.
        column_name (str): The name of the column to analyze.
        n (int): The number of top values to return. Default is 10.

    Returns:
        pd.Series: A series containing the top n most frequent values and their counts.
    """
    return df[column_name].value_counts().head(n)

def plot_top_classes(class_counts):
    """
    Plot the top classes in a bar graph.

    Args:
        class_counts (pd.Series): A series containing the class counts.
    """
    plt.figure(figsize=(10, 6))
    class_counts.plot(kind='bar', color='skyblue')

    # Set the title and labels
    plt.title('Top 10 Most Frequent Classes', fontsize=16)
    plt.xlabel('Class', fontsize=12)
    plt.ylabel('Frequency', fontsize=12)

    # Rotate x-axis labels for readability
    plt.xticks(rotation=45, ha='right')

    # Adjust layout to fit everything
    plt.tight_layout()

    # Display the plot
    plt.show()

## DATA

In [23]:
def get_data(snowsesh):
    """
    Retrieves dataset

    Args:
        snowsesh:
            Snowflake session


    Returns:
        DataFrame containing
    """
    query = """
    SELECT * FROM intelligence_dev.ai_centre_dev.comp_pdc_duration
    LIMIT 10000
    """
    try:
        df = snowsesh.execute_query_to_df(query)
        print(f"Retreived columns: {df.columns}")
        return df
    except Exception as e:
        print(f"Error retrieving modeling data: {e}")
        raise e

def generate_query_with_class_filter(top_classes):
    """
    Generate SQL query with a WHERE condition filtering by the top 3 classes.

    Args:
        top_classes (pd.Series): The top 3 classes to filter the query by.

    Returns:
        str: The SQL query with the appropriate WHERE condition.
    """
    # Create the WHERE condition with the top classes
    classes_condition = ", ".join(f"'{cls}'" for cls in top_classes.index)
    query = f"""
    SELECT * FROM intelligence_dev.ai_centre_dev.comp_pdc_duration
    WHERE CLASS IN ({classes_condition})
    """
    return query

## SUMMARY STATS

## REGRESSION and ANALYSIS

## PIPELINE

In [24]:
load_dotenv()

snowsesh = SnowflakeConnection()
snowsesh.use_database("INTELLIGENCE_DEV")
snowsesh.use_schema("AI_CENTRE_FEATURE_STORE")

Initiating login request with your identity provider. A browser window should have opened for you to complete the login. If you can't see it, check existing browser windows, or your OS settings. Press CTRL+C to abort and try again...
Going to open: https://login.microsoftonline.com/8076439c-5b1f-4e15-91ea-a0bff0b3bf16/saml2?SAMLRequest=nZJbb6MwEIX%2FCvI%2BAzYhNytJlUurZbfZooRWq31zYEisgE1sU9L%2B%2BjUkkboP7cO%2BWfaZ%2BY7nzOTuXBbOKyjNpZgi4mHkgEhlxsV%2Bip6TB3eEHG2YyFghBUzRG2h0N5toVhYVndfmIDZwqkEbxzYSmrYPU1QrQSXTXFPBStDUpHQ7Xz%2FSwMOUaQ3KWBy6lmSaW9bBmIr6ftM0XtPzpNr7AcbYx2PfqlrJN%2FQBUX3NqJQ0MpXFreRs%2F%2FQJgvg4bBFWYQnxtXDBxWUEX1F2F5Gm35MkduOnbYKc%2Be13Syl0XYLagnrlKTxvHi8GtHWwve8PyGjg1UdXSztDj73XCjwtZJMX7AipLKva2NaePfk5ZH4h99wOLFpNUXXkWbCK1D1sgvCUvahzXa3x7%2Brnj3UaJQs4yPfqaR7GEC1PC5GmyHm5xRu08UZa1xCJNlRjr3DQd3HPxcOEhLQ3pEHfGw3IH%2BSsbKhcMNNV3px3PrySp0pqmRspCi6gcznCw0HYG6duf0dyNwTSd8cEmMvwLs%2FxrrfLycBvowvQZX1oZ0TN%2FmsoE%2F9ji%2Bs6%2FrIJRatYFjx9cx6kKpn5PEDike6GZ27eSSmUjBfzLFOgtQ2yKGSzVMCM3Xqj

### Table with estimated PD per period. 

Period is defined by duration threshold.
One row per period


In [25]:
df = get_data(snowsesh)
df.head()

Retreived columns: Index(['PERSON_ID', 'PERIOD_ID', 'DRUG', 'CLASS', 'MEDICATION_COMPLIANCE',
       'COMPLIANCE_DATE', 'PERIOD_START_DATE', 'PERIOD_END_DATE',
       'DURATION_PERIOD', 'ORDER_GAPS', 'DURATION_ORDERS', 'EST_PDC',
       'PERIOD_RANK'],
      dtype='object')


Unnamed: 0,PERSON_ID,PERIOD_ID,DRUG,CLASS,MEDICATION_COMPLIANCE,COMPLIANCE_DATE,PERIOD_START_DATE,PERIOD_END_DATE,DURATION_PERIOD,ORDER_GAPS,DURATION_ORDERS,EST_PDC,PERIOD_RANK
0,2403967,1,Emollient bath and shower preparations,Emollient bath and shower preparations,good,2022-11-07,2012-05-16,2012-05-16,0,0,0,,489
1,2403967,1,Emollient bath and shower preparations,Emollient bath and shower preparations,good,2024-08-16,2012-05-16,2012-05-16,0,0,0,,489
2,2403967,2,Emollient bath and shower preparations,Emollient bath and shower preparations,good,2024-08-16,2013-04-22,2013-06-28,67,11,56,0.835821,525
3,2403967,2,Emollient bath and shower preparations,Emollient bath and shower preparations,good,2022-11-07,2013-04-22,2013-06-28,67,0,67,1.0,525
4,2403967,1,Estradiol with progestogen,Oestrogens and Hormone Replacement Therapy,good,2024-08-16,1999-01-21,1999-01-21,0,0,0,,11


In [26]:
top_3_classes = get_top_n_classes(df, 'CLASS', n=3)

    # Generate the SQL query with the top 3 classes as a filter
query = generate_query_with_class_filter(top_3_classes)
print("Generated SQL Query:\n", query)

result_df = execute_query(snowsesh, query)

    
print(result_df.head())

plot_top_classes(result_df)

Generated SQL Query:
 
    SELECT * FROM intelligence_dev.ai_centre_dev.comp_pdc_duration
    WHERE CLASS IN ('Non-opioid analgesics and compound preparations', 'Lipid-regulating drugs', 'Selective beta(2)-agonists')
    


NameError: name 'execute_query' is not defined