Imports packages and connection to internal Actuary Team database

In [1]:
import pandas as pd
import pyodbc
connection_string = ("DRIVER={SQL Server};"
"SERVER=USDF11V0954;"
"DATABASE=Actuarial_AH;"
"Trust_Connection=yes;")

conn = pyodbc.connect(connection_string)
cursor = conn.cursor()

Runs SQL query that creates a cohort of individuals that had a script for Wegovy, Saxenda, or Zepbound between service dates 2022 and 2025. Using that cohort, returns medical claims dataset that were paid between 2021 and 2025 and were actively enrolled since 2021.

In [70]:
med = """
WITH WEIGHTLOSSCOHORT1 AS (
    SELECT DISTINCT MEMBER_ID,
           YEAR(SERVICE_DATE) AS PRESCRIPTION_YEAR
    FROM Actuarial_AH.DBO.SN_Rx
    WHERE DRUG_NAME_PREFERRED IN ('Wegovy', 'Saxenda', 'Zepbound')
    AND SERVICE_DATE >= '2022-01-01'
    AND SERVICE_DATE < '2025-01-01'
)
SELECT M.MEDICAL_CLAIM_ID, 
       M.MEMBER_ID, 
       M.PAID_DATE, 
	   YEAR(M.PAID_DATE) AS PAID_YEAR,
       M.SERVICE_DATE,
YEAR(M.SERVICE_DATE) AS SERVICE_YEAR,
       M.MEDICAL_PAID_AMOUNT, 
       M.IS_TELEMEDICINE, 
       M.IS_ER_AVOIDABLE, 
M.DIAGNOSIS_DESC_ICD10_1 AS PRIMARY_DX,
       M.PROCEDURE_DESC, 
       M.ARTTOS_V2_L1, 
       M.ARTTOS_V2_L3, 
M.DX_IS_CHRONIC,
       M.ICD10_CHAPTER, 
       M.ICD10_CATEGORY, 
       M.ICD10_SECTION, 
       M.MEG_EPISODE_DESCRIPTION, 
       M.OP_SURG_INC,
       M.IS_PCP_VISIT, 
       M.INCLUDED_SPECIALIST, 
       M.ER_VISIT_FLAG, 
       M.IS_URGENT_CARE_VISIT, 
       M.IS_PREVENTIVE_VISIT, 
       M.IP_ADMIT_INC
FROM [Actuarial_AH].[dbo].[SN_Medical] M
JOIN 
Actuarial_AH.dbo.SN_Member MB ON M.MEMBER_ID = MB.MEMBER_ID
WHERE M.PAID_DATE BETWEEN '2021-01-01' AND '2024-12-31'
AND MB.MONTH_KEY = '2024-12-01' 
AND MB.MEDICAL_ENROLLMENT_STATUS = 3
AND MB.MEDICAL_CONT_ENRLMNT_START <= '2021-01-01'
AND MB.MEMBER_ID IN (
    SELECT MEMBER_ID
    FROM WEIGHTLOSSCOHORT1
    GROUP BY MEMBER_ID
	)
"""
med = pd.read_sql(med, conn)

  med = pd.read_sql(med, conn)


Runs SQL query that creates a cohort of individuals that had a script for Wegovy, Saxenda, or Zepbound between service dates 2022 and 2025. Using that cohort, returns pharmacy claims dataset that were paid between 2021 and 2025 and were actively enrolled since 2021.

In [71]:
rx = """
WITH WEIGHTLOSSCOHORT1 AS (
    SELECT DISTINCT MEMBER_ID,
           YEAR(SERVICE_DATE) AS PRESCRIPTION_YEAR
    FROM Actuarial_AH.DBO.SN_Rx
    WHERE DRUG_NAME_PREFERRED IN ('Wegovy', 'Saxenda', 'Zepbound')
    AND SERVICE_DATE BETWEEN '2022-01-01' AND '2024-12-31'
)
SELECT R.RX_CLAIM_ID, 
       R.MEMBER_ID, 
       R.PAID_DATE,
       R.DAYS,
       R.SERVICE_DATE, 
       R.DRUG_NAME_GENERIC,
	   R.DRUG_NAME_PREFERRED,
	   R.RX_PAID_AMOUNT,
	   R.RX_SCRIPT_COUNT,
	   R.ART_DRUGGRPS_L1,
	   R.ART_DRUGGRPS_L2,
	   YEAR(R.PAID_DATE) AS PAID_YEAR,
YEAR(R.SERVICE_DATE) AS SERVICE_YEAR
FROM [Actuarial_AH].[dbo].[SN_Rx] R
JOIN 
Actuarial_AH.dbo.SN_Member M ON R.MEMBER_ID = M.MEMBER_ID
WHERE R.PAID_DATE BETWEEN '2021-01-01' AND '2024-12-31'
AND M.MONTH_KEY = '2024-12-01' 
AND M.MEDICAL_ENROLLMENT_STATUS = 3
AND M.MEDICAL_CONT_ENRLMNT_START <= '2021-01-01'
AND M.MEMBER_ID IN (
    SELECT MEMBER_ID
    FROM WEIGHTLOSSCOHORT1
    GROUP BY MEMBER_ID
	)
"""
rx = pd.read_sql(rx, conn)

  rx = pd.read_sql(rx, conn)


Runs SQL query that creates a cohort of individuals that had a script for Wegovy, Saxenda, or Zepbound between service dates 2022 and 2025. Using that cohort, returns unique members dataset that were actively enrolled since 2021.

In [72]:
member = """
WITH WEIGHTLOSSCOHORT1 AS (
    SELECT DISTINCT MEMBER_ID,
           YEAR(SERVICE_DATE) AS PRESCRIPTION_YEAR
    FROM Actuarial_AH.DBO.SN_Rx
    WHERE DRUG_NAME_PREFERRED IN ('Wegovy', 'Saxenda', 'Zepbound')
    AND SERVICE_DATE >= '2022-01-01'
    AND SERVICE_DATE < '2025-01-01'
)
SELECT M.MEMBER_ID,
       M.MEMBER_STATUS,
       M.HOME_STATE,
	   CASE 
    WHEN M.MEMBER_RELATIONSHIP = 0 THEN 'Subscriber'
    WHEN M.MEMBER_RELATIONSHIP = 1 THEN 'Spouse'
    WHEN M.MEMBER_RELATIONSHIP = 2 THEN 'Dependent'
    WHEN M.MEMBER_RELATIONSHIP = 3 THEN 'Domestic Partner'
    WHEN M.MEMBER_RELATIONSHIP = 4 THEN 'Unknown'
    ELSE 'Unknown' -- Optional: Handle unexpected values
END AS MEMBER_RELATIONSHIP,
CASE 
        WHEN M.GENDER = 0 THEN 'Female'
        WHEN M.GENDER = 1 THEN 'Male'
        WHEN M.GENDER = 2 THEN 'Unknown'
        ELSE 'Unknown' 
    END AS GENDER,
    DATEDIFF(YEAR, CONVERT(DATE, M.BIRTH_YEAR_MO + '-01'), GETDATE()) - 
    CASE 
        WHEN MONTH(CONVERT(DATE, M.BIRTH_YEAR_MO + '-01')) > MONTH(GETDATE()) 
             OR (MONTH(CONVERT(DATE, M.BIRTH_YEAR_MO + '-01')) = MONTH(GETDATE()) 
                 AND DAY(CONVERT(DATE, M.BIRTH_YEAR_MO + '-01')) > DAY(GETDATE())) 
        THEN 1 
        ELSE 0 
    END AS Age,
	CASE 
    WHEN M.BIRTH_YEAR >= 1946 AND M.BIRTH_YEAR < 1965 THEN 'Baby Boomers'
	WHEN M.BIRTH_YEAR >= 1965 AND M.BIRTH_YEAR < 1981 THEN 'Generation X'
	WHEN M.BIRTH_YEAR >= 1981 AND M.BIRTH_YEAR < 1997 THEN 'Millenials'
	WHEN M.BIRTH_YEAR >= 1997 AND M.BIRTH_YEAR < 2012 THEN 'Generation Z'
    ELSE 'Unknown' -- Optional: Handle unexpected values
END AS GENERATIONS
FROM [Actuarial_AH].[dbo].[SN_MEMBER] M
WHERE M.MEMBER_ID IN (
    SELECT MEMBER_ID
    FROM WEIGHTLOSSCOHORT1
    GROUP BY MEMBER_ID
	)
AND M.MONTH_KEY = '2024-12-01' 
AND M.MEDICAL_ENROLLMENT_STATUS = 3
AND M.MEDICAL_CONT_ENRLMNT_START <= '2021-01-01'
"""
member = pd.read_sql(member, conn)

  member = pd.read_sql(member, conn)


Reviewing medical, pharmacy and member tables

In [43]:
med

Unnamed: 0,MEDICAL_CLAIM_ID,MEMBER_ID,PAID_DATE,PAID_YEAR,SERVICE_DATE,SERVICE_YEAR,MEDICAL_PAID_AMOUNT,IS_TELEMEDICINE,IS_ER_AVOIDABLE,PRIMARY_DX,...,ICD10_CATEGORY,ICD10_SECTION,MEG_EPISODE_DESCRIPTION,OP_SURG_INC,IS_PCP_VISIT,INCLUDED_SPECIALIST,ER_VISIT_FLAG,IS_URGENT_CARE_VISIT,IS_PREVENTIVE_VISIT,IP_ADMIT_INC
0,41246.0,mma-f158ef6210ec494e9748aa83f77523e2,2021-01-01,2021,2020-12-22,2020,51.88,0.0,1.0,Epigastric pain,...,Symptoms and signs involving the digestive sys...,Abdominal and pelvic pain,Other Disorders of Female Genital System,0,0,0,0,0,0,0
1,2371204.0,mma-2ffc5633dbd64b3994af70240afd12e2,2021-01-01,2021,2020-11-20,2020,16.31,0.0,1.0,Pain in right knee,...,Other joint disorders,"Other joint disorder, not elsewhere classified","Osteoarthritis, Except Spine",0,0,0,0,0,0,0
2,11951714.0,mma-a874673b46fa438eac9559ff9074aab1,2021-01-01,2021,2020-12-02,2020,317.70,0.0,0.0,Unspecified benign mammary dysplasia of left b...,...,Disorders of breast,Benign mammary dysplasia,Other Breast Disorders,1,0,0,0,0,0,0
3,2636075.0,mma-f60bfa7f91a6405983d7667e9ec40c9e,2021-01-01,2021,2020-12-02,2020,99.44,0.0,0.0,"Other vitreous opacities, bilateral",...,Disorders of vitreous body and globe,Disorders of vitreous body,,0,0,0,0,0,0,0
4,7937659.0,mma-b41dfa59348142389ddbd03c45615a7d,2021-01-01,2021,2020-12-30,2020,85.12,1.0,1.0,Essential (primary) hypertension,...,Hypertensive diseases,Essential (primary) hypertension,"Essential Hypertension, Chronic Maintenance",0,1,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2233165,9503985.0,mma-aa72f3b2a8a449d2b28f84ed5b0a4cf0,2024-12-31,2024,2024-05-08,2024,1.04,0.0,1.0,"Allergic contact dermatitis due to plants, exc...",...,Dermatitis and eczema,Allergic contact dermatitis,Other Inflammations and Infections of Skin and...,0,0,0,0,0,0,0
2233166,12475592.0,mma-5effc21c01d74261a9b0291e0ce19903,2024-12-31,2024,2024-12-14,2024,11.90,0.0,0.0,Calculus of gallbladder with acute cholecystit...,...,"Disorders of gallbladder, biliary tract and pa...",Cholelithiasis,Cholecystitis and Cholelithiasis,0,0,0,0,0,0,1
2233167,12475592.0,mma-5effc21c01d74261a9b0291e0ce19903,2024-12-31,2024,2024-12-14,2024,6.03,0.0,0.0,Calculus of gallbladder with acute cholecystit...,...,"Disorders of gallbladder, biliary tract and pa...",Cholelithiasis,Cholecystitis and Cholelithiasis,0,0,0,0,0,0,1
2233168,577096.0,mma-834cb589b7d44aecbd57cf0b4598d7d3,2024-12-31,2024,2024-12-23,2024,7.34,0.0,1.0,Other specified noninflammatory disorders of v...,...,Noninflammatory disorders of female genital tract,Other noninflammatory disorders of vagina,Other Disorders of Female Genital System,0,0,0,0,0,0,0


In [44]:
rx

Unnamed: 0,RX_CLAIM_ID,MEMBER_ID,PAID_DATE,DAYS,SERVICE_DATE,DRUG_NAME_GENERIC,DRUG_NAME_PREFERRED,RX_PAID_AMOUNT,RX_SCRIPT_COUNT,ART_DRUGGRPS_L1,ART_DRUGGRPS_L2,PAID_YEAR,SERVICE_YEAR
0,7621558.0,mma-09c4c713e7db4d60b6bbdbf318398a6c,2021-01-01,30.0,2020-12-31,Cyclobenzaprine HCl,Cyclobenzaprine HCl,6.12,60.0,Musculoskeletal Agents,Muscle Relaxers,2021,2020
1,13387845.0,mma-0b8b8629f6684a73b244ca13cebfe8c3,2021-01-01,30.0,2020-12-31,hydroCHLOROthiazide,hydroCHLOROthiazide,0.00,30.0,Cardiac,Diuretics,2021,2020
2,4663706.0,mma-4fbfc3b14e454699b4aab1557459bcdb,2021-01-01,-30.0,2020-12-17,Tacrolimus,Tacrolimus,-217.75,0.0,Immune Suppressants,Immune Suppressants,2021,2020
3,15234759.0,mma-36af576882ea4d8da6dda1ff126ad1ac,2021-01-01,6.0,2020-12-28,methylPREDNISolone,methylPREDNISolone,0.00,21.0,Steroids,Adrenal Corticosteroids,2021,2020
4,12332888.0,mma-86f72e7b20e049bb87e8f623a15eb87c,2021-01-01,30.0,2021-01-01,HYDROcodone-Acetaminophen,HYDROcodone-Acetaminophen,2.18,60.0,CNS Agents,Opioid Analgesics,2021,2021
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1181321,2273554.0,mma-c24dc57b82274b52aa4230385949600a,2024-12-31,30.0,2024-12-31,Sacubitril-Valsartan,Entresto,668.42,60.0,Cardiac,Misc. Cardiac Combinations,2024,2024
1181322,10817811.0,mma-e14f63b86eed481aa059968d561b6af7,2024-12-31,6.0,2024-12-31,Promethazine-DM,Promethazine-DM,0.00,120.0,Respiratory,Cough/Cold Combos,2024,2024
1181323,10347182.0,mma-bb7125f267df412fa3517023186305a3,2024-12-31,5.0,2024-12-02,LORazepam,LORazepam,0.00,20.0,CNS Agents,Benzodiazepines,2024,2024
1181324,269429.0,mma-c6514fcbd5bc4b36a8a239664580147d,2024-12-31,28.0,2024-12-31,Tirzepatide-Weight Management,Zepbound,981.45,2.0,CNS Agents,Weight Loss Agents,2024,2024


In [45]:
member

Unnamed: 0,MEMBER_ID,MEMBER_STATUS,HOME_STATE,MEMBER_RELATIONSHIP,GENDER,Age,GENERATIONS
0,mma-eef7d2e1c6f44ab7bae950f252b61c37,0.0,MN,Subscriber,Female,58,Generation X
1,mma-32626a9cf0da42cebd3771d7dcb9ec58,0.0,NC,Subscriber,Female,46,Generation X
2,mma-2e25ed887e0b4990ad2fa1c2d91e0966,0.0,NC,Subscriber,Female,53,Generation X
3,mma-e052097e817c4be38ec3aa34d2c1f71a,0.0,WI,Subscriber,Female,45,Generation X
4,mma-d8c04108c3304149ae25a89d20f68dd1,0.0,NC,Spouse,Female,40,Millenials
...,...,...,...,...,...,...,...
9681,mma-6ba38328f62841149222d3ed238c5e7f,0.0,NC,Subscriber,Female,31,Millenials
9682,mma-5d9ec55368bf42929f019054c36447da,0.0,TX,Subscriber,Female,59,Generation X
9683,mma-1e7b0ce3555b43ac81aadf5c2c4173fe,0.0,TX,Subscriber,Female,45,Generation X
9684,mma-5ec877c20c8944e8ac1d6efeac0ed5af,0.0,OK,Spouse,Female,40,Millenials


Exporting and reading these tables to CSV so they could be read easily from saved files as opposing to running the SQL Query each time using the notebook.

In [73]:
member.to_csv('glp-1-member-claims_pdc.csv', index=False)
med.to_csv('med-glp-claims_pdc.csv', index=False)
rx.to_csv('rx-glp-claims_pdc.csv', index=False)

In [83]:
med = pd.read_csv('med-glp-claims_pdc.csv')
rx = pd.read_csv('rx-glp-claims_pdc.csv')
member = pd.read_csv('glp-1-member-claims_pdc.csv')

Find members that had taken a weight-loss GLP-1 in 2020 and 2021 and remove from the tables. This study looks at individuals who initiated in 2022.

In [6]:
# Define the drugs to filter out
drugs_to_remove = ['Wegovy', 'Saxenda', 'Zepbound']

# Filter to find members with the specified drugs in 2021
members_to_remove = rx[((rx['SERVICE_YEAR'] == 2021) | (rx['SERVICE_YEAR'] == 2020)) & (rx['DRUG_NAME_PREFERRED'].isin(drugs_to_remove))]['MEMBER_ID'].unique()

# Remove those members from the original DataFrame
rx_members_glp_22_24 = rx[~rx['MEMBER_ID'].isin(members_to_remove)]
med_members_glp_22_24 = med[~med['MEMBER_ID'].isin(members_to_remove)]
members_glp_22_24 = member[~member['MEMBER_ID'].isin(members_to_remove)]

Create a view that will set up the Percentage of Days Covered (PDC) metric. Locate GLP-1s in the rx dataset and determine days filled, start and end days per script.

In [7]:
# Define NDCs or names of GLP-1 drugs
glp1_drugs = ['Wegovy', 'Saxenda', 'Zepbound'] 

# Step 1: Filter for GLP-1 claims only
glp1_drugs_rx = rx_members_glp_22_24[rx_members_glp_22_24['DRUG_NAME_PREFERRED'].isin(glp1_drugs)].copy()

# Step 2: Convert fill_date to datetime
glp1_drugs_rx['SERVICE_DATE'] = pd.to_datetime(glp1_drugs_rx['SERVICE_DATE'])

# Step 3: Create a column for days covered per fill
glp1_drugs_rx['END_DATE'] = glp1_drugs_rx['SERVICE_DATE'] + pd.to_timedelta(glp1_drugs_rx['DAYS'] - 1, unit='D')
glp1_drugs_rx

Unnamed: 0,RX_CLAIM_ID,MEMBER_ID,PAID_DATE,DAYS,SERVICE_DATE,DRUG_NAME_GENERIC,DRUG_NAME_PREFERRED,RX_PAID_AMOUNT,RX_SCRIPT_COUNT,ART_DRUGGRPS_L1,ART_DRUGGRPS_L2,PAID_YEAR,SERVICE_YEAR,END_DATE
172648,7788285.0,mma-3b394f381f364a2ca68b80a5d878fe81,2022-01-04,28.0,2022-01-04,Semaglutide-Weight Management,Wegovy,1329.80,2.0,CNS Agents,Weight Loss Agents,2022,2022,2022-01-31
174244,2944008.0,mma-95534c7006664d13a2a3866fb336f638,2022-01-04,28.0,2022-01-04,Semaglutide-Weight Management,Wegovy,-1225.13,-2.0,CNS Agents,Weight Loss Agents,2022,2022,2022-01-31
174247,607211.0,mma-10a115cc67a8423abe8ab5cfcb1a0741,2022-01-04,28.0,2022-01-04,Semaglutide-Weight Management,Wegovy,-1225.13,-3.0,CNS Agents,Weight Loss Agents,2022,2022,2022-01-31
174457,2835746.0,mma-95534c7006664d13a2a3866fb336f638,2022-01-04,28.0,2022-01-04,Semaglutide-Weight Management,Wegovy,-1225.13,-2.0,CNS Agents,Weight Loss Agents,2022,2022,2022-01-31
174463,607211.0,mma-10a115cc67a8423abe8ab5cfcb1a0741,2022-01-04,28.0,2022-01-04,Semaglutide-Weight Management,Wegovy,-1225.13,-3.0,CNS Agents,Weight Loss Agents,2022,2022,2022-01-31
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1181313,12246162.0,mma-ef9d3e5d443f4bf3b9cb20f4b586eacd,2024-12-31,-28.0,2024-12-31,Tirzepatide-Weight Management,Zepbound,-938.23,-2.0,CNS Agents,Weight Loss Agents,2024,2024,2024-12-02
1181316,1972946.0,mma-fecfdf6ac107456c8c3ec64188a0501c,2024-12-31,28.0,2024-12-31,Tirzepatide-Weight Management,Zepbound,956.56,0.0,CNS Agents,Weight Loss Agents,2024,2024,2025-01-27
1181319,10373715.0,mma-e5345135cb1044f7a2e7acf4f060df90,2024-12-31,28.0,2024-12-05,Tirzepatide-Weight Management,Zepbound,1028.91,2.0,CNS Agents,Weight Loss Agents,2024,2024,2025-01-01
1181323,1746183.0,mma-ddde596e98db46fe8fcd48c74e55d600,2024-12-27,28.0,2024-12-27,Tirzepatide-Weight Management,Zepbound,981.45,0.0,CNS Agents,Weight Loss Agents,2024,2024,2025-01-23


Aggregate sum of days filled by member ID and earliest and latest service/fill dates. Determine 3 respective years from earliest service date to create a 365-day timelines to calculate PDC.

In [30]:
member_pdc = glp1_drugs_rx.groupby('MEMBER_ID').agg(
    min_service_date=('SERVICE_DATE', 'min'),
    max_end_date=('END_DATE', 'max'),
    covered_days = ('DAYS', 'sum')
).reset_index()

# Add 12 months for each year
for year in range(1, 4):
    member_pdc[f'min_service_date_year_{year}'] = member_pdc['min_service_date'] + pd.DateOffset(months=12 * year)

member_pdc

Unnamed: 0,MEMBER_ID,min_service_date,max_end_date,covered_days,min_service_date_year_1,min_service_date_year_2,min_service_date_year_3
0,mma-0000ac7529034a358d619eeb9610b3d9,2023-06-07,2025-01-26,1126.0,2024-06-07,2025-06-07,2026-06-07
1,mma-000be31d0c0246f188ffb9af76efb618,2023-02-03,2024-10-16,644.0,2024-02-03,2025-02-03,2026-02-03
2,mma-000e6577cfab4be2a0f616eec00f4b8c,2023-11-01,2024-12-11,336.0,2024-11-01,2025-11-01,2026-11-01
3,mma-001eb7a23bfc4e849ee06c18369ccae9,2024-01-09,2025-01-05,378.0,2025-01-09,2026-01-09,2027-01-09
4,mma-001ec84c24864164b3a6ed7a5d2337fd,2023-01-13,2024-09-09,476.0,2024-01-13,2025-01-13,2026-01-13
...,...,...,...,...,...,...,...
9130,mma-ffdd6f70ec77478a94ecca9c48e37839,2024-09-20,2025-01-20,84.0,2025-09-20,2026-09-20,2027-09-20
9131,mma-ffeda52085934fe2928488be93d4ef20,2022-12-27,2023-07-07,222.0,2023-12-27,2024-12-27,2025-12-27
9132,mma-ffeeeae5bc51483aa0575c9bf3dd66bc,2023-08-24,2024-02-23,142.0,2024-08-24,2025-08-24,2026-08-24
9133,mma-fff0404bfc224db0837ae05a5c84e564,2024-11-25,2024-12-22,0.0,2025-11-25,2026-11-25,2027-11-25


Only keep members that had earliest script dates in 2022.

In [39]:
members_2022 = member_pdc[member_pdc['min_service_date'] < '2023']
members_2022

Unnamed: 0,MEMBER_ID,min_service_date,max_end_date,covered_days,min_service_date_year_1,min_service_date_year_2,min_service_date_year_3
7,mma-003e9a4f4a4c476ea476a8f7658294dd,2022-12-15,2023-03-23,60.0,2023-12-15,2024-12-15,2025-12-15
12,mma-0049f2985b9f413a8cf5ad2f79c95d4b,2022-07-01,2025-01-15,874.0,2023-07-01,2024-07-01,2025-07-01
14,mma-0054034112c64cd2924c4f85de9a46ca,2022-02-23,2025-03-02,1048.0,2023-02-23,2024-02-23,2025-02-23
19,mma-0067cc7e5aab4a908cb83e9922c13875,2022-05-10,2025-01-08,592.0,2023-05-10,2024-05-10,2025-05-10
30,mma-00e492d23e4240e7a10e9d47762d38fb,2022-09-15,2023-01-16,90.0,2023-09-15,2024-09-15,2025-09-15
...,...,...,...,...,...,...,...
9087,mma-fe94f5237c194a4da7b7edd2b0805e1a,2022-04-11,2025-03-23,700.0,2023-04-11,2024-04-11,2025-04-11
9111,mma-ff82ab56bc48478b91ad9760b62d56e1,2022-09-26,2024-07-24,284.0,2023-09-26,2024-09-26,2025-09-26
9116,mma-ff9bea0a67004e3698f185a7e36a1d5d,2022-05-26,2025-01-10,1071.0,2023-05-26,2024-05-26,2025-05-26
9131,mma-ffeda52085934fe2928488be93d4ef20,2022-12-27,2023-07-07,222.0,2023-12-27,2024-12-27,2025-12-27


In [40]:
# Initialize a dictionary to store results
results = []

# Loop through each member and calculate covered days
for index, member in members_2022.iterrows():
    member_id = member['MEMBER_ID']
    
    # Filter rx_df for claims before year 1 and year 2
    covered_days_year_1 = glp1_drugs_rx[(glp1_drugs_rx['MEMBER_ID'] == member_id) & 
                                 (glp1_drugs_rx['SERVICE_DATE'] < member['min_service_date_year_1'])]['DAYS'].sum()
    
    covered_days_year_2 = glp1_drugs_rx[(glp1_drugs_rx['MEMBER_ID'] == member_id) & 
                                 ((glp1_drugs_rx['SERVICE_DATE'] > member['min_service_date_year_1']) & (glp1_drugs_rx['SERVICE_DATE'] < member['min_service_date_year_2']))]['DAYS'].sum()
    
    # Append results
    results.append({
        'MEMBER_ID': member_id,
        'covered_days_year_1': covered_days_year_1,
        'covered_days_year_2': covered_days_year_2
    })

In [41]:
results_df = pd.DataFrame(results)
results_df

Unnamed: 0,MEMBER_ID,covered_days_year_1,covered_days_year_2
0,mma-003e9a4f4a4c476ea476a8f7658294dd,60.0,0.0
1,mma-0049f2985b9f413a8cf5ad2f79c95d4b,396.0,310.0
2,mma-0054034112c64cd2924c4f85de9a46ca,348.0,364.0
3,mma-0067cc7e5aab4a908cb83e9922c13875,144.0,140.0
4,mma-00e492d23e4240e7a10e9d47762d38fb,90.0,0.0
...,...,...,...
876,mma-fe94f5237c194a4da7b7edd2b0805e1a,112.0,0.0
877,mma-ff82ab56bc48478b91ad9760b62d56e1,88.0,196.0
878,mma-ff9bea0a67004e3698f185a7e36a1d5d,399.0,448.0
879,mma-ffeda52085934fe2928488be93d4ef20,222.0,0.0


In [44]:
members_2022_cvd = members_2022.merge(results_df, on='MEMBER_ID')
members_2022_cvd.drop(columns=['covered_days', 'min_service_date_year_3'], inplace=True)
members_2022_cvd['PDC1'] = members_2022_cvd['covered_days_year_1']/365
members_2022_cvd['PDC2'] = members_2022_cvd['covered_days_year_2']/365
members_2022_cvd

Unnamed: 0,MEMBER_ID,min_service_date,max_end_date,min_service_date_year_1,min_service_date_year_2,covered_days_year_1,covered_days_year_2,PDC1,PDC2
0,mma-003e9a4f4a4c476ea476a8f7658294dd,2022-12-15,2023-03-23,2023-12-15,2024-12-15,60.0,0.0,0.164384,0.000000
1,mma-0049f2985b9f413a8cf5ad2f79c95d4b,2022-07-01,2025-01-15,2023-07-01,2024-07-01,396.0,310.0,1.084932,0.849315
2,mma-0054034112c64cd2924c4f85de9a46ca,2022-02-23,2025-03-02,2023-02-23,2024-02-23,348.0,364.0,0.953425,0.997260
3,mma-0067cc7e5aab4a908cb83e9922c13875,2022-05-10,2025-01-08,2023-05-10,2024-05-10,144.0,140.0,0.394521,0.383562
4,mma-00e492d23e4240e7a10e9d47762d38fb,2022-09-15,2023-01-16,2023-09-15,2024-09-15,90.0,0.0,0.246575,0.000000
...,...,...,...,...,...,...,...,...,...
876,mma-fe94f5237c194a4da7b7edd2b0805e1a,2022-04-11,2025-03-23,2023-04-11,2024-04-11,112.0,0.0,0.306849,0.000000
877,mma-ff82ab56bc48478b91ad9760b62d56e1,2022-09-26,2024-07-24,2023-09-26,2024-09-26,88.0,196.0,0.241096,0.536986
878,mma-ff9bea0a67004e3698f185a7e36a1d5d,2022-05-26,2025-01-10,2023-05-26,2024-05-26,399.0,448.0,1.093151,1.227397
879,mma-ffeda52085934fe2928488be93d4ef20,2022-12-27,2023-07-07,2023-12-27,2024-12-27,222.0,0.0,0.608219,0.000000


In [90]:
members_2022_cvd.describe()

Unnamed: 0,min_service_date,max_end_date,min_service_date_year_1,min_service_date_year_2,covered_days_year_1,covered_days_year_2,PDC1,PDC2
count,881,871,881,881,881.0,881.0,881.0,881.0
mean,2022-06-27 05:41:36.708285952,2024-03-12 05:17:25.694603776,2023-06-27 05:41:36.708285952,2024-06-27 01:31:31.940975872,269.78,166.66,0.74,0.46
min,2022-01-03 00:00:00,2022-02-06 00:00:00,2023-01-03 00:00:00,2024-01-03 00:00:00,0.0,0.0,0.0,0.0
25%,2022-03-17 00:00:00,2023-04-12 00:00:00,2023-03-17 00:00:00,2024-03-17 00:00:00,86.0,0.0,0.24,0.0
50%,2022-06-13 00:00:00,2024-11-11 00:00:00,2023-06-13 00:00:00,2024-06-13 00:00:00,224.0,84.0,0.61,0.23
75%,2022-10-15 00:00:00,2025-01-12 00:00:00,2023-10-15 00:00:00,2024-10-15 00:00:00,366.0,336.0,1.0,0.92
max,2022-12-31 00:00:00,2025-03-23 00:00:00,2023-12-31 00:00:00,2024-12-31 00:00:00,2878.0,1232.0,7.88,3.38
std,,,,,264.63,196.45,0.73,0.54


In [46]:
members_65 = members_2022_cvd[(members_2022_cvd['PDC1'] > 0.65) & (members_2022_cvd['PDC2'] > 0.65)]
members_65

Unnamed: 0,MEMBER_ID,min_service_date,max_end_date,min_service_date_year_1,min_service_date_year_2,covered_days_year_1,covered_days_year_2,PDC1,PDC2
1,mma-0049f2985b9f413a8cf5ad2f79c95d4b,2022-07-01,2025-01-15,2023-07-01,2024-07-01,396.0,310.0,1.084932,0.849315
2,mma-0054034112c64cd2924c4f85de9a46ca,2022-02-23,2025-03-02,2023-02-23,2024-02-23,348.0,364.0,0.953425,0.997260
7,mma-01c846aa858d43e98167eb0669db3a4e,2022-03-15,2024-05-12,2023-03-15,2024-03-15,282.0,252.0,0.772603,0.690411
9,mma-0281be178d4042ab83dd30b3190a3c6c,2022-08-24,2025-01-22,2023-08-24,2024-08-24,682.0,588.0,1.868493,1.610959
14,mma-038ef0da0fd54c2c81bfd92b13f81a3e,2022-11-10,2025-01-08,2023-11-10,2024-11-10,284.0,329.0,0.778082,0.901370
...,...,...,...,...,...,...,...,...,...
860,mma-fb5675af09a14732bb702c1feb30e1b6,2022-07-15,2025-01-05,2023-07-15,2024-07-15,1116.0,364.0,3.057534,0.997260
865,mma-fbecd51fc39c48898eb4f31188700674,2022-01-14,2024-01-22,2023-01-14,2024-01-14,336.0,308.0,0.920548,0.843836
872,mma-fd3a666933464a569791e983fbdb4867,2022-11-29,2025-01-06,2023-11-29,2024-11-29,858.0,672.0,2.350685,1.841096
875,mma-fe87330ad1d34f5bb4ee0bba91cf61c7,2022-10-17,2025-01-07,2023-10-17,2024-10-17,386.0,392.0,1.057534,1.073973


In [91]:
members_65.describe()

Unnamed: 0,min_service_date,max_end_date,min_service_date_year_1,min_service_date_year_2,covered_days_year_1,covered_days_year_2,PDC1,PDC2
count,261,261,261,261,261.0,261.0,261.0,261.0
mean,2022-07-03 10:17:55.862068992,2024-12-15 12:19:18.620689664,2023-07-03 10:17:55.862068992,2024-07-03 05:53:06.206896640,492.15,409.95,1.35,1.12
min,2022-01-04 00:00:00,2023-07-02 00:00:00,2023-01-04 00:00:00,2024-01-04 00:00:00,240.0,238.0,0.66,0.65
25%,2022-03-11 00:00:00,2024-12-29 00:00:00,2023-03-11 00:00:00,2024-03-11 00:00:00,336.0,336.0,0.92,0.92
50%,2022-06-27 00:00:00,2025-01-09 00:00:00,2023-06-27 00:00:00,2024-06-27 00:00:00,390.0,364.0,1.07,1.0
75%,2022-11-02 00:00:00,2025-01-21 00:00:00,2023-11-02 00:00:00,2024-11-02 00:00:00,504.0,448.0,1.38,1.23
max,2022-12-31 00:00:00,2025-03-23 00:00:00,2023-12-31 00:00:00,2024-12-31 00:00:00,2878.0,1232.0,7.88,3.38
std,,,,,296.4,147.79,0.81,0.4


In [49]:
med_agg = med_members_glp_22_24.pivot_table(index='MEMBER_ID', columns='PAID_YEAR', values='MEDICAL_PAID_AMOUNT', aggfunc='sum')
med_agg.reset_index(inplace=True)
med_agg.columns.name = None
med_agg.columns = [f"{col}" if col != 'PAID_YEAR' else 'PAID_YEAR' for col in med_agg.columns]
med_agg.rename(columns={'2021': 'Med_2021', '2022': 'Med_2022', '2023': 'Med_2023', '2024': 'Med_2024'}, inplace=True)
med_agg1 = med_agg
med_agg1

Unnamed: 0,MEMBER_ID,Med_2021,Med_2022,Med_2023,Med_2024
0,mma-0000ac7529034a358d619eeb9610b3d9,1181.52,1726.81,1052.81,211.87
1,mma-000be31d0c0246f188ffb9af76efb618,481.59,1523.11,1322.48,2535.04
2,mma-000e6577cfab4be2a0f616eec00f4b8c,1223.80,699.72,133.40,55.74
3,mma-001eb7a23bfc4e849ee06c18369ccae9,8668.13,4282.21,2578.05,5243.39
4,mma-001ec84c24864164b3a6ed7a5d2337fd,247.48,4077.50,763.21,176.67
...,...,...,...,...,...
9133,mma-ffdd6f70ec77478a94ecca9c48e37839,1989.24,31083.88,11112.18,3649.50
9134,mma-ffeda52085934fe2928488be93d4ef20,1517.58,1283.26,2120.49,1794.84
9135,mma-ffeeeae5bc51483aa0575c9bf3dd66bc,4614.41,615.48,1031.24,520.65
9136,mma-fff0404bfc224db0837ae05a5c84e564,1336.34,1415.09,7982.37,1335.22


In [50]:
# Define inflation rates (as factors)
inflation_factors = {
    2021: 1.175, 
    2022: 1.117,   
    2023: 1.069, 
    2024: 1
}

# Normalize employer paid amounts for inflation
for year, factor in inflation_factors.items():
    column_name = f'Med_{year}'
    med_agg[column_name] = med_agg[column_name] * factor
med_agg

Unnamed: 0,MEMBER_ID,Med_2021,Med_2022,Med_2023,Med_2024
0,mma-0000ac7529034a358d619eeb9610b3d9,1388.28600,1928.84677,1125.45389,211.87
1,mma-000be31d0c0246f188ffb9af76efb618,565.86825,1701.31387,1413.73112,2535.04
2,mma-000e6577cfab4be2a0f616eec00f4b8c,1437.96500,781.58724,142.60460,55.74
3,mma-001eb7a23bfc4e849ee06c18369ccae9,10185.05275,4783.22857,2755.93545,5243.39
4,mma-001ec84c24864164b3a6ed7a5d2337fd,290.78900,4554.56750,815.87149,176.67
...,...,...,...,...,...
9133,mma-ffdd6f70ec77478a94ecca9c48e37839,2337.35700,34720.69396,11878.92042,3649.50
9134,mma-ffeda52085934fe2928488be93d4ef20,1783.15650,1433.40142,2266.80381,1794.84
9135,mma-ffeeeae5bc51483aa0575c9bf3dd66bc,5421.93175,687.49116,1102.39556,520.65
9136,mma-fff0404bfc224db0837ae05a5c84e564,1570.19950,1580.65553,8533.15353,1335.22


In [51]:
med_agg['Med_Delta'] = med_agg['Med_2024'] - med_agg['Med_2021']
med_agg['Med_%_Change'] =(( med_agg['Med_2024'] - med_agg['Med_2021'])/ med_agg['Med_2021']) * 100
med_agg

Unnamed: 0,MEMBER_ID,Med_2021,Med_2022,Med_2023,Med_2024,Med_Delta,Med_%_Change
0,mma-0000ac7529034a358d619eeb9610b3d9,1388.28600,1928.84677,1125.45389,211.87,-1176.41600,-84.738735
1,mma-000be31d0c0246f188ffb9af76efb618,565.86825,1701.31387,1413.73112,2535.04,1969.17175,347.991206
2,mma-000e6577cfab4be2a0f616eec00f4b8c,1437.96500,781.58724,142.60460,55.74,-1382.22500,-96.123689
3,mma-001eb7a23bfc4e849ee06c18369ccae9,10185.05275,4783.22857,2755.93545,5243.39,-4941.66275,-48.518774
4,mma-001ec84c24864164b3a6ed7a5d2337fd,290.78900,4554.56750,815.87149,176.67,-114.11900,-39.244607
...,...,...,...,...,...,...,...
9133,mma-ffdd6f70ec77478a94ecca9c48e37839,2337.35700,34720.69396,11878.92042,3649.50,1312.14300,56.137894
9134,mma-ffeda52085934fe2928488be93d4ef20,1783.15650,1433.40142,2266.80381,1794.84,11.68350,0.655215
9135,mma-ffeeeae5bc51483aa0575c9bf3dd66bc,5421.93175,687.49116,1102.39556,520.65,-4901.28175,-90.397334
9136,mma-fff0404bfc224db0837ae05a5c84e564,1570.19950,1580.65553,8533.15353,1335.22,-234.97950,-14.964946


In [52]:
med_agg.fillna(0, inplace=True)

In [53]:
med_agg.isnull().sum()

MEMBER_ID       0
Med_2021        0
Med_2022        0
Med_2023        0
Med_2024        0
Med_Delta       0
Med_%_Change    0
dtype: int64

In [54]:
import numpy as np

# Prepare the years and medical costs
years = np.array([2021, 2022, 2023, 2024])
slope_list = []

# Calculate the slope for each member
for index, row in med_agg.iterrows():
    costs = np.array([row['Med_2021'], row['Med_2022'], row['Med_2023'], row['Med_2024']])
    # Perform linear regression to get the slope
    slope, intercept = np.polyfit(years, costs, 1)  # 1 indicates linear fit
    slope_list.append(slope)

# Add the slope to the DataFrame
med_agg['Med_Slope'] = slope_list
med_agg

Unnamed: 0,MEMBER_ID,Med_2021,Med_2022,Med_2023,Med_2024,Med_Delta,Med_%_Change,Med_Slope
0,mma-0000ac7529034a358d619eeb9610b3d9,1388.28600,1928.84677,1125.45389,211.87,-1176.41600,-84.738735,-433.264088
1,mma-000be31d0c0246f188ffb9af76efb618,565.86825,1701.31387,1413.73112,2535.04,1969.17175,347.991206,561.993250
2,mma-000e6577cfab4be2a0f616eec00f4b8c,1437.96500,781.58724,142.60460,55.74,-1382.22500,-96.123689,-478.565764
3,mma-001eb7a23bfc4e849ee06c18369ccae9,10185.05275,4783.22857,2755.93545,5243.39,-4941.66275,-48.518774,-1685.228137
4,mma-001ec84c24864164b3a6ed7a5d2337fd,290.78900,4554.56750,815.87149,176.67,-114.11900,-39.244607,-408.105301
...,...,...,...,...,...,...,...,...
9133,mma-ffdd6f70ec77478a94ecca9c48e37839,2337.35700,34720.69396,11878.92042,3649.50,1312.14300,56.137894,-1890.534454
9134,mma-ffeda52085934fe2928488be93d4ef20,1783.15650,1433.40142,2266.80381,1794.84,11.68350,0.655215,86.845289
9135,mma-ffeeeae5bc51483aa0575c9bf3dd66bc,5421.93175,687.49116,1102.39556,520.65,-4901.28175,-90.397334,-1428.894085
9136,mma-fff0404bfc224db0837ae05a5c84e564,1570.19950,1580.65553,8533.15353,1335.22,-234.97950,-14.964946,624.755950


In [55]:
rx_agg = rx_members_glp_22_24.pivot_table(index='MEMBER_ID', columns='PAID_YEAR', values='RX_PAID_AMOUNT', aggfunc='sum')
rx_agg.reset_index(inplace=True)
rx_agg.columns.name = None
rx_agg.columns = [f"{col}" if col != 'PAID_YEAR' else 'PAID_YEAR' for col in rx_agg.columns]
rx_agg.rename(columns={'2021': 'Rx_2021', '2022': 'Rx_2022', '2023': 'Rx_2023', '2024': 'Rx_2024'}, inplace=True)
rx_agg1 = rx_agg
rx_agg1

Unnamed: 0,MEMBER_ID,Rx_2021,Rx_2022,Rx_2023,Rx_2024
0,mma-0000ac7529034a358d619eeb9610b3d9,67.82,137.83,9890.72,14901.62
1,mma-000be31d0c0246f188ffb9af76efb618,1891.25,330.46,17324.18,13448.49
2,mma-000e6577cfab4be2a0f616eec00f4b8c,401.78,912.06,10040.67,11717.40
3,mma-001eb7a23bfc4e849ee06c18369ccae9,4184.57,1005.46,1926.09,13510.81
4,mma-001ec84c24864164b3a6ed7a5d2337fd,5.05,383.03,15657.34,7529.09
...,...,...,...,...,...
9136,mma-ffdd6f70ec77478a94ecca9c48e37839,7225.26,14272.54,17248.09,14203.36
9137,mma-ffeda52085934fe2928488be93d4ef20,286.80,1493.01,6413.48,205.82
9138,mma-ffeeeae5bc51483aa0575c9bf3dd66bc,2291.66,7877.43,9336.61,1285.02
9139,mma-fff0404bfc224db0837ae05a5c84e564,496.33,34.02,126.02,349.32


In [56]:
# Define inflation rates (as factors)
rx_inflation_factors = {
    2021: 1.31, 
    2022: 1.20,   
    2023: 1.103, 
    2024: 1
}

# Normalize employer paid amounts for inflation
for year, factor in rx_inflation_factors.items():
    column_name = f'Rx_{year}'
    rx_agg[column_name] = rx_agg[column_name] * factor
rx_agg

Unnamed: 0,MEMBER_ID,Rx_2021,Rx_2022,Rx_2023,Rx_2024
0,mma-0000ac7529034a358d619eeb9610b3d9,88.8442,165.396,10909.46416,14901.62
1,mma-000be31d0c0246f188ffb9af76efb618,2477.5375,396.552,19108.57054,13448.49
2,mma-000e6577cfab4be2a0f616eec00f4b8c,526.3318,1094.472,11074.85901,11717.40
3,mma-001eb7a23bfc4e849ee06c18369ccae9,5481.7867,1206.552,2124.47727,13510.81
4,mma-001ec84c24864164b3a6ed7a5d2337fd,6.6155,459.636,17270.04602,7529.09
...,...,...,...,...,...
9136,mma-ffdd6f70ec77478a94ecca9c48e37839,9465.0906,17127.048,19024.64327,14203.36
9137,mma-ffeda52085934fe2928488be93d4ef20,375.7080,1791.612,7074.06844,205.82
9138,mma-ffeeeae5bc51483aa0575c9bf3dd66bc,3002.0746,9452.916,10298.28083,1285.02
9139,mma-fff0404bfc224db0837ae05a5c84e564,650.1923,40.824,139.00006,349.32


In [57]:
rx_agg.fillna(0, inplace=True)
rx_agg.isnull().sum()

MEMBER_ID    0
Rx_2021      0
Rx_2022      0
Rx_2023      0
Rx_2024      0
dtype: int64

In [58]:
pd.set_option('display.float_format', '{:.2f}'.format)

In [59]:
rx_agg['Rx_Delta'] = rx_agg['Rx_2024'] - rx_agg['Rx_2021']
rx_agg['Rx_%_Change'] =((rx_agg['Rx_2024'] - rx_agg['Rx_2021'])/ rx_agg['Rx_2021']) * 100
# Prepare the years and medical costs
years = np.array([2021, 2022, 2023, 2024])
slope_list = []

# Calculate the slope for each member
for index, row in rx_agg.iterrows():
    costs = np.array([row['Rx_2021'], row['Rx_2022'], row['Rx_2023'], row['Rx_2024']])
    # Perform linear regression to get the slope
    slope, intercept = np.polyfit(years, costs, 1)  # 1 indicates linear fit
    slope_list.append(slope)

# Add the slope to the DataFrame
rx_agg['Rx_Slope'] = slope_list
rx_agg

Unnamed: 0,MEMBER_ID,Rx_2021,Rx_2022,Rx_2023,Rx_2024,Rx_Delta,Rx_%_Change,Rx_Slope
0,mma-0000ac7529034a358d619eeb9610b3d9,88.84,165.40,10909.46,14901.62,14812.78,16672.76,5518.24
1,mma-000be31d0c0246f188ffb9af76efb618,2477.54,396.55,19108.57,13448.49,10970.95,442.82,5162.49
2,mma-000e6577cfab4be2a0f616eec00f4b8c,526.33,1094.47,11074.86,11717.40,11191.07,2126.24,4355.36
3,mma-001eb7a23bfc4e849ee06c18369ccae9,5481.79,1206.55,2124.48,13510.81,8029.02,146.47,2500.50
4,mma-001ec84c24864164b3a6ed7a5d2337fd,6.62,459.64,17270.05,7529.09,7522.47,113709.84,3937.78
...,...,...,...,...,...,...,...,...
9136,mma-ffdd6f70ec77478a94ecca9c48e37839,9465.09,17127.05,19024.64,14203.36,4738.27,50.06,1611.24
9137,mma-ffeda52085934fe2928488be93d4ef20,375.71,1791.61,7074.07,205.82,-169.89,-45.22,477.28
9138,mma-ffeeeae5bc51483aa0575c9bf3dd66bc,3002.07,9452.92,10298.28,1285.02,-1717.05,-57.20,-430.58
9139,mma-fff0404bfc224db0837ae05a5c84e564,650.19,40.82,139.00,349.32,-300.87,-46.27,-80.44


In [60]:
rx_agg.replace(np.inf, 0, inplace=True)
rx_agg

Unnamed: 0,MEMBER_ID,Rx_2021,Rx_2022,Rx_2023,Rx_2024,Rx_Delta,Rx_%_Change,Rx_Slope
0,mma-0000ac7529034a358d619eeb9610b3d9,88.84,165.40,10909.46,14901.62,14812.78,16672.76,5518.24
1,mma-000be31d0c0246f188ffb9af76efb618,2477.54,396.55,19108.57,13448.49,10970.95,442.82,5162.49
2,mma-000e6577cfab4be2a0f616eec00f4b8c,526.33,1094.47,11074.86,11717.40,11191.07,2126.24,4355.36
3,mma-001eb7a23bfc4e849ee06c18369ccae9,5481.79,1206.55,2124.48,13510.81,8029.02,146.47,2500.50
4,mma-001ec84c24864164b3a6ed7a5d2337fd,6.62,459.64,17270.05,7529.09,7522.47,113709.84,3937.78
...,...,...,...,...,...,...,...,...
9136,mma-ffdd6f70ec77478a94ecca9c48e37839,9465.09,17127.05,19024.64,14203.36,4738.27,50.06,1611.24
9137,mma-ffeda52085934fe2928488be93d4ef20,375.71,1791.61,7074.07,205.82,-169.89,-45.22,477.28
9138,mma-ffeeeae5bc51483aa0575c9bf3dd66bc,3002.07,9452.92,10298.28,1285.02,-1717.05,-57.20,-430.58
9139,mma-fff0404bfc224db0837ae05a5c84e564,650.19,40.82,139.00,349.32,-300.87,-46.27,-80.44


In [61]:
member_mrg = members_glp_22_24.merge(med_agg, on='MEMBER_ID')
member_mrg = member_mrg.merge(rx_agg, on='MEMBER_ID')                                     
member_mrg                                    

Unnamed: 0,MEMBER_ID,MEMBER_STATUS,HOME_STATE,MEMBER_RELATIONSHIP,GENDER,Age,GENERATIONS,Med_2021,Med_2022,Med_2023,...,Med_Delta,Med_%_Change,Med_Slope,Rx_2021,Rx_2022,Rx_2023,Rx_2024,Rx_Delta,Rx_%_Change,Rx_Slope
0,mma-51b01a9d804c402d9ec98ba8903a744e,0.00,PA,Subscriber,Female,67,Baby Boomers,1086.91,8919.32,32404.09,...,2096.55,192.89,2977.44,411.86,431.62,3040.35,11615.28,11203.42,2720.17,3621.90
1,mma-49c9cfb0577c496d820130fb1600dcf3,0.00,IN,Subscriber,Female,47,Generation X,205.99,0.00,274.52,...,562832.96,273234.14,168877.34,1196.13,0.00,7517.25,0.00,-1196.13,-100.00,392.88
2,mma-b027fbc0d58348338e0a12b5520743a1,0.00,NC,Subscriber,Female,48,Generation X,743.06,1762.83,1853.81,...,-271.74,-36.57,-72.42,0.00,2192.41,7835.17,12179.12,12179.12,0.00,4218.01
3,mma-5602b7f2d1244ea4b2c013d527667e08,0.00,NC,Subscriber,Female,41,Millenials,8122.82,527.76,570.25,...,-7623.48,-93.85,-2282.80,7135.10,590.54,675.16,8406.11,1271.01,17.81,389.76
4,mma-507116f346844b3fbc40d88fd77cba19,0.00,NC,Dependent,Female,23,Generation Z,1652.27,2008.96,591.23,...,-1289.40,-78.04,-528.59,501.51,0.00,2767.49,2973.41,2471.90,492.89,1018.32
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9132,mma-cc398ee771f14529a7f334661bb803b3,0.00,TX,Subscriber,Female,32,Millenials,4679.28,13830.34,6909.84,...,-1049.23,-22.42,-1006.82,283.16,266.93,386.63,6042.19,5759.03,2033.87,1739.68
9133,mma-bca36e20b51743e287a674303dd59efb,0.00,PA,Subscriber,Female,47,Generation X,1970.13,497.05,2689.34,...,2124.88,107.85,856.69,12.25,0.00,0.00,7113.85,7101.60,57979.36,2130.48
9134,mma-44fca14ad9034f288693c540cfccf57b,0.00,MS,Subscriber,Female,51,Generation X,114170.77,135232.80,116329.85,...,-1386.86,-1.21,-2306.35,182.42,115.20,1693.60,124.05,-58.37,-32.00,140.33
9135,mma-f8a65f0a0ef6493db2e71b86a97c90d6,0.00,MS,Spouse,Male,56,Generation X,480.36,724.81,667.65,...,-159.10,-33.12,-53.45,212.55,35.54,10597.61,26.40,-186.15,-87.58,1000.36


In [62]:
member1 = member_mrg.drop(columns=['MEMBER_STATUS'])
member1

Unnamed: 0,MEMBER_ID,HOME_STATE,MEMBER_RELATIONSHIP,GENDER,Age,GENERATIONS,Med_2021,Med_2022,Med_2023,Med_2024,Med_Delta,Med_%_Change,Med_Slope,Rx_2021,Rx_2022,Rx_2023,Rx_2024,Rx_Delta,Rx_%_Change,Rx_Slope
0,mma-51b01a9d804c402d9ec98ba8903a744e,PA,Subscriber,Female,67,Baby Boomers,1086.91,8919.32,32404.09,3183.46,2096.55,192.89,2977.44,411.86,431.62,3040.35,11615.28,11203.42,2720.17,3621.90
1,mma-49c9cfb0577c496d820130fb1600dcf3,IN,Subscriber,Female,47,Generation X,205.99,0.00,274.52,563038.95,562832.96,273234.14,168877.34,1196.13,0.00,7517.25,0.00,-1196.13,-100.00,392.88
2,mma-b027fbc0d58348338e0a12b5520743a1,NC,Subscriber,Female,48,Generation X,743.06,1762.83,1853.81,471.32,-271.74,-36.57,-72.42,0.00,2192.41,7835.17,12179.12,12179.12,0.00,4218.01
3,mma-5602b7f2d1244ea4b2c013d527667e08,NC,Subscriber,Female,41,Millenials,8122.82,527.76,570.25,499.34,-7623.48,-93.85,-2282.80,7135.10,590.54,675.16,8406.11,1271.01,17.81,389.76
4,mma-507116f346844b3fbc40d88fd77cba19,NC,Dependent,Female,23,Generation Z,1652.27,2008.96,591.23,362.87,-1289.40,-78.04,-528.59,501.51,0.00,2767.49,2973.41,2471.90,492.89,1018.32
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9132,mma-cc398ee771f14529a7f334661bb803b3,TX,Subscriber,Female,32,Millenials,4679.28,13830.34,6909.84,3630.05,-1049.23,-22.42,-1006.82,283.16,266.93,386.63,6042.19,5759.03,2033.87,1739.68
9133,mma-bca36e20b51743e287a674303dd59efb,PA,Subscriber,Female,47,Generation X,1970.13,497.05,2689.34,4095.01,2124.88,107.85,856.69,12.25,0.00,0.00,7113.85,7101.60,57979.36,2130.48
9134,mma-44fca14ad9034f288693c540cfccf57b,MS,Subscriber,Female,51,Generation X,114170.77,135232.80,116329.85,112783.91,-1386.86,-1.21,-2306.35,182.42,115.20,1693.60,124.05,-58.37,-32.00,140.33
9135,mma-f8a65f0a0ef6493db2e71b86a97c90d6,MS,Spouse,Male,56,Generation X,480.36,724.81,667.65,321.26,-159.10,-33.12,-53.45,212.55,35.54,10597.61,26.40,-186.15,-87.58,1000.36


In [63]:
member1.HOME_STATE.value_counts()

HOME_STATE
NC    2021
TX    1456
PA    1443
NJ     456
VA     352
OH     281
MS     260
MI     253
GA     240
NY     222
FL     217
SC     201
MN     200
CA     195
IL     144
KY     142
AZ     114
MO     111
IN      92
TN      82
MA      71
DE      67
WI      60
NH      50
CO      46
AR      42
AL      40
OK      35
KS      29
MD      27
LA      25
CT      25
WA      23
NV      20
NE      14
IA      12
ND      10
ME       9
NM       7
WV       7
RI       5
AK       5
SD       4
UT       4
ID       4
DC       3
OR       3
MT       2
VT       2
WY       2
PR       1
HI       1
Name: count, dtype: int64

In [64]:
# Define the states to keep
states_to_keep = ['NC', 'PA', 'TX', 'NJ', 'MS', 'OH', 'MI']
member1['HOME_STATE'] = member1['HOME_STATE'].apply(lambda x: x if x in states_to_keep else 'OTHER')
member1

Unnamed: 0,MEMBER_ID,HOME_STATE,MEMBER_RELATIONSHIP,GENDER,Age,GENERATIONS,Med_2021,Med_2022,Med_2023,Med_2024,Med_Delta,Med_%_Change,Med_Slope,Rx_2021,Rx_2022,Rx_2023,Rx_2024,Rx_Delta,Rx_%_Change,Rx_Slope
0,mma-51b01a9d804c402d9ec98ba8903a744e,PA,Subscriber,Female,67,Baby Boomers,1086.91,8919.32,32404.09,3183.46,2096.55,192.89,2977.44,411.86,431.62,3040.35,11615.28,11203.42,2720.17,3621.90
1,mma-49c9cfb0577c496d820130fb1600dcf3,OTHER,Subscriber,Female,47,Generation X,205.99,0.00,274.52,563038.95,562832.96,273234.14,168877.34,1196.13,0.00,7517.25,0.00,-1196.13,-100.00,392.88
2,mma-b027fbc0d58348338e0a12b5520743a1,NC,Subscriber,Female,48,Generation X,743.06,1762.83,1853.81,471.32,-271.74,-36.57,-72.42,0.00,2192.41,7835.17,12179.12,12179.12,0.00,4218.01
3,mma-5602b7f2d1244ea4b2c013d527667e08,NC,Subscriber,Female,41,Millenials,8122.82,527.76,570.25,499.34,-7623.48,-93.85,-2282.80,7135.10,590.54,675.16,8406.11,1271.01,17.81,389.76
4,mma-507116f346844b3fbc40d88fd77cba19,NC,Dependent,Female,23,Generation Z,1652.27,2008.96,591.23,362.87,-1289.40,-78.04,-528.59,501.51,0.00,2767.49,2973.41,2471.90,492.89,1018.32
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9132,mma-cc398ee771f14529a7f334661bb803b3,TX,Subscriber,Female,32,Millenials,4679.28,13830.34,6909.84,3630.05,-1049.23,-22.42,-1006.82,283.16,266.93,386.63,6042.19,5759.03,2033.87,1739.68
9133,mma-bca36e20b51743e287a674303dd59efb,PA,Subscriber,Female,47,Generation X,1970.13,497.05,2689.34,4095.01,2124.88,107.85,856.69,12.25,0.00,0.00,7113.85,7101.60,57979.36,2130.48
9134,mma-44fca14ad9034f288693c540cfccf57b,MS,Subscriber,Female,51,Generation X,114170.77,135232.80,116329.85,112783.91,-1386.86,-1.21,-2306.35,182.42,115.20,1693.60,124.05,-58.37,-32.00,140.33
9135,mma-f8a65f0a0ef6493db2e71b86a97c90d6,MS,Spouse,Male,56,Generation X,480.36,724.81,667.65,321.26,-159.10,-33.12,-53.45,212.55,35.54,10597.61,26.40,-186.15,-87.58,1000.36


In [65]:
med.columns

Index(['MEDICAL_CLAIM_ID', 'MEMBER_ID', 'PAID_DATE', 'PAID_YEAR',
       'SERVICE_DATE', 'SERVICE_YEAR', 'MEDICAL_PAID_AMOUNT',
       'IS_TELEMEDICINE', 'IS_ER_AVOIDABLE', 'PRIMARY_DX', 'PROCEDURE_DESC',
       'ARTTOS_V2_L1', 'ARTTOS_V2_L3', 'DX_IS_CHRONIC', 'ICD10_CHAPTER',
       'ICD10_CATEGORY', 'ICD10_SECTION', 'MEG_EPISODE_DESCRIPTION',
       'OP_SURG_INC', 'IS_PCP_VISIT', 'INCLUDED_SPECIALIST', 'ER_VISIT_FLAG',
       'IS_URGENT_CARE_VISIT', 'IS_PREVENTIVE_VISIT', 'IP_ADMIT_INC'],
      dtype='object')

In [66]:
med_util = med.groupby('MEMBER_ID').agg({
    'DX_IS_CHRONIC': 'max',
    'ER_VISIT_FLAG': 'sum',
    'IS_PCP_VISIT': 'sum',
    'IS_URGENT_CARE_VISIT': 'sum',
    'IS_PREVENTIVE_VISIT': 'sum',
    'IP_ADMIT_INC': 'sum'
}).reset_index()
med_util

Unnamed: 0,MEMBER_ID,DX_IS_CHRONIC,ER_VISIT_FLAG,IS_PCP_VISIT,IS_URGENT_CARE_VISIT,IS_PREVENTIVE_VISIT,IP_ADMIT_INC
0,mma-0000ac7529034a358d619eeb9610b3d9,1.00,0,6,0,3,0
1,mma-000be31d0c0246f188ffb9af76efb618,1.00,0,11,0,6,0
2,mma-000e6577cfab4be2a0f616eec00f4b8c,1.00,0,16,1,1,0
3,mma-001eb7a23bfc4e849ee06c18369ccae9,1.00,0,19,0,5,0
4,mma-001ec84c24864164b3a6ed7a5d2337fd,1.00,0,6,1,2,0
...,...,...,...,...,...,...,...
9677,mma-ffdd6f70ec77478a94ecca9c48e37839,1.00,0,18,0,5,20
9678,mma-ffeda52085934fe2928488be93d4ef20,1.00,3,13,0,0,0
9679,mma-ffeeeae5bc51483aa0575c9bf3dd66bc,1.00,0,0,0,1,0
9680,mma-fff0404bfc224db0837ae05a5c84e564,1.00,0,1,1,4,0


In [67]:
episodes = med.ICD10_CHAPTER.value_counts()
episodes.head(10)

ICD10_CHAPTER
Factors influencing health status and contact with health services                         459582
Diseases of the musculoskeletal system and connective tissue                               375828
Symptoms, signs and abnormal clinical and laboratory findings, not elsewhere classified    268959
Endocrine, nutritional and metabolic diseases                                              188730
Diseases of the genitourinary system                                                       120561
Mental, Behavioral and Neurodevelopmental disorders                                        114796
Diseases of the circulatory system                                                         111459
Diseases of the nervous system                                                              95022
Diseases of the digestive system                                                            88262
Diseases of the respiratory system                                                          74223
Name: 

In [68]:
episodes_cost = med.groupby('ICD10_CHAPTER')['MEDICAL_PAID_AMOUNT'].sum().reset_index()
episodes_cost.sort_values(by='MEDICAL_PAID_AMOUNT', ascending=False)

Unnamed: 0,ICD10_CHAPTER,MEDICAL_PAID_AMOUNT
10,Diseases of the musculoskeletal system and con...,48874395.8
16,Factors influencing health status and contact ...,44591973.21
21,"Symptoms, signs and abnormal clinical and labo...",26710135.64
19,Neoplasms,23481553.77
6,Diseases of the digestive system,23349409.05
5,Diseases of the circulatory system,23047278.35
9,Diseases of the genitourinary system,19028397.52
11,Diseases of the nervous system,16712217.14
14,"Endocrine, nutritional and metabolic diseases",16637001.83
17,"Injury, poisoning and certain other consequenc...",14846598.06


In [69]:
episodes_to_keep = ['Diseases of the musculoskeletal system and connective tissue', 'Neoplasms', 'Diseases of the digestive system', 
                    'Diseases of the circulatory system', 'Diseases of the circulatory system', 'Diseases of the genitourinary system',
                   'Diseases of the nervous system', 'Mental, Behavioral and Neurodevelopmental disorders']
med1 = med
med1['ICD10_CHAPTER'] = med['ICD10_CHAPTER'].apply(lambda x: x if x in episodes_to_keep else 'OTHER')

In [70]:
med_meg = pd.pivot_table(
    med1,
    values='MEDICAL_PAID_AMOUNT',
    index='MEMBER_ID',
    columns='ICD10_CHAPTER',
    aggfunc='sum',
    fill_value=0
)
med_meg.reset_index()
med_meg.columns.name = None
med_meg.drop(columns='OTHER', inplace=True)
med_meg

Unnamed: 0_level_0,Diseases of the circulatory system,Diseases of the digestive system,Diseases of the genitourinary system,Diseases of the musculoskeletal system and connective tissue,Diseases of the nervous system,"Mental, Behavioral and Neurodevelopmental disorders",Neoplasms
MEMBER_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
mma-0000ac7529034a358d619eeb9610b3d9,994.02,0.00,0.00,0.00,0.00,0.00,278.64
mma-000be31d0c0246f188ffb9af76efb618,2661.53,0.00,0.00,0.00,114.60,165.32,89.91
mma-000e6577cfab4be2a0f616eec00f4b8c,0.00,0.00,67.00,336.19,330.42,305.93,0.00
mma-001eb7a23bfc4e849ee06c18369ccae9,1350.03,0.00,0.00,6183.41,1943.50,0.00,0.00
mma-001ec84c24864164b3a6ed7a5d2337fd,369.03,0.00,114.03,0.00,4.33,0.00,0.00
...,...,...,...,...,...,...,...
mma-ffdd6f70ec77478a94ecca9c48e37839,0.00,171.35,4697.15,4051.02,493.56,2457.19,14.37
mma-ffeda52085934fe2928488be93d4ef20,299.20,69.17,0.00,165.09,141.21,0.00,0.00
mma-ffeeeae5bc51483aa0575c9bf3dd66bc,0.00,0.00,0.00,525.55,5365.06,0.00,0.00
mma-fff0404bfc224db0837ae05a5c84e564,0.00,0.00,0.00,0.00,0.00,0.00,0.00


In [71]:
med_var = member1.merge(med_util, on='MEMBER_ID')
med_var = med_var.merge(med_meg, on='MEMBER_ID')
med_var.columns= med_var.columns.str.replace(' ', '_')
med_var

Unnamed: 0,MEMBER_ID,HOME_STATE,MEMBER_RELATIONSHIP,GENDER,Age,GENERATIONS,Med_2021,Med_2022,Med_2023,Med_2024,...,IS_URGENT_CARE_VISIT,IS_PREVENTIVE_VISIT,IP_ADMIT_INC,Diseases_of_the_circulatory_system,Diseases_of_the_digestive_system,Diseases_of_the_genitourinary_system,Diseases_of_the_musculoskeletal_system_and_connective_tissue,Diseases_of_the_nervous_system,"Mental,_Behavioral_and_Neurodevelopmental_disorders",Neoplasms
0,mma-51b01a9d804c402d9ec98ba8903a744e,PA,Subscriber,Female,67,Baby Boomers,1086.91,8919.32,32404.09,3183.46,...,4,1,0,504.67,68.31,149.38,31309.68,113.74,0.00,0.00
1,mma-49c9cfb0577c496d820130fb1600dcf3,OTHER,Subscriber,Female,47,Generation X,205.99,0.00,274.52,563038.95,...,0,0,26,550848.22,8.73,0.00,362.03,203.34,2186.19,117.56
2,mma-b027fbc0d58348338e0a12b5520743a1,NC,Subscriber,Female,48,Generation X,743.06,1762.83,1853.81,471.32,...,2,1,0,0.00,0.00,0.00,0.00,0.00,0.00,0.00
3,mma-5602b7f2d1244ea4b2c013d527667e08,NC,Subscriber,Female,41,Millenials,8122.82,527.76,570.25,499.34,...,1,4,0,0.00,0.00,5506.05,-90.23,0.00,201.32,0.00
4,mma-507116f346844b3fbc40d88fd77cba19,NC,Dependent,Female,23,Generation Z,1652.27,2008.96,591.23,362.87,...,2,0,0,0.00,0.00,0.00,0.00,0.00,319.83,0.00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9132,mma-cc398ee771f14529a7f334661bb803b3,TX,Subscriber,Female,32,Millenials,4679.28,13830.34,6909.84,3630.05,...,0,6,0,145.50,0.00,911.74,486.69,1111.54,0.00,2021.94
9133,mma-bca36e20b51743e287a674303dd59efb,PA,Subscriber,Female,47,Generation X,1970.13,497.05,2689.34,4095.01,...,6,3,0,0.00,2751.45,0.00,219.64,0.00,383.35,0.00
9134,mma-44fca14ad9034f288693c540cfccf57b,MS,Subscriber,Female,51,Generation X,114170.77,135232.80,116329.85,112783.91,...,4,4,0,4075.28,0.00,519.38,253.67,431884.15,70.73,0.00
9135,mma-f8a65f0a0ef6493db2e71b86a97c90d6,MS,Spouse,Male,56,Generation X,480.36,724.81,667.65,321.26,...,0,5,0,377.11,0.00,0.00,0.00,0.00,0.00,0.00


In [75]:
mbr = members_65['MEMBER_ID'].unique()
members65_df = med_var[med_var['MEMBER_ID'].isin(mbr)]
members65_df

Unnamed: 0,MEMBER_ID,HOME_STATE,MEMBER_RELATIONSHIP,GENDER,Age,GENERATIONS,Med_2021,Med_2022,Med_2023,Med_2024,...,IS_URGENT_CARE_VISIT,IS_PREVENTIVE_VISIT,IP_ADMIT_INC,Diseases_of_the_circulatory_system,Diseases_of_the_digestive_system,Diseases_of_the_genitourinary_system,Diseases_of_the_musculoskeletal_system_and_connective_tissue,Diseases_of_the_nervous_system,"Mental,_Behavioral_and_Neurodevelopmental_disorders",Neoplasms
2,mma-b027fbc0d58348338e0a12b5520743a1,NC,Subscriber,Female,48,Generation X,743.06,1762.83,1853.81,471.32,...,2,1,0,0.00,0.00,0.00,0.00,0.00,0.00,0.00
20,mma-bfb67842a597473e947b613fac97ad55,PA,Subscriber,Female,54,Generation X,743.35,1664.68,2031.73,757.67,...,0,2,0,0.00,0.00,45.80,0.00,767.07,0.00,0.00
51,mma-1a161f1e3c50438c9922817772f5ef00,OTHER,Subscriber,Female,39,Millenials,505.03,4521.08,624.54,4908.83,...,0,5,0,129.43,0.00,3199.01,0.00,0.00,26.00,30.32
134,mma-d54971b76cc24da98e91aa4c6dc988f2,NC,Subscriber,Female,50,Generation X,813.82,2434.57,442.10,1935.36,...,2,4,0,0.00,0.00,131.77,912.09,0.00,0.00,0.00
175,mma-89dd2150959c48f2a8b272b1ea570a03,PA,Subscriber,Female,47,Generation X,16542.53,2606.70,2676.18,4336.96,...,0,7,0,0.00,1739.82,4694.96,107.00,0.00,0.00,175.64
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8990,mma-129485d79a4348c6803a6cd4575becc5,NC,Subscriber,Female,45,Generation X,6768.60,10115.82,19119.30,10681.36,...,0,5,0,461.15,0.00,1903.78,162.55,0.00,0.00,21290.32
8991,mma-3889d08c09eb4436b6550eeaeba10110,NC,Subscriber,Female,39,Millenials,3208.29,7543.59,1757.00,8378.28,...,0,10,0,1976.36,0.00,12459.03,15.63,0.00,0.00,2326.88
9006,mma-26ab442e19584de59574806d5f5f58e3,NC,Subscriber,Female,51,Generation X,5813.01,2670.26,1443.83,10602.83,...,3,0,0,0.00,213.93,9223.50,0.00,0.00,113.72,0.00
9039,mma-5bec529f37d0492f865267a9bb9da698,NC,Subscriber,Female,58,Generation X,73498.42,6870.35,10776.05,14343.07,...,2,3,0,817.28,267.19,118.32,59699.12,8403.11,204.49,5445.65


In [73]:
med_var.columns

Index(['MEMBER_ID', 'HOME_STATE', 'MEMBER_RELATIONSHIP', 'GENDER', 'Age',
       'GENERATIONS', 'Med_2021', 'Med_2022', 'Med_2023', 'Med_2024',
       'Med_Delta', 'Med_%_Change', 'Med_Slope', 'Rx_2021', 'Rx_2022',
       'Rx_2023', 'Rx_2024', 'Rx_Delta', 'Rx_%_Change', 'Rx_Slope',
       'DX_IS_CHRONIC', 'ER_VISIT_FLAG', 'IS_PCP_VISIT',
       'IS_URGENT_CARE_VISIT', 'IS_PREVENTIVE_VISIT', 'IP_ADMIT_INC',
       'Diseases_of_the_circulatory_system',
       'Diseases_of_the_digestive_system',
       'Diseases_of_the_genitourinary_system',
       'Diseases_of_the_musculoskeletal_system_and_connective_tissue',
       'Diseases_of_the_nervous_system',
       'Mental,_Behavioral_and_Neurodevelopmental_disorders', 'Neoplasms'],
      dtype='object')

In [76]:
members65_df.describe()

Unnamed: 0,Age,Med_2021,Med_2022,Med_2023,Med_2024,Med_Delta,Med_%_Change,Med_Slope,Rx_2021,Rx_2022,...,IS_URGENT_CARE_VISIT,IS_PREVENTIVE_VISIT,IP_ADMIT_INC,Diseases_of_the_circulatory_system,Diseases_of_the_digestive_system,Diseases_of_the_genitourinary_system,Diseases_of_the_musculoskeletal_system_and_connective_tissue,Diseases_of_the_nervous_system,"Mental,_Behavioral_and_Neurodevelopmental_disorders",Neoplasms
count,261.0,261.0,261.0,261.0,261.0,261.0,261.0,261.0,261.0,261.0,...,261.0,261.0,261.0,261.0,261.0,261.0,261.0,261.0,261.0,261.0
mean,49.66,13092.26,9865.85,8478.42,7739.75,-5433.63,248.57,-1744.5,3212.59,12567.84,...,1.8,4.44,3.19,1909.75,3752.68,2123.38,6803.59,2377.36,1010.08,1546.18
std,9.61,33416.19,20634.9,16532.75,11420.09,32086.99,1134.58,9973.44,14499.12,11453.16,...,2.68,3.89,9.59,9609.58,14545.97,5289.85,24097.29,17961.73,3139.31,5244.54
min,19.0,-1946.15,-656.09,0.0,23.98,-351945.16,-131.05,-106061.39,-35797.51,71.93,...,0.0,0.0,0.0,-1252.66,0.0,-19.68,-242.78,0.0,-190.0,0.0
25%,44.0,1021.08,1532.18,1258.45,1239.44,-3562.55,-63.26,-1670.41,106.37,4534.19,...,0.0,2.0,0.0,0.0,0.0,0.0,10.34,0.0,0.0,0.0
50%,50.0,2745.56,3281.5,3428.02,3056.58,25.01,0.71,-100.03,513.32,11317.63,...,1.0,4.0,0.0,94.52,50.52,189.43,524.59,0.0,0.0,0.0
75%,56.0,8219.49,9135.91,7931.98,8999.74,2368.67,234.71,1075.11,1438.93,16840.08,...,2.0,5.0,0.0,594.78,1416.0,1038.23,2777.23,446.94,460.21,280.03
max,77.0,357120.96,222048.77,186327.16,67999.32,53325.85,15919.73,18775.04,201096.74,82843.09,...,17.0,32.0,65.0,99842.85,199730.5,35391.03,281741.57,273354.75,31947.72,39219.05


In [77]:
costs = members65_df[['MEMBER_ID', 'HOME_STATE', 'MEMBER_RELATIONSHIP', 'GENDER', 'Age',
       'GENERATIONS', 'Med_2021', 'Med_2022', 'Med_2023', 'Med_2024',
       'Med_Delta', 'Med_%_Change', 'Med_Slope', 'Rx_2021', 'Rx_2022',
       'Rx_2023', 'Rx_2024', 'Rx_Delta', 'Rx_%_Change', 'Rx_Slope']]
costs

Unnamed: 0,MEMBER_ID,HOME_STATE,MEMBER_RELATIONSHIP,GENDER,Age,GENERATIONS,Med_2021,Med_2022,Med_2023,Med_2024,Med_Delta,Med_%_Change,Med_Slope,Rx_2021,Rx_2022,Rx_2023,Rx_2024,Rx_Delta,Rx_%_Change,Rx_Slope
2,mma-b027fbc0d58348338e0a12b5520743a1,NC,Subscriber,Female,48,Generation X,743.06,1762.83,1853.81,471.32,-271.74,-36.57,-72.42,0.00,2192.41,7835.17,12179.12,12179.12,0.00,4218.01
20,mma-bfb67842a597473e947b613fac97ad55,PA,Subscriber,Female,54,Generation X,743.35,1664.68,2031.73,757.67,14.32,1.93,41.00,291.99,1624.19,16636.66,15725.32,15433.33,5285.64,6131.25
51,mma-1a161f1e3c50438c9922817772f5ef00,OTHER,Subscriber,Female,39,Millenials,505.03,4521.08,624.54,4908.83,4403.80,871.99,931.49,294.29,10788.01,16017.81,17538.58,17244.29,5859.59,5696.27
134,mma-d54971b76cc24da98e91aa4c6dc988f2,NC,Subscriber,Female,50,Generation X,813.82,2434.57,442.10,1935.36,1121.54,137.81,137.22,358.46,9233.84,15513.14,11084.60,10726.14,2992.32,3845.77
175,mma-89dd2150959c48f2a8b272b1ea570a03,PA,Subscriber,Female,47,Generation X,16542.53,2606.70,2676.18,4336.96,-12205.57,-73.78,-3654.72,895.36,1807.68,15280.25,14833.82,13938.46,1556.75,5528.79
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8990,mma-129485d79a4348c6803a6cd4575becc5,NC,Subscriber,Female,45,Generation X,6768.60,10115.82,19119.30,10681.36,3912.76,57.81,2074.18,552.56,15487.79,19657.75,15142.14,14589.58,2640.37,4793.87
8991,mma-3889d08c09eb4436b6550eeaeba10110,NC,Subscriber,Female,39,Millenials,3208.29,7543.59,1757.00,8378.28,5169.99,161.14,972.34,398.78,1288.19,17164.08,17891.42,17492.64,4386.57,6835.38
9006,mma-26ab442e19584de59574806d5f5f58e3,NC,Subscriber,Female,51,Generation X,5813.01,2670.26,1443.83,10602.83,4789.82,82.40,1314.30,73.56,9052.21,21631.61,14936.48,14862.92,20206.13,5716.82
9039,mma-5bec529f37d0492f865267a9bb9da698,NC,Subscriber,Female,58,Generation X,73498.42,6870.35,10776.05,14343.07,-59155.35,-80.49,-17356.04,5843.24,11317.63,16117.43,17726.38,11883.14,203.37,4044.92


In [78]:
costs.describe()

Unnamed: 0,Age,Med_2021,Med_2022,Med_2023,Med_2024,Med_Delta,Med_%_Change,Med_Slope,Rx_2021,Rx_2022,Rx_2023,Rx_2024,Rx_Delta,Rx_%_Change,Rx_Slope
count,261.0,261.0,261.0,261.0,261.0,261.0,261.0,261.0,261.0,261.0,261.0,261.0,261.0,261.0,261.0
mean,49.66,13092.26,9865.85,8478.42,7739.75,-5433.63,248.57,-1744.5,3212.59,12567.84,19594.84,17276.08,14063.49,16328.17,4921.75
std,9.61,33416.19,20634.9,16532.75,11420.09,32086.99,1134.58,9973.44,14499.12,11453.16,14909.61,17269.03,21182.46,111045.54,6768.68
min,19.0,-1946.15,-656.09,0.0,23.98,-351945.16,-131.05,-106061.39,-35797.51,71.93,736.65,4.57,-178299.6,-146.54,-48086.39
25%,44.0,1021.08,1532.18,1258.45,1239.44,-3562.55,-63.26,-1670.41,106.37,4534.19,14387.39,12248.53,10573.23,238.29,3597.48
50%,50.0,2745.56,3281.5,3428.02,3056.58,25.01,0.71,-100.03,513.32,11317.63,17164.08,14919.01,13878.96,1849.07,4768.18
75%,56.0,8219.49,9135.91,7931.98,8999.74,2368.67,234.71,1075.11,1438.93,16840.08,19700.76,17215.45,15892.21,5772.58,5729.26
max,77.0,357120.96,222048.77,186327.16,67999.32,53325.85,15919.73,18775.04,201096.74,82843.09,148780.24,206102.47,203204.41,1732629.77,70188.75


average med+rx net per member, normalized for 2024 = $11,834-1,554 = $10,280. Since 2021, employers have spent $10,280 average per member on those that have prescriptions for Wegovy, Saxenda or Zepbound for 3 years

In [79]:
members65_df.to_csv('glp1_members_pdc65.csv', index=False)

In [41]:
drugs = rx.groupby(['ART_DRUGGRPS_L1', 'ART_DRUGGRPS_L2', 'DRUG_NAME_PREFERRED'])['RX_PAID_AMOUNT'].sum().reset_index()
drugs.sort_values(by='RX_PAID_AMOUNT', ascending=False, inplace=True)
drugs

Unnamed: 0,ART_DRUGGRPS_L1,ART_DRUGGRPS_L2,DRUG_NAME_PREFERRED,RX_PAID_AMOUNT
619,CNS Agents,Weight Loss Agents,Wegovy,69992636.68
621,CNS Agents,Weight Loss Agents,Zepbound,19217585.89
202,CNS Agents,Anti-Rheumatics/Immune Modulators,Humira (2 Pen),10644836.01
618,CNS Agents,Weight Loss Agents,Saxenda,8197240.42
2346,Topical Agents,Psoriasis Agents,Stelara,5033676.44
...,...,...,...,...
55,Antibiotics,Fluoroquinolone,Ofloxacin,-195.22
365,CNS Agents,Migraine Agents - Ergotamines,Dihydroergotamine Mesylate,-2099.64
438,CNS Agents,NSAID Analgesics,Ketoprofen,-5256.80
1098,Gastrointestinal,Proton Pum Inhibitors,Omeprazole-Sodium Bicarbonate,-5403.49


In [42]:
med.to_csv('GLP-1_Medical_PDC.csv', index=False)

In [82]:
med

Unnamed: 0,MEDICAL_CLAIM_ID,MEMBER_ID,PAID_DATE,PAID_YEAR,SERVICE_DATE,SERVICE_YEAR,MEDICAL_PAID_AMOUNT,IS_TELEMEDICINE,IS_ER_AVOIDABLE,PRIMARY_DX,...,ICD10_CATEGORY,ICD10_SECTION,MEG_EPISODE_DESCRIPTION,OP_SURG_INC,IS_PCP_VISIT,INCLUDED_SPECIALIST,ER_VISIT_FLAG,IS_URGENT_CARE_VISIT,IS_PREVENTIVE_VISIT,IP_ADMIT_INC
0,8417530.00,mma-9c9661e5ace04b78a553426df5113b0b,2021-01-01,2021,2020-12-30,2020,29.45,0.00,0.00,Encounter for immunization,...,Persons with potential health hazards related ...,Encounter for immunization,Encounter for Preventive Health Services,0,0,0,0,0,0,0
1,3596847.00,mma-b11a672d93e644908436e7ade7e7f413,2021-01-01,2021,2020-05-22,2020,-95.53,0.00,0.00,Unspecified dyspareunia,...,Noninflammatory disorders of female genital tract,Pain and other conditions associated with fema...,Other Disorders of Female Genital System,0,0,1,0,0,0,0
2,8018579.00,mma-f1d6d6479e6947329810b7135af714d8,2021-01-01,2021,2020-11-29,2020,0.00,0.00,0.00,"Myalgia, unspecified site",...,Other soft tissue disorders,"Other and unspecified soft tissue disorders, n...","Other Arthropathies, Bone and Joint Disorders",0,0,0,0,0,0,0
3,1096954.00,mma-b539a11885624b10ade320f93f3a2ed8,2021-01-01,2021,2020-08-31,2020,6.18,1.00,0.00,"Tachycardia, unspecified",...,Symptoms and signs involving the circulatory a...,Abnormalities of heart beat,Other Cardiovascular Symptoms,0,1,0,0,0,0,0
4,1096954.00,mma-b539a11885624b10ade320f93f3a2ed8,2021-01-01,2021,2020-08-31,2020,-6.18,1.00,0.00,"Tachycardia, unspecified",...,Symptoms and signs involving the circulatory a...,Abnormalities of heart beat,Other Cardiovascular Symptoms,0,1,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2233165,5756534.00,mma-b1100d5941c94c668851ce50fd99ca78,2024-12-31,2024,2024-12-19,2024,3.00,0.00,1.00,Essential (primary) hypertension,...,Hypertensive diseases,Essential (primary) hypertension,"Essential Hypertension, Chronic Maintenance",0,0,0,0,0,0,0
2233166,3577909.00,mma-dc25bc8870ba46168a83917047fed40d,2024-12-31,2024,2024-11-12,2024,0.00,0.00,1.00,Encounter for other preprocedural examination,...,Persons encountering health services for exami...,Encounter for other special examination withou...,Bursitis,0,0,0,0,0,0,0
2233167,5926044.00,mma-1ae47808202142bfbc0b84cc39ff881c,2024-12-31,2024,2024-12-23,2024,3.22,0.00,0.00,Mixed incontinence,...,Other diseases of the urinary system,Other disorders of urinary system,Other Urinary Symptoms,0,0,0,0,0,0,0
2233168,957354.00,mma-c0f587cf169d49a3863fd74bf829d525,2024-12-31,2024,2024-12-03,2024,359.88,1.00,0.00,"Attention-deficit hyperactivity disorder, unsp...",...,Behavioral and emotional disorders with onset ...,Attention-deficit hyperactivity disorders,Depression,0,0,0,0,0,0,0


In [84]:
glp_pdc65_members = member[member['MEMBER_ID'].isin(mbr)]
glp_pdc65_med = med[med['MEMBER_ID'].isin(mbr)]
glp_pdc65_rx = rx[rx['MEMBER_ID'].isin(mbr)]

In [85]:
glp_pdc65_members

Unnamed: 0,MEMBER_ID,MEMBER_STATUS,HOME_STATE,MEMBER_RELATIONSHIP,GENDER,Age,GENERATIONS
2,mma-b027fbc0d58348338e0a12b5520743a1,0.00,NC,Subscriber,Female,48,Generation X
21,mma-bfb67842a597473e947b613fac97ad55,0.00,PA,Subscriber,Female,54,Generation X
56,mma-1a161f1e3c50438c9922817772f5ef00,0.00,KY,Subscriber,Female,39,Millenials
142,mma-d54971b76cc24da98e91aa4c6dc988f2,0.00,NC,Subscriber,Female,50,Generation X
186,mma-89dd2150959c48f2a8b272b1ea570a03,0.00,PA,Subscriber,Female,47,Generation X
...,...,...,...,...,...,...,...
9532,mma-129485d79a4348c6803a6cd4575becc5,0.00,NC,Subscriber,Female,45,Generation X
9533,mma-3889d08c09eb4436b6550eeaeba10110,0.00,NC,Subscriber,Female,39,Millenials
9549,mma-26ab442e19584de59574806d5f5f58e3,0.00,NC,Subscriber,Female,51,Generation X
9584,mma-5bec529f37d0492f865267a9bb9da698,0.00,NC,Subscriber,Female,58,Generation X


In [86]:
import openpyxl

In [88]:
glp_pdc65_rx.to_excel('glp_pdc65_rx.xlsx', index=False, engine='openpyxl')
glp_pdc65_med.to_excel('glp_pdc65_med.xlsx', index=False, engine='openpyxl')
glp_pdc65_members.to_excel('glp_pdc65_members.xlsx', index=False, engine='openpyxl')

In [89]:
glp_pdc65_rx.to_csv('glp_pdc_rx65.csv', index=False)
glp_pdc65_med.to_csv('glp_pdc_med65.csv', index=False)
glp_pdc65_members.to_csv('glp_pdc_members65.csv', index=False)