# SCRIPT TO OBTAIN PHENOTIPIC QUANTITAVE TREATS FOR OPIOIDS (LOPERAMIDE, FENTANYL AND MORPHINE)

Quantitave traits: 
- Number of drug eras 
- Total drug exposures
- Treatment duration
- Cumulative gaps days
- Adherence Score: Obtained from Total drug exposures, treatment duration and cumulative gap days

The participants with no data on this fields: NA


## This script should only be run once

#### Initialization
##### Load packages

In [29]:
import pandas as pd

from pathlib import Path

##### Spark and dataset configuration 

In [30]:
# 1. Load the phenotype CSV files into Pandas DataFrames
file_omop_loperamide = f"/mnt/project/WGS_Lucia/Data/phenotypes/omop_loperamide_groupbyeid.csv"
file_omop_morphine = f"/mnt/project/WGS_Lucia/Data/phenotypes/omop_morphine_groupbyeid.csv"
file_omop_fentanyl = f"/mnt/project/WGS_Lucia/Data/phenotypes/omop_fentanyl_groupbyeid.csv"

morphine_df = pd.read_csv(file_omop_morphine)
fentanyl_df = pd.read_csv(file_omop_fentanyl)
loperamide_df = pd.read_csv(file_omop_loperamide)


##### DataFrame formatting

In [50]:
# Load the TSV file
file_eids = f"/mnt/project/WGS_Lucia/Data/phenotypes/phenotypes.opioids.tsv"
phenotypes_df = pd.read_csv(file_eids, sep="\t")
# Rename 'FID' to 'eid'
phenotypes_df = phenotypes_df.rename(columns={"FID": "eid"})
# Select only the 'eid' column
participant_df = phenotypes_df[["eid"]].copy()

Now run this for each drug

In [51]:
# 4. Ensure all 'eid' columns are of the same type (e.g., integer)
participant_df['eid'] = participant_df['eid'].astype(int)  

# 5. Merge participant_df with loperamide_df on 'eid', keeping all eids from participant_df
participant_df = participant_df.merge(
    loperamide_df[["eid", "num_drug_era_id", "total_exposure_counts", "total_duration", "cumulative_gap_days", "adherence_score_omop"]],
    on="eid",
    how="left"
)


In [52]:
display(participant_df.drop("eid", axis=1))
print(participant_df.notna().sum())  # Counts non-NaN values per column

Unnamed: 0,num_drug_era_id,total_exposure_counts,total_duration,cumulative_gap_days,adherence_score_omop
0,,,,,
1,,,,,
2,,,,,
3,,,,,
4,,,,,
...,...,...,...,...,...
502231,,,,,
502232,,,,,
502233,,,,,
502234,,,,,


eid                      502236
num_drug_era_id           15660
total_exposure_counts     15660
total_duration            15660
cumulative_gap_days       15660
adherence_score_omop      15660
dtype: int64


In [53]:
# Set adherence_score_omop to None if total_exposure_counts is less than 2
participant_df.loc[participant_df["total_exposure_counts"] < 2, "adherence_score_omop"] = None

In [54]:
display(participant_df.drop("eid", axis=1))

Unnamed: 0,num_drug_era_id,total_exposure_counts,total_duration,cumulative_gap_days,adherence_score_omop
0,,,,,
1,,,,,
2,,,,,
3,,,,,
4,,,,,
...,...,...,...,...,...
502231,,,,,
502232,,,,,
502233,,,,,
502234,,,,,


In [55]:
# Filter rows where num_drug_era_id is not NaN
filtered_df = participant_df[participant_df["num_drug_era_id"].notna()]

# Display the filtered DataFrame
print("Rows where num_drug_era_id is not NaN:")
display(filtered_df.drop("eid", axis=1))

Rows where num_drug_era_id is not NaN:


Unnamed: 0,num_drug_era_id,total_exposure_counts,total_duration,cumulative_gap_days,adherence_score_omop
5,1.0,1.0,5.0,0.0,
25,4.0,4.0,2120.0,2004.0,-0.245247
50,1.0,1.0,5.0,0.0,
75,14.0,22.0,1142.0,834.0,0.898314
102,1.0,1.0,29.0,0.0,
...,...,...,...,...,...
502121,1.0,1.0,1.0,0.0,
502163,7.0,15.0,457.0,388.0,0.418332
502166,1.0,1.0,29.0,0.0,
502179,1.0,1.0,5.0,0.0,


In [56]:
# Set the 'eid' column as the index
participant_df.set_index('eid', inplace=True)

# Insert 'FID' and 'IID' as the first two columns using the index
participant_df.insert(0, "FID", participant_df.index)
participant_df.insert(1, "IID", participant_df.index)

##### Export and upload DataFrame

In [57]:
# Save the result as a TSV file
participant_df.to_csv("loperamide.phenotype.QT.tsv", sep='\t', index=False, na_rep="NA")

In [58]:
! dx upload loperamide.phenotype.QT.tsv --path /WGS_Lucia/Data/phenotypes/

ID                                file-GyKQg08Jb4JPfg06G7176595
Class                             file
Project                           project-GfVK998Jb4JJgVBjKXPyxJ9q
Folder                            /WGS_Lucia/Data/phenotypes
Name                              loperamide.phenotype.QT.tsv
State                             [33mclosing[0m
Visibility                        visible
Types                             -
Properties                        -
Tags                              -
Outgoing links                    -
Created                           Fri Jan 31 17:42:25 2025
Created by                        luciass6
 via the job                      job-GyKKQ4QJb4J5325Z97Xx4bX5
Last modified                     Fri Jan 31 17:42:26 2025
Media type                        
archivalState                     "live"
cloudAccount                      "cloudaccount-dnanexus"
