
    Reads polypharmacy side effects data from a CSV file into a pandas DataFrame
    
    Parameters:
    file_path (str): Path to the CSV file
    
    Returns:
    pandas.DataFrame: DataFrame with polypharmacy data


In [None]:
import pandas as pd


file_path = r"ChChSe-Decagon_polypharmacy.csv"

columns = ['STITCH_1', 'STITCH_2', 'Polypharmacy_Side_Effect', 'Side_Effect_Name']

pairs_df = pd.read_csv(file_path, names=columns)

pairs_df = pairs_df.apply(lambda x: x.str.strip() if x.dtype == "object" else x)

pairs_df.head(10)

Unnamed: 0,STITCH_1,STITCH_2,Polypharmacy_Side_Effect,Side_Effect_Name
0,CID000002173,CID000003345,C0151714,hypermagnesemia
1,CID000002173,CID000003345,C0035344,retinopathy of prematurity
2,CID000002173,CID000003345,C0004144,atelectasis
3,CID000002173,CID000003345,C0002063,alkalosis
4,CID000002173,CID000003345,C0004604,Back Ache
5,CID000002173,CID000003345,C0034063,lung edema
6,CID000002173,CID000003345,C0085631,agitated
7,CID000002173,CID000003345,C0013384,abnormal movements
8,CID000002173,CID000003345,C0001122,Acidosis
9,CID000002173,CID000003345,C0034150,peliosis


isolationg just the side-effects

In [18]:
side_effects_df = pairs_df[['Polypharmacy_Side_Effect', 'Side_Effect_Name']]
    
    # Save last 2 rows to a new CSV file

last_column = side_effects_df.columns[-1]

# Convert the last column to uppercase
side_effects_df[last_column] = side_effects_df[last_column].str.upper()


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  side_effects_df[last_column] = side_effects_df[last_column].str.upper()


Unnamed: 0,Polypharmacy_Side_Effect,Side_Effect_Name
0,C0151714,HYPERMAGNESEMIA
1,C0035344,RETINOPATHY OF PREMATURITY
2,C0004144,ATELECTASIS
3,C0002063,ALKALOSIS
4,C0004604,BACK ACHE
5,C0034063,LUNG EDEMA
6,C0085631,AGITATED
7,C0013384,ABNORMAL MOVEMENTS
8,C0001122,ACIDOSIS
9,C0034150,PELIOSIS


pandas for the ranking of side-effects

In [24]:
se_ranking_df = pd.read_excel('jmir_v17i3e80_app4.xlsx')
se_ranking_df.to_csv('ranking.csv', index=False)

merging side-effects and rankings

In [None]:
merged_df = side_effects_df.merge(se_ranking_df, left_on="Side_Effect_Name", right_on="Name", how="left")

# Replace NaN values in 'Rank score' with "unknown"
merged_df["Rank score"] = merged_df["Rank score"].fillna("unknown")
merged_df.iloc[:, -1] = merged_df.iloc[:, -1].fillna("unknown")
merged_df.drop(columns=['Name'], inplace=True)

1          11.826728
2          19.910377
3           1.995541
4            unknown
             ...    
4649436      unknown
4649437     7.575071
4649438      unknown
4649439     8.170583
4649440      unknown
Name: Rank Stdev (% out 2929), Length: 4649441, dtype: object' has dtype incompatible with float64, please explicitly cast to a compatible dtype first.
  merged_df.iloc[:, -1] = merged_df.iloc[:, -1].fillna("unknown")


Unnamed: 0,Polypharmacy_Side_Effect,Side_Effect_Name,Rank score,Rank Stdev (% out 2929)
0,C0151714,HYPERMAGNESEMIA,unknown,unknown
1,C0035344,RETINOPATHY OF PREMATURITY,0.489986,11.826728
2,C0004144,ATELECTASIS,0.693081,19.910377
3,C0002063,ALKALOSIS,0.505357,1.995541
4,C0004604,BACK ACHE,unknown,unknown
5,C0034063,LUNG EDEMA,unknown,unknown
6,C0085631,AGITATED,unknown,unknown
7,C0013384,ABNORMAL MOVEMENTS,unknown,unknown
8,C0001122,ACIDOSIS,0.43851,8.379133
9,C0034150,PELIOSIS,unknown,unknown


merging the dataframes for drgs pairs, side effects and rankinkgs

In [21]:
columns_to_add = pairs_df[["STITCH_1","STITCH_2"]]

final_merged_df = pd.concat([columns_to_add, merged_df ], axis=1)
final_merged_df.head(10)


Unnamed: 0,STITCH_1,STITCH_2,Polypharmacy_Side_Effect,Side_Effect_Name,Rank score,Rank Stdev (% out 2929)
0,CID000002173,CID000003345,C0151714,HYPERMAGNESEMIA,unknown,unknown
1,CID000002173,CID000003345,C0035344,RETINOPATHY OF PREMATURITY,0.489986,11.826728
2,CID000002173,CID000003345,C0004144,ATELECTASIS,0.693081,19.910377
3,CID000002173,CID000003345,C0002063,ALKALOSIS,0.505357,1.995541
4,CID000002173,CID000003345,C0004604,BACK ACHE,unknown,unknown
5,CID000002173,CID000003345,C0034063,LUNG EDEMA,unknown,unknown
6,CID000002173,CID000003345,C0085631,AGITATED,unknown,unknown
7,CID000002173,CID000003345,C0013384,ABNORMAL MOVEMENTS,unknown,unknown
8,CID000002173,CID000003345,C0001122,ACIDOSIS,0.43851,8.379133
9,CID000002173,CID000003345,C0034150,PELIOSIS,unknown,unknown


In [22]:
# Convert 'Rank score' to numeric, setting errors='coerce' to turn non-numeric values into NaN
final_merged_df["Rank score"] = pd.to_numeric(final_merged_df["Rank score"], errors="coerce")

# Sort by 'Rank score', keeping NaNs (strings) at the bottom
final_merged_df = final_merged_df.sort_values(by="Rank score", ascending=False, na_position="last")

final_merged_df["Rank score"] = final_merged_df["Rank score"].fillna("unknown")
final_merged_df.to_csv('se_risks.csv', index=False)

univoc side effects

In [23]:
univoc_merged_df = final_merged_df.drop_duplicates(subset="Side_Effect_Name")
univoc_merged_df.to_csv('se_risks_univoc.csv', index=False)

unknown risk table

In [None]:
df_filtered = univoc_merged_df[univoc_merged_df.iloc[:, -2] == "unknown"]
df_filtered.to_csv('unknown_risk.csv', index=False)
