In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from datetime import datetime
!pip install pytrials

import requests
import json
from pytrials.client import ClinicalTrials
import os
import re
from requests.exceptions import ChunkedEncodingError, ConnectionError, Timeout
import time
from tqdm import tqdm

Collecting pytrials
  Downloading pytrials-1.0.0-py2.py3-none-any.whl (7.0 kB)
Installing collected packages: pytrials
Successfully installed pytrials-1.0.0


In [None]:
path_reactions = '/content/drive/My Drive/Data_MedEffect/reactions.txt'
path_report_drug_indication = '/content/drive/My Drive/Data_MedEffect/report_drug_indication.txt'
path_reports = '/content/drive/My Drive/Data_MedEffect/reports.txt'

In [None]:
def strip_quotes(x):
    return x.strip('"') if isinstance(x, str) else x

In [None]:
dtype_map = {3: str, 4: str}
df_reactions = pd.read_csv(path_reactions, delimiter = "$", header=None, dtype=dtype_map)
df_reactions.columns = ["REACTION_ID", "REPORT_ID", "DURATION", "DURATION_UNIT_ENG", "DURATION_UNIT_FR", "PT_NAME_ENG", "PT_NAME_FR", "SOC_NAME_ENG", "SOC_NAME_FR", "MEDDRA_VERSION"]
df_reactions = df_reactions.applymap(strip_quotes)
df_reactions.drop(columns=["DURATION_UNIT_FR", "PT_NAME_FR", "SOC_NAME_FR", "MEDDRA_VERSION", "REACTION_ID", "DURATION", "DURATION_UNIT_ENG"], inplace=True)
pd.set_option('display.width', 500)
print(df_reactions)

         REPORT_ID         PT_NAME_ENG                                       SOC_NAME_ENG
0                1            Pruritus             Skin and subcutaneous tissue disorders
1                1              Oedema  General disorders and administration site cond...
2                2           Urticaria             Skin and subcutaneous tissue disorders
3                3            Erythema             Skin and subcutaneous tissue disorders
4                3  Periorbital oedema                                      Eye disorders
...            ...                 ...                                                ...
4179584  906931316            Vomiting                         Gastrointestinal disorders
4179585  906931316    Weight decreased                                     Investigations
4179586  906931316    Weight increased                                     Investigations
4179587  906931316                Acne             Skin and subcutaneous tissue disorders
4179588  9

In [None]:
df_drugs= pd.read_csv(path_report_drug_indication, delimiter = "$", header=None)
df_drugs.columns = ["REPORT_DRUG_ID", "REPORT_ID", "DRUG_PRODUCT_ID", "DRUGNAME", "INDICATION_NAME_ENG", "INDICATION_NAME_FR"]
df_drugs = df_drugs.applymap(strip_quotes)
df_drugs.drop(columns=["INDICATION_NAME_FR", "REPORT_DRUG_ID", "DRUG_PRODUCT_ID"], inplace=True)
print(df_drugs)
pd.set_option('display.max_columns', None)

         REPORT_ID                      DRUGNAME                  INDICATION_NAME_ENG
0           300198           NOVO VENLAFAXINE XR                           Depression
1           300548               BENADRYL ELIXIR  Product used for unknown indication
2           300488                      ELOXATIN                         Colon cancer
3           301050  TWINJECT 0.3MG AUTO-INJECTOR                Anaphylactic reaction
4           300841               APO-PROPAFENONE  Product used for unknown indication
...            ...                           ...                                  ...
2988921    1069622                       ENHERTU          HER2 positive breast cancer
2988922     972418                       XELJANZ  Product used for unknown indication
2988923     972418                    CETIRIZINE  Product used for unknown indication
2988924     972418                     PLAQUENIL                 Rheumatoid arthritis
2988925    1070830                       ENHERTU      

In [None]:
dtype_map = {1: str, 5: str,16: str,39: str,40: str,41: str}
df_reports = pd.read_csv(path_reports, delimiter = "$", header=None, dtype=dtype_map)
df_reports.columns = [
    "REPORT_ID", "REPORT_NO", "VERSION_NO", "DATRECEIVED", "DATINTRECEIVED", "MAH_NO",
    "REPORT_TYPE_CODE", "REPORT_TYPE_ENG", "REPORT_TYPE_FR", "GENDER_CODE", "GENDER_ENG",
    "GENDER_FR", "AGE", "AGE_Y", "AGE_UNIT_ENG", "AGE_UNIT_FR", "OUTCOME_CODE",
    "OUTCOME_ENG", "OUTCOME_FR", "WEIGHT", "WEIGHT_UNIT_ENG", "WEIGHT_UNIT_FR", "HEIGHT",
    "HEIGHT_UNIT_ENG", "HEIGHT_UNIT_FR", "SERIOUSNESS_CODE", "SERIOUSNESS_ENG", "SERIOUSNESS_FR",  "DEATH",
    "DISABILITY", "CONGENITAL_ANOMALY", "LIFE_THREATENING", "HOSP_REQUIRED",
    "OTHER_MEDICALLY_IMP_COND", "REPORTER_TYPE_ENG", "REPORTER_TYPE_FR",
    "SOURCE_CODE", "SOURCE_ENG","SOURCE_FR", "E2B_IMP_SAFETYREPORT_ID",
    "AUTHORITY_NUMB", "COMPANY_NUMB"]
df_reports = df_reports.applymap(strip_quotes)

def convert_age(row):
    if row["AGE_UNIT_ENG"] == "Years":
        return row["AGE"]
    elif row["AGE_UNIT_ENG"] == "Months":
        return row["AGE"] / 12
    elif row["AGE_UNIT_ENG"] == "Weeks":
        return row["AGE"] / 52
    elif row["AGE_UNIT_ENG"] == "Days":
        return row["AGE"] / 365
    elif row["AGE_UNIT_ENG"] == "Hours":
        return row["AGE"] / 8760
    elif row["AGE_UNIT_ENG"] == "Decade":
        return row["AGE"]*10
    else:
        return row["AGE"]

def convert_height(row):
    if row["HEIGHT_UNIT_ENG"] == "Centimeter":
        return row["HEIGHT"] / 100
    if row["HEIGHT_UNIT_ENG"] == "Inch":
        return row["HEIGHT"] * 0.0254
    else:
        return row["HEIGHT"]

# Function to convert weight to kilograms
def convert_weight(row):
    if row["WEIGHT_UNIT_ENG"] == "Pound":
        return row["WEIGHT"] * 0.453592
    else:
        return row["WEIGHT"]

df_reports["AGE"] = df_reports.apply(lambda row: convert_age(row), axis=1)
df_reports["HEIGHT_M"] = df_reports.apply(lambda row: convert_height(row), axis=1)
df_reports["WEIGHT_KG"] = df_reports.apply(lambda row: convert_weight(row), axis=1)


df_reports.drop(columns=["VERSION_NO", "DATINTRECEIVED", "MAH_NO",
    "REPORT_TYPE_FR", "GENDER_FR", "AGE_Y", "AGE_UNIT_FR", "OUTCOME_CODE","GENDER_CODE",
    "OUTCOME_FR", "WEIGHT_UNIT_FR", "HEIGHT_UNIT_FR", "SERIOUSNESS_FR", "REPORTER_TYPE_FR",
    "SOURCE_CODE", "SOURCE_FR", "E2B_IMP_SAFETYREPORT_ID", "AUTHORITY_NUMB", "COMPANY_NUMB", "REPORTER_TYPE_ENG",
    "SOURCE_ENG", "REPORT_TYPE_CODE", "REPORT_NO", "OUTCOME_ENG", "SERIOUSNESS_CODE", "DISABILITY",
    "REPORT_TYPE_ENG", "AGE_UNIT_ENG", "HEIGHT_UNIT_ENG", "WEIGHT_UNIT_ENG", "CONGENITAL_ANOMALY", "HOSP_REQUIRED", "WEIGHT", "HEIGHT", "DATRECEIVED", "LIFE_THREATENING", "OTHER_MEDICALLY_IMP_COND", "DEATH"], inplace=True)

print(df_reports)


         REPORT_ID GENDER_ENG   AGE SERIOUSNESS_ENG  HEIGHT_M  WEIGHT_KG
0                1     Female  62.0     Not Serious      1.55       66.0
1                2     Female  70.0     Not Serious       NaN        NaN
2                3       Male  83.0         Serious      1.73       44.0
3                4     Female  78.0         Serious       NaN        NaN
4                5     Female  55.0     Not Serious       NaN        NaN
...            ...        ...   ...             ...       ...        ...
1090228  907053719       Male   NaN         Serious       NaN        NaN
1090229  907053726     Female  62.0         Serious       NaN        NaN
1090230  907053728     Female  70.0         Serious       NaN        NaN
1090231  907053730     Female   NaN         Serious       NaN        NaN
1090232  907053734     Female  46.0     Not Serious       NaN       70.0

[1090233 rows x 6 columns]


In [None]:
acute_myocardial_infarction = ["Acute myocardial infarction","Myocardial infarction"]
heart_failure = ["Left ventricular failure", "Cardiac failure congestive", "Cardiac failure chronic", "Cardiac failure", "Right ventricular failure"]
hypertension = ["Hypertension", "Blood pressure management", "Blood pressure abnormal", "HIGH BLOOD PRESSURE MEDS", "Blood pressure increased"]
ischemic_heart_disease = ["Myocardial ischaemia", "Ischaemic heart disease prophylaxis", "Ischaemic cardiomyopathy", "Coronary artery disease", "Arteriosclerosis coronary artery"]
stroke = ["Transient ischaemic attack","Ischaemic stroke","Haemorrhagic stroke","Middle cerebral artery stroke","Embolic stroke","Lacunar stroke","Thrombotic stroke"]
chronic_obstructive_pulmonary = ["Chronic obstructive pulmonary disease", "Obstructive airways disorder", "Infective exacerbation of chronic obstructive airways", "Emphysema", "Chronic respiratory disease"]
diabetes_names = ["Type 2 diabetes mellitus", "Type 1 diabetes mellitus", "Diabetes mellitus", "Diabetes", "diabetes"]

df_acute_myocardial_infarction = df_drugs[df_drugs['INDICATION_NAME_ENG'].isin(acute_myocardial_infarction)]
df_heart_failure = df_drugs[df_drugs['INDICATION_NAME_ENG'].isin(heart_failure)]
df_hypertension = df_drugs[df_drugs['INDICATION_NAME_ENG'].isin(hypertension)]
df_ischemic_heart_disease = df_drugs[df_drugs['INDICATION_NAME_ENG'].isin(ischemic_heart_disease)]
df_stroke = df_drugs[df_drugs['INDICATION_NAME_ENG'].isin(stroke)]
df_chronic_obstructive_pulmonary = df_drugs[df_drugs['INDICATION_NAME_ENG'].isin(chronic_obstructive_pulmonary)]
df_diabetes = df_drugs[df_drugs['INDICATION_NAME_ENG'].isin(diabetes_names)]
print(df_diabetes)

df_heart_diseases = {"Myocardial infarction": df_acute_myocardial_infarction, "Heart failure":df_heart_failure, "Hypertension": df_hypertension, "Ischemic heart disease": df_ischemic_heart_disease, "Stroke": df_stroke}

         REPORT_ID                        DRUGNAME       INDICATION_NAME_ENG
309         316858                     AVANDIA 4MG         Diabetes mellitus
747         333113                LANTUS   -(VIAL)  Type 1 diabetes mellitus
784         339922                         ETHANOL  Type 2 diabetes mellitus
893         306970  NOVOLIN GE NPH INJ SUS 100U/ML  Type 1 diabetes mellitus
1101        354424                         HUMALOG         Diabetes mellitus
...            ...                             ...                       ...
2987878  907051727                         TRESIBA         Diabetes mellitus
2987884  905973473                   CANAGLIFLOZIN         Diabetes mellitus
2987926  907037065                         TRURAPI         Diabetes mellitus
2988361  906061918                          LANTUS  Type 1 diabetes mellitus
2988393  906642692                  INSULIN ASPART         Diabetes mellitus

[16867 rows x 3 columns]


In [None]:
ctg_studies_path = '/content/drive/My Drive/Data_MedEffect/ctg-studies.csv'
ctg_studies = pd.read_csv(ctg_studies_path)
ctg_studies["Conditions"] = ctg_studies["Conditions"].str.upper()
ctg_studies["Interventions"] = ctg_studies["Interventions"].str.upper()

drug_equivalencies_path = '/content/drive/My Drive/Data_MedEffect/drug_equivalencies.csv'
drug_equivalencies = pd.read_csv(drug_equivalencies_path, skiprows=1, encoding="latin1")
drug_equivalencies.drop(columns=["RXCUI", "RXAUI", "SAB", "TTY", "CODE"], inplace =True)
drug_equivalencies["DRUGNAME"] = drug_equivalencies["DRUGNAME"].str.upper()
drug_equivalencies["STR"] = drug_equivalencies["STR"].str.upper()

# Ensure all values in are strings and handle missing values
drug_equivalencies['DRUGNAME'] = drug_equivalencies['DRUGNAME'].fillna('').astype(str)
drug_equivalencies['STR'] = drug_equivalencies['STR'].fillna('').astype(str)

# choose which disease to replace drugnames
curr_df = df_diabetes
curr_df["DRUGNAME"] = curr_df["DRUGNAME"].fillna('').astype(str)
curr_df["DRUGNAME"] = curr_df["DRUGNAME"].str.upper()
def clean_drug_name(drug):
  drug = drug.replace("(", "").replace(")", "").replace(",", "").strip()
  drug = drug.replace("-", " ").replace("&", "").replace("*", " ").strip()
  drug = drug.replace("[", "").replace("]", "").replace(",", " ").strip()
  drug = re.sub(r'\/.*?\/', '', drug)
  return drug

drug_alternates_cache = {}
def get_alternate_drug(drug):
  if drug in drug_alternates_cache:
        return drug_alternates_cache[drug]
  for _, row in drug_equivalencies.iterrows():
      if re.search(r'\b%s\b' % drug, row['DRUGNAME']) is not None:
          drug_alternates_cache[drug] = row['STR']
          return row['STR']
  drug_alternates_cache[drug] = drug
  return drug

tqdm.pandas()

save_directory = '/content/drive/My Drive/CSV/Diabetes'

print(curr_df)
curr_df['DRUGNAME'] = curr_df['DRUGNAME'].apply(clean_drug_name)

# function to apply as chunks
def process_and_save_chunk(df_chunk, chunk_index):
    # Apply the get_alternate_drug function
    df_chunk["Alternate drugname"] = df_chunk["DRUGNAME"].progress_apply(get_alternate_drug)
    # Create the file path for saving the CSV
    csv_file_path = os.path.join(save_directory, f"diabetes_altdrug_chunk_{chunk_index}.csv")
    # Save the DataFrame chunk to a CSV file
    df_chunk.to_csv(csv_file_path, index=False)
    # Optionally print the chunk
    print(f"Chunk {chunk_index} processed and saved.")
    print(df_chunk.head())  # Print only the first few rows to avoid clutter

# Define the chunk size
chunk_size = 500
starting_chunk_index = 0
# Split the DataFrame into chunks and process each chunk
for chunk_index, start_row in enumerate(range(0, curr_df.shape[0], chunk_size), start=starting_chunk_index):
    # Get the current chunk
    df_chunk = curr_df.iloc[start_row:start_row + chunk_size]
    # Process and save the current chunk
    process_and_save_chunk(df_chunk, chunk_index)

# df_hypertension["Alternate drugname"] = df_hypertension["DRUGNAME"].progress_apply(get_alternate_drug)
# csv_file_path = os.path.join(save_directory, "hypertension_altdrug.csv")
# df_hypertension.to_csv(csv_file_path, index=False)
# for key, df in df_heart_diseases.items():

#   df['DRUGNAME'] = df['DRUGNAME'].apply(clean_drug_name)
#   df["Alternate drugname"] = df["DRUGNAME"].progress_apply(get_alternate_drug)
#   csv_file_path = os.path.join(save_directory, f"{key}_altdrug.csv")
#   df.to_csv(csv_file_path, index=False)
#   print(df)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  curr_df["DRUGNAME"] = curr_df["DRUGNAME"].fillna('').astype(str)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  curr_df["DRUGNAME"] = curr_df["DRUGNAME"].str.upper()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  curr_df['DRUGNAME'] = curr_df['DRUGNAME'].apply(clean_drug_name)


         REPORT_ID                        DRUGNAME       INDICATION_NAME_ENG
309         316858                     AVANDIA 4MG         Diabetes mellitus
747         333113                  LANTUS    VIAL  Type 1 diabetes mellitus
784         339922                         ETHANOL  Type 2 diabetes mellitus
893         306970  NOVOLIN GE NPH INJ SUS 100U/ML  Type 1 diabetes mellitus
1101        354424                         HUMALOG         Diabetes mellitus
...            ...                             ...                       ...
2987878  907051727                         TRESIBA         Diabetes mellitus
2987884  905973473                   CANAGLIFLOZIN         Diabetes mellitus
2987926  907037065                         TRURAPI         Diabetes mellitus
2988361  906061918                          LANTUS  Type 1 diabetes mellitus
2988393  906642692                  INSULIN ASPART         Diabetes mellitus

[16867 rows x 3 columns]


100%|██████████| 500/500 [31:46<00:00,  3.81s/it]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_chunk["Alternate drugname"] = df_chunk["DRUGNAME"].progress_apply(get_alternate_drug)


Chunk 0 processed and saved.
      REPORT_ID                        DRUGNAME       INDICATION_NAME_ENG              Alternate drugname
309      316858                     AVANDIA 4MG         Diabetes mellitus                     AVANDIA 4MG
747      333113                  LANTUS    VIAL  Type 1 diabetes mellitus                  LANTUS    VIAL
784      339922                         ETHANOL  Type 2 diabetes mellitus                         ETHANOL
893      306970  NOVOLIN GE NPH INJ SUS 100U/ML  Type 1 diabetes mellitus  NOVOLIN GE NPH INJ SUS 100U/ML
1101     354424                         HUMALOG         Diabetes mellitus                  INSULIN LISPRO


100%|██████████| 500/500 [16:11<00:00,  1.94s/it]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_chunk["Alternate drugname"] = df_chunk["DRUGNAME"].progress_apply(get_alternate_drug)


Chunk 1 processed and saved.
       REPORT_ID             DRUGNAME INDICATION_NAME_ENG   Alternate drugname
93217     302259              HUMALOG   Diabetes mellitus       INSULIN LISPRO
93486     315739              HUMALOG   Diabetes mellitus       INSULIN LISPRO
93591     319746  LANTUS    CARTRIDGE   Diabetes mellitus  LANTUS    CARTRIDGE
93677     322402               LANTUS   Diabetes mellitus     INSULIN GLARGINE
93682     324055       LANTUS    VIAL   Diabetes mellitus       LANTUS    VIAL


100%|██████████| 500/500 [12:40<00:00,  1.52s/it]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_chunk["Alternate drugname"] = df_chunk["DRUGNAME"].progress_apply(get_alternate_drug)


Chunk 2 processed and saved.
        REPORT_ID        DRUGNAME       INDICATION_NAME_ENG     Alternate drugname
183349  906803227       METFORMIN  Type 2 diabetes mellitus              METFORMIN
183358  906886012       METFORMIN         Diabetes mellitus              METFORMIN
184030  904428338      GLICLAZIDE         Diabetes mellitus             GLICLAZIDE
184872  903921327  INSULIN ASPART         Diabetes mellitus  INSULIN ASPART, HUMAN
186272  907021206       NOVORAPID  Type 1 diabetes mellitus  INSULIN ASPART, HUMAN


100%|██████████| 500/500 [10:36<00:00,  1.27s/it]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_chunk["Alternate drugname"] = df_chunk["DRUGNAME"].progress_apply(get_alternate_drug)


Chunk 3 processed and saved.
        REPORT_ID         DRUGNAME       INDICATION_NAME_ENG                                 Alternate drugname
271150     727901          INSULIN  Type 2 diabetes mellitus                     INSULIN DEGLUDEC / LIRAGLUTIDE
271405    1044100  APO SITAGLIPTIN  Type 2 diabetes mellitus                                        SITAGLIPTIN
272004  905831138       GLICLAZIDE         Diabetes mellitus                                         GLICLAZIDE
272060  906494605             INS.         Diabetes mellitus  INSULINS AND ANALOGS FOR INJECTION, INTERMEDIA...
272253  906544998      SEMAGLUTIDE         Diabetes mellitus                                        SEMAGLUTIDE


100%|██████████| 500/500 [09:09<00:00,  1.10s/it]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_chunk["Alternate drugname"] = df_chunk["DRUGNAME"].progress_apply(get_alternate_drug)


Chunk 4 processed and saved.
        REPORT_ID         DRUGNAME       INDICATION_NAME_ENG        Alternate drugname
374257     328898  GLUCONORM 1.0MG  Type 2 diabetes mellitus               REPAGLINIDE
374312     331435  LEVEMIR PENFILL  Type 2 diabetes mellitus           INSULIN DETEMIR
374408     338596        GLYBURIDE         Diabetes mellitus                 GLYBURIDE
374433     337132           APIDRA         Diabetes mellitus  INSULIN GLULISINE, HUMAN
374462     340643         GLUMETZA         Diabetes mellitus                 METFORMIN


100%|██████████| 500/500 [06:58<00:00,  1.19it/s]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_chunk["Alternate drugname"] = df_chunk["DRUGNAME"].progress_apply(get_alternate_drug)


Chunk 5 processed and saved.
        REPORT_ID                                           DRUGNAME       INDICATION_NAME_ENG                                 Alternate drugname
456370    1033499                                           INVOKANA         Diabetes mellitus                                      CANAGLIFLOZIN
456406  906248645                                            HUMALOG  Type 1 diabetes mellitus                                     INSULIN LISPRO
456775    1036365  OZEMPIC MULTIDOSE PREFILLED PEN DISPENSES 1MG ...  Type 2 diabetes mellitus  OZEMPIC MULTIDOSE PREFILLED PEN DISPENSES 1MG ...
457258  906364832                                        SEMAGLUTIDE  Type 2 diabetes mellitus                                        SEMAGLUTIDE
457390  906386561                                               INS.         Diabetes mellitus  INSULINS AND ANALOGS FOR INJECTION, INTERMEDIA...


100%|██████████| 500/500 [05:50<00:00,  1.42it/s]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_chunk["Alternate drugname"] = df_chunk["DRUGNAME"].progress_apply(get_alternate_drug)


Chunk 6 processed and saved.
        REPORT_ID   DRUGNAME       INDICATION_NAME_ENG Alternate drugname
535638  903050048  METFORMIN  Type 2 diabetes mellitus          METFORMIN
535891  903105065    HUMALOG         Diabetes mellitus     INSULIN LISPRO
535915     909324    OZEMPIC  Type 2 diabetes mellitus        SEMAGLUTIDE
536265     909968    OZEMPIC  Type 2 diabetes mellitus        SEMAGLUTIDE
536327     913518    TRESIBA         Diabetes mellitus   INSULIN DEGLUDEC


100%|██████████| 500/500 [06:04<00:00,  1.37it/s]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_chunk["Alternate drugname"] = df_chunk["DRUGNAME"].progress_apply(get_alternate_drug)


Chunk 7 processed and saved.
        REPORT_ID             DRUGNAME       INDICATION_NAME_ENG Alternate drugname
619925  901741588              HUMALOG         Diabetes mellitus     INSULIN LISPRO
620043  901606978               LANTUS         Diabetes mellitus   INSULIN GLARGINE
620058     708160             INVOKANA         Diabetes mellitus      CANAGLIFLOZIN
620143     708783  SANDOZ METFORMIN FC  Type 2 diabetes mellitus          METFORMIN
620455  901868355      INSULIN DETEMIR  Type 2 diabetes mellitus    INSULIN DETEMIR


100%|██████████| 500/500 [03:48<00:00,  2.18it/s]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_chunk["Alternate drugname"] = df_chunk["DRUGNAME"].progress_apply(get_alternate_drug)


Chunk 8 processed and saved.
        REPORT_ID               DRUGNAME       INDICATION_NAME_ENG           Alternate drugname
709426  900868085              METFORMIN  Type 2 diabetes mellitus                    METFORMIN
709586  900903699               TRAJENTA  Type 2 diabetes mellitus                  LINAGLIPTIN
709727     681540               INVOKANA  Type 2 diabetes mellitus                CANAGLIFLOZIN
709845  900865087  ROSIGLITAZONE MALEATE  Type 2 diabetes mellitus  GLIMEPIRIDE / ROSIGLITAZONE
710318     687938               INVOKANA  Type 2 diabetes mellitus                CANAGLIFLOZIN


100%|██████████| 500/500 [04:55<00:00,  1.69it/s]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_chunk["Alternate drugname"] = df_chunk["DRUGNAME"].progress_apply(get_alternate_drug)


Chunk 9 processed and saved.
        REPORT_ID             DRUGNAME INDICATION_NAME_ENG          Alternate drugname
799500  900135635            GLYBURIDE   Diabetes mellitus                   GLYBURIDE
799647  900113574        INSULIN HUMAN   Diabetes mellitus  INSULIN HUMAN, RDNA ORIGIN
799667     628693            METFORMIN   Diabetes mellitus                   METFORMIN
799964     635834  ONGLYZA FILM COATED   Diabetes mellitus                 SAXAGLIPTIN
799984     623725               LANTUS   Diabetes mellitus            INSULIN GLARGINE


100%|██████████| 500/500 [06:45<00:00,  1.23it/s]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_chunk["Alternate drugname"] = df_chunk["DRUGNAME"].progress_apply(get_alternate_drug)


Chunk 10 processed and saved.
        REPORT_ID                         DRUGNAME       INDICATION_NAME_ENG               Alternate drugname
894392     663687  NOVORAPID 3ML PENFILL CARTRIDGE  Type 2 diabetes mellitus  NOVORAPID 3ML PENFILL CARTRIDGE
894596  900490897                         INVOKANA  Type 2 diabetes mellitus                    CANAGLIFLOZIN
894727  900545752                          JANUMET         Diabetes mellitus          METFORMIN / SITAGLIPTIN
894793     663815                       GLICLAZIDE         Diabetes mellitus                       GLICLAZIDE
894873  900479557                        GLYBURIDE         Diabetes mellitus                        GLYBURIDE


100%|██████████| 500/500 [03:24<00:00,  2.45it/s]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_chunk["Alternate drugname"] = df_chunk["DRUGNAME"].progress_apply(get_alternate_drug)


Chunk 11 processed and saved.
        REPORT_ID                   DRUGNAME       INDICATION_NAME_ENG              Alternate drugname
987196  900417764                   INVOKANA  Type 2 diabetes mellitus                   CANAGLIFLOZIN
987254  900438808                    INSULIN  Type 2 diabetes mellitus  INSULIN DEGLUDEC / LIRAGLUTIDE
987304  900431712                   TRAJENTA  Type 2 diabetes mellitus                     LINAGLIPTIN
987307     661546                  METFORMIN  Type 2 diabetes mellitus                       METFORMIN
987368  900460534  SAXAGLIPTIN HYDROCHLORIDE         Diabetes mellitus     DAPAGLIFLOZIN / SAXAGLIPTIN


100%|██████████| 500/500 [04:09<00:00,  2.01it/s]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_chunk["Alternate drugname"] = df_chunk["DRUGNAME"].progress_apply(get_alternate_drug)


Chunk 12 processed and saved.
         REPORT_ID               DRUGNAME       INDICATION_NAME_ENG       Alternate drugname
1073071    1073057  METFORMIN/SITAGLIPTIN  Type 2 diabetes mellitus    METFORMIN/SITAGLIPTIN
1073365  905542156         INSULIN ASPART         Diabetes mellitus    INSULIN ASPART, HUMAN
1073374  904713404            SEMAGLUTIDE         Diabetes mellitus              SEMAGLUTIDE
1073686  903921327                TRESIBA         Diabetes mellitus         INSULIN DEGLUDEC
1075070    1075859                JANUMET  Type 2 diabetes mellitus  METFORMIN / SITAGLIPTIN


100%|██████████| 500/500 [05:41<00:00,  1.47it/s]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_chunk["Alternate drugname"] = df_chunk["DRUGNAME"].progress_apply(get_alternate_drug)


Chunk 13 processed and saved.
         REPORT_ID                                           DRUGNAME       INDICATION_NAME_ENG                                 Alternate drugname
1160232    1048972  OZEMPIC MULTIDOSE PREFILLED PEN DISPENSES 0.25...         Diabetes mellitus  OZEMPIC MULTIDOSE PREFILLED PEN DISPENSES 0.25...
1160667  906543824                                        SEMAGLUTIDE  Type 2 diabetes mellitus                                        SEMAGLUTIDE
1161068    1054144   APO SITAGLIPTIN MALATE / METFORMIN HYDROCHLORIDE         Diabetes mellitus   APO SITAGLIPTIN MALATE / METFORMIN HYDROCHLORIDE
1161233    1054341                              METFORMIN/SITAGLIPTIN  Type 2 diabetes mellitus                              METFORMIN/SITAGLIPTIN
1161312  906591884              DAPAGLIFLOZIN PROPANEDIOL MONOHYDRATE  Type 2 diabetes mellitus                                      DAPAGLIFLOZIN


100%|██████████| 500/500 [03:58<00:00,  2.10it/s]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_chunk["Alternate drugname"] = df_chunk["DRUGNAME"].progress_apply(get_alternate_drug)


Chunk 14 processed and saved.
         REPORT_ID                                           DRUGNAME       INDICATION_NAME_ENG                                 Alternate drugname
1248167  905744283                                        SEMAGLUTIDE         Diabetes mellitus                                        SEMAGLUTIDE
1248428    1012899  OZEMPIC MULTIDOSE PREFILLED PEN DISPENSES 0.25...         Diabetes mellitus  OZEMPIC MULTIDOSE PREFILLED PEN DISPENSES 0.25...
1248500    1013640                                          JARDIANCE  Type 2 diabetes mellitus                          DAPAGLIFLOZIN / METFORMIN
1248860  905758859                                        DULAGLUTIDE         Diabetes mellitus                                        DULAGLUTIDE
1249673  903910372                                      CANAGLIFLOZIN  Type 2 diabetes mellitus                                      CANAGLIFLOZIN


100%|██████████| 500/500 [05:15<00:00,  1.58it/s]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_chunk["Alternate drugname"] = df_chunk["DRUGNAME"].progress_apply(get_alternate_drug)


Chunk 15 processed and saved.
         REPORT_ID                                           DRUGNAME       INDICATION_NAME_ENG                                 Alternate drugname
1335338  904111001                                        LIRAGLUTIDE         Diabetes mellitus                     INSULIN DEGLUDEC / LIRAGLUTIDE
1335455  903588771                                            INSULIN         Diabetes mellitus                     INSULIN DEGLUDEC / LIRAGLUTIDE
1335567     949495                          TRESIBA PENFILL CARTRIDGE  Type 1 diabetes mellitus                          TRESIBA PENFILL CARTRIDGE
1335835  904087469                                     INSULIN ASPART  Type 1 diabetes mellitus                              INSULIN ASPART, HUMAN
1336051     935523  METFORMIN HYDROCHLORIDE/SAXAGLIPTIN HYDROCHLORIDE         Diabetes mellitus  METFORMIN HYDROCHLORIDE/SAXAGLIPTIN HYDROCHLORIDE


100%|██████████| 500/500 [03:39<00:00,  2.27it/s]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_chunk["Alternate drugname"] = df_chunk["DRUGNAME"].progress_apply(get_alternate_drug)


Chunk 16 processed and saved.
         REPORT_ID          DRUGNAME       INDICATION_NAME_ENG         Alternate drugname
1423580     909059         JARDIANCE  Type 2 diabetes mellitus  DAPAGLIFLOZIN / METFORMIN
1423626     612355  INSULIN GLARGINE         Diabetes mellitus           INSULIN GLARGINE
1423872  903215888         METFORMIN         Diabetes mellitus                  METFORMIN
1424028  903280935         GLYBURIDE         Diabetes mellitus                  GLYBURIDE
1424299     913743           JANUVIA         Diabetes mellitus                SITAGLIPTIN


100%|██████████| 500/500 [04:37<00:00,  1.80it/s]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_chunk["Alternate drugname"] = df_chunk["DRUGNAME"].progress_apply(get_alternate_drug)


Chunk 17 processed and saved.
         REPORT_ID                                           DRUGNAME INDICATION_NAME_ENG                                 Alternate drugname
1514428  902760241                                     INSULIN LISPRO   Diabetes mellitus                                     INSULIN LISPRO
1514692  901775397  METFORMIN HYDROCHLORIDE/SITAGLIPTIN PHOSPHATE ...   Diabetes mellitus  METFORMIN HYDROCHLORIDE/SITAGLIPTIN PHOSPHATE ...
1515505  902609446                                            HUMALOG   Diabetes mellitus                                     INSULIN LISPRO
1515506  902609446                                            HUMALOG   Diabetes mellitus                                     INSULIN LISPRO
1515507  902609446                                            HUMALOG   Diabetes mellitus                                     INSULIN LISPRO


100%|██████████| 500/500 [02:01<00:00,  4.10it/s]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_chunk["Alternate drugname"] = df_chunk["DRUGNAME"].progress_apply(get_alternate_drug)


Chunk 18 processed and saved.
         REPORT_ID       DRUGNAME       INDICATION_NAME_ENG         Alternate drugname
1599674     705314      NOVORAPID         Diabetes mellitus      INSULIN ASPART, HUMAN
1599956     703371      JARDIANCE         Diabetes mellitus  DAPAGLIFLOZIN / METFORMIN
1600069  901600218      EXENATIDE  Type 2 diabetes mellitus                  EXENATIDE
1600348  901690677      METFORMIN         Diabetes mellitus                  METFORMIN
1600558  901785817  EMPAGLIFLOZIN  Type 2 diabetes mellitus  EMPAGLIFLOZIN / METFORMIN


100%|██████████| 500/500 [04:28<00:00,  1.86it/s]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_chunk["Alternate drugname"] = df_chunk["DRUGNAME"].progress_apply(get_alternate_drug)


Chunk 19 processed and saved.
         REPORT_ID        DRUGNAME       INDICATION_NAME_ENG Alternate drugname
1689819  900891473   CANAGLIFLOZIN  Type 2 diabetes mellitus      CANAGLIFLOZIN
1689854  900898470       GLYBURIDE  Type 2 diabetes mellitus          GLYBURIDE
1689963  900872789         FORXIGA         Diabetes mellitus      DAPAGLIFLOZIN
1689980  900943839       METFORMIN  Type 2 diabetes mellitus          METFORMIN
1690010  900869012  NOVOLIN GE NPH  Type 2 diabetes mellitus     NOVOLIN GE NPH


100%|██████████| 500/500 [02:36<00:00,  3.20it/s]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_chunk["Alternate drugname"] = df_chunk["DRUGNAME"].progress_apply(get_alternate_drug)


Chunk 20 processed and saved.
         REPORT_ID                                          DRUGNAME INDICATION_NAME_ENG                                Alternate drugname
1772900  906947573                                  INSULIN DEGLUDEC   Diabetes mellitus                    INSULIN DEGLUDEC / LIRAGLUTIDE
1773146  905260882                                         METFORMIN   Diabetes mellitus                                         METFORMIN
1773418  902600172                                       LIRAGLUTIDE   Diabetes mellitus                    INSULIN DEGLUDEC / LIRAGLUTIDE
1773557    1076639  APO SITAGLIPTIN MALATE / METFORMIN HYDROCHLORIDE   Diabetes mellitus  APO SITAGLIPTIN MALATE / METFORMIN HYDROCHLORIDE
1773854  906555241                                       SEMAGLUTIDE   Diabetes mellitus                                       SEMAGLUTIDE


100%|██████████| 500/500 [05:36<00:00,  1.48it/s]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_chunk["Alternate drugname"] = df_chunk["DRUGNAME"].progress_apply(get_alternate_drug)


Chunk 21 processed and saved.
         REPORT_ID                                           DRUGNAME       INDICATION_NAME_ENG                                 Alternate drugname
1850596     963293                                           INVOKANA  Type 2 diabetes mellitus                                      CANAGLIFLOZIN
1850961     969121                                      APO METFORMIN         Diabetes mellitus                                          METFORMIN
1851205     972960                                          NOVORAPID         Diabetes mellitus                              INSULIN ASPART, HUMAN
1852003  905019351                                            HUMALOG         Diabetes mellitus                                     INSULIN LISPRO
1852028     980111  OZEMPIC MULTIDOSE PREFILLED PEN DISPENSES 0.25...  Type 2 diabetes mellitus  OZEMPIC MULTIDOSE PREFILLED PEN DISPENSES 0.25...


100%|██████████| 500/500 [02:21<00:00,  3.54it/s]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_chunk["Alternate drugname"] = df_chunk["DRUGNAME"].progress_apply(get_alternate_drug)


Chunk 22 processed and saved.
         REPORT_ID                                           DRUGNAME       INDICATION_NAME_ENG                                 Alternate drugname
1941166  902734658                                            INSULIN         Diabetes mellitus                     INSULIN DEGLUDEC / LIRAGLUTIDE
1941205  903820570                                            FORXIGA  Type 2 diabetes mellitus                                      DAPAGLIFLOZIN
1941352     935234  OZEMPIC MULTIDOSE PREFILLED PEN DISPENSES 0.25...         Diabetes mellitus  OZEMPIC MULTIDOSE PREFILLED PEN DISPENSES 0.25...
1941529  903888852                                           ATENOLOL         Diabetes mellitus                                         S-ATENOLOL
1942044     944952                                          JARDIANCE  Type 2 diabetes mellitus                          DAPAGLIFLOZIN / METFORMIN


100%|██████████| 500/500 [03:00<00:00,  2.77it/s]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_chunk["Alternate drugname"] = df_chunk["DRUGNAME"].progress_apply(get_alternate_drug)


Chunk 23 processed and saved.
         REPORT_ID               DRUGNAME       INDICATION_NAME_ENG           Alternate drugname
2034331     936211              GLYBURIDE  Type 2 diabetes mellitus                    GLYBURIDE
2034431  903779695  ROSIGLITAZONE MALEATE         Diabetes mellitus  GLIMEPIRIDE / ROSIGLITAZONE
2034497     936177                OZEMPIC  Type 2 diabetes mellitus                  SEMAGLUTIDE
2034661  903621160                 LANTUS         Diabetes mellitus             INSULIN GLARGINE
2034722  903820718                FORXIGA  Type 2 diabetes mellitus                DAPAGLIFLOZIN


100%|██████████| 500/500 [03:50<00:00,  2.17it/s]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_chunk["Alternate drugname"] = df_chunk["DRUGNAME"].progress_apply(get_alternate_drug)


Chunk 24 processed and saved.
         REPORT_ID                                       DRUGNAME       INDICATION_NAME_ENG                             Alternate drugname
2118625  902287640                                      METFORMIN         Diabetes mellitus                                      METFORMIN
2119429  902603157  METFORMIN HYDROCHLORIDE/SITAGLIPTIN PHOSPHATE         Diabetes mellitus  METFORMIN HYDROCHLORIDE/SITAGLIPTIN PHOSPHATE
2119782  902604201                                      METFORMIN  Type 2 diabetes mellitus                                      METFORMIN
2119864  900153621                      SAXAGLIPTIN HYDROCHLORIDE         Diabetes mellitus                    DAPAGLIFLOZIN / SAXAGLIPTIN
2120296  902676479                                      ZOPICLONE         Diabetes mellitus                                    ESZOPICLONE


100%|██████████| 500/500 [02:43<00:00,  3.06it/s]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_chunk["Alternate drugname"] = df_chunk["DRUGNAME"].progress_apply(get_alternate_drug)


Chunk 25 processed and saved.
         REPORT_ID                                  DRUGNAME       INDICATION_NAME_ENG                        Alternate drugname
2213403  902607773                                 NOVORAPID         Diabetes mellitus                     INSULIN ASPART, HUMAN
2213606     727029                              ATACAND PLUS  Type 2 diabetes mellitus         CANDESARTAN / HYDROCHLOROTHIAZIDE
2214209  902750722                                    LANTUS         Diabetes mellitus                          INSULIN GLARGINE
2214235  902755450                                 METFORMIN  Type 2 diabetes mellitus                                 METFORMIN
2214237  902752753  INSULIN GLULISINE RECOMBINANT DNA ORIGIN         Diabetes mellitus  INSULIN GLULISINE RECOMBINANT DNA ORIGIN


100%|██████████| 500/500 [04:31<00:00,  1.84it/s]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_chunk["Alternate drugname"] = df_chunk["DRUGNAME"].progress_apply(get_alternate_drug)


Chunk 26 processed and saved.
         REPORT_ID                   DRUGNAME       INDICATION_NAME_ENG         Alternate drugname
2299863     698017                   INVOKANA  Type 2 diabetes mellitus              CANAGLIFLOZIN
2299981     703476                    LEVEMIR  Type 2 diabetes mellitus            INSULIN DETEMIR
2300082  901511894                    LEVEMIR         Diabetes mellitus            INSULIN DETEMIR
2300195  901554480                    HUMALOG         Diabetes mellitus             INSULIN LISPRO
2300386  901528055  APO METFORMIN   TAB 500MG  Type 2 diabetes mellitus  APO METFORMIN   TAB 500MG


100%|██████████| 500/500 [03:04<00:00,  2.71it/s]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_chunk["Alternate drugname"] = df_chunk["DRUGNAME"].progress_apply(get_alternate_drug)


Chunk 27 processed and saved.
         REPORT_ID   DRUGNAME       INDICATION_NAME_ENG       Alternate drugname
2389905     682113   HIZENTRA         Diabetes mellitus  IMMUNOGLOBULIN G, HUMAN
2390008  900893484   SOLOSTAR  Type 1 diabetes mellitus           INSULIN LISPRO
2390331  901009438  DIAMICRON         Diabetes mellitus               GLICLAZIDE
2390336  900903594    JANUMET  Type 2 diabetes mellitus  METFORMIN / SITAGLIPTIN
2390477  901011930  METFORMIN         Diabetes mellitus                METFORMIN


100%|██████████| 500/500 [04:10<00:00,  1.99it/s]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_chunk["Alternate drugname"] = df_chunk["DRUGNAME"].progress_apply(get_alternate_drug)


Chunk 28 processed and saved.
         REPORT_ID                 DRUGNAME       INDICATION_NAME_ENG      Alternate drugname
2479410  900002029  METFORMIN HYDROCHLORIDE         Diabetes mellitus  ALOGLIPTIN / METFORMIN
2479598     590500                   LANTUS  Type 1 diabetes mellitus        INSULIN GLARGINE
2479682     592898                 TRAJENTA         Diabetes mellitus             LINAGLIPTIN
2479754     587583                METFORMIN         Diabetes mellitus               METFORMIN
2479794  900088160        INSULIN NPH HUMAN  Type 1 diabetes mellitus       INSULIN NPH HUMAN


100%|██████████| 500/500 [02:56<00:00,  2.83it/s]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_chunk["Alternate drugname"] = df_chunk["DRUGNAME"].progress_apply(get_alternate_drug)


Chunk 29 processed and saved.
         REPORT_ID                                           DRUGNAME       INDICATION_NAME_ENG                                 Alternate drugname
2571860     472372                                             LANTUS  Type 2 diabetes mellitus                                   INSULIN GLARGINE
2571891     495555                                            HUMULIN         Diabetes mellitus                                   INSULIN ISOPHANE
2571979     485669                                             APIDRA         Diabetes mellitus                           INSULIN GLULISINE, HUMAN
2572116     540512                                          METFORMIN  Type 2 diabetes mellitus                                          METFORMIN
2572233     534531  VICTOZA MULTIDOSE PEN INJECTOR. 30DOSES0.6MG 1...  Type 2 diabetes mellitus  VICTOZA MULTIDOSE PEN INJECTOR. 30DOSES0.6MG 1...


100%|██████████| 500/500 [02:11<00:00,  3.81it/s]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_chunk["Alternate drugname"] = df_chunk["DRUGNAME"].progress_apply(get_alternate_drug)


Chunk 30 processed and saved.
         REPORT_ID                                           DRUGNAME       INDICATION_NAME_ENG                                 Alternate drugname
2666292  900053212                                            HUMALOG         Diabetes mellitus                                     INSULIN LISPRO
2666404     590981  VICTOZA 1.2MG PRE FILLED PEN INJECTOR. 15 DOSE...  Type 2 diabetes mellitus  VICTOZA 1.2MG PRE FILLED PEN INJECTOR. 15 DOSE...
2666524     610992                                            AVANDIA         Diabetes mellitus                                      ROSIGLITAZONE
2666611     581020                                          METFORMIN         Diabetes mellitus                                          METFORMIN
2666612     611604                                            AVANDIA         Diabetes mellitus                                      ROSIGLITAZONE


100%|██████████| 500/500 [01:54<00:00,  4.38it/s]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_chunk["Alternate drugname"] = df_chunk["DRUGNAME"].progress_apply(get_alternate_drug)


Chunk 31 processed and saved.
         REPORT_ID   DRUGNAME       INDICATION_NAME_ENG         Alternate drugname
2746364  900854118  METFORMIN         Diabetes mellitus                  METFORMIN
2746489  906413771    TRESIBA  Type 2 diabetes mellitus           INSULIN DEGLUDEC
2746864  906465380  JARDIANCE  Type 2 diabetes mellitus  DAPAGLIFLOZIN / METFORMIN
2747054  902867333     TOUJEO         Diabetes mellitus           INSULIN GLARGINE
2747084  906357241  JARDIANCE         Diabetes mellitus  DAPAGLIFLOZIN / METFORMIN


100%|██████████| 500/500 [02:29<00:00,  3.34it/s]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_chunk["Alternate drugname"] = df_chunk["DRUGNAME"].progress_apply(get_alternate_drug)


Chunk 32 processed and saved.
         REPORT_ID     DRUGNAME       INDICATION_NAME_ENG           Alternate drugname
2828178     919930    METFORMIN         Diabetes mellitus                    METFORMIN
2828192     939149    JARDIANCE  Type 2 diabetes mellitus    DAPAGLIFLOZIN / METFORMIN
2828239  903801416  SAXAGLIPTIN  Type 2 diabetes mellitus  DAPAGLIFLOZIN / SAXAGLIPTIN
2828255     938462    JARDIANCE         Diabetes mellitus    DAPAGLIFLOZIN / METFORMIN
2828676     943239      OZEMPIC  Type 2 diabetes mellitus                  SEMAGLUTIDE


100%|██████████| 367/367 [02:26<00:00,  2.51it/s]

Chunk 33 processed and saved.
         REPORT_ID         DRUGNAME       INDICATION_NAME_ENG     Alternate drugname
2916530     736739        NOVORAPID         Diabetes mellitus  INSULIN ASPART, HUMAN
2916611  903041202        METFORMIN  Type 2 diabetes mellitus              METFORMIN
2916934     908171  LEVEMIR PENFILL  Type 2 diabetes mellitus        INSULIN DETEMIR
2917008  903080950          HUMALOG         Diabetes mellitus         INSULIN LISPRO
2917017     908829          FORXIGA  Type 2 diabetes mellitus          DAPAGLIFLOZIN



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_chunk["Alternate drugname"] = df_chunk["DRUGNAME"].progress_apply(get_alternate_drug)


In [None]:
# concatenate all chunks together into a single csv file
save_directory = '/content/drive/My Drive/CSV/Parkinsonism'
output_file = os.path.join(save_directory, 'parkinsonism_altdrug.csv')

# List to store DataFrames
df_list = []

# Iterate over all files in the save directory
for filename in sorted(os.listdir(save_directory)):
    if filename.startswith('parkinsonism_altdrug_chunk_') and filename.endswith('.csv'):
        file_path = os.path.join(save_directory, filename)
        # Read the CSV file into a DataFrame
        df_chunk = pd.read_csv(file_path)
        # Append the DataFrame to the list
        df_list.append(df_chunk)
        print(f"Processed {filename}")

# Concatenate all DataFrames in the list
combined_df = pd.concat(df_list, ignore_index=True)

# Save the combined DataFrame to a single CSV file
combined_df.to_csv(output_file, index=False)

Processed parkinsonism_altdrug_chunk_0.csv
Processed parkinsonism_altdrug_chunk_1.csv
Processed parkinsonism_altdrug_chunk_2.csv
Processed parkinsonism_altdrug_chunk_3.csv
Processed parkinsonism_altdrug_chunk_4.csv
