## Setup

In [6]:
import numpy as np
import pandas as pd
from sqlalchemy import create_engine
import random
import boto3
import json
from bs4 import BeautifulSoup
import requests
import re
import csv
from tqdm import tqdm

In [33]:
def get_secret(secret_name, region_name="us-east-1"):
    session = boto3.session.Session()
    client = session.client(
        service_name='secretsmanager',
        region_name=region_name)
    get_secret_value_response = client.get_secret_value(SecretId=secret_name)
    get_secret_value_response = json.loads(get_secret_value_response['SecretString'])
    return get_secret_value_response

creds = get_secret("wysde")
USERNAME = creds["RDS_MYSQL_USERNAME"]
PASSWORD = creds["RDS_MYSQL_PASSWORD"]
HOST = creds["RDS_MYSQL_HOST"]
DATABASE = 'sparsh'

conn_str = 'mysql+mysqlconnector://{0}:{1}@{2}/{3}'.format(USERNAME, PASSWORD, HOST, DATABASE)

engine = create_engine(conn_str)
conn = engine.connect()

%config SqlMagic.autopandas=True
%config SqlMagic.displaycon=False
%config SqlMagic.feedback=False
%config SqlMagic.displaylimit=5
%reload_ext sql
%sql {conn_str}

## Data Model

In [34]:
%%sql

CREATE TABLE doctors
  (
     doctor_id      INT NOT NULL,
     license_number TEXT,
     doctor_name    VARCHAR(150) NOT NULL,
     hospital_id    INT NOT NULL,
     gender         VARCHAR(20) NOT NULL,
     PRIMARY KEY (doctor_id)
  );
  
CREATE TABLE hospitals
  (
     hospital_id INT NOT NULL,
     zip_code    BIGINT NOT NULL,
     PRIMARY KEY (hospital_id),
     FOREIGN KEY (hospital_id) REFERENCES doctors (doctor_id)
  );

CREATE TABLE consultation_cost
  (
     id          INT NOT NULL,
     cost        INT NOT NULL,
     hospital_id INT NOT NULL,
     doctor_id   INT NOT NULL,
     PRIMARY KEY (id),
     FOREIGN KEY (doctor_id) REFERENCES doctors (doctor_id),
     FOREIGN KEY (hospital_id) REFERENCES hospitals(hospital_id)
  );

CREATE TABLE degree
  (
     doctor_id     INT NOT NULL,
     doctor_degree TEXT,
     FOREIGN KEY (doctor_id) REFERENCES doctors(doctor_id)
  );

CREATE TABLE diseasecategory
  (
     disease_category_id   INT NOT NULL,
     disease_category_name VARCHAR(150) NOT NULL UNIQUE,
     PRIMARY KEY (disease_category_id)
  );

CREATE TABLE diseases
  (
     disease_id          INT,
     disease_name        VARCHAR(100) NOT NULL,
     signs_and_symptoms  TEXT,
     diagnosis           TEXT,
     treatment           TEXT,
     disease_category_id INT,
     doctor_id           INT,
     PRIMARY KEY (disease_id),
     FOREIGN KEY (doctor_id) REFERENCES doctors (doctor_id),
     FOREIGN KEY (disease_category_id) REFERENCES diseasecategory(
     disease_category_id)
  );

UPDATE diseases
SET    doctor_id = 7
WHERE  disease_id = 7;

CREATE TABLE hospital_affiliations
  (
     hospital_id          INT NOT NULL,
     hospital_affliations VARCHAR(300),
     PRIMARY KEY (hospital_id, hospital_affliations),
     FOREIGN KEY (hospital_id) REFERENCES hospitals (hospital_id)
  );

CREATE TABLE medicines
  (
     id                  INT NOT NULL,
     drug_name           VARCHAR(200) NOT NULL,
     manufacturer_name   TEXT,
     price               DECIMAL(10, 2) NOT NULL,
     disease_category_id INT NOT NULL,
     FOREIGN KEY (disease_category_id) REFERENCES diseasecategory (
     disease_category_id)
  );

CREATE TABLE pincodes_state_city
  (
     city        VARCHAR(150) NOT NULL,
     state       VARCHAR(20),
     zip_code    INT NOT NULL,
     hospital_id INT NOT NULL,
     PRIMARY KEY (hospital_id),
     FOREIGN KEY (hospital_id) REFERENCES hospitals(hospital_id)
  );

CREATE TABLE practice_specialities
  (
     doctor_id             INT NOT NULL,
     practice_specialities VARCHAR(300),
     PRIMARY KEY (doctor_id, practice_specialities),
     FOREIGN KEY (doctor_id) REFERENCES doctors(doctor_id)
  );

![](./img/schema.png)

## ETL

In [35]:
#reading excelfiles to create dataframes
massdoc = pd.read_excel("./data/raw/massdoc.xlsx")
zocdoc = pd.read_excel("./data/raw/zocdoc.xlsx")
hospitals = pd.read_excel("./data/raw/hospitals.xlsx")

In [36]:
display(massdoc.head())
display(zocdoc.head())

Unnamed: 0,Doctor_Id,License_Number,Doctor_Name,Degree,Practice_Specialities
0,1,54713,Ward Bein,M.D. or M.D. Equivalent,"Child and Adolescent Psychiatry, Psychiatry, F..."
1,2,58952,Donald Condie,M.D. or M.D. Equivalent,"Child and Adolescent Psychiatry, Psychiatry, F..."
2,3,51517,Jeffrey Friedman,M.D. or M.D. Equivalent,"Forensic Psychiatry, Psychiatry, Geriatric Psy..."
3,4,255819,Mihae Platt,M.D. or M.D. Equivalent,"Medical Microbiology Pathology, Pathology, Mol..."
4,5,288074,Allison Nussbaum,D.O.,"Child and Adolescent Psychiatry, Psychiatry, P..."


Unnamed: 0,Doctor_Id,Doctor_Name,Practice_Specialities,Gender
0,976,"Anne Valez, FNP",Family Nurse Practitioner,Female
1,977,"Amy Watson, MD",Pediatrician,Female
2,978,"Christine Cassel, PA",Physician Assistant,Female
3,979,"Elizabeth Braungart, ND",Naturopathic Doctor,Female
4,980,"Rachael Maina-Delgado, PA",Physician Assistant,Female


In [37]:
#Concating two data frames for doctors since data is collected from two different sources
df3=pd.concat([massdoc, zocdoc])

#Filling Nan with Unknown for those doctors who dont have License Number
df4=df3.fillna({'License_Number':'Unknown'})

#Populating Gender column where gender is Nan
df4['Gender'] = pd.Series(
    random.choices(['Male', 'Female','Transgender'], weights=[1, 1,1], k=len(df4)),
    index=df4.index
)
#Cleaning data to Populate Degree Table
df6=df4['Degree'].str.split('. or',expand=True)

df6.to_csv('./data/cleaned/degree.csv')

#Populating Gender column where gender is Nan
df4['Gender'] = pd.Series(
    random.choices(['Male', 'Female','Transgender'], weights=[1, 1,1], k=len(df4)),
    index=df4.index
)

In [38]:
df7 = pd.read_csv('./data/cleaned/degree.csv')

df7.drop(df7.filter(regex="Unname"),axis=1, inplace=True)
df8=df7.rename(columns={"0": "Doctor_Degree"})
df8['Doctor_Id'] = range(1, 1+len(df7))
df10=df8.drop(["1"],axis=1)

#Cleaning data to populate Practice_Specialities table
df11=df4[['Practice_Specialities','Doctor_Id']]
df11.explode(['Practice_Specialities', 'Doctor_Id'])
df13=df11.assign(Practice_Specialities=df11['Practice_Specialities'].str.split(',')).explode('Practice_Specialities')
df14=df13.drop_duplicates()

In [39]:
display(df10.head())
display(df14.head())

Unnamed: 0,Doctor_Degree,Doctor_Id
0,M.D,1
1,M.D,2
2,M.D,3
3,M.D,4
4,D.O.,5


Unnamed: 0,Practice_Specialities,Doctor_Id
0,Child and Adolescent Psychiatry,1
0,Psychiatry,1
0,Forensic Psychiatry,1
0,Geriatric Psychiatry,1
1,Child and Adolescent Psychiatry,2


In [40]:
df10.to_sql('degree', con=engine, if_exists='replace', index=False)
df14.to_sql('practice_specialities', con=engine, if_exists='replace', index=False)

In [None]:
%%sql

SELECT * FROM degree LIMIT 10

Unnamed: 0,Doctor_Degree,Doctor_Id
0,M.D,1
1,M.D,2
2,M.D,3
3,M.D,4
4,D.O.,5
5,M.D,6
6,M.D,7
7,M.D,8
8,M.D,9
9,M.D,10


In [None]:
%%sql

SELECT * FROM practice_specialities LIMIT 10

Unnamed: 0,Practice_Specialities,Doctor_Id
0,Child and Adolescent Psychiatry,1
1,Psychiatry,1
2,Forensic Psychiatry,1
3,Geriatric Psychiatry,1
4,Child and Adolescent Psychiatry,2
5,Psychiatry,2
6,Forensic Psychiatry,2
7,Forensic Psychiatry,3
8,Psychiatry,3
9,Geriatric Psychiatry,3


In [23]:
dffinaldoctors=df4.drop(['Practice_Specialities', 'Degree'], axis=1)

#Generating new dataframe for Disease Category dataframe from Doctors dataframe(df4)
df6=df4['Practice_Specialities'].str.split(',',expand=True)

#Removing duplicates
df6=df6.unstack().drop_duplicates()
df6.to_csv('./data/cleaned/disease_categories.csv')

In [25]:
df7 = pd.read_csv('./data/cleaned/disease_categories.csv')

#Reformatting data to fit in disease_category table
df7.drop(df7.filter(regex="Unname"),axis=1, inplace=True)
df8=df7.rename(columns={"0": "Disease_Category_Name"})
df8['Disease_Category_Id'] = range(1, 1+len(df7))
df9 = df8.reindex(columns=list(df8.columns)[::-1])
df9["Disease_Category_Name"].fillna("Hepatology", inplace = True)

In [26]:
#Cleaning ZipCode column to remove unwanted values
hospitals['Zip_Code'] = hospitals['Zip_Code'].astype(str).str[:4]
dffinalhospital = hospitals[['Hospital_Id','Zip_Code']]

dffinalhospital.to_csv('./data/cleaned/hospitals.csv', index=False)
dffinaldoctors.to_csv('./data/cleaned/doctors.csv', index=False)

In [32]:
doctors = pd.read_csv('./data/cleaned/doctors.csv')
doctors.head()

Unnamed: 0,Doctor_Id,License_Number,Doctor_Name,Gender
0,1,54713.0,Ward Bein,Female
1,2,58952.0,Donald Condie,Transgender
2,3,51517.0,Jeffrey Friedman,Transgender
3,4,255819.0,Mihae Platt,Male
4,5,288074.0,Allison Nussbaum,Transgender


In [None]:
csv_data = csv.reader(open('./data/cleaned/doctors.csv'))
header = next(csv_data)

for row in csv_data:
    print(row)
    conn.execute(f"INSERT INTO {SCHEMA}.doctors(doctor_id, license_number, doctor_name, gender) VALUES(%s, %s, %s, %s)", row)

conn.commit()

In [None]:
csv_data = csv.reader(open('./data/cleaned/hospitals.csv'))
header = next(csv_data)

for row in csv_data:
    print(row)
    conn.execute(f"INSERT INTO {SCHEMA}.hospitals(hospital_id, zip_code) VALUES(%s, %s)", row)

conn.commit()

### Diseases

In [None]:
# Site URL
url="https://en.wikipedia.org/wiki/List_of_infectious_diseases"

# Make a GET request to fetch the raw HTML content
html_content = requests.get(url).text

# Parse HTML code for the entire site
soup = BeautifulSoup(html_content, "html.parser")

# On site there is a table with the class "sortable"
# The following line will generate a list of HTML content for each table
dis = soup.find_all("table", attrs={"class": "sortable"})
#print("Number of tables on site: ",len(dis))

# Lets go ahead and scrape first table with HTML code dis[0]
table1 = dis[0]

# the head will form our column names
body = table1.find_all("tr")

# Head values (Column names) are the first items of the body list
head = body[0] # 0th item is the header row

body_rows = body[1:] # All other items becomes the rest of the rows

# Lets now iterate through the head HTML code and make list of clean headings

# Declare empty list to keep Columns names
headings = []

for item in head.find_all("th"): # loop through all th elements
    # convert the th elements to text and strip "\n"
    item = (item.text).rstrip("\n")
    # append the clean column name to headings
    headings.append(item)

all_rows = [] # will be a list for list for all rows

for row_num in range(len(body_rows)): # A row at a time
    row = [] # this will old entries for one row
    for row_item in body_rows[row_num].find_all("td"): #loop through all row entries
        # row_item.text removes the tags from the entries
        # the following regex is to remove \xa0 and \n and comma from row_item.text
        # xa0 encodes the flag, \n is the newline and comma separates thousands in numbers
        aa = re.sub("(\xa0)|(\n)|,","",row_item.text)
        #append aa to row - note one row entry is being appended
        row.append(aa)
    # append one row to all_rows
    all_rows.append(row)

# We can now use the data on all_rows and headings to make a table
# all_rows becomes our data and headings the column names
df = pd.DataFrame(data=all_rows,columns=headings)
#shows the first few rows as a preview in the dataframe
#print(df)

# Reformat the data to fit the tables of the database schema
# Modifying any column's character limit in the dataframe

#reformatting 'Infectious agent' column by limitting the character length to 132 characters
df['Infectious agent'] = df['Infectious agent'].str[:132]
#reformatting 'Vaccine(s)' column to take in only 14 character long values
df['Vaccine(s)'] = df['Vaccine(s)'].str[:14]

# #Auditing validity
#
# #To validate the accuracy of our scraped data, below is the URL for our source where the table of infectious diseases is present.
#
# #URL: https://en.wikipedia.org/wiki/List_of_infectious_diseases

# #For example, the first record in our dataframe is "Acinetobacter baumannii". When searched in google, we get the below results:
#
# #CDC info: https://www.cdc.gov/hai/organisms/acinetobacter.html
#
# #Virginia.gov info: https://www.vdh.virginia.gov/epidemiology/epidemiology-fact-sheets/acinetobacter-infection/#:~:text=Symptoms%20of%20a%20bloodstream%20infection,and%20pus%20around%20the%20wound

# #Auditing Completeness & Auditing Uniformity

# #Here, we are cleaning data to include values wherever there is an empty cell or exclude unwanted characters that don't make sense.
#
# #Logic for data cleaning as follows:
#
# #Cleaing all these columns' values to see if data is getting modified/replaced.
# #UPDATE: commenting below logic since, clean was successful.
df['Infectious agent'] = df['Infectious agent'].str.replace('á','a') #yes it works
df['Infectious agent'] = df['Infectious agent'].str.replace(';','') #yes it works
df['Infectious agent'] = df['Infectious agent'].str.replace('Yes','NA') #yes it works
df['Infectious agent'] = df['Infectious agent'].str.replace(r'[41]','') #yes it works
df['Infectious agent'] = df['Infectious agent'].str.replace('usually','') #yes it works
df['Infectious agent'] = df['Infectious agent'].str.replace(r'[','') #yes it works
df['Infectious agent'] = df['Infectious agent'].str.replace(r']','') #yes it works
df['Common name'] = df['Common name'].str.replace(';','') #yes it works
df['Common name'] = df['Common name'].str.replace('–','') #yes it works
df['Common name'] = df['Common name'].str.replace('ä','a') #yes it works
df['Common name'] = df['Common name'].str.replace('’','') #yes it works
df[['Common name','Signs and symptoms','Diagnosis','Treatment','Vaccine(s)']] = df[['Common name','Signs and symptoms','Diagnosis','Treatment','Vaccine(s)']].fillna('NA') #yes it works
df['Signs and symptoms'] = df['Signs and symptoms'].str.replace('–',' to ') #yes it works
df['Signs and symptoms'] = df['Signs and symptoms'].str.replace('ó','o') #yes it works
df['Signs and symptoms'] = df['Signs and symptoms'].str.replace('°',' degrees ') #yes it works

# #inserting NaN values in the cells wherever empty
df2 = df.replace(r'^\s*$', np.nan, regex=True)

#Now "df2" is my variable that stores updated dataframe having NaN values
#Using df2, I am replacing NaN values with appropriate data under symptoms column
df2[['Signs and symptoms']] = df2[['Signs and symptoms']].fillna('No symptoms found')

#Now "df2" is my variable that stores updated dataframe having NaN values
#Using df2, I am replacing NaN values with appropriate data under diagnosis column
df2[['Diagnosis']] = df2[['Diagnosis']].fillna('No diagnosis found')

#Now "df2" is my variable that stores updated dataframe having NaN values
#Using df2, I am replacing NaN values with appropriate data under treatment column
df2[['Treatment']] = df2[['Treatment']].fillna('No treatment found')

#renaming "Vaccine(s)" column as "Vaccine availability"
df2 = df2.rename(columns={'Vaccine(s)': 'Vaccine_availability'})

# #renaming all columns into different names
df2 = df2.rename(columns={'Infectious agent': 'Disease_agent'})
df2 = df2.rename(columns={'Common name': 'Disease_name'})
df2 = df2.rename(columns={'Signs and symptoms': 'Signs_and_symptoms'})

#creating a new dataframe df3 to only specify some columns in my diseases table
df3 = df2[['Disease_name','Signs_and_symptoms','Diagnosis','Treatment']]

df3.insert(0, 'Disease_Id', range(1, 1 + len(df3)))

# #Data cleaning is done for Diseases table
# #Now exporting diseases dataframe in CSV
df3.to_csv('./data/cleaned/infectious_diseases.csv', index=False, encoding='utf-8')

In [12]:
Disease_name={'Acinetobacter infections':18,'Actinomycosis':18,'Adenovirus infection':19,'African sleeping sickness (African trypanosomiasis)':19,'AIDS (acquired immunodeficiency syndrome)':18,'Amoebiasis':19,'Anaplasmosis':19,'Angiostrongyliasis':18,'Anisakiasis'
:25,'Anthrax':37,'Arcanobacterium haemolyticum infection':37,'Argentine hemorrhagic fever':12,'Ascariasis':14,'Aspergillosis':14,'Astrovirus infection':15,'Babesiosis':66,'Bacillus cereus infection':66,'Bacterial meningitis':66,
'Bacterial pneumonia':66,'Bacterial vaginosis':66,'Bacteroides infection':38,'Balantidiasis':38,'Bartonellosis':38,'Baylisascaris infection':38,'BK virus infection':38,'Black piedra':38,'Blastocystosis':66,
'Blastomycosis':66,'Bolivian hemorrhagic fever':66,'Botulism (and Infant botulism)':38,'Brazilian hemorrhagic fever':50,'Brucellosis':50,'Bubonic plague':28,'Burkholderia infection':28,'Buruli ulcer':28,'Calicivirus infection (Norovirus and Sapovirus)':28,'Campylobacteriosis':28,
'Candidiasis (Moniliasis Thrush)':28,'Capillariasis':18,'Dental caries':18,"Carrion's disease":3,'Cat-scratch disease':3,'Cellulitis':3,'Chagas disease (American trypanosomiasis)':18,'Chancroid':18,'Chickenpox':17,'Chikungunya':17,
'Chlamydia':29,'Chlamydophila pneumoniae infection (Taiwan acute respiratory agent or TWAR)':66,'Cholera':66,'Chromoblastomycosis':136,'Chytridiomycosis':28,'Clonorchiasis':28,'Clostridium difficile colitis':29,'Coccidioidomycosis':67,'Colorado tick fever (CTF)':100,
'Common cold (Acute viral rhinopharyngitis Acute coryza)':66,'Coronavirus disease 2019 (COVID-19)':66,'Coxsackie B virus infection':66,'CreutzfeldtJakob disease (CJD)':17,'Crimean-Congo hemorrhagic fever (CCHF)':3,'Cryptococcosis':18,'Cryptosporidiosis':28,'Cutaneous larva migrans (CLM)':28,'Cyclosporiasis':17,'Cysticercosis':17,'Cytomegalovirus infection':19,'Dengue fever':66,
'Desmodesmus infection':66,'Dientamoebiasis':71,'Diphtheria':66,'Diphyllobothriasis':67,'Dracunculiasis':29,'Eastern equine encephalitis (EEE)':3,'Ebola hemorrhagic fever':50,'Echinococcosis':17,'Ehrlichiosis':22,'Enterobiasis (Pinworm infection)':67,'Enterococcus infection':89,
'Enterovirus infection':66,'Epidemic typhus':66,'Erythema infectiosum (Fifth disease)':28,'Exanthem subitum (Sixth disease)':28,'Fasciolasis':17,'Fasciolopsiasis':66,'Fatal familial insomnia (FFI)':56,'Filariasis':45,
'Food poisoning by Clostridium perfringens':33,'Free-living amebic infection':56,'Fusobacterium infection':66,'Gas gangrene (Clostridial myonecrosis)':100,'Geotrichosis':46,'Gerstmann-Straussler-Scheinker syndrome (GSS)':100,'Giardiasis':18,'Glanders':38,
'Gnathostomiasis':12,'Gonorrhea':37,'Granuloma inguinale (Donovanosis)':56,'Group A streptococcal infection':89,'Group B streptococcal infection':78,'Haemophilus influenzae infection':78,'Hand foot and mouth disease (HFMD)':66,'Hantavirus Pulmonary Syndrome (HPS)':17,'Heartland virus disease':66,
'Helicobacter pylori infection':67,'Hemolytic-uremic syndrome (HUS)':34,'Hemorrhagic fever with renal syndrome (HFRS)':89,'Hendra virus infection':66,'Hepatitis A':100,'Hepatitis B':100,'Hepatitis C':100,'Hepatitis D':100,'Hepatitis E':100,'Herpes simplex':66,'Histoplasmosis':136,
'Hookworm infection':78,'Human bocavirus infection':17,'Human ewingii ehrlichiosis':66,'Human granulocytic anaplasmosis (HGA)':100,'Human metapneumovirus infection':89,'Human monocytic ehrlichiosis':77,'Human papillomavirus (HPV) infection':77,'Human parainfluenza virus infection':18,
'Human T-lymphotropic virus 1 infection':37,'Hymenolepiasis':25,'EpsteinBarr virus infectious mononucleosis (Mono)':14,
'Influenza (flu)':56,'Isosporiasis':19,'Japanese encephalitis':89,'Kawasaki disease':37,'Keratitis':77,'Kingella kingae infection':90,'Kuru':78,'Lassa fever':56,'Pontiac fever':100,'Leishmaniasis':100,'Leprosy':66,'Leptospirosis':66,'Listeriosis':66,'Lymphocytic choriomeningitis':98,'Malaria':77,
'Marburg hemorrhagic fever (MHF)':90,'Measles':100,'Middle East respiratory syndrome (MERS)':100,'Melioidosis (Whitmores disease)':14,'Meningitis':14,'Meningococcal disease':14,'Metagonimiasis':14,'Microsporidiosis':100,'Molluscum contagiosum (MC)':18,'Monkeypox':100,'Mumps':100,'Murine typhus (Endemic typhus)':100,
'Mycoplasma pneumonia':77,'Mycoplasma genitalium infection':19,'Mycetoma':12,'Myiasis':14,'Neonatal conjunctivitis (Ophthalmia neonatorum)':37,'Nipah virus infection':100,'Norovirus':99,'(New) Variant CreutzfeldtJakob disease (vCJD nvCJD)':100,'Nocardiosis':17,'Onchocerciasis (River blindness)':17,'Opisthorchiasis':10,
'Paracoccidioidomycosis (South American blastomycosis)':19,'Paragonimiasis':90,'Pasteurellosis':100,'Pediculosis capitis (Head lice)':17,'Pediculosis corporis (Body lice)':14,'Pediculosis pubis (pubic lice crab lice)':14,'Pelvic inflammatory disease (PID)':19,'Pertussis (whooping cough)':19,
'Plague':100,'Pneumococcal infection':100,'Pneumocystis pneumonia (PCP)':66,'Pneumonia':66,'Poliomyelitis':14,'Prevotella infection':100,'Primary amoebic meningoencephalitis (PAM)':100,'Progressive multifocal leukoencephalopathy':77,'Psittacosis':91,'Q fever':14,'Rabies':17,'Relapsing fever':100,
'Respiratory syncytial virus infection':10,'Rhinosporidiosis':18,'Rhinovirus infection':99,'Rickettsial infection':100,'Rickettsialpox':100,'Rift Valley fever (RVF)':19,'Rocky Mountain spotted fever (RMSF)':19,'Rotavirus infection':66,'Rubella':38,'Salmonellosis':66,'Severe acute respiratory syndrome (SARS)':28,
'Scabies':90
}

In [None]:
csv_data = csv.reader(open('./data/cleaned/infectious_diseases.csv'))
header = next(csv_data)

for row in tqdm(list(csv_data), total=len(list(csv_data))):
    try:
      row = row + [Disease_name[row[1]]]
      print(row)
      conn.execute(f"INSERT INTO {SCHEMA}.diseases(disease_id, disease_name, signs_and_symptoms, diagnosis, treatment,disease_category_id) VALUES(%s, %s, %s, %s,%s,%s)", row)
      conn.commit()
    except Exception as e:
      print(e)
      # continue
      break 

In [18]:
%%sql

SELECT * FROM diseases LIMIT 10