# Create and Load Tables using SQL Scripts

Database Used in this Lab
The database used in this lab is internal. You will be working on a sample Cardio-Vascular Diseases (CVD) database. This CVD database schema consists of five tables: PATIENTS, MEDICAL_HISTORY, MEDICAL_PROCEDURES, MEDICAL_DEPARTMENTS, and MEDICAL_LOCATIONS.

Your task is to create this database in MySQL. This task is divided into three parts.


In [25]:

import mysql.connector as sql
import os as os 
from dotenv import load_dotenv
from sqlalchemy import create_engine
import pandas as pd 

In [26]:


load_dotenv('/workspaces/IBM-DS-Course/.env')

user = os.getenv('USER')
password = os.getenv('PASSWORD')



Task 1: Create the database on MySQL using the phpMyAdmin GUI.


In [27]:
conn = sql.connect(
    host='localhost',
    user=user,
    password=password,
    port=3306)

cursor = conn.cursor()


In [28]:
cursor.execute("CREATE DATABASE IF NOT EXISTS CVD")

In [29]:
sql_file_path = '/workspaces/IBM-DS-Course/Course 6 Db and SQL /Module 2 Relational db and Tables/2.2. .sql and .csv/CVD_Database_Create_Tables_Script.sql'

with open(sql_file_path, 'r') as file:
    sql_script = file.read()


Task 2: Create all the tables in MySQL using an SQL script.


In [30]:
cursor.execute("USE CVD")
try:
    for statement in sql_script.split(';'):
        if statement.strip():  # Execute non-empty statements
            cursor.execute(statement)
    conn.commit()  # Commit the changes
    print("SQL script executed successfully")
except Exception as e:
    print(f"An error occurred: {e}")


SQL script executed successfully



Task 3: Populate each table with the data in respective CSV files.

In [47]:
patients = '/workspaces/IBM-DS-Course/Course 6 Db and SQL /Module 2 Relational db and Tables/2.2. .sql and .csv/PATIENTS.csv'
md_procedure = '/workspaces/IBM-DS-Course/Course 6 Db and SQL /Module 2 Relational db and Tables/2.2. .sql and .csv/MEDICAL_PROCEDURES.csv'
md_history = '/workspaces/IBM-DS-Course/Course 6 Db and SQL /Module 2 Relational db and Tables/2.2. .sql and .csv/MEDICAL_HISTORY.csv'
md_department ='/workspaces/IBM-DS-Course/Course 6 Db and SQL /Module 2 Relational db and Tables/2.2. .sql and .csv/MEDICAL_DEPARTMENTS.csv'
md_locations = '/workspaces/IBM-DS-Course/Course 6 Db and SQL /Module 2 Relational db and Tables/2.2. .sql and .csv/MEDICAL_LOCATIONS.csv'

In [48]:
patients_df = pd.read_csv(patients)
md_procedure_df = pd.read_csv(md_procedure)
md_history_df = pd.read_csv(md_history)
md_department_df= pd.read_csv(md_department)
md_locations_df = pd.read_csv(md_locations)

In [61]:
md_locations_df.head()
# !pip install pandas sqlalchemy

Unnamed: 0,LOCATION_ID,DEPT_ID,LOCATION_NAME
0,L002,D002,Medical Center


**NOTE**: the create_engine method creates an engine connection stored in the engine object


'''
create_engine(mysql+mysqlconnector://< username >:< password >@< host >/< database >)
'''


In [33]:
db_name = "CVD"
host = "localhost"
connection_string = f'mysql+mysqlconnector://{user}:{password}@{host}/{db_name}'
engine = create_engine(connection_string)

- to_sql() method inserts DataFrames into the specified tables

- - if_exists='append' means it will add new rows to the table without removing existing ones.
- - If you want to replace the table, use if_exists='replace'


In [49]:
patients_df.columns = ['PATIENT_ID', 'FIRST_NAME', 'LAST_NAME', 'SSN', 'BIRTH_DATE', 'SEX', 'ADDRESS', 'DEPT_ID']
md_procedure_df.columns = ['PROCEDURE_ID', 'PROCEDURE_NAME', 'PROCEDURE_DATE' ,'PATIENT_ID' ,'DEPT_ID']
md_history_df.columns = ['MEDICAL_HISTORY_ID', 'PATIENT_ID' ,'DIAGNOSIS_DATE','DIAGNOSIS_CODE','MEDICAL_CONDITION' ,'DEPT_ID' ]
md_department_df.columns = ['DEPT_ID' , 'DEPT_NAME' , 'MANAGER_ID' ,'LOCATION_ID' ]
md_locations_df.columns = ['LOCATION_ID', 'DEPT_ID' , 'LOCATION_NAME' ]


<class 'pandas.core.frame.DataFrame'>
<class 'pandas.core.frame.DataFrame'>
<class 'pandas.core.frame.DataFrame'>
<class 'pandas.core.frame.DataFrame'>


In [51]:
patients_df.to_sql('PATIENTS', con=engine, if_exists='replace', index=False)
md_procedure_df.to_sql('MEDICAL_PROCEDURES', con=engine, if_exists='replace', index=False)
md_history_df.to_sql('MEDICAL_HISTORY', con=engine, if_exists='replace', index=False)
md_department_df.to_sql('MEDICAL_DEPARTMENTS', con=engine, if_exists='replace', index=False)
md_locations_df.to_sql('MEDICAL_LOCATIONS', con=engine, if_exists='replace', index=False)

1

In [54]:
cursor.execute("SELECT * FROM PATIENTS;")

In [55]:
output = cursor.fetchall()
for row in output:
    print(row)

('P002', 'Jane', 'Smith', 987654321, '1985-10-20', 'F', '456 Oak Ave', 'D002')
('P003', 'Michael', 'Johnson', 111222333, '1975-03-12', 'M', '789 Elm St', 'D003')
('P004', 'Emily', 'Brown', 444555666, '1980-09-25', 'F', '321 Pine Rd', 'D004')
('P005', 'William', 'Miller', 777888999, '1992-11-18', 'M', '567 Maple Ave', 'D003')


In [56]:
cursor.execute("SELECT * FROM MEDICAL_PROCEDURES;")
output = cursor.fetchall()
for row in output:
    print(row)

('PR002', 'Cardiac Catheterization', '2023-08-01', 'P002', 'D002')
('PR003', 'Electrocardiogram', '2023-08-02', 'P003', 'D003')
('PR004', 'Echocardiogram', '2023-08-03', 'P004', 'D004')
('PR005', 'Stress Test', '2023-08-03', 'P005', 'D003')
('PR006', 'Coronary Angiogram', '2023-08-04', 'P003', 'D003')
('PR007', 'Pacemaker Implantation', '2023-08-04', 'P005', 'D003')


In [57]:
cursor.execute("SELECT * FROM MEDICAL_HISTORY;")
output = cursor.fetchall()
for row in output:
    print(row)

('MH002', 'P001', '2023-07-30', 'I25.10', 'Hypertensive Heart Disease', 'D002')
('MH003', 'P002', '2023-08-01', 'I25.10', 'Hypertensive Heart Disease', 'D002')
('MH004', 'P003', '2023-08-01', 'I20.9', 'Unstable Angina', 'D003')
('MH005', 'P004', '2023-08-01', 'I25.5', 'Ischemic Cardiomyopathy', 'D004')
('MH006', 'P005', '2023-08-02', 'I50.9', 'HeartFailure, Unspecified', 'D003')


In [59]:
cursor.execute("SELECT * FROM MEDICAL_DEPARTMENTS;")
output = cursor.fetchall()
for row in output:
    print(row)

('D002', 'Internal Medicine', None, 'L002')
('D003', 'Cardiothoracic Surgery', None, 'L001')
('D004', 'Electrophysiology', None, 'L002')


In [60]:
cursor.execute("SELECT * FROM MEDICAL_LOCATIONS;")
output = cursor.fetchall()
for row in output:
    print(row)

('L002', 'D002', 'Medical Center')
