# Clinic Patient and Appointment Management
In this notebook I have created a new SQL database, where there will be 4 tables: `Patients`, `Appointments`, `Doctors`, `TreatmentPlans`. 

In [195]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [220]:
# Patients.csv 

npatients = 1000

# Primary key: Nominal
PatientId = np.unique(np.random.randint(7330000000, 7340000000, npatients, dtype=np.int64))

# Nominal
random_name_gender = np.loadtxt('../csv files/random-names/random-names-gender.csv', delimiter=',', unpack = True,  dtype=str)
fullname = np.array([])
for i in range(len(random_name_gender[0])):
    fullname = np.append(fullname, random_name_gender[0][i].strip('"') + ' ' + random_name_gender[1][i].strip('"'))

# Nominal
gender = random_name_gender[2]
# Ratio
Age = np.random.randint(18, 100, npatients)

# Nominal
ContactNumber = np.unique(np.random.randint(7900000000, 7910000000, npatients, dtype=np.int64))
ContactNumber = np.array(['0' + str(number) for number in ContactNumber])

patients_df = pd.DataFrame({
    'PatientId': PatientId,
    'Name': fullname,
    'Gender': gender,
    'Age': Age,
    'ContactNumber': ContactNumber
})
patients_df.to_csv('../csv files/clinic-appointment/Patients.csv', index=False)

In [197]:
patients_df.shape

(1000, 5)

In [198]:
patients_df.head()

Unnamed: 0,PatientId,Name,Gender,Age,ContactNumber
0,7330003330,Jordan Alexander,"""Male""",43,7900009640
1,7330033792,Cherry Williams,"""Female""",28,7900024701
2,7330053992,Vanessa Taylor,"""Female""",79,7900033574
3,7330095427,Richard Johnson,"""Male""",68,7900035050
4,7330097134,Rafael Watson,"""Male""",23,7900061069


In [199]:
# Appointments.csv

from datetime import datetime


nappointments = 3000  

# Nominal attribute
patient_ids = np.random.choice(patients_df['PatientId'], nappointments, replace=True)

doctor_names = ['Dr. Smith', 'Dr. Johnson', 'Dr. Williams', 'Dr. Jones', 'Dr. Brown', 
                'Dr. Davis', 'Dr. Miller', 'Dr. Wilson', 'Dr. Moore', 'Dr. Taylor']
# Nominal attribute
appointed_doctor = np.random.choice(doctor_names, nappointments)

current_year = datetime.now().year
years = np.full(nappointments, current_year)  


months = np.random.randint(1, 13, nappointments)
days = np.random.randint(1, 29, nappointments) 

hours = np.random.randint(8, 18, nappointments) 
minutes = np.random.randint(0, 60, nappointments)  

# Interval attribute
appointment_dates = [
    f"{year}-{str(month).zfill(2)}-{str(day).zfill(2)} {str(hour).zfill(2)}:{str(minute).zfill(2)}"
    for year, month, day, hour, minute in zip(years, months, days, hours, minutes)
]


appointments_df = pd.DataFrame({
    'PatientID': patient_ids,
    'DoctorName': appointed_doctor,
    'AppointmentDate': appointment_dates
})
appointments_df.to_csv('../csv files/clinic-appointment/Appointments.csv', index=False)

In [200]:
appointments_df.head()

Unnamed: 0,PatientID,DoctorName,AppointmentDate
0,7332419578,Dr. Wilson,2024-07-05 16:54
1,7336335469,Dr. Wilson,2024-08-06 15:42
2,7335274373,Dr. Moore,2024-10-08 11:55
3,7332456379,Dr. Brown,2024-10-17 16:25
4,7331569279,Dr. Williams,2024-03-11 08:13


In [201]:
# Doctors Table

# Ordinal attribute
doctor_positions = [
    'Consultant', 'Senior Consultant', 'GP Partner', 
    'Salaried GP', 'Locum GP', 'Clinical Lead', 'Medical Director'
]

# Nominal attribute
specialties = [
    'General Medicine', 'Cardiology', 'Dermatology', 'Pediatrics', 'Neurology', 
    'Oncology', 'Orthopedics', 'Gastroenterology', 'Psychiatry', 'Emergency Medicine'
]

doctor_positions_assigned = np.random.choice(doctor_positions, len(doctor_names), replace=True)
doctor_specialties_assigned = np.random.choice(specialties, len(doctor_names), replace=True)

doctors_df = pd.DataFrame({
    'DoctorName': doctor_names,
    'Position': doctor_positions_assigned,
    'Specialty': doctor_specialties_assigned
})
doctors_df.to_csv('../csv files/clinic-appointment/Doctors.csv', index=False)

In [202]:
doctors_df.head()

Unnamed: 0,DoctorName,Position,Specialty
0,Dr. Smith,Salaried GP,Neurology
1,Dr. Johnson,Salaried GP,Orthopedics
2,Dr. Williams,Locum GP,Cardiology
3,Dr. Jones,Locum GP,General Medicine
4,Dr. Brown,GP Partner,Orthopedics


In [203]:
# Treatment table
ntreatments = 3000

# Foreign key: Nominal
patient_ids = np.random.choice(patients_df['PatientId'], ntreatments, replace=True)

current_date = datetime.now()
years = np.full(nappointments, current_year)  

treatment_years = np.random.randint(2010, years[0], ntreatments)
treatment_month = np.random.randint(1, 13, ntreatments)
treatment_days = np.random.randint(1, 29, ntreatments)
treatment_dates = [f'{treatment_years[i]}-{str(treatment_month[i]).zfill(2)}-'
                     f'{str(treatment_days[i]).zfill(2)}' for i in range(ntreatments)]

# Ordinal 
statuses = ["Ongoing", "Completed", "Follow-Up"]

treatment_statuses = np.random.choice(statuses, ntreatments, replace=True)

treatment_plans_df = pd.DataFrame({
    'PatientID': patient_ids,
    'TreatmentDate': treatment_dates,
    'Status': treatment_statuses
})


In [204]:
treatment_plans_df.head()

Unnamed: 0,PatientID,TreatmentDate,Status
0,7337305481,2017-07-05,Follow-Up
1,7332658263,2021-06-28,Completed
2,7337201580,2016-01-26,Completed
3,7334334124,2013-07-22,Ongoing
4,7339936017,2017-05-24,Follow-Up


In [205]:
# Introducing deliberate missing values

n_points = 50  
random_indices = np.random.choice(treatment_plans_df.index, n_points, replace=False)

# Set selected rows' TreatmentDate and Status to NaN
treatment_plans_df.loc[random_indices, 'TreatmentDate'] = np.nan
treatment_plans_df.loc[random_indices, 'Status'] = np.nan

treatment_plans_df.to_csv('../csv files/clinic-appointment/Treatments.csv', index=False)
# Check and print how many values are NaN
print(treatment_plans_df.isna().sum()) 


PatientID         0
TreatmentDate    50
Status           50
dtype: int64


In [206]:
# Introducing duplicate values to Patients table

n_points = 5
random_indices = np.random.choice(patients_df.index, n_points, replace=False)

duplicates = patients_df.loc[random_indices].copy()

patients_with_duplicates = pd.concat([patients_df, duplicates], ignore_index=True)

patients_with_duplicates = patients_with_duplicates.sample(frac=1).reset_index(drop=True)

print(random_indices)
print(duplicates)
print(patients_with_duplicates.shape)
patients_with_duplicates.tail()

patients_with_duplicates.to_csv('../csv files/clinic-appointment/Patients.csv', index=False)

[631 886  30 905 371]
      PatientId              Name    Gender  Age ContactNumber
631  7336248749        Max Harper    "Male"   44   07906516527
886  7338978948      Mike Chapman    "Male"   29   07908960041
30   7330348121  Camila Armstrong  "Female"   85   07900253048
905  7339173412       Lana Turner  "Female"   58   07909119035
371  7333837418     Violet Morgan  "Female"   79   07903770585
(1005, 5)
