In [1]:
#required packages
import pandas as pd
import mysql.connector as connection
import re
from datetime import datetime
from functools import reduce
import getpass
import warnings
import time
warnings.filterwarnings('ignore')

# Record real start time for the SQL execution
v_start_time = datetime.now()

# Print the real start time for SQL portion
print(f"Real start time for SQL portion: {v_start_time }")

Real start time for SQL portion: 2024-01-09 19:45:13.170439


In [2]:
# data base connection
def f_connect_to_database(p_host, p_user, p_password, p_database):
    while True:
        try:

            # Setup database connection
            v_mydb = connection.connect(
                host=p_host,
                user=p_user,
                password=p_password,
                database=p_database,
                use_pure=True
            )
            v_cursor = v_mydb.cursor()
            print("Connected to the database")
            return v_mydb, v_cursor

        except connection.Error as err:
            print(f"Error: {err}")
            print("\n please enter the correct credential.")

            return None, None

In [3]:
# Function to close the database connection
def f_close_database_connection(p_mydb, p_cursor):
    try:
        if p_cursor is not None:
            p_cursor.close()
            print("Cursor closed.")
        if p_mydb.is_connected():
            p_mydb.close()
            print("Database connection closed.")
    except Exception as e:
        return f"An error occurred while closing the database connection: {e}"

In [4]:
# enter credentials for data base connection
while True:
    v_host_input = input("Enter the host: ")
    v_user_input = input("Enter the username: ")
    v_password_input = getpass.getpass("Enter the password: ")
    v_database_input = input("Enter the database: ")

    v_mydb,v_cursor=f_connect_to_database(v_host_input, v_user_input, v_password_input, v_database_input)
    
    if v_mydb is not None:
        # Close the database connection
        f_close_database_connection(v_mydb, v_cursor)
        break

Enter the host: localhost
Enter the username: root
Enter the password: ········
Enter the database: newschema
Connected to the database
Cursor closed.
Database connection closed.


In [None]:
# Record real start time for the SQL execution
v_sql_start_time = datetime.now()

# Print the real start time for SQL portion
print(f"Real start time for SQL portion: {v_sql_start_time}")

# define you queries  
v_queries = {
    "visit_info": """
        SELECT
            v.visit_id,
            v.patient_id,
            p.gender AS Gender,
            YEAR(CURDATE()) - YEAR(p.birthdate) AS Age,
            v.date_Started AS Visit_started_date,
            v.date_stopped AS Visit_ended_date,
            v.date_created AS Visit_created_date,
            v.creator AS Creator_Id
        FROM visit v
        JOIN person p ON v.patient_id = p.person_id
        WHERE v.voided = 0
        AND v.date_Started BETWEEN %s AND %s;
    """,
    "doctor_id": """
        SELECT v.visit_id,
               e.creator AS Doctor_Id
        FROM visit v
        JOIN encounter e ON e.visit_id=v.visit_id 
        WHERE v.voided=0
              AND e.encounter_type=9
              AND e.voided=0
              AND v.date_Started BETWEEN %s AND %s;  
    """,
    "vitals_info": """
        SELECT
            v.visit_id,
            MAX(CASE 
                WHEN o.concept_id = 5085 THEN o.value_numeric END) AS Sbp,
            MAX(CASE 
                WHEN o.concept_id = 5086 THEN o.value_numeric END) AS Dbp,
            MAX(CASE 
                WHEN o.concept_id = 5087 THEN o.value_numeric END) AS Pulse,
            MAX(CASE 
                WHEN o.concept_id = 5088 THEN o.value_numeric END) AS Temperature,
            MAX(CASE 
                WHEN o.concept_id = 5089 THEN o.value_numeric END) AS Weight,
            MAX(CASE 
                WHEN o.concept_id = 5090 THEN o.value_numeric END) AS Height,
             MAX(CASE 
                WHEN o.concept_id = 5091 THEN o.value_numeric END) AS RR,
            MAX(CASE 
                WHEN o.concept_id = 5092 THEN o.value_numeric END) AS SPO2,
            MAX(CASE 
                WHEN o.concept_id = 301 THEN o.value_numeric END) AS HB,
            MAX(CASE 
                WHEN o.concept_id = 163177 THEN o.value_numeric END) AS Uric_acid,
            MAX(CASE 
                WHEN o.concept_id = 1006 THEN o.value_numeric END) AS Toal_cholestrol,
            MAX(CASE 
                WHEN o.concept_id = 728 THEN o.value_numeric END) AS sugar_random,
            MAX(CASE 
                WHEN o.concept_id = 163167 THEN o.value_numeric END) AS sugar_pp,
            MAX(CASE 
                WHEN o.concept_id = 163355 THEN o.value_numeric END) AS Sugar_after_meal
        FROM visit v
        JOIN encounter e ON e.visit_id=v.visit_id
        JOIN obs o ON e.encounter_id = o.encounter_id
        WHERE v.voided=0 
              AND e.encounter_type=6
              AND e.voided=0
              AND o.voided=0
              AND v.date_Started BETWEEN %s AND %s
        GROUP BY v.visit_id;
    """,
    "patient_info": """
        SELECT 
            v.visit_id,
            GROUP_CONCAT(distinct CASE 
                         WHEN o.concept_id = 163212 THEN o.value_text ELSE NULL END) AS Cheif_Complaint,
            GROUP_CONCAT(distinct CASE 
                         WHEN o.concept_id = 163213 THEN o.value_text ELSE NULL END) AS Physical_examination,
            GROUP_CONCAT(distinct CASE 
                         WHEN o.concept_id = 163210 THEN o.value_text ELSE NULL END) AS Patient_Medical_History,
            GROUP_CONCAT(distinct CASE 
                         WHEN o.concept_id = 163211 THEN o.value_text ELSE NULL END) AS Family_History
        FROM visit v
        JOIN encounter e ON v.visit_id = e.visit_id
        JOIN obs o ON e.encounter_id = o.encounter_id
        WHERE v.voided = 0 
            AND e.encounter_type = 1
            AND e.voided = 0
            AND o.voided = 0
            AND v.date_Started BETWEEN %s AND %s
        GROUP BY v.visit_id;
    """,
    "visit_note": """
        SELECT v.visit_id,
               CASE WHEN MAX(CASE 
                        WHEN o.concept_id = 163219 THEN o.value_text END) IS NOT NULL THEN 'yes' ELSE 'no' END AS Diagnosis_provided,
               GROUP_CONCAT(distinct CASE
                            WHEN o.concept_id = 163219 THEN o.value_text ELSE NULL END) AS Diagnosis,
               GROUP_CONCAT(distinct CASE
                            WHEN o.concept_id = 163202 THEN o.value_text ELSE NULL END) AS Medications,
               GROUP_CONCAT(distinct CASE
                            WHEN o.concept_id = 163206 THEN o.value_text ELSE NULL END) AS Medical_test,
               GROUP_CONCAT(distinct CASE
                            WHEN o.concept_id = 163205 THEN o.value_text ELSE NULL END) AS Medical_advice,
               GROUP_CONCAT(distinct CASE
                            WHEN o.concept_id = 163205 THEN o.value_text ELSE NULL END) AS Notes,
               GROUP_CONCAT(distinct CASE
                            WHEN o.concept_id = 163345 THEN o.value_text ELSE NULL END) AS Follow_up_date         
        FROM visit v
        JOIN encounter e ON e.visit_id=v.visit_id
        JOIN obs o ON o.encounter_id=e.encounter_id
        WHERE v.voided=0 
              AND e.encounter_type=9 
              AND e.voided=0 
              AND o.voided=0
              AND v.date_Started BETWEEN %s AND %s
        GROUP BY v.visit_id;
    """
}


In [6]:
# Function to get the date range from SQL tables
def f_get_date_range_from_tables(p_mydb, p_cursor):
    try:
        if p_mydb.is_connected():
            # SQL query to fetch the minimum and maximum dates from "visit" table
            date_query = "SELECT MIN(date_Started) AS min_date, MAX(date_Started) AS max_date FROM visit WHERE voided = 0;"

            p_cursor.execute(date_query)
            v_date_results = p_cursor.fetchall()

            if v_date_results:
                v_min_date, v_max_date = v_date_results[0]

                return v_min_date, v_max_date
            
            else:
                print("No data available in the tables.")
                return None, None
        else:
            print("Database connection is not opened.")
            return None, None
    except Exception as e:
        print(f"An error occurred: {e}")
    
        
# Connect to the database
v_mydb, v_cursor = f_connect_to_database(v_host_input, v_user_input, v_password_input, v_database_input)

# Get the date range from the SQL tables
v_min_table_date, v_max_table_date = f_get_date_range_from_tables(v_mydb, v_cursor)

# Close the database connection
f_close_database_connection(v_mydb, v_cursor)

Connected to the database
Cursor closed.
Database connection closed.


In [7]:
# Function to execute a query with date input parameters
def f_execute_query_with_params(p_mydb, p_cursor, p_query, p_params):
    try:
        if p_mydb.is_connected():
            p_cursor.execute(p_query, p_params)
            v_results = p_cursor.fetchall()
            v_columns = [desc[0] for desc in p_cursor.description]
            df = pd.DataFrame(v_results, columns=v_columns)
            
            return df
        else:
            print("Database connection is not opened.")
            return None
    except Exception as e:
        print(f"An error occurred: {e}")
        return None

In [8]:
# Print the date range to inform the user
print(f"Date range available in the SQL table: {v_min_table_date} to {v_max_table_date}")

# Get input parameters with validation
while True:
    # Get input parameters
    v_visit_started_date = input("Enter the Visit Started Date (YYYY-MM-DD): ")
    try:
        pd.to_datetime(v_visit_started_date, format='%Y-%m-%d')
        v_start_date = v_visit_started_date +' 00:00:00'
        
        while True:
            v_visit_ended_date = input("Enter the Visit Ended Date (YYYY-MM-DD): ")
            try:
                pd.to_datetime(v_visit_ended_date, format='%Y-%m-%d')
                v_end_date = v_visit_ended_date +' 23:59:59'
        
                # Check if the start date is before the end date
                if v_start_date <  v_end_date :
                    break
                else:
                    print("Error: Start date must be before the end date. Please try again.")
            
            except Exception as e:
                print(e)
        break
    except ValueError:
        print("Error: Invalid date format. Please enter dates in the format YYYY-MM-DD.")
        
# Execute each query with input parameters
v_params = (v_start_date, v_end_date)

Date range available in the SQL table: 2019-08-03 05:14:19 to 2019-08-08 06:46:15
Enter the Visit Started Date (YYYY-MM-DD): 2019-08-01
Enter the Visit Ended Date (YYYY-MM-DD): 2019-08-10


In [9]:
# Execute each query with input parameters and store the output in a dictionary
v_output_dataframes = {}
for query_name, query in v_queries.items():
    #connect database and cursor     
    v_mydb, v_cursor = f_connect_to_database(v_host_input, v_user_input, v_password_input, v_database_input)
    
    df = f_execute_query_with_params(v_mydb, v_cursor, query, v_params)
    if df is not None:
        v_output_dataframes[query_name] = df
        print(f"{query_name} executed successfully.")
    
    # Close the database connection
    f_close_database_connection(v_mydb, v_cursor)

Connected to the database
visit_info executed successfully.
Cursor closed.
Database connection closed.
Connected to the database
doctor_id executed successfully.
Cursor closed.
Database connection closed.
Connected to the database
vitals_info executed successfully.
Cursor closed.
Database connection closed.
Connected to the database
patient_info executed successfully.
Cursor closed.
Database connection closed.
Connected to the database
visit_note executed successfully.
Cursor closed.
Database connection closed.


In [10]:
# Merge all the data frames based on 'visit_id' using 'outer' join
merged_df = None
for df in v_output_dataframes.values():
    if merged_df is None:
        merged_df = df
    else:
        merged_df = pd.merge(merged_df, df, on='visit_id', how='outer')

In [11]:
# Record real start time for the complete execution
v_sql_end_time = datetime.now()
# Print the SQL real end time
print(f"SQL Real End time: {v_sql_end_time}")

# Calculate and print the total execution time
v_sql_duration = (v_sql_end_time - v_sql_start_time).total_seconds()
print(f"\nTotal Execution Time of SQL Portion: {v_sql_duration:.4f} seconds")


# Create a new DataFrame for time information
sql_time_df = pd.DataFrame({'sql_Start_Time': [v_sql_start_time], 'sql_End_Time': [v_sql_end_time], 'sql_Duration': [v_sql_duration]})

# Append the time information DataFrame to the merged_df
#final_sql_df = pd.concat([merged_df, sql_time_df], axis=1)

# Save the final DataFrame to Excel file
merged_df.to_excel('final_sql_output_dataframe.xlsx', index=False)

sql_time_df.to_excel("Time calc.xlsx",index=False)

# let's remove the "time" columns before moving to the seggregation part to avoid bulk columns in the final output
#final_sql_df.drop(columns=['sql_Start_Time','sql_End_Time','sql_Duration'],inplace=True)

SQL Real End time: 2024-01-09 19:45:31.591938

Total Execution Time of SQL Portion: 8.9131 seconds


In [12]:
merged_df

Unnamed: 0,visit_id,patient_id,Gender,Age,Visit_started_date,Visit_ended_date,Visit_created_date,Creator_Id,Doctor_Id,Sbp,...,Physical_examination,Patient_Medical_History,Family_History,Diagnosis_provided,Diagnosis,Medications,Medical_test,Medical_advice,Notes,Follow_up_date
0,1,1409,M,10,2019-08-03 10:05:57,NaT,2019-08-03 10:07:42,28,1.0,100.0,...,<b>General exams: </b><br/>• Eyes: Jaundice-[p...,• Alcohol use - Yes<br/>,•Diabetes : Mother.<br/>,yes,Malaria:Primary & Confirmed,,,,,
1,2,1410,F,10,2019-08-03 05:14:19,2019-08-07 06:20:05,2019-08-03 10:46:46,28,1.0,100.0,...,<b>General exams: </b><br/>• Eyes: Jaundice-ja...,• Pregnancy status - Not pregnant.<br/>• Alcoh...,•Heart Disease : Mother.<br/>,no,,,,,,
2,3,1411,M,9,2019-08-03 05:19:33,2019-08-07 07:26:54,2019-08-03 10:50:39,28,1.0,100.0,...,<b>General exams: </b><br/>• Eyes: Jaundice-no...,• Smoking history,•Diabetes : Mother.,yes,Fever of unknown origin:Primary & Provisional,,cbc,DO NOT SMOKE BIDIS OF CIGARETTES,DO NOT SMOKE BIDIS OF CIGARETTES,05-08-2019
3,4,1412,F,39,2019-08-03 10:51:40,NaT,2019-08-03 10:53:54,28,,100.0,...,<b>General exams: </b><br/>• Eyes: Jaundice-ja...,• Alcohol use - No/Denied.<br/>• Smoking histo...,•Tuberculosis : Mother.<br/>,,,,,,,
4,5,1413,M,32,2019-08-03 10:55:44,NaT,2019-08-03 10:56:58,28,22.0,100.0,...,<b>General exams: </b><br/>• Eyes: Jaundice-ja...,• Allergies - No known allergies.<br/>,•Diabetes : Mother.<br/>,yes,Malaria:Primary & Provisional,"PANTOCID 20MG: 1, Tablet Once daily (null) fev...","CBC: HB/WBC/RBC PLATELET,HB,WIDAL TEST (TYPHOI...","DO NOT EAT RAW ONION,DO NOT WALK BAREFOOT,take...","DO NOT EAT RAW ONION,DO NOT WALK BAREFOOT,take...","05-08-2019, Advice: get hospital documents,15-..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
63,64,1491,M,23,2019-08-07 14:33:30,NaT,2019-08-07 14:36:28,28,,90.0,...,<b>General exams: </b><br/>• Eyes: Jaundice-no...,• Allergies - No known allergies.<br/>• Alcoho...,•High BP : Father.<br/>,,,,,,,
64,65,1492,M,9,2019-08-07 10:53:58,2019-08-07 16:27:15,2019-08-07 16:25:23,28,1.0,,...,<b>General exams: </b><br/>• Eyes: Jaundice-no...,test,test,yes,Hematuria:Primary & Provisional,,,,,
65,66,1492,M,9,2019-08-07 16:27:56,NaT,2019-08-07 16:28:54,28,,,...,<b>General exams: </b><br/>• Eyes: Jaundice-no...,,,,,,,,,
66,67,1493,M,9,2019-08-07 17:43:44,NaT,2019-08-07 17:56:22,28,,,...,<b>General exams: </b><br/>• Eyes: Jaundice-no...,• Alcohol use - No/Denied.<br/>,•Other : Sister.<br/>,,,,,,,


<div style=" padding:20px; border-radius:5px;">
    <h1 style="color:black; font-weight:bold; text-align:center;">Data Seggregation</h1>
</div>

## Cheif Complaint

In [13]:
# copy the datafrmae
df_1=merged_df.copy()

# selecting the required columns for 'chief complaint' to make it as a dataframe
df_cc = df_1[['visit_id', 'patient_id','Visit_started_date','Follow_up_date', 'Cheif_Complaint']]

In [14]:
#Imputing the nan values in the chief complaint data with null
df_cc['Cheif_Complaint'] = df_cc['Cheif_Complaint'].fillna('null')

#Removing the unwanted symbols in the chief complaint data
df_cc['Cheif_Complaint'] = df_cc['Cheif_Complaint'].replace('\xa0', ' ', regex=True)

In [15]:
#Creating a function that will extract the data before the associated symptoms
def f_extract_section(p_text):
    try:
        v_matches = re.findall(r'(.*?)Associated symptoms', p_text, re.DOTALL)
        if v_matches:
            return v_matches[0].strip()
        else:
            return p_text.strip()
    except Exception as e:
        print(f"Error: {e}")
        return p_text.strip()

In [16]:
# Extracting the Chief complaint from the data
def f_extract_all_symptoms(p_text):
    try:
        v_matches = re.findall(r'►<b>(.*?)</b>', p_text)
        if v_matches:
            v_formatted_symptoms = ', '.join(v_matches)
            return v_formatted_symptoms
    except:
        print("An error occurred while extracting symptoms.")
    return None

In [17]:
#Combining both the functions to extract the all the chief complaints
def f_combined_extraction(p_text):
    v_section = f_extract_section(p_text)
    if v_section:
        return f_extract_all_symptoms(v_section)
    return None

In [18]:
# Extracting the associated symptoms from the visit samples
def f_extract_associated_symptoms(p_text):
    try:
        v_pattern = r'Associated symptoms</b>: <br/>• Patient reports -<br/>(.*?)<br/>'
        v_matches = re.findall(v_pattern, p_text, re.DOTALL)

        v_associated_symptoms = []
        if v_matches:
            v_cleaned_text = re.sub(r'<.*?>', '', v_matches[0]).strip().replace('•', '')
            v_cleaned_text = v_cleaned_text.replace('Patient denies', '').strip()
            v_associated_symptoms = [part.split('-')[0].strip() for part in v_cleaned_text.split(',')]

        v_formatted_symptoms = ', '.join(v_associated_symptoms)
        return v_formatted_symptoms if v_formatted_symptoms else 'Null'
    except Exception as e:
        # Handle the exception here, e.g., log the error or return a default value
        return 'An error occurred: {}'.format(e)

In [19]:
#Applying the function to the dataframe
df_cc['All_Symptoms'] = df_cc['Cheif_Complaint'].apply(f_combined_extraction)

#Applying the function to the dataframe
df_cc['Associated_Symptoms'] = df_cc['Cheif_Complaint'].apply(f_extract_associated_symptoms)

## Diagnosis

In [20]:
# copy the dataframe
df_2=merged_df.copy()

# selecting the required columns for 'Diagnosis' to make it as a dataframe
df_diag=df_2[['visit_id','patient_id','Visit_started_date','Diagnosis']]

In [21]:
# Split the strings in the 'Diagnosis' column at the ':' and ',' symbol and extract the unique strings
v_diagnosis_unique = df_2['Diagnosis'].str.split(':',expand=True)[1].str.split(',',expand=True)[0].str.strip().dropna().unique()
# print(v_diagnosis_unique)

# Create a new columns for each unique diagnosis category
for diag in v_diagnosis_unique:
    df_diag[diag] = 'NaN'

In [22]:
# Columns to match in the 'Diagnosis' column values
v_columns_to_match = ['Primary & Provisional', 'Secondary & Confirmed', 'Primary & Confirmed', 'Secondary & Provisional']

# Iterate through each column
for col in v_columns_to_match:
    # Extract values before the first ':' using regular expression and also in between ':' as well.
    df_diag[col] = df_diag['Diagnosis'].apply(lambda x: ''.join(re.findall(fr'([^:]+):{col}', str(x))) if re.search(fr'([^:]+):{col}', str(x)) else None)

In [23]:
# Remove the specified values from the specified columns
v_values_to_remove = ['Primary & Provisional,', 'Secondary & Confirmed,', 'Primary & Confirmed,', 'Secondary & Provisional,']

for col in v_columns_to_match:
    df_diag[col] = df_diag[col].str.replace('|'.join(v_values_to_remove), '', regex=True)
    
    
# changing the columns order
df_diag = df_diag.reindex(columns=['visit_id', 'patient_id', 'Visit_started_date', 'Diagnosis',
       'Primary & Provisional', 'Secondary & Provisional','Primary & Confirmed','Secondary & Confirmed'])

## Medication

In [24]:
# copy the dataframe
df_3=merged_df.copy()

# Selecting the necessary columns
df_medi = df_3[['visit_id','patient_id','Visit_started_date','Medications']]

In [25]:
# Filling the nan values with Null in 'Medication' column
df_medi['Medications'] = df_medi['Medications'].fillna('Null')

In [26]:
# Creating a function for parsing the data
def f_extracting_medication(p_data):
    v_data = p_data
    try:
        # Creating an empty list for saving the parsed data
        v_medicines = []
        v_strength = []
        v_dosage = []
        # Passing the data into loop and parse the columns
        for i in range(0, len(v_data)):
            # Checking if there is any null values
            if v_data[i] == 'Null':
                #print(i, df2['Medication'][i])
                v_medicines.append('null')
                v_strength.append('null')
                v_dosage.append('null')
            else:
                # List for Medicines
                v_medi = [''.join(x.strip().split(':')[0]) for x in v_data[i].split(';')]
                v_m = ', '.join(v_medi)
                v_medicines.append(v_m)

                # List for Strength
                v_stro = [''.join(x.strip().split(':')[1].strip().split(',')[0]) for x in v_data[i].split(';')]
                v_stren = ', '.join(v_stro)
                v_strength.append(v_stren)

                # List for Dosage
                v_dos = [''.join(x.strip().split(':')[1].strip().split(',')[1:]) for x in v_data[i].split(';')]
                v_dosa = ', '.join(v_dos)
                v_dosage.append(v_dosa)
        
        # Return the remaining columns
        return v_medicines, v_strength, v_dosage

    except Exception as e:
        print(e)

        
# Passing the medication text into function to parse the data
df2_medicine = list(f_extracting_medication(df_medi['Medications']))

In [27]:
# Copying the dataframe to df2_medicine
df_medicine = df_medi.copy()

In [28]:
# Inserting the parsed data into particular columns
df_medicine.insert(4, 'Medicines', df2_medicine[0])
df_medicine.insert(5, 'Strength', df2_medicine[1])
df_medicine.insert(6, 'Dosage', df2_medicine[2])

## Final DataFrame

In [29]:
# Merge "chief complaint" and "Diagnosis" DataFrames based on 'visit_id', 'patient_id', 'Visit_Started_Date' 
segg_merged_df = pd.merge(df_cc, df_diag, on=['visit_id', 'patient_id', 'Visit_started_date'], how='inner')

# Merge " merged_df" and "Medications" DataFrames based on 'visit_id', 'patient_id', 'Visit_Started_Date'
final_segg_df=pd.merge(segg_merged_df, df_medicine, on=['visit_id', 'patient_id', 'Visit_started_date'], how='inner')

In [30]:
# removing some columns before merging with the "final_sql_df" to avoid duplicate rows
final_segg_df.drop(columns=['Follow_up_date','Cheif_Complaint','Diagnosis','Medications'],axis=1,inplace=True)

In [31]:
# Merge final_sql_df and final_df using 'inner' join
complete_df = pd.merge(merged_df, final_segg_df, on=['visit_id', 'patient_id', 'Visit_started_date'], how='inner')

In [32]:
# Record real start time for the complete execution
v_end_time = datetime.now()

# Print the real end time for complete execution
print(f"Real End Time for complete execution: {v_end_time}")

# Calculate and print the total execution time
v_complete_duration = (v_end_time - v_start_time).total_seconds()
print(f"\nTotal Execution Time for complete execution: {v_complete_duration:.4f} seconds")

# Create a new DataFrame for time information
complete_time_df = pd.DataFrame({'complete_Start_Time': [v_start_time], 'complete_End_Time': [v_end_time], 'complete_Duration': [v_complete_duration]})

# Append the time information DataFrame to the "complete_df"
#complete_final_df = pd.concat([complete_df, complete_time_df], axis=1)

# Save the final DataFrame to Excel file
complete_df.to_excel('final_complete_dataframe.xlsx', index=False)
complete_time_df.to_excel('seg_time.xlsx', index=False)

Real End Time for complete execution: 2024-01-09 19:45:32.716071

Total Execution Time for complete execution: 19.5456 seconds


In [33]:
complete_df

Unnamed: 0,visit_id,patient_id,Gender,Age,Visit_started_date,Visit_ended_date,Visit_created_date,Creator_Id,Doctor_Id,Sbp,...,Follow_up_date,All_Symptoms,Associated_Symptoms,Primary & Provisional,Secondary & Provisional,Primary & Confirmed,Secondary & Confirmed,Medicines,Strength,Dosage
0,1,1409,M,10,2019-08-03 10:05:57,NaT,2019-08-03 10:07:42,28,1.0,100.0,...,,Abdominal Pain,Null,,,Malaria,,,,
1,2,1410,F,10,2019-08-03 05:14:19,2019-08-07 06:20:05,2019-08-03 10:46:46,28,1.0,100.0,...,,Back & Neck pain,Null,,,,,,,
2,3,1411,M,9,2019-08-03 05:19:33,2019-08-07 07:26:54,2019-08-03 10:50:39,28,1.0,100.0,...,05-08-2019,Fever,Null,Fever of unknown origin,,,,,,
3,4,1412,F,39,2019-08-03 10:51:40,NaT,2019-08-03 10:53:54,28,,100.0,...,,Diarrhea,Null,,,,,,,
4,5,1413,M,32,2019-08-03 10:55:44,NaT,2019-08-03 10:56:58,28,22.0,100.0,...,"05-08-2019, Advice: get hospital documents,15-...",Fever,Null,Malaria,,,,PANTOCID 20MG,1,Tablet Once daily (null) fever for 10 1 nullt...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
63,64,1491,M,23,2019-08-07 14:33:30,NaT,2019-08-07 14:36:28,28,,90.0,...,,Back & Neck pain,Null,,,,,,,
64,65,1492,M,9,2019-08-07 10:53:58,2019-08-07 16:27:15,2019-08-07 16:25:23,28,1.0,,...,,Sick child (under 5years),Null,Hematuria,,,,,,
65,66,1492,M,9,2019-08-07 16:27:56,NaT,2019-08-07 16:28:54,28,,,...,,,Null,,,,,,,
66,67,1493,M,9,2019-08-07 17:43:44,NaT,2019-08-07 17:56:22,28,,,...,,Fever,Null,,,,,,,
