In [2]:
import re
from phe import paillier

class Lexer:
    def __init__(self, text):
        self.text = text
        self.tokens = []

    def tokenize(self):
        # Tokenize text into words and numbers
        self.tokens = re.findall(r'\w+|\d+', self.text)

class Parser:
    def __init__(self, tokens):
        self.tokens = tokens

    def parse(self):
        # Parse tokens and identify medical terms and numbers
        medical_terms = []
        numbers = []
        for token in self.tokens:
            if token.isdigit():
                numbers.append(int(token))
            else:
                medical_terms.append(token.lower())
        return medical_terms, numbers

def main():
    # Sample healthcare data
    text = "Patient Neha Shah, age 50,  has blood pressure (180/120)"

    # Step 1: Tokenization (Lexer)
    lexer = Lexer(text)
    lexer.tokenize()
    tokens = lexer.tokens

    # Step 2: Parsing
    parser = Parser(tokens)
    medical_terms, numbers = parser.parse()

    # Step 3: Homomorphic Encryption (HE)
    # Generate key pair
    public_key, private_key = paillier.generate_paillier_keypair()

    # Encrypt sensitive data
    encrypted_numbers = [public_key.encrypt(num) for num in numbers]

    # Define normal, high, and low mean blood pressure ranges
    normal_range = (70, 110)  # Adjusted normal range
    high_range = (110, 140)
    low_range = (40, 70)

    # Encrypt the ranges
    encrypted_normal_range = [public_key.encrypt(range_val) for range_val in normal_range]
    encrypted_high_range = [public_key.encrypt(range_val) for range_val in high_range]
    encrypted_low_range = [public_key.encrypt(range_val) for range_val in low_range]

    # Perform computation on encrypted data (e.g., mean blood pressure)
    encrypted_mean_blood_pressure = sum(encrypted_numbers) / len(encrypted_numbers)

    # Decrypt the result
    decrypted_mean_blood_pressure = private_key.decrypt(encrypted_mean_blood_pressure)

    # Decrypt and compare with ranges to determine blood pressure level
    decrypted_normal_range = [private_key.decrypt(range_val) for range_val in encrypted_normal_range]
    decrypted_high_range = [private_key.decrypt(range_val) for range_val in encrypted_high_range]
    decrypted_low_range = [private_key.decrypt(range_val) for range_val in encrypted_low_range]

    if decrypted_mean_blood_pressure >= decrypted_normal_range[0] and decrypted_mean_blood_pressure <= decrypted_normal_range[1]:
        bp_level = "Normal"
    elif decrypted_mean_blood_pressure >= decrypted_high_range[0] and decrypted_mean_blood_pressure <= decrypted_high_range[1]:
        bp_level = "High"
    elif decrypted_mean_blood_pressure >= decrypted_low_range[0] and decrypted_mean_blood_pressure <= decrypted_low_range[1]:
        bp_level = "Low"
    else:
        bp_level = "Unknown"

    # Print results
    print("Medical Terms:", medical_terms)
    print("Encrypted Numbers:", [num.ciphertext() for num in encrypted_numbers])
    print("Decrypted Mean Blood Pressure:", decrypted_mean_blood_pressure)
    print("Blood Pressure Level:", bp_level)

if __name__ == '__main__':
    main()


Medical Terms: ['patient', 'neha', 'shah', 'age', 'has', 'blood', 'pressure']
Encrypted Numbers: [98011659338815702584401693007702575846288207517981641004737626709227454031503319982263032829450117269837283802166467063046417222775993985079564175291488950201298093224739547267973050124535191692605315047132780320510607483536777268153109578788520055845603048390929723566490076880930365440922063089665840482325055882001319045328145214256357515918630680065138663520021650575574601932210519822053538223593988586246593304839945430298507658316480235923954166729516779070068374039669454221508779709225066647526793529150606196498531754794359124392856814728760016609654771204081639960893025797713337866279898459968331726311801930430552695473458459695441095231271031992080333691584776090673500400517843731595057094659455871729392684733522127163197453633159824944576774887756634903024558670667016130946959523530128772542455910293972929995897798582071914382616116751723202943456475450198750699013050458966891068946

In [3]:
import pandas as pd
import re
from phe import paillier

class Lexer:
    def __init__(self, text):
        self.text = text
        self.tokens = []

    def tokenize(self):
        # Tokenize text into words and numbers
        self.tokens = re.findall(r'\w+|\d+', self.text)

class Parser:
    def __init__(self, tokens):
        self.tokens = tokens

    def parse(self):
        # Parse tokens and identify medical terms and numbers
        medical_terms = []
        numbers = []
        for token in self.tokens:
            if token.isdigit():
                numbers.append(int(token))
            else:
                medical_terms.append(token.lower())
        return medical_terms, numbers

def encrypted_mean(numbers, public_key):
    # Encrypt each number in the list
    encrypted_numbers = [public_key.encrypt(num) for num in numbers]
    # Compute the sum of encrypted numbers
    encrypted_sum = sum(encrypted_numbers)
    # Compute the encrypted mean
    encrypted_mean = encrypted_sum / len(numbers)
    return encrypted_mean

def encrypted_in_range(encrypted_val, encrypted_range_start, encrypted_range_end, private_key):
    # Decrypt the range values
    range_start = private_key.decrypt(encrypted_range_start)
    range_end = private_key.decrypt(encrypted_range_end)
    # Check if the encrypted value is in the range
    return range_start <= private_key.decrypt(encrypted_val) <= range_end

def main():
    # Read CSV file
    df = pd.read_excel("C:\\Users\\Varshini\\Downloads\\imcd_dataset.xlsx")

    # Sample healthcare data
    reports = df["Patient_Report"]

    # Step 1: Tokenization (Lexer), Parsing, and Blood Pressure Computation
    # Generate key pair
    public_key, private_key = paillier.generate_paillier_keypair()

    normal_range = (70, 110)  # Adjusted normal range
    high_range = (110, 140)
    low_range = (40, 70)

    encrypted_mean_bps = []
    bp_levels = []
    
    for report in reports:
        # Tokenize
        lexer = Lexer(report)
        lexer.tokenize()
        tokens = lexer.tokens

        # Parse
        parser = Parser(tokens)
        medical_terms, numbers = parser.parse()

        # Compute mean blood pressure in encrypted mode
        encrypted_mean_bp = encrypted_mean(numbers, public_key)

        # Check blood pressure level in encrypted mode
        if encrypted_in_range(encrypted_mean_bp, public_key.encrypt(normal_range[0]), public_key.encrypt(normal_range[1]), private_key):
            bp_level = "Normal"
        elif encrypted_in_range(encrypted_mean_bp, public_key.encrypt(high_range[0]), public_key.encrypt(high_range[1]), private_key):
            bp_level = "High"
        elif encrypted_in_range(encrypted_mean_bp, public_key.encrypt(low_range[0]), public_key.encrypt(low_range[1]), private_key):
            bp_level = "Low"
        else:
            bp_level = "Unknown"

        encrypted_mean_bps.append(str(encrypted_mean_bp.ciphertext()))  # Modified line
        bp_levels.append(bp_level)

    # Add computed values to DataFrame
    df["Encrypted_Mean_BP"] = encrypted_mean_bps
    df["BP_Level"] = bp_levels

    # Write back to CSV
    df.to_csv("patients_bpnewwwwwww.csv", index=False)

if __name__ == '__main__':
    main()


In [None]:
!pip install phe