In [20]:
import numpy as np
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from sentence_transformers import SentenceTransformer
import joblib
import os

# Initialize all Q&A pairs — core medical + feature-based
parkinson_qa = [

    # General Medical Q&A
    {"question": "What is Parkinson's Disease?",
     "answer": "Parkinson's Disease (PD) is a progressive neurological disorder that primarily affects movement. It occurs when certain nerve cells in the brain, especially in an area called the substantia nigra, become damaged or die. These cells normally produce dopamine—a chemical essential for coordinating smooth and controlled body movements."},

    {"question": "How does Parkinson's Disease occur?",
     "answer": "PD occurs due to the gradual loss of dopamine-producing neurons in the brain. Without sufficient dopamine, the brain cannot send proper signals to control movement. The exact cause is unknown, but it likely involves a combination of genetic and environmental factors. Abnormal protein deposits called Lewy bodies are commonly found in the brains of PD patients."},

    {"question": "What are the main symptoms of Parkinson's Disease?",
     "answer": "Motor Symptoms:\n- Bradykinesia: Slowness of movement\n- Tremor: Shaking, typically at rest\n- Rigidity: Muscle stiffness\n- Postural instability: Poor balance\n\nNon-Motor Symptoms:\n- Depression/anxiety\n- Memory problems\n- Sleep disturbances\n- Loss of smell\n- Constipation"},

    {"question": "What causes Parkinson's Disease?",
     "answer": "Contributing factors include:\n- Genetic mutations (LRRK2, PARK7 genes)\n- Environmental toxins (pesticides, heavy metals)\n- Age (risk increases after 60)\n- Gender (men are more commonly affected)"},

    {"question": "Can Parkinson's Disease be prevented?",
     "answer": "While no guaranteed prevention exists, these may help:\n- Regular exercise\n- Antioxidant-rich diet (Mediterranean)\n- Avoiding toxins like pesticides\n- Mental stimulation\n- Good sleep and stress management"},

    {"question": "How is Parkinson's Disease diagnosed?",
     "answer": "Diagnosis involves:\n- Neurological exams\n- UPDRS rating scale\n- DAT-SPECT scans (dopamine activity)\n- MRI/CT to rule out other conditions\n- Emerging voice/spiral analysis tools"},

    {"question": "What are early warning signs?",
     "answer": "Early signs include:\n- Slight finger/hand tremor\n- Small handwriting (micrographia)\n- Slowness of movement\n- Loss of smell\n- Soft speech\n- Reduced facial expression"},

    {"question": "Is Parkinson's Disease curable?",
     "answer": "Currently no cure, but treatments can significantly improve quality of life:\n- Medications (Levodopa, dopamine agonists)\n- Physical/occupational therapy\n- Deep Brain Stimulation (DBS) in advanced cases"},

    {"question": "What are the risk factors?",
     "answer": "Risk factors include:\n- Age (60+)\n- Family history\n- Male gender\n- Toxin exposure\n- Head trauma"},

    {"question": "What is the progression rate?",
     "answer": "PD progresses differently for each person, typically over years to decades. Factors affecting progression:\n- Age at onset\n- Symptom type\n- Overall health\n- Treatment response"},

    {"question": "How is Parkinson's treated?",
     "answer": "Treatment options:\n\nMedications:\n- Levodopa/Carbidopa\n- Dopamine agonists\n- MAO-B inhibitors\n\nTherapies:\n- Physical/speech therapy\n- Cognitive behavioral therapy\n\nSurgical:\n- Deep Brain Stimulation (DBS)"},

    {"question": "What complications can occur?",
     "answer": "Possible complications:\n- Swallowing difficulties (aspiration risk)\n- Depression/anxiety\n- Dementia\n- Blood pressure drops\n- Falls/fractures\n- Sleep disorders"},

    {"question": "How to improve quality of life?",
     "answer": "Quality of life tips:\n- Early diagnosis\n- Regular exercise (yoga, tai chi)\n- Support groups\n- Home safety modifications\n- Medication adherence\n- Routine specialist visits"},

    {"question": "Recommended lifestyle changes?",
     "answer": "Lifestyle recommendations:\n- Regular exercise\n- Healthy diet (fiber, antioxidants)\n- Consistent sleep schedule\n- Stay hydrated\n- Mental stimulation (puzzles, reading)"},

    {"question": "Biomarkers for early detection?",
     "answer": "Promising biomarkers:\n- Alpha-synuclein in cerebrospinal fluid\n- DAT-SPECT neuroimaging\n- Blood/CSF protein patterns\n- Voice/spiral drawing AI analysis"}
]

# Feature-specific Q&A
feature_qa = [
    {"question": "What is baseline_motor in Parkinson's analysis?",
     "answer": "Baseline motor UPDRS is the first recorded motor score for a patient. It helps in tracking disease progression by comparing future scores to this initial baseline."},

    {"question": "What does previous_motor_UPDRS mean?",
     "answer": "This feature represents the motor UPDRS score from the patient's last visit. It helps the model understand recent trends in motor symptoms."},

    {"question": "What does previous_total_UPDRS mean?",
     "answer": "Previous total UPDRS is the overall Parkinson’s rating from the previous visit, which helps compare past and present symptom severity."},

    {"question": "What is motor_diff?",
     "answer": "Motor_diff is the change in motor UPDRS since the previous visit. A positive value may indicate worsening symptoms."},

    {"question": "What is total_diff?",
     "answer": "Total_diff is the difference in total UPDRS between two time points, reflecting the overall change in symptoms."},

    {"question": "What does progress_rate indicate?",
     "answer": "Progress rate estimates how quickly the motor UPDRS is increasing over time since the first test. It provides insight into how rapidly the disease is progressing."},

    {"question": "What does months_since_first_test mean?",
     "answer": "This feature shows how much time has passed since a patient’s first test. It helps estimate disease progression over time."},

    {"question": "How does age affect Parkinson's prediction?",
     "answer": "Older age is a known risk factor for Parkinson’s and can affect the severity and progression of symptoms. Age is an important feature in the model."},

    {"question": "What is NHR in Parkinson's analysis?",
     "answer": "NHR (Noise-to-Harmonics Ratio) measures the noisiness in a patient's voice. Higher NHR values may indicate more severe vocal impairment in PD patients."},

    {"question": "What is HNR and why is it used?",
     "answer": "HNR (Harmonics-to-Noise Ratio) quantifies the quality of voice. Lower HNR values often indicate vocal issues common in Parkinson's Disease."},

    {"question": "What is RPDE and its role in PD detection?",
     "answer": "RPDE (Recurrence Period Density Entropy) is a measure of signal complexity. It's used to detect irregularities in speech that may be related to PD."},

    {"question": "What does DFA stand for in Parkinson's models?",
     "answer": "DFA (Detrended Fluctuation Analysis) quantifies long-term correlations in time series data, often applied to voice signals to detect neurological disorders like PD."},

    {"question": "What is NHR_age?",
     "answer": "NHR_age is a combination of a patient’s age and voice noisiness. It enhances the model's understanding of how age influences vocal degradation in PD."},

    {"question": "What is Shimmer_trend?",
     "answer": "Shimmer_trend captures short-term fluctuations in voice amplitude. Averaged over several tests, it reflects changes in speech control in Parkinson’s."}
]

# Combine all questions
parkinson_qa.extend(feature_qa)

# Initialize the embedding model
model = SentenceTransformer('paraphrase-MiniLM-L6-v2')

# Create DataFrame and compute embeddings
qa_df = pd.DataFrame(parkinson_qa)
qa_df['embedding'] = qa_df['question'].apply(lambda x: model.encode(x))

def get_answer(user_question, threshold=0.4):
    """Get the best matching answer for a user's question"""
    user_embedding = model.encode([user_question])

    # Compute similarities
    qa_df['similarity'] = qa_df['embedding'].apply(
        lambda x: cosine_similarity([x], user_embedding)[0][0]
    )

    best_match = qa_df.loc[qa_df['similarity'].idxmax()]

    if best_match['similarity'] > threshold:
        return best_match['answer']
    else:
        return "I couldn't find a precise answer. Would you like information about common Parkinson's symptoms?"

def save_models():
    """Save all models and data to disk"""
    os.makedirs('parkinson_models', exist_ok=True)

    # Save Q&A data
    qa_df.to_pickle('parkinson_models/qa_data.pkl')

    # Save embedding model
    joblib.dump(model, 'parkinson_models/embedding_model.joblib')

    print("All models saved successfully to 'parkinson_models' directory")

def load_models():
    """Load saved models from disk"""
    global qa_df, model

    qa_df = pd.read_pickle('parkinson_models/qa_data.pkl')
    model = joblib.load('parkinson_models/embedding_model.joblib')
    print("Models loaded successfully")

# Example usage
if __name__ == "__main__":
    # Save models (run once)
    save_models()

    # Interactive QA
    while True:
        user_input = input("\nAsk about Parkinson's (or 'quit'): ")
        if user_input.lower() in ['quit', 'exit']:
            break

        response = get_answer(user_input)
        print("\nAnswer:", response)


All models saved successfully to 'parkinson_models' directory

Ask about Parkinson's (or 'quit'): hnr score

Answer: HNR (Harmonics-to-Noise Ratio) quantifies the quality of voice. Lower HNR values often indicate vocal issues common in Parkinson's Disease.

Ask about Parkinson's (or 'quit'): quit
