# **1.DATA PRE-PROCESSING.**


In [165]:
import csv

file_path = 'FMD cases.csv'

def preprocess_dataset(file_path):
    """
    Preprocess the dataset by converting it into a dictionary format.

    Args:
        file_path (str): The path to the CSV file containing the dataset.

    Returns:
        dict: A dictionary representing the dataset, where each case is a dictionary containing the input parameters,
              diagnosis, treatment, and outcome.
    """
    case_database = {}

    # Open the CSV file
    with open(file_path, 'r') as file:
        reader = csv.DictReader(file)

        # Iterate over each row (case) in the CSV file
        for row in reader:
            case_id = row['Case ID']
            symptoms = row['Symptoms'].split(', ')  # Split the symptoms into a list
            age = int(row['Animal Age (Months)'])
            sex = row['Animal Sex']
            environmental_conditions = row['Environmental Conditions']
            diagnosis = row['Diagnosis']
            treatment = row['Treatment'].split(', ')  # Split the treatment into a list
            outcome = row['Outcome']

            # Create a dictionary for the current case
            case = {
                'Symptoms': symptoms,
                'Animal Age (Months)': age,
                'Animal Sex': sex,
                'Environmental Conditions': environmental_conditions,
                'Diagnosis': diagnosis,
                'Treatment': treatment,
                'Outcome': outcome
            }

            # Add the case to the case database
            case_database[case_id] = case

    return case_database

# Example usage
case_database = preprocess_dataset(file_path)

print("Case Database:")
print(case_database)

# Print each case individually
print("\nIndividual Cases:")
for case_id, case in case_database.items():
    print(f"\nCase ID: {case_id}")
    for key, value in case.items():
        print(f"{key}: {value}")


Case Database:
{'FMD001': {'Symptoms': ['Blisters on mouth', 'tongue', 'and feet', 'excessive salivation', 'fever (104°F)', 'reduced appetite'], 'Animal Age (Months)': 18, 'Animal Sex': 'Female', 'Environmental Conditions': 'Recent introduction of new cattle', 'Diagnosis': 'Foot-and-Mouth Disease', 'Treatment': ['Antibiotics', 'anti-inflammatory drugs', 'fluid therapy'], 'Outcome': 'Recovered'}, 'FMD002': {'Symptoms': ['Blisters on udder', 'fever (103°F)', 'decreased milk production', 'lethargy'], 'Animal Age (Months)': 72, 'Animal Sex': 'Female', 'Environmental Conditions': 'Shared grazing pasture with infected herd', 'Diagnosis': 'Foot-and-Mouth Disease', 'Treatment': ['Antibiotics', 'anti-inflammatory drugs', 'supportive care'], 'Outcome': 'Recovered with prolonged milk production loss'}, 'FMD003': {'Symptoms': ['Mouth blisters', 'tongue lesions', 'lameness', 'fever (102°F)', 'nasal discharge'], 'Animal Age (Months)': 24, 'Animal Sex': 'Male', 'Environmental Conditions': 'Poor biose

# **2.CALCULATE SIMILARITY MEASURES.**

In [166]:
import difflib

def calculate_symptom_similarity(new_symptoms, existing_symptoms):
    """
    Calculate the similarity between the symptoms of a new case and an existing case.

    Args:
        new_symptoms (list): A list of symptoms for the new case.
        existing_symptoms (list): A list of symptoms for an existing case.

    Returns:
        float: A similarity score between 0 and 1, where 1 indicates an exact match.
    """
    new_symptom_set = set(new_symptoms)
    existing_symptom_set = set(existing_symptoms)

    # Calculate the ratio of common symptoms
    common_symptoms = new_symptom_set.intersection(existing_symptom_set)
    symptom_similarity = len(common_symptoms) / max(len(new_symptom_set), len(existing_symptom_set), 1)

    return symptom_similarity

def calculate_age_similarity(new_age, existing_age):
    """
    Calculate the similarity between the ages of a new case and an existing case.

    Args:
        new_age (int): The age (in months) of the new case.
        existing_age (int): The age (in months) of an existing case.

    Returns:
        float: A similarity score between 0 and 1, where 1 indicates an exact match.
    """
    max_age = max(new_age, existing_age)
    age_difference = abs(new_age - existing_age)
    age_similarity = 1 - (age_difference / max_age) if max_age > 0 else 1

    return age_similarity

def calculate_environmental_similarity(new_conditions, existing_conditions):
    """
    Calculate the similarity between the environmental conditions of a new case and an existing case.

    Args:
        new_conditions (str): A string describing the environmental conditions for the new case.
        existing_conditions (str): A string describing the environmental conditions for an existing case.

    Returns:
        float: A similarity score between 0 and 1, where 1 indicates an exact match.
    """
    sequence_matcher = difflib.SequenceMatcher(None, new_conditions, existing_conditions)
    environmental_similarity = sequence_matcher.ratio()

    return environmental_similarity

def calculate_overall_similarity(new_case, existing_case, weights):
    """
    Calculate the overall similarity between a new case and an existing case.

    Args:
        new_case (dict): A dictionary representing the new case.
        existing_case (dict): A dictionary representing an existing case.
        weights (dict): A dictionary containing weights for each feature (symptom, age, environmental conditions).

    Returns:
        float: An overall similarity score between 0 and 1, where 1 indicates an exact match.
    """
    symptom_similarity = calculate_symptom_similarity(new_case['Symptoms'], existing_case['Symptoms'])
    age_similarity = calculate_age_similarity(new_case['Animal Age (Months)'], existing_case['Animal Age (Months)'])
    environmental_similarity = calculate_environmental_similarity(new_case['Environmental Conditions'],
                                                                  existing_case['Environmental Conditions'])

    overall_similarity = (weights['Symptoms'] * symptom_similarity +
                          weights['Animal Age (Months)'] * age_similarity +
                          weights['Environmental Conditions'] * environmental_similarity)

    return overall_similarity

In [167]:
# Define the weights for each feature
weights = {
    'Symptoms': 0.8,
    'Animal Age (Months)': 0.1,
    'Environmental Conditions': 0.1
}

new_case = {
    'Symptoms': ['Blisters on mouth', 'Blisters on feet', 'Excessive salivation', 'Fever', 'Reduced appetite'],
    'Animal Age (Months)': 18,
    'Environmental Conditions': 'Recent introduction of new cattle'
}

existing_case = {
    'Symptoms': ['Blisters on mouth', 'Blisters on feet', 'Excessive salivation', 'Fever', 'Reduced appetite'],
    'Animal Age (Months)': 18,
    'Environmental Conditions': 'Recent introduction of new cattle'
}

overall_similarity = calculate_overall_similarity(new_case, existing_case, weights)
overall_similarity_percentage = overall_similarity * 100
print(f"Overall similarity score: {overall_similarity_percentage:.2f}%")


Overall similarity score: 100.00%


# **3. IMPLEMENTING CASE RETRIEVAL**

In [168]:
from collections import defaultdict

def retrieve_similar_cases(new_case, case_database, similarity_threshold=0.5, top_n=5):
    """
    Retrieve the most similar cases from the case database for a given new case.

    Args:
        new_case (dict): A dictionary representing the new case.
        case_database (dict): A dictionary containing the existing cases.
        similarity_threshold (float): The minimum similarity score required to consider a case as similar.
        top_n (int): The maximum number of similar cases to retrieve.

    Returns:
        list: A list of tuples, where each tuple contains the case ID and the corresponding case dictionary
              for the top N most similar cases.
    """
    similar_cases = defaultdict(list)

    # Calculate the similarity between the new case and each existing case
    for case_id, existing_case in case_database.items():
        overall_similarity = calculate_overall_similarity(new_case, existing_case, weights)
        if overall_similarity >= similarity_threshold:
            similar_cases[overall_similarity].append((case_id, existing_case))

    # Sort the similar cases by similarity score in descending order
    sorted_similar_cases = sorted(similar_cases.items(), reverse=True)

    # Retrieve the top N similar cases
    top_similar_cases = []
    for similarity_score, case_list in sorted_similar_cases:
        top_similar_cases.extend(case_list[:top_n])
        top_n -= len(case_list)
        if top_n <= 0:
            break

    return top_similar_cases

In [169]:
new_case = {
    'Symptoms':  ['Udder lesions', 'fever (103°F)', 'decreased milk yield (>30% drop)', 'weight loss'],
    'Animal Age (Months)': 96,
    'Animal Sex': 'Female',
    'Environmental Conditions': 'Poor hygiene practices during milking'
}

top_similar_cases = retrieve_similar_cases(new_case, case_database, similarity_threshold=0.7, top_n=5)

print(f"Number of similar cases retrieved: {len(top_similar_cases)}")

if not top_similar_cases:
    print("No similar cases found above the specified threshold.")
else:
    for case_id, case in top_similar_cases:
        print(f"Case ID: {case_id}")
        print(f"Symptoms: {case['Symptoms']}")
        print(f"Age: {case['Animal Age (Months)']} months")
        print(f"Environmental Conditions: {case['Environmental Conditions']}")
        print("---")

Number of similar cases retrieved: 1
Case ID: FMD009
Symptoms: ['Udder lesions', 'fever (103°F)', 'decreased milk yield (>30% drop)', 'weight loss']
Age: 96 months
Environmental Conditions: Poor hygiene practices during milking
---


# **4. DETERMINING THE DIAGNOSIS AMND TREATMENT**

In [170]:
from collections import Counter

def diagnose_and_treat(new_case, similar_cases):
    """
    Determine the diagnosis and treatment for a new case based on the most similar cases.

    Args:
        new_case (dict): A dictionary representing the new case.
        similar_cases (list): A list of tuples, where each tuple contains the case ID, the corresponding case dictionary,
                              and the similarity score for the most similar cases.

    Returns:
        tuple: A tuple containing the determined diagnosis (str) and the recommended treatment (list).
    """
    diagnoses = []
    treatments = []

    # Collect diagnoses and treatments from the similar cases
    for case_id, case, similarity_score in similar_cases:
        diagnoses.append(case['Diagnosis'])
        treatments.append(case['Treatment'])

    # If no similar cases were found, return default values
    if not diagnoses:
        return "Unknown", []

    # Determine the most common diagnosis
    diagnosis_counter = Counter(diagnoses)
    most_common_diagnosis, _ = diagnosis_counter.most_common(1)[0]

    # Determine the most common treatment
    treatment_counter = Counter([item for sublist in treatments for item in sublist])
    most_common_treatment = [item for item, count in treatment_counter.most_common() if count >= len(similar_cases) // 2]

    return most_common_diagnosis, most_common_treatment

def retrieve_similar_cases(new_case, case_database, similarity_threshold=0.5, top_n=3):
    """
    Retrieve the most similar cases from the case database for a given new case.

    Args:
        new_case (dict): A dictionary representing the new case.
        case_database (dict): A dictionary containing the existing cases.
        similarity_threshold (float): The minimum similarity score required to consider a case as similar.
        top_n (int): The maximum number of similar cases to retrieve.

    Returns:
        list: A list of tuples, where each tuple contains the case ID, the corresponding case dictionary,
              and the similarity score for the top N most similar cases.
    """
    similar_cases = defaultdict(list)

    # Calculate the similarity between the new case and each existing case
    for case_id, existing_case in case_database.items():
        overall_similarity = calculate_overall_similarity(new_case, existing_case, weights)
        if overall_similarity >= similarity_threshold:
            similar_cases[overall_similarity].append((case_id, existing_case, overall_similarity))

    # Sort the similar cases by similarity score in descending order
    sorted_similar_cases = sorted(similar_cases.items(), reverse=True)

    # Retrieve the top N similar cases
    top_similar_cases = []
    for similarity_score, case_list in sorted_similar_cases:
        top_similar_cases.extend(case_list[:top_n])
        top_n -= len(case_list)
        if top_n <= 0:
            break

    return top_similar_cases



In [171]:
new_case = {
    'Symptoms': ['Udder lesions', 'fever (103°F)', 'decreased milk yield (>30% drop)'],
    'Animal Age (Months)': 24,  # Adjusted key to match the expected format
    'Animal Sex': 'Male',
    'Environmental Conditions': 'Poor hygiene practices during milking'
}


top_similar_cases = retrieve_similar_cases(new_case, case_database, similarity_threshold=0.7, top_n=5)
diagnosis, treatment = diagnose_and_treat(new_case, top_similar_cases)

print(f"Diagnosis: {diagnosis}")
print(f"Recommended Treatment: {', '.join(treatment)}")

# Print the retrieved similar cases with their similarity scores
print("\nRetrieved Similar Cases:")
for case_id, case, similarity_score in top_similar_cases:
    print(f"Case ID: {case_id}, Similarity Score: {similarity_score:.2f}")
    print(f"Symptoms: {case['Symptoms']}")
    print(f"Age: {case['Animal Age (Months)']} months")
    print(f"Sex: {case['Animal Sex']}")
    print(f"Environmental Conditions: {case['Environmental Conditions']}")
    print("---")

Diagnosis: Foot-and-Mouth Disease
Recommended Treatment: Antibiotics, anti-inflammatory drugs, supportive care

Retrieved Similar Cases:
Case ID: FMD009, Similarity Score: 0.73
Symptoms: ['Udder lesions', 'fever (103°F)', 'decreased milk yield (>30% drop)', 'weight loss']
Age: 96 months
Sex: Female
Environmental Conditions: Poor hygiene practices during milking
---


# **5.PREDICTING THE PROGNOSIS**

In [172]:
from collections import Counter

def predict_prognosis(new_case, similar_cases):
    """
    Predict the prognosis for a new case based on the outcomes of the most similar cases.

    Args:
        new_case (dict): A dictionary representing the new case.
        similar_cases (list): A list of tuples, where each tuple contains the case ID, the corresponding case dictionary,
                              and the similarity score for the most similar cases.

    Returns:
        str: The predicted prognosis for the new case.
    """
    outcomes = []

    # Collect outcomes from the similar cases
    for case_id, case, similarity_score in similar_cases:
        outcomes.append(case['Outcome'])

    # If no similar cases were found, return a default prognosis
    if not outcomes:
        return "Unable to predict prognosis due to lack of similar cases."

    # Determine the most common outcome
    outcome_counter = Counter(outcomes)
    most_common_outcome, _ = outcome_counter.most_common(1)[0]

    # Determine the prognosis based on the most common outcome
    if "Recovered" in most_common_outcome:
        prognosis = "Likely to recover"
    elif "Euthanized" in most_common_outcome or "Died" in most_common_outcome:
        prognosis = "High risk of complications or mortality"
    else:
        prognosis = "Possible long-term effects or complications"

    return prognosis

In [173]:
new_case = {
    'Symptoms': ['Blisters on mouth', 'Excessive salivation', 'Fever (104°F)', 'Lameness'],
    'Animal Age (Months)': 30,
    'Animal Sex': 'Male',
    'Environmental Conditions': 'Overcrowded holding pen during transportation'
}

top_similar_cases = retrieve_similar_cases(new_case, case_database, similarity_threshold=0.5, top_n=5)
diagnosis, treatment = diagnose_and_treat(new_case, top_similar_cases)
prognosis = predict_prognosis(new_case, top_similar_cases)

print(f"Diagnosis: {diagnosis}")
print(f"Recommended Treatment: {', '.join(treatment)}")
print(f"Prognosis: {prognosis}")

Diagnosis: Foot-and-Mouth Disease
Recommended Treatment: Antibiotics, Anti-inflammatory drugs, Fluid therapy
Prognosis: Likely to recover


# **6.UPDATING THE CASE BASE**

In [174]:
import csv
import os
from datetime import datetime

# def update_case_database(case_database, new_case, diagnosis, treatment, outcome):
#     """
#     Update the case database by adding a new case and its outcome if it doesn't already exist.

#     Args:
#         case_database (dict): The existing case database.
#         new_case (dict): A dictionary representing the new case.
#         diagnosis (str): The diagnosed condition for the new case.
#         treatment (list): A list of treatments applied for the new case.
#         outcome (str): The outcome of the new case.

#     Returns:
#         dict: The updated case database with the new case added.
#     """
#     # Check if the case already exists in the database
#     for case_id, case in case_database.items():
#         if case['Symptoms'] == new_case['Symptoms'] and \
#            case['Animal Age (Months)'] == new_case['Animal Age (Months)'] and \
#            case['Animal Sex'] == new_case['Animal Sex'] and \
#            case['Environmental Conditions'] == new_case['Environmental Conditions'] and \
#            case['Diagnosis'] == diagnosis and \
#            case['Treatment'] == treatment and \
#            case['Outcome'] == outcome:
#             # Case already exists, no need to add it again
#             return case_database

#     # Case doesn't exist, generate a unique case ID
#     num_cases = len(case_database)
#     case_id = f"FMD{num_cases + 1:03d}"

#     # Add the new case to the database
#     new_case_entry = {
#         'Case ID': case_id,
#         'Symptoms': new_case['Symptoms'],  # Ensure 'symptoms' matches the key used in the new_case dictionary
#         'Animal Age (Months)': new_case['Animal Age (Months)'],
#         'Animal Sex': new_case['Animal Sex'],
#         'Environmental Conditions': new_case['Environmental Conditions'],
#         'Diagnosis': diagnosis,
#         'Treatment': (treatment),
#         'Outcome': outcome
#     }
#     case_database[case_id] = new_case_entry

#     return case_database
from collections import defaultdict

def update_case_database(case_database, new_case, diagnosis, treatment, outcome, similarity_threshold=0.5):
    """
    Update the case database by adding a new case and its outcome if it's sufficiently dissimilar to existing cases.

    Args:
        case_database (dict): The existing case database.
        new_case (dict): A dictionary representing the new case.
        diagnosis (str): The diagnosed condition for the new case.
        treatment (list): A list of treatments applied for the new case.
        outcome (str): The outcome of the new case.
        similarity_threshold (float): The minimum similarity score required for considering a case similar. Defaults to 0.5.

    Returns:
        dict: The updated case database with the new case added if it meets the similarity threshold.
    """
    # Retrieve similar cases from the case database
    similar_cases = retrieve_similar_cases(new_case, case_database, similarity_threshold=similarity_threshold)

    # If there are no similar cases above the threshold, add the new case
    if not similar_cases:
        # Generate a unique case ID
        num_cases = len(case_database)
        case_id = f"FMD{num_cases + 1:03d}"

        # Add the new case to the database
        new_case_entry = {
        'Case ID': case_id,
        'Symptoms': new_case['Symptoms'],  # Ensure 'symptoms' matches the key used in the new_case dictionary
        'Animal Age (Months)': new_case['Animal Age (Months)'],
        'Animal Sex': new_case['Animal Sex'],
        'Environmental Conditions': new_case['Environmental Conditions'],
        'Diagnosis': diagnosis,
        'Treatment': (treatment),
        'Outcome': outcome
        }
        case_database[case_id] = new_case_entry

    return case_database



def save_case_database(case_database, file_path):
    """
    Save the case database to a CSV file.

    Args:
        case_database (dict): The case database to be saved.
        file_path (str): The path to the CSV file where the database will be saved.
    """
    fieldnames = ['Case ID', 'Symptoms', 'Animal Age (Months)', 'Animal Sex', 'Environmental Conditions', 'Diagnosis', 'Treatment', 'Outcome']

    # Check if the file exists
    file_exists = os.path.isfile(file_path)

    with open(file_path, 'w', newline='') as file:
        writer = csv.DictWriter(file, fieldnames=fieldnames)

        # Write header if the file is empty or doesn't exist
        if not file_exists or os.stat(file_path).st_size == 0:
            writer.writeheader()

        # Append data to the file
        for case_id, case in case_database.items():
            writer.writerow({
                'Case ID': case_id,
                'Symptoms': ', '.join(case['Symptoms']),
                'Animal Age (Months)': case['Animal Age (Months)'],
                'Animal Sex': case['Animal Sex'],
                'Environmental Conditions': case['Environmental Conditions'],
                'Diagnosis': case['Diagnosis'],
                'Treatment': ', '.join(case['Treatment']),
                'Outcome': case['Outcome']
            })



def load_case_database(file_path):
    """
    Load the case database from a CSV file.

    Args:
        file_path (str): The path to the CSV file containing the case database.

    Returns:
        dict: A dictionary representing the case database.
    """
    case_database = {}

    try:
        with open(file_path, 'r') as file:
            reader = csv.DictReader(file)
            for row in reader:
                case_id = row['Case ID']
                symptoms = row['Symptoms'].split(', ')
                age = int(row['Animal Age (Months)'])
                sex = row['Animal Sex']
                environmental_conditions = row['Environmental Conditions']
                diagnosis = row['Diagnosis']
                treatment = row['Treatment'].split(', ')
                outcome = row['Outcome']

                case = {
                    'Symptoms': symptoms,
                    'Animal Age (Months)': age,
                    'Animal Sex': sex,
                    'Environmental Conditions': environmental_conditions,
                    'Diagnosis': diagnosis,
                    'Treatment': treatment,
                    'Outcome': outcome
                }

                case_database[case_id] = case
    except FileNotFoundError:
        print(f"Error: The file '{file_path}' does not exist.")

    return case_database


# Load the existing case database


In [177]:
existing_case_database = load_case_database('FMD cases.csv')

# Define the new case
new_case = {
    'Symptoms': ['Blisters on mouth, tongue, and feet',  'Fever (110°F)'],
    'Animal Age (Months)': 102,
    'Animal Sex': 'Female',
    'Environmental Conditions': 'Shared water trough with infected animals'
}

top_similar_cases = retrieve_similar_cases(new_case, case_database, similarity_threshold=0.5, top_n=5)
diagnosis, treatment = diagnose_and_treat(new_case, top_similar_cases)
prognosis = predict_prognosis(new_case, top_similar_cases)

for case_id, case, similarity_score in top_similar_cases:
    print(f"Case ID: {case_id}, Similarity Score: {similarity_score:.2f}")
    print(f"Symptoms: {case['Symptoms']}")
    print(f"Age: {case['Animal Age (Months)']} months")
    print(f"Sex: {case['Animal Sex']}")
    print(f"Environmental Conditions: {case['Environmental Conditions']}")
    print(f"Diagnosis: {diagnosis}")
    print(f"Recommended Treatment: {', '.join(treatment)}")
    print(f"Prognosis: {prognosis}")

diagnosis = "Foot-and-Mouth Disease"
treatment = ['Antibiotics', 'Anti-inflammatory drugs', 'Fluid therapy']
outcome = "Recovered"

# Update the case database with the new case
updated_case_database = update_case_database(case_database, new_case, diagnosis, treatment, outcome, similarity_threshold=0.5)

# Save the updated case database to the existing file
save_case_database(updated_case_database, 'FMD cases.csv')


Case ID: FMD023, Similarity Score: 0.94
Symptoms: ['Blisters on mouth, tongue, and feet', 'Fever (110°F)']
Age: 42 months
Sex: Male
Environmental Conditions: Shared water trough with infected animals
Diagnosis: Foot-and-Mouth Disease
Recommended Treatment: Antibiotics, Anti-inflammatory drugs, Fluid therapy
Prognosis: Likely to recover
