In [23]:
import pandas as pd
import matplotlib.pyplot as plt
import plotly.express as px
import re
import numpy as np
pd.options.display.max_columns = None
pd.set_option('display.max_rows', None)

In [24]:
# Load the CSV into the DataFrame
pseudomonadaceae_df = pd.read_csv(r"D:\Vivli 2024\Jupiter Notebooks\Vidata Analysis\Filtered Data for the Species and Families\pseudomonadaceae_df.csv")

# Display the DataFrame (optional)
pseudomonadaceae_df.head()



Columns (9,11,13,17,19,21,23,25,29,31,39) have mixed types. Specify dtype option on import or set low_memory=False.



Unnamed: 0,Isolate Number,Data Source,Species,Family,Gender,Age Group,Country,Year,Source of Infection,Amoxicillin-clavulanate,Amoxicillin-clavulanate_I,Ampicillin,Ampicillin_I,Piperacillin tazobactam,Piperacillin tazobactam_I,Ceftriaxone,Ceftriaxone_I,Ceftazidime,Ceftazidime_I,Cefepime,Cefepime_I,Imipenem,Imipenem_I,Meropenem,Meropenem_I,Ciprofloxacin,Ciprofloxacin_I,Levofloxacin,Levofloxacin_I,Amikacin,Amikacin_I,Gentamicin,Gentamicin_I,Vancomycin,Vancomycin_I,Azithromycin,Azithromycin_I,Clarithromycin,Clarithromycin_I,Tigecycline,Tigecycline_I,Linezolid,Linezolid_I,Trimethoprim-sulfamethoxazole,Trimethoprim-sulfamethoxazole_I
0,1773202,ATLAS,Pseudomonas aeruginosa,Pseudomonadaceae,Female,65 to 84 Years,Spain,2018,Sputum,>16,,>16,,1,,,,2,,4,,1,,<=0.06,,0.25,,2,,2,,1,,,,,,,,>8,,,,16,
1,1773203,ATLAS,Pseudomonas aeruginosa,Pseudomonadaceae,Male,65 to 84 Years,Spain,2018,Sputum,>16,,>16,,64,,,,16,,16,,>8,,>16,,0.5,,4,,4,,2,,,,,,,,>8,,,,>32,
2,1773204,ATLAS,Pseudomonas aeruginosa,Pseudomonadaceae,Female,85 and Over,Spain,2018,Wound,>16,,>16,,>64,,,,32,,16,,8,,2,,>4,,>8,,16,,8,,,,,,,,>8,,,,>32,
3,1773205,ATLAS,Pseudomonas aeruginosa,Pseudomonadaceae,Female,85 and Over,Spain,2018,Abscess,>16,,>16,,>64,,,,32,,16,,2,,2,,>4,,>8,,16,,8,,,,,,,,>8,,,,>32,
4,1773206,ATLAS,Pseudomonas aeruginosa,Pseudomonadaceae,Male,85 and Over,Spain,2018,Ulcer,>16,,>16,,64,,,,16,,2,,2,,0.25,,<=0.12,,0.5,,8,,2,,,,,,,,8,,,,16,


In [25]:
# Function to extract numeric value from a string, handling cases like '<', '>', etc.
def extract_numeric_value(value):
    if isinstance(value, str):
        # Use regex to remove any non-numeric characters except for '.' and '-'
        cleaned_value = re.sub(r'[^\d.-]', '', value)
        try:
            return float(cleaned_value)
        except ValueError:
            return np.nan  # Return NaN if conversion fails
    return value  # Return the value as is if it's already numeric

In [26]:
# Function to interpret antibiotic values based on the given conditions for Pseudomonadaceae
def interpret_antibiotic_pseudomonadaceae(row, antibiotic, year):
    value = extract_numeric_value(row[antibiotic])  # Clean the value

    # Handle missing or non-numeric values
    if pd.isna(value):
        return 'Use Not recommended'

    # Interpret for Penicillins (Piperacillin tazobactam - TZP)
    if antibiotic == 'Piperacillin tazobactam':
        if year in [2020, 2021, 2022]:
            if value <= 0.001:
                return 'Susceptible'
            elif 0.002 <= value <= 16:
                return 'Intermediate'
            else:
                return 'Resistant'
        elif year in [2018, 2019]:
            return 'Susceptible' if value <= 16 else 'Resistant'

    # Interpret for Cephalosporins (Ceftazidime - CAZ, Cefepime - PIP)
    elif antibiotic in ['Ceftazidime', 'Cefepime']:
        if year in [2020, 2021, 2022]:
            if value <= 0.001:
                return 'Susceptible'
            elif 0.002 <= value <= 8:
                return 'Intermediate'
            else:
                return 'Resistant'
        elif year in [2018, 2019]:
            return 'Susceptible' if value <= 8 else 'Resistant'

    # Interpret for Carbapenems (Imipenem - IPM, Meropenem - MEM)
    elif antibiotic == 'Imipenem':
        if year == 2018:
            if value <= 4:
                return 'Susceptible'
            elif 4.1 <= value <= 8:
                return 'Intermediate'
            else:
                return 'Resistant'
        elif year == 2019:
            return 'Susceptible' if value <= 4 else 'Resistant'
        elif year in [2020, 2021, 2022]:
            if value <= 0.001:
                return 'Susceptible'
            elif 0.002 <= value <= 4:
                return 'Intermediate'
            else:
                return 'Resistant'
    elif antibiotic == 'Meropenem':
        if year in [2018, 2019, 2020, 2021, 2022]:
            if value <= 2:
                return 'Susceptible'
            elif 2.1 <= value <= 8:
                return 'Intermediate'
            else:
                return 'Resistant'

    # Interpret for Fluoroquinolones (Ciprofloxacin - CIP, Levofloxacin - LEV)
    elif antibiotic == 'Ciprofloxacin':
        if year in [2020, 2021, 2022]:
            if value <= 0.001:
                return 'Susceptible'
            elif 0.002 <= value <= 0.5:
                return 'Intermediate'
            else:
                return 'Resistant'
        elif year in [2018, 2019]:
            return 'Susceptible' if value <= 0.5 else 'Resistant'
    elif antibiotic == 'Levofloxacin':
        if year == 2022:
            if value <= 0.001:
                return 'Susceptible'
            elif 0.002 <= value <= 2:
                return 'Intermediate'
            else:
                return 'Resistant'
        elif year in [2020, 2021]:
            if value <= 0.001:
                return 'Susceptible'
            elif 0.002 <= value <= 1:
                return 'Intermediate'
            else:
                return 'Resistant'
        elif year in [2018, 2019]:
            return 'Susceptible' if value <= 1 else 'Resistant'

    # Interpret for Aminoglycosides (Amikacin - AMK, Gentamicin - GEN)
    elif antibiotic == 'Amikacin':
        if year in [2020, 2021, 2022]:
            return 'Susceptible' if value <= 16 else 'Resistant'
        elif year in [2018, 2019]:
            if value <= 8:
                return 'Susceptible'
            elif 8.1 <= value <= 16:
                return 'Intermediate'
            else:
                return 'Resistant'
    elif antibiotic == 'Gentamicin':
        if year in [2020, 2021, 2022]:
            return 'Insufficient evidence that the organism or group is a good target for therapy with the agent'
        elif year in [2018, 2019]:
            return 'Susceptible' if value <= 4 else 'Resistant'

    return 'Use Not recommended'

# Apply the function to the relevant columns
def apply_interpretation_pseudomonadaceae(row):
    antibiotics = [
        'Piperacillin tazobactam', 'Ceftazidime', 'Cefepime', 'Imipenem', 'Meropenem',
        'Ciprofloxacin', 'Levofloxacin', 'Amikacin', 'Gentamicin'
    ]
    year = row['Year']
    
    for antibiotic in antibiotics:
        interpretation = interpret_antibiotic_pseudomonadaceae(row, antibiotic, year)
        row[antibiotic + '_I'] = interpretation  # Fill the interpretation into the respective "_I" column
    return row

# Apply the interpretation across the dataframe for Pseudomonadaceae family
pseudomonadaceae_df = pseudomonadaceae_df.apply(apply_interpretation_pseudomonadaceae, axis=1)

# Display the result
pseudomonadaceae_df.head()


Unnamed: 0,Isolate Number,Data Source,Species,Family,Gender,Age Group,Country,Year,Source of Infection,Amoxicillin-clavulanate,Amoxicillin-clavulanate_I,Ampicillin,Ampicillin_I,Piperacillin tazobactam,Piperacillin tazobactam_I,Ceftriaxone,Ceftriaxone_I,Ceftazidime,Ceftazidime_I,Cefepime,Cefepime_I,Imipenem,Imipenem_I,Meropenem,Meropenem_I,Ciprofloxacin,Ciprofloxacin_I,Levofloxacin,Levofloxacin_I,Amikacin,Amikacin_I,Gentamicin,Gentamicin_I,Vancomycin,Vancomycin_I,Azithromycin,Azithromycin_I,Clarithromycin,Clarithromycin_I,Tigecycline,Tigecycline_I,Linezolid,Linezolid_I,Trimethoprim-sulfamethoxazole,Trimethoprim-sulfamethoxazole_I
0,1773202,ATLAS,Pseudomonas aeruginosa,Pseudomonadaceae,Female,65 to 84 Years,Spain,2018,Sputum,>16,,>16,,1,Susceptible,,,2,Susceptible,4,Susceptible,1,Susceptible,<=0.06,Susceptible,0.25,Susceptible,2,Resistant,2,Susceptible,1,Susceptible,,,,,,,>8,,,,16,
1,1773203,ATLAS,Pseudomonas aeruginosa,Pseudomonadaceae,Male,65 to 84 Years,Spain,2018,Sputum,>16,,>16,,64,Resistant,,,16,Resistant,16,Resistant,>8,Intermediate,>16,Resistant,0.5,Susceptible,4,Resistant,4,Susceptible,2,Susceptible,,,,,,,>8,,,,>32,
2,1773204,ATLAS,Pseudomonas aeruginosa,Pseudomonadaceae,Female,85 and Over,Spain,2018,Wound,>16,,>16,,>64,Resistant,,,32,Resistant,16,Resistant,8,Intermediate,2,Susceptible,>4,Resistant,>8,Resistant,16,Intermediate,8,Resistant,,,,,,,>8,,,,>32,
3,1773205,ATLAS,Pseudomonas aeruginosa,Pseudomonadaceae,Female,85 and Over,Spain,2018,Abscess,>16,,>16,,>64,Resistant,,,32,Resistant,16,Resistant,2,Susceptible,2,Susceptible,>4,Resistant,>8,Resistant,16,Intermediate,8,Resistant,,,,,,,>8,,,,>32,
4,1773206,ATLAS,Pseudomonas aeruginosa,Pseudomonadaceae,Male,85 and Over,Spain,2018,Ulcer,>16,,>16,,64,Resistant,,,16,Resistant,2,Susceptible,2,Susceptible,0.25,Susceptible,<=0.12,Susceptible,0.5,Susceptible,8,Susceptible,2,Susceptible,,,,,,,8,,,,16,


In [27]:
import pandas as pd

# List of antibiotics of interest with interpretations (_I)
antibiotics_list = [
    'Amoxicillin-clavulanate_I', 'Ampicillin_I', 'Piperacillin tazobactam_I', 
    'Ceftriaxone_I', 'Ceftazidime_I', 'Cefepime_I', 'Imipenem_I', 'Meropenem_I', 
    'Ciprofloxacin_I', 'Levofloxacin_I', 'Amikacin_I', 'Gentamicin_I', 
    'Vancomycin_I', 'Azithromycin_I', 'Clarithromycin_I', 'Tigecycline_I', 
    'Linezolid_I', 'Trimethoprim-sulfamethoxazole_I'
]

# List of years of interest
years_list = [2018, 2019, 2020, 2021, 2022]

# Dictionary to store susceptibility results for Pseudomonas aeruginosa
pseudomonadaceae_results = {}

# Filter the data for Pseudomonas aeruginosa
df_species = pseudomonadaceae_df[pseudomonadaceae_df['Species'] == 'Pseudomonas aeruginosa']

# Loop through each antibiotic of interest
for antibiotic in antibiotics_list:
    susceptibility_rates_list = []

    # Loop through each year
    for year in years_list:
        year_data = df_species[df_species['Year'] == year]
        total_count = len(year_data)

        if total_count > 0:
            # Calculate susceptibility counts
            susceptible_count = (year_data[antibiotic] == 'Susceptible').sum()
            intermediate_count = (year_data[antibiotic] == 'Intermediate').sum()
            resistant_count = (year_data[antibiotic] == 'Resistant').sum()

            # Additional handling for Gentamicin with special comment
            if antibiotic == 'Gentamicin_I':
                insufficient_evidence_count = (year_data[antibiotic] == 'Insufficient evidence').sum()
            
            # Calculate rates
            total_reported = susceptible_count + intermediate_count + resistant_count
            if total_reported > 0:
                susceptible_rate = (susceptible_count / total_reported) * 100
                intermediate_rate = (intermediate_count / total_reported) * 100
                resistant_rate = (resistant_count / total_reported) * 100

                # Round rates to 2 decimal places
                susceptible_rate = round(susceptible_rate, 2)
                intermediate_rate = round(intermediate_rate, 2)
                resistant_rate = round(resistant_rate, 2)

                # Append the data to the list
                rates_dict = {
                    'Year': year,
                    'Susceptible': susceptible_rate,
                    'Intermediate': intermediate_rate,
                    'Resistant': resistant_rate
                }

                # Add insufficient evidence for Gentamicin
                if antibiotic == 'Gentamicin_I' and insufficient_evidence_count > 0:
                    rates_dict['Insufficient Evidence'] = 'Insufficient evidence that the organism or group is a good target for therapy with the agent'
                
                susceptibility_rates_list.append(rates_dict)

    # Store the results for this antibiotic
    if susceptibility_rates_list:
        pseudomonadaceae_results[antibiotic] = pd.DataFrame(susceptibility_rates_list)

# Display the results for each antibiotic in a table format
for antibiotic, df in pseudomonadaceae_results.items():
    print(f"\nResults for {antibiotic}:")
    print(df.to_string(index=False))  # Display table without the index column



Results for Piperacillin tazobactam_I:
 Year  Susceptible  Intermediate  Resistant
 2018        73.83          0.00      26.17
 2019        71.36          0.00      28.64
 2020         0.00         74.30      25.70
 2021         0.00         72.28      27.72
 2022         0.00         75.52      24.48

Results for Ceftazidime_I:
 Year  Susceptible  Intermediate  Resistant
 2018        76.53          0.00      23.47
 2019        76.53          0.00      23.47
 2020         0.00         77.72      22.28
 2021         0.00         77.35      22.65
 2022         0.00         79.17      20.83

Results for Cefepime_I:
 Year  Susceptible  Intermediate  Resistant
 2018        78.84          0.00      21.16
 2019        78.16          0.00      21.84
 2020         0.00         79.69      20.31
 2021         0.00         78.18      21.82
 2022         0.00         81.03      18.97

Results for Imipenem_I:
 Year  Susceptible  Intermediate  Resistant
 2018        72.86         23.86       3.28
 2

In [28]:
# Dictionary grouping antibiotics by their pharmacological classes
antibiotic_groups = {
    'Penicillins': ['Amoxicillin-clavulanate_I', 'Ampicillin_I', 'Piperacillin tazobactam_I'],
    'Cephalosporins': ['Ceftriaxone_I', 'Ceftazidime_I', 'Cefepime_I'],
    'Carbapenems': ['Imipenem_I', 'Meropenem_I'],
    'Fluoroquinolones': ['Ciprofloxacin_I', 'Levofloxacin_I'],
    'Aminoglycosides': ['Amikacin_I', 'Gentamicin_I'],
    'Glycopeptides': ['Vancomycin_I'],
    'Macrolides': ['Azithromycin_I', 'Clarithromycin_I'],
    'Tetracyclines': ['Tigecycline_I'],
    'Oxazolidinones': ['Linezolid_I'],
    'Antifolates': ['Trimethoprim-sulfamethoxazole_I']
}

# Loop through antibiotic groups to visualize the trends for Pseudomonas aeruginosa
print("Visualizing results for Species: Pseudomonas aeruginosa")

# Loop through each antibiotic group
for group, antibiotics in antibiotic_groups.items():
    print(f"Visualizing results for Antibiotic Group: {group}")

    # Prepare a combined dataframe for the current group
    combined_df_list = []

    for antibiotic in antibiotics:
        if antibiotic in pseudomonadaceae_results:
            # Add the 'Antibiotic' column to identify each antibiotic in the group
            df = pseudomonadaceae_results[antibiotic].copy()
            df['Antibiotic'] = antibiotic.replace('_I', '')  # Removing the "_I" suffix for readability
            combined_df_list.append(df)
        else:
            print(f"No data available for {antibiotic}")

    # If there is data for this group, combine and plot
    if combined_df_list:
        combined_df = pd.concat(combined_df_list)

        # Plotting trends for each antibiotic in the group
        fig = px.line(
            combined_df,
            x='Year',
            y=['Susceptible', 'Intermediate', 'Resistant'],
            color='Antibiotic',  # Different colors for each antibiotic
            line_dash='variable',  # Different dash patterns for susceptible, intermediate, and resistant
            title=f"Susceptibility Trends for Pseudomonas aeruginosa ({group} Group)",
            labels={'value': 'Percentage (%)', 'variable': 'Resistance Category'},
            markers=True
        )

        # Customize the layout
        fig.update_layout(
            xaxis_title="Year",
            yaxis_title="Percentage (%)",
            legend_title="Antibiotic / Resistance Category",
            template="plotly",
            xaxis=dict(tickmode='linear', tick0=2018, dtick=1),  # Ensure years are displayed as integers
            width=1000,  # Adjust the width
            height=600   # Adjust the height
        )

        # Show the plot
        fig.show()
    else:
        print(f"No data to visualize for group {group}")


Visualizing results for Species: Pseudomonas aeruginosa
Visualizing results for Antibiotic Group: Penicillins
No data available for Amoxicillin-clavulanate_I
No data available for Ampicillin_I


Visualizing results for Antibiotic Group: Cephalosporins
No data available for Ceftriaxone_I


Visualizing results for Antibiotic Group: Carbapenems


Visualizing results for Antibiotic Group: Fluoroquinolones


Visualizing results for Antibiotic Group: Aminoglycosides


Visualizing results for Antibiotic Group: Glycopeptides
No data available for Vancomycin_I
No data to visualize for group Glycopeptides
Visualizing results for Antibiotic Group: Macrolides
No data available for Azithromycin_I
No data available for Clarithromycin_I
No data to visualize for group Macrolides
Visualizing results for Antibiotic Group: Tetracyclines
No data available for Tigecycline_I
No data to visualize for group Tetracyclines
Visualizing results for Antibiotic Group: Oxazolidinones
No data available for Linezolid_I
No data to visualize for group Oxazolidinones
Visualizing results for Antibiotic Group: Antifolates
No data available for Trimethoprim-sulfamethoxazole_I
No data to visualize for group Antifolates


In [30]:

def check_increasing_trend(resistant_rates):
    """
    This function checks if the resistance trend is increasing year over year.
    Returns True if resistance is increasing, False otherwise.
    """
    # Compare each element with the next one to ensure it's increasing
    for i in range(len(resistant_rates) - 1):
        if resistant_rates[i] > resistant_rates[i + 1]:
            return False  # If any rate decreases, return False
    return True  # Return True if all rates are increasing

def calculate_absolute_change(start_value, end_value):
    """
    This function calculates the absolute change between two values.
    """
    return end_value - start_value

# List to store results of increasing resistance trends for Pseudomonas aeruginosa
increasing_resistance_results = []

# Loop through each antibiotic in the results for Pseudomonas aeruginosa
for antibiotic, df in pseudomonadaceae_results.items():
    # Extract the resistant rates over the years
    resistant_rates = df['Resistant'].values
    years = df['Year'].values
    
    # Check if the resistance trend is increasing
    if check_increasing_trend(resistant_rates):
        # Calculate absolute changes between consecutive years
        absolute_changes = []
        for i in range(1, len(resistant_rates)):
            start_value = resistant_rates[i - 1]
            end_value = resistant_rates[i]
            absolute_change = calculate_absolute_change(start_value, end_value)
            absolute_changes.append(absolute_change)
        
        # Calculate the mean of the absolute changes
        if absolute_changes:
            mean_absolute_change = sum(absolute_changes) / len(absolute_changes)
        else:
            mean_absolute_change = 0.0
        
        # Store the information in the results
        increasing_resistance_results.append({
            'Antibiotic': antibiotic.replace('_I', ''),  # Remove "_I" for readability
            'Years': ', '.join(map(str, years)),  # Concatenate years for display
            'Resistant Rates': ', '.join(map(str, resistant_rates)),  # Concatenate rates for display
            'Overall Resistance Rate Increase': round(mean_absolute_change, 2)  # Round to 2 decimal places
        })

# Convert the results into a DataFrame for easy display
increasing_resistance_df = pd.DataFrame(increasing_resistance_results)

# Display the results
if not increasing_resistance_df.empty:
    print("Antibiotics showing increasing resistance trends for Pseudomonas aeruginosa with Overall Resistance Rate Increase:")
    print(increasing_resistance_df.to_string(index=False))  # Display DataFrame without the index
else:
    print("No increasing resistance trends identified.")


Antibiotics showing increasing resistance trends for Pseudomonas aeruginosa with Overall Resistance Rate Increase:
Antibiotic      Years Resistant Rates  Overall Resistance Rate Increase
Gentamicin 2018, 2019     16.3, 17.13                              0.83
