In [14]:
import pandas as pd
import matplotlib.pyplot as plt
import plotly.express as px
import re
import numpy as np
pd.options.display.max_columns = None
pd.set_option('display.max_rows', None)

In [15]:
# Load the CSV into the DataFrame
streptococcus_pneumoniae_df = pd.read_csv(r"D:\Vivli 2024\Jupiter Notebooks\Vidata Analysis\Filtered Data for the Species and Families\streptococcus_pneumoniae_df.csv")

# Display the DataFrame (optional)
streptococcus_pneumoniae_df.head()


Unnamed: 0,Isolate Number,Data Source,Species,Family,Gender,Age Group,Country,Year,Source of Infection,Amoxicillin-clavulanate,Amoxicillin-clavulanate_I,Ampicillin,Ampicillin_I,Piperacillin tazobactam,Piperacillin tazobactam_I,Ceftriaxone,Ceftriaxone_I,Ceftazidime,Ceftazidime_I,Cefepime,Cefepime_I,Imipenem,Imipenem_I,Meropenem,Meropenem_I,Ciprofloxacin,Ciprofloxacin_I,Levofloxacin,Levofloxacin_I,Amikacin,Amikacin_I,Gentamicin,Gentamicin_I,Vancomycin,Vancomycin_I,Azithromycin,Azithromycin_I,Clarithromycin,Clarithromycin_I,Tigecycline,Tigecycline_I,Linezolid,Linezolid_I,Trimethoprim-sulfamethoxazole,Trimethoprim-sulfamethoxazole_I
0,1773302,ATLAS,Streptococcus pneumoniae,Streptococcaceae,Female,19 to 64 Years,Spain,2018,Sputum,,,,,<=0.25,,<=0.015,,,,,,,,<=0.03,,,,1,,,,,,0.25,,,,,,0.015,,1,,,
1,1773303,ATLAS,Streptococcus pneumoniae,Streptococcaceae,Male,19 to 64 Years,Spain,2018,Sputum,,,,,<=0.25,,<=0.015,,,,,,,,<=0.03,,,,1,,,,,,0.25,,,,,,0.015,,1,,,
2,1773304,ATLAS,Streptococcus pneumoniae,Streptococcaceae,Female,65 to 84 Years,Spain,2018,Sputum,,,,,<=0.25,,<=0.015,,,,,,,,<=0.03,,,,1,,,,,,0.25,,,,,,0.03,,1,,,
3,1773305,ATLAS,Streptococcus pneumoniae,Streptococcaceae,Female,0 to 2 Years,Spain,2018,Sputum,,,,,<=0.25,,0.03,,,,,,,,<=0.03,,,,1,,,,,,0.25,,,,,,0.015,,1,,,
4,1773306,ATLAS,Streptococcus pneumoniae,Streptococcaceae,Male,19 to 64 Years,Spain,2018,Sputum,,,,,0.5,,0.12,,,,,,,,0.12,,,,1,,,,,,0.5,,,,,,0.015,,1,,,


In [16]:
# Function to extract numeric value from a string, handling cases like '<', '>', etc.
def extract_numeric_value(value):
    if isinstance(value, str):
        # Use regex to remove any non-numeric characters except for '.' and '-'
        cleaned_value = re.sub(r'[^\d.-]', '', value)
        try:
            return float(cleaned_value)
        except ValueError:
            return np.nan  # Return NaN if conversion fails
    return value  # Return the value as is if it's already numeric

In [17]:
# Function to interpret antibiotic values based on the given conditions for Streptococcus pneumoniae
def interpret_antibiotic_streptococcus_pneumoniae(row, antibiotic, year):
    value = extract_numeric_value(row[antibiotic])  # Clean the value

    # Handle missing or non-numeric values
    if pd.isna(value):
        return 'Use Not recommended'

    # Interpret for Penicillins (Amoxicillin-clavulanate - AMC, Ampicillin - AMP, Piperacillin tazobactam - TZP)
    if antibiotic in ['Amoxicillin-clavulanate', 'Ampicillin', 'Piperacillin tazobactam']:
        if year == 2022:
            if value <= 0.5:
                return 'Susceptible'
            elif 0.6 <= value <= 1:
                return 'Intermediate'
            else:
                return 'Resistant'
        elif year in [2018, 2019, 2020, 2021]:
            if value <= 0.5:
                return 'Susceptible'
            elif 0.6 <= value <= 2:
                return 'Intermediate'
            else:
                return 'Resistant'

    # Interpret for Cephalosporins (Ceftriaxone - AXO, Cefepime - PIP)
    elif antibiotic == 'Ceftriaxone':
        if year in [2018, 2019, 2020, 2021, 2022]:
            if value <= 0.5:
                return 'Susceptible'
            elif 0.6 <= value <= 2:
                return 'Intermediate'
            else:
                return 'Resistant'
    elif antibiotic == 'Cefepime':
        if year in [2018, 2019, 2020, 2021, 2022]:
            if value <= 1:
                return 'Susceptible'
            elif 1.1 <= value <= 2:
                return 'Intermediate'
            else:
                return 'Resistant'

    # Interpret for Carbapenems (Imipenem - IPM, Meropenem - MEM)
    elif antibiotic in ['Imipenem', 'Meropenem']:
        if year in [2018, 2019, 2020, 2021, 2022]:
            return 'Susceptible' if value <= 2 else 'Resistant'

    # Interpret for Fluoroquinolones (Levofloxacin - LEV)
    elif antibiotic == 'Levofloxacin':
        if year in [2020, 2021, 2022]:
            if value <= 0.001:
                return 'Susceptible'
            elif 0.002 <= value <= 2:
                return 'Intermediate'
            else:
                return 'Resistant'
        elif year in [2018, 2019]:
            return 'Susceptible' if value <= 2 else 'Resistant'

    # Interpret for Glycopeptides (Vancomycin - VAN)
    elif antibiotic == 'Vancomycin':
        if year in [2018, 2019, 2020, 2021, 2022]:
            return 'Susceptible' if value <= 2 else 'Resistant'

    # Interpret for Macrolides (Azithromycin - AZM, Clarithromycin - CAM)
    elif antibiotic in ['Azithromycin', 'Clarithromycin']:
        if year in [2018, 2019, 2020, 2021, 2022]:
            if value <= 0.25:
                return 'Susceptible'
            elif 0.26 <= value <= 5:
                return 'Intermediate'
            else:
                return 'Resistant'

    # Interpret for Tetracyclines (Tigecycline - TIG)
    elif antibiotic == 'Tigecycline':
        if year in [2018, 2019, 2020, 2021, 2022]:
            return 'Insufficient evidence that the organism or group is a good target for therapy with the agent'

    # Interpret for Oxazolidines (Linezolid - LZD)
    elif antibiotic == 'Linezolid':
        if year in [2020, 2021, 2022]:
            return 'Susceptible' if value <= 2 else 'Resistant'
        elif year in [2018, 2019]:
            if value <= 2:
                return 'Susceptible'
            elif 2.1 <= value <= 4:
                return 'Intermediate'
            else:
                return 'Resistant'

    # Interpret for Antifolates (Trimethoprim-sulfamethoxazole - SXT)
    elif antibiotic == 'Trimethoprim-sulfamethoxazole':
        if year in [2018, 2019, 2020, 2021, 2022]:
            if value <= 1:
                return 'Susceptible'
            elif 1.1 <= value <= 2:
                return 'Intermediate'
            else:
                return 'Resistant'

    return 'Use Not recommended'

# Apply the function to the relevant columns for Streptococcus pneumoniae species
def apply_interpretation_streptococcus_pneumoniae(row):
    antibiotics = [
        'Amoxicillin-clavulanate', 'Ampicillin', 'Piperacillin tazobactam', 'Ceftriaxone', 'Cefepime', 
        'Imipenem', 'Meropenem', 'Levofloxacin', 'Vancomycin', 'Azithromycin', 'Clarithromycin', 
        'Tigecycline', 'Linezolid', 'Trimethoprim-sulfamethoxazole'
    ]
    year = row['Year']
    
    for antibiotic in antibiotics:
        interpretation = interpret_antibiotic_streptococcus_pneumoniae(row, antibiotic, year)
        row[antibiotic + '_I'] = interpretation  # Fill the interpretation into the respective "_I" column
    return row

# Apply the interpretation across the filtered dataframe for Streptococcus pneumoniae
streptococcus_pneumoniae_df = streptococcus_pneumoniae_df.apply(apply_interpretation_streptococcus_pneumoniae, axis=1)

# Display the result
streptococcus_pneumoniae_df.head()


Unnamed: 0,Isolate Number,Data Source,Species,Family,Gender,Age Group,Country,Year,Source of Infection,Amoxicillin-clavulanate,Amoxicillin-clavulanate_I,Ampicillin,Ampicillin_I,Piperacillin tazobactam,Piperacillin tazobactam_I,Ceftriaxone,Ceftriaxone_I,Ceftazidime,Ceftazidime_I,Cefepime,Cefepime_I,Imipenem,Imipenem_I,Meropenem,Meropenem_I,Ciprofloxacin,Ciprofloxacin_I,Levofloxacin,Levofloxacin_I,Amikacin,Amikacin_I,Gentamicin,Gentamicin_I,Vancomycin,Vancomycin_I,Azithromycin,Azithromycin_I,Clarithromycin,Clarithromycin_I,Tigecycline,Tigecycline_I,Linezolid,Linezolid_I,Trimethoprim-sulfamethoxazole,Trimethoprim-sulfamethoxazole_I
0,1773302,ATLAS,Streptococcus pneumoniae,Streptococcaceae,Female,19 to 64 Years,Spain,2018,Sputum,,Use Not recommended,,Use Not recommended,<=0.25,Susceptible,<=0.015,Susceptible,,,,Use Not recommended,,Use Not recommended,<=0.03,Susceptible,,,1,Susceptible,,,,,0.25,Susceptible,,Use Not recommended,,Use Not recommended,0.015,Insufficient evidence that the organism or gro...,1,Susceptible,,Use Not recommended
1,1773303,ATLAS,Streptococcus pneumoniae,Streptococcaceae,Male,19 to 64 Years,Spain,2018,Sputum,,Use Not recommended,,Use Not recommended,<=0.25,Susceptible,<=0.015,Susceptible,,,,Use Not recommended,,Use Not recommended,<=0.03,Susceptible,,,1,Susceptible,,,,,0.25,Susceptible,,Use Not recommended,,Use Not recommended,0.015,Insufficient evidence that the organism or gro...,1,Susceptible,,Use Not recommended
2,1773304,ATLAS,Streptococcus pneumoniae,Streptococcaceae,Female,65 to 84 Years,Spain,2018,Sputum,,Use Not recommended,,Use Not recommended,<=0.25,Susceptible,<=0.015,Susceptible,,,,Use Not recommended,,Use Not recommended,<=0.03,Susceptible,,,1,Susceptible,,,,,0.25,Susceptible,,Use Not recommended,,Use Not recommended,0.03,Insufficient evidence that the organism or gro...,1,Susceptible,,Use Not recommended
3,1773305,ATLAS,Streptococcus pneumoniae,Streptococcaceae,Female,0 to 2 Years,Spain,2018,Sputum,,Use Not recommended,,Use Not recommended,<=0.25,Susceptible,0.03,Susceptible,,,,Use Not recommended,,Use Not recommended,<=0.03,Susceptible,,,1,Susceptible,,,,,0.25,Susceptible,,Use Not recommended,,Use Not recommended,0.015,Insufficient evidence that the organism or gro...,1,Susceptible,,Use Not recommended
4,1773306,ATLAS,Streptococcus pneumoniae,Streptococcaceae,Male,19 to 64 Years,Spain,2018,Sputum,,Use Not recommended,,Use Not recommended,0.5,Susceptible,0.12,Susceptible,,,,Use Not recommended,,Use Not recommended,0.12,Susceptible,,,1,Susceptible,,,,,0.5,Susceptible,,Use Not recommended,,Use Not recommended,0.015,Insufficient evidence that the organism or gro...,1,Susceptible,,Use Not recommended


In [18]:
# Dictionary for Streptococcaceae species
streptococcaceae_species_list = [
    'Streptococcus pneumoniae'
]

# List of antibiotics of interest with interpretations (_I)
antibiotics_list = [
    'Amoxicillin-clavulanate_I', 'Ampicillin_I', 'Piperacillin tazobactam_I', 
    'Ceftriaxone_I', 'Ceftazidime_I', 'Cefepime_I', 'Imipenem_I', 'Meropenem_I', 
    'Ciprofloxacin_I', 'Levofloxacin_I', 'Amikacin_I', 'Gentamicin_I', 
    'Vancomycin_I', 'Azithromycin_I', 'Clarithromycin_I', 'Tigecycline_I', 
    'Linezolid_I', 'Trimethoprim-sulfamethoxazole_I'
]

# List of years of interest
years_list = [2018, 2019, 2020, 2021, 2022]

# Dictionary to store susceptibility results for Streptococcaceae
streptococcaceae_results = {}

# Loop through each species of interest in Streptococcaceae
for species in streptococcaceae_species_list:
    # Filter the data for the current species
    df_species = streptococcus_pneumoniae_df[streptococcus_pneumoniae_df['Species'] == species]

    # Dictionary to store antibiotic results
    antibiotic_results = {}

    # Loop through each antibiotic of interest
    for antibiotic in antibiotics_list:
        susceptibility_rates_list = []
        
        # Loop through each year
        for year in years_list:
            year_data = df_species[df_species['Year'] == year]
            total_count = len(year_data)
            
            if total_count > 0:
                # Calculate susceptibility counts
                susceptible_count = (year_data[antibiotic] == 'Susceptible').sum()
                intermediate_count = (year_data[antibiotic] == 'Intermediate').sum()
                resistant_count = (year_data[antibiotic] == 'Resistant').sum()

                # Calculate rates
                total_reported = susceptible_count + intermediate_count + resistant_count
                if total_reported > 0:
                    susceptible_rate = (susceptible_count / total_reported) * 100
                    intermediate_rate = (intermediate_count / total_reported) * 100
                    resistant_rate = (resistant_count / total_reported) * 100

                    # Round rates to 2 decimal places
                    susceptible_rate = round(susceptible_rate, 2)
                    intermediate_rate = round(intermediate_rate, 2)
                    resistant_rate = round(resistant_rate, 2)

                    # Append the data to the list
                    susceptibility_rates_list.append({
                        'Year': year,
                        'Susceptible': susceptible_rate,
                        'Intermediate': intermediate_rate,
                        'Resistant': resistant_rate
                    })
        
        # Store the results for this antibiotic
        if susceptibility_rates_list:
            antibiotic_results[antibiotic] = pd.DataFrame(susceptibility_rates_list)

    # Store the results for this species
    if antibiotic_results:
        streptococcaceae_results[species] = antibiotic_results

# Final results: Display susceptibility rates for Streptococcus pneumoniae grouped by years
for species, antibiotics_data in streptococcaceae_results.items():
    print(f"Results for Species: {species}")
    for antibiotic, df in antibiotics_data.items():
        print(f"\nSusceptibility rates for {antibiotic}:")
        print(df)
        print("\n" + "="*50 + "\n")


Results for Species: Streptococcus pneumoniae

Susceptibility rates for Amoxicillin-clavulanate_I:
   Year  Susceptible  Intermediate  Resistant
0  2018        82.66         12.06       5.28
1  2019        79.86         16.59       3.55
2  2020        81.50         13.79       4.70
3  2021        77.02         18.01       4.97
4  2022        81.05          6.21      12.75



Susceptibility rates for Piperacillin tazobactam_I:
   Year  Susceptible  Intermediate  Resistant
0  2018        78.53          9.18      12.29
1  2019        77.72          9.43      12.84
2  2020        72.28          8.16      19.56
3  2021        68.31         10.63      21.06
4  2022        71.60          3.81      24.59



Susceptibility rates for Ceftriaxone_I:
   Year  Susceptible  Intermediate  Resistant
0  2018        86.29         12.78       0.94
1  2019        84.82         13.79       1.39
2  2020        85.89         13.14       0.97
3  2021        79.10         17.64       3.26
4  2022        83.27 

In [19]:
# Dictionary grouping antibiotics by their pharmacological classes
antibiotic_groups = {
    'Penicillins': ['Amoxicillin-clavulanate_I', 'Ampicillin_I', 'Piperacillin tazobactam_I'],
    'Cephalosporins': ['Ceftriaxone_I', 'Ceftazidime_I', 'Cefepime_I'],
    'Carbapenems': ['Imipenem_I', 'Meropenem_I'],
    'Fluoroquinolones': ['Ciprofloxacin_I', 'Levofloxacin_I'],
    'Aminoglycosides': ['Amikacin_I', 'Gentamicin_I'],
    'Glycopeptides': ['Vancomycin_I'],
    'Macrolides': ['Azithromycin_I', 'Clarithromycin_I'],
    'Tetracyclines': ['Tigecycline_I'],
    'Oxazolidinones': ['Linezolid_I'],
    'Antifolates': ['Trimethoprim-sulfamethoxazole_I']
}

# Loop through each species - specifically for 'Streptococcus pneumoniae'
for species, antibiotics_data in streptococcaceae_results.items():
    if species == 'Streptococcus pneumoniae':
        print(f"Visualizing results for Species: {species}")

        # Loop through each antibiotic group
        for group, antibiotics in antibiotic_groups.items():
            print(f"Visualizing results for Antibiotic Group: {group}")

            # Prepare a combined dataframe for the current group
            combined_df_list = []

            for antibiotic in antibiotics:
                if antibiotic in antibiotics_data:
                    # Add the 'Antibiotic' column to identify each antibiotic in the group
                    df = antibiotics_data[antibiotic].copy()
                    df['Antibiotic'] = antibiotic.replace('_I', '')  # Removing the "_I" suffix for readability
                    combined_df_list.append(df)

            # If there is data for this group, combine and plot
            if combined_df_list:
                combined_df = pd.concat(combined_df_list)

                # Plotting trends for each antibiotic in the group
                fig = px.line(
                    combined_df,
                    x='Year',
                    y=['Susceptible', 'Intermediate', 'Resistant'],
                    color='Antibiotic',  # Different colors for each antibiotic
                    line_dash='variable',  # Different dash patterns for susceptible, intermediate, and resistant
                    title=f"Susceptibility Trends for {species} ({group} Group)",
                    labels={'value': 'Percentage (%)', 'variable': 'Resistance Category'},
                    markers=True
                )

                # Customize the layout
                fig.update_layout(
                    xaxis_title="Year",
                    yaxis_title="Percentage (%)",
                    legend_title="Antibiotic / Resistance Category",
                    template="plotly",
                    width=1000,  # Adjust the width
                    height=600,  # Adjust the height
                    xaxis=dict(tickmode='linear', tick0=2018, dtick=1)  # Ensure integer ticks for years
                )

                # Show the plot
                fig.show()


Visualizing results for Species: Streptococcus pneumoniae
Visualizing results for Antibiotic Group: Penicillins


Visualizing results for Antibiotic Group: Cephalosporins


Visualizing results for Antibiotic Group: Carbapenems


Visualizing results for Antibiotic Group: Fluoroquinolones


Visualizing results for Antibiotic Group: Aminoglycosides
Visualizing results for Antibiotic Group: Glycopeptides


Visualizing results for Antibiotic Group: Macrolides


Visualizing results for Antibiotic Group: Tetracyclines
Visualizing results for Antibiotic Group: Oxazolidinones


Visualizing results for Antibiotic Group: Antifolates


In [20]:
def check_increasing_trend(resistant_rates): 
    """
    This function checks if the resistance trend is increasing year over year.
    Returns True if resistance is increasing, False otherwise.
    """
    # Compare each element with the next one to ensure it's increasing
    for i in range(len(resistant_rates) - 1):
        if resistant_rates[i] > resistant_rates[i + 1]:
            return False  # If any rate decreases, return False
    return True  # Return True if all rates are increasing

def calculate_absolute_change(start_value, end_value):
    """
    This function calculates the absolute change between two values.
    """
    return end_value - start_value

# List to store results of increasing resistance trends
increasing_resistance_results = []

# Loop through each species in the results, specifically focusing on Streptococcus pneumoniae
for species, antibiotics_data in streptococcaceae_results.items():
    if species == 'Streptococcus pneumoniae':
        # Loop through each antibiotic in the results
        for antibiotic, df in antibiotics_data.items():
            # Extract the resistant rates over the years
            resistant_rates = df['Resistant'].values
            years = df['Year'].values
            
            # Check if the resistance trend is increasing
            if check_increasing_trend(resistant_rates):
                # Calculate absolute changes between consecutive years
                absolute_changes = []
                for i in range(1, len(resistant_rates)):
                    start_value = resistant_rates[i - 1]
                    end_value = resistant_rates[i]
                    absolute_change = calculate_absolute_change(start_value, end_value)
                    absolute_changes.append(absolute_change)
                
                # Calculate the mean of the absolute changes
                if absolute_changes:
                    mean_absolute_change = sum(absolute_changes) / len(absolute_changes)
                else:
                    mean_absolute_change = 0.0
                
                # Store the information in the results
                increasing_resistance_results.append({
                    'Species': species,
                    'Antibiotic': antibiotic.replace('_I', ''),  # Remove "_I" for readability
                    'Years': ', '.join(map(str, years)),  # Concatenate years for display
                    'Resistant Rates': ', '.join(map(str, resistant_rates)),  # Concatenate rates for display
                    'Overall Resistance Rate Increase': round(mean_absolute_change, 2)  # Round to 2 decimal places
                })

# Convert the results into a DataFrame for easy display
increasing_resistance_df = pd.DataFrame(increasing_resistance_results)

# Display the results
if not increasing_resistance_df.empty:
    print("Species and Antibiotics showing increasing resistance trends with Overall Resistance Rate Increase:")
    print(increasing_resistance_df.to_string(index=False))  # Display DataFrame without the index
else:
    print("No increasing resistance trends identified.")


Species and Antibiotics showing increasing resistance trends with Overall Resistance Rate Increase:
                 Species              Antibiotic                        Years                   Resistant Rates  Overall Resistance Rate Increase
Streptococcus pneumoniae Piperacillin tazobactam 2018, 2019, 2020, 2021, 2022 12.29, 12.84, 19.56, 21.06, 24.59                              3.08
Streptococcus pneumoniae               Meropenem 2018, 2019, 2020, 2021, 2022           0.0, 0.0, 0.0, 0.0, 0.0                              0.00
Streptococcus pneumoniae              Vancomycin 2018, 2019, 2020, 2021, 2022           0.0, 0.0, 0.0, 0.0, 0.0                              0.00
Streptococcus pneumoniae               Linezolid 2018, 2019, 2020, 2021, 2022           0.0, 0.0, 0.0, 0.0, 0.0                              0.00
