In [1]:
import pandas as pd

# Importing Datasets on Microbial Consumption

The source of these datasets is the [European Centre for Disease Prevention and Control's Annual 2021 Report](https://www.ecdc.europa.eu/en/data/downloadable-datasets) 

They feature a dataset per [antimicrobial category](https://www.who.int/tools/atc-ddd-toolkit/atc-classification) as defined by the WHO. 

## The WHO's ATC Classification
* The WHO has designed the Anatomical Therapeutic Chemical (ATC) classification system for indexing drug compunds.
* The classification system is divided into five different levels.
* As an example: The 1st level classifies drugs into anatomical groups - see the calssification here.
<img src="Images/who_1st classification.png" alt="My Image" width="800" height="600">
* Given their nature, antimicrobial drugs are classified under letter J.


## The types of antimicrobial drugs discussed here
(needs to be written)

## Community vs Hospital administration
(needs to be written)

## The ECDC's Data Dictionary

* All country data are shown as they are reported to The European Surveillance System. 

* EU/EEA refers to the population-weighted mean consumption based on reported or imputed antimicrobial consumption data from all 29 EU/EEA countries, and excludes the United Kingdom. 
* Country adjustments were applied as detailed in the Methods chapter.
* Crude EU/EEA refers to the population-weighted mean consumption based on reported data available for the specific year, with no imputations for missing data or adjustments for change in data source and includes the UK for the years 2012−2019.
* N/A = Not applicable. Trend analyses was not performed and CAGR not calculated because of missing data, changes in the type of data or change in data process. † = Spain reported reimbursement data for 2011-2015 and changed to sales data in 2016.
* Luxembourg changed data process in 2020, which could impact comparability with previous years.
* For details, please refer to the Methods chapter.

# Step 1: Loading the Data
## Antimicrobial Data Administered between 2021 and 2022

In [2]:
community_betalactams_main = pd.read_csv('Data/community_betalactams.csv')
community_betalactams_others = pd.read_csv('Data/community_betalactams_others.csv')
community_macrolides_lincosamides_streptogramins= pd.read_csv('Data/community_macrolides_lincosamides_streptogramins.csv')
community_others= pd.read_csv('Data/community_others.csv')
community_quinolones= pd.read_csv('Data/community_quinolones.csv')
community_sulfonamides_trimethoprim= pd.read_csv('Data/community_sulfonamides_trimethoprim.csv')
community_tetracyclines= pd.read_csv('Data/community_tetracyclines.csv')
hospital_betalactams = pd.read_csv('Data/hospital_betalactams.csv')
hospital_betalactams_others= pd.read_csv('Data/hospital_betalactams_others.csv')
hospital_carbapenems = pd.read_csv('Data/hospital_carbapenems.csv')
hospital_macrolides_lincosamides_streptogramins = pd.read_csv('Data/hospital_macrolides_lincosamides_streptogramins.csv')
hospital_others = pd.read_csv('Data/hospital_others.csv')
hospital_polymyxins = pd.read_csv('Data/hospital_polymyxins.csv')
hospital_quinolones = pd.read_csv('Data/hospital_quinolones.csv')
hospital_reserve = pd.read_csv('Data/hospital_reserve.csv')
hospital_sulfonamides_trimethoprim = pd.read_csv('Data/hospital_sulfonamides_trimethoprim.csv')
hospital_tetracyclines = pd.read_csv('Data/hospital_tetracyclines.csv')

In [3]:
# Visualising the data
print(hospital_tetracyclines[:5])
print(community_betalactams_others[:5])
print(community_macrolides_lincosamides_streptogramins[:5])
print(community_others[:5])
print(community_quinolones[:5])
print(community_sulfonamides_trimethoprim[:5])
print(community_tetracyclines[:5])
print(hospital_betalactams[:5])
print(hospital_betalactams_others[:5])
print(hospital_carbapenems[:5])
print(hospital_macrolides_lincosamides_streptogramins[:5])
print(hospital_others[:5])
print(hospital_polymyxins[:5])
print(hospital_quinolones[:5])
print(hospital_reserve[:5])
print(hospital_sulfonamides_trimethoprim[:5])
print(hospital_tetracyclines[:5])

  Country name  2012  2013  2014  2015  2016  2017  2018  2019  2020  2021  \
0      Austria   NaN   NaN   NaN   NaN   NaN   NaN   NaN  0.05  0.06  0.05   
1      Belgium  0.02  0.02  0.02  0.02  0.02  0.02  0.02  0.02  0.02  0.02   
2     Bulgaria  0.02  0.02  0.02  0.02  0.02  0.01  0.01  0.02  0.04  0.06   
3      Croatia  0.06  0.05  0.04  0.04  0.04  0.04  0.03  0.04  0.03  0.03   
4      Czechia   NaN   NaN   NaN   NaN   NaN   NaN   NaN   NaN   NaN  0.05   

  Compound annual growth rate (CAGR)  Unnamed: 12  Unnamed: 13  Unnamed: 14  
0                                NaN          NaN          NaN          NaN  
1                              -1.4%          NaN          NaN          NaN  
2                              12.1%          NaN          NaN          NaN  
3                              -9.2%          NaN          NaN          NaN  
4                                NaN          NaN          NaN          NaN  
  Country name  2012  2013  2014  2015  2016  2017  2018  2019 

In [4]:
hospital_tetracyclines[:5]

Unnamed: 0,Country name,2012,2013,2014,2015,2016,2017,2018,2019,2020,2021,Compound annual growth rate (CAGR),Unnamed: 12,Unnamed: 13,Unnamed: 14
0,Austria,,,,,,,,0.05,0.06,0.05,,,,
1,Belgium,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02,0.02,-1.4%,,,
2,Bulgaria,0.02,0.02,0.02,0.02,0.02,0.01,0.01,0.02,0.04,0.06,12.1%,,,
3,Croatia,0.06,0.05,0.04,0.04,0.04,0.04,0.03,0.04,0.03,0.03,-9.2%,,,
4,Czechia,,,,,,,,,,0.05,,,,


### Observations
* I should compile all these dataframes into a list
* CAGR column should be deleted
* Same for columns "Unnamed: 12", "Unnamed: 13" and "Unnamed: 14"
* Rows with NaNs should be deleted
* Luxembourg should be deleted since data acquisition has been inconsistent
* Rename 'Czech Republic' to 'Czechia' for consistency

In [5]:
list_of_drug_dfs = [community_betalactams_main, community_betalactams_others,
                    community_macrolides_lincosamides_streptogramins, community_others, community_quinolones, 
                    community_sulfonamides_trimethoprim, community_tetracyclines, hospital_betalactams_others, 
                    hospital_carbapenems, hospital_macrolides_lincosamides_streptogramins, hospital_others, 
                    hospital_polymyxins, hospital_quinolones, hospital_reserve, hospital_sulfonamides_trimethoprim, 
                    hospital_tetracyclines]

In [6]:
def cleanup(table):
    table.drop('Unnamed: 12', axis = 1, inplace = True)
    table.drop('Unnamed: 13', axis = 1, inplace = True)
    table.drop('Unnamed: 14', axis = 1, inplace = True)
    table.drop('Compound annual growth rate (CAGR)', axis = 1, inplace = True)
    table.dropna(axis = 0, inplace = True)
    table[table['Country name']] == 'Czech Republic'
    table = table[table['Country name'] != 'Luxembourg']
    return table

In [7]:
cleaned_up_dfs = [cleanup(table) for table in list_of_drug_dfs]

ValueError: Need to specify at least one of 'labels', 'index' or 'columns'