# Analysis of outputs

In [6]:
import os
import pandas as pd
import matplotlib.pyplot as plt
import math
import numpy as np
from textwrap import wrap
import sys
if ".." not in sys.path:
    sys.path.insert(0, "..")
from analysis_data_processing import (
    create_population_df,
    redact_and_round_column,
    redact_and_round_df,
    code_specific_analysis,
    further_redaction,
    further_redaction_all,
    redact_to_five_and_round,
    produce_plot,
)

## Data Processing

In [7]:
# Create population data frame which includes all weeks and dictionary of cohort size for each individual week
population_df, cohort_size = create_population_df("../../output/")

# Create dictionary of oximetry codes and terms they refer to
oximetry_codes_df = pd.read_csv('../../codelists/opensafely-pulse-oximetry.csv')
oximetry_codes_dict = oximetry_codes_df.set_index("code")["term"].to_dict()

# Define the ranges for each of the age categories
# (e.g. [-1, 17] will find ages such that -1<age<=17 - age is a whole number in the data frame so this is anyone aged 0 to 17 inclusive)
age_bins = [-1, 39, 49, 64, 200]
n = len(age_bins)

# Create list of labels for the different age groups
age_group_labels = []
# For all except the last final age category the label is 'Age between ... and ...'
for i in range(0, len(age_bins)-2):
    age_group_labels.append(f'Age between {age_bins[i]+1} and {age_bins[i+1]+1}')
# For final age category the label is 'Age ... or over'
age_group_labels.append(f'Age {age_bins[-2]+1} or over')

# Create list of age category for each patient and insert into the population data frame
age_category = pd.cut(population_df.age, bins = age_bins, labels = age_group_labels)
population_df.insert(0, 'age_group', age_category)



# Define the ranges for each of the age categories
# (e.g. [-1, 17] will find ages such that -1<age<=17 - age is a whole number in the data frame so this is anyone aged 0 to 17 inclusive)
age_bins_2 = [-1, 39, 49, 64, 200]
n = len(age_bins_2)

# Create list of labels for the different age groups
age_group_labels_2 = []
# For all except the last final age category the label is 'Age between ... and ...'
for i in range(0, len(age_bins_2)-2):
    age_group_labels_2.append(f'Age between {age_bins_2[i]+1} and {age_bins_2[i+1]+1}')
# For final age category the label is 'Age ... or over'
age_group_labels_2.append(f'Age {age_bins_2[-2]+1} or over')

# Create list of age category for each patient and insert into the population data frame
age_category_2 = pd.cut(population_df.age, bins = age_bins_2, labels = age_group_labels_2)
population_df.insert(0, 'age_group_2', age_category_2)

# Add column to population data frame for combined age group and shielding status
conditionlist = [
    (population_df['shielding'] == 1) ,
    (population_df['shielding'] == 0) & (population_df['age'] >=65),
    (population_df['shielding'] == 0) & (population_df['age'] >=50) & (population_df['age'] <65),
    (population_df['shielding'] == 0) & (population_df['age'] <50)]
choicelist = ['1: Shielding',
    '2: Aged 65 or over and not shielding',
    '3: Aged 50 to 64 and not shielding',
    '4: Aged 49 or under and not shielding']
population_df['age_and_shielding'] = np.select(conditionlist, choicelist, default='Not Specified')

region_list = ["North East", "North West", "Yorkshire and the Humber", "East Midlands", "West Midlands", "East of England", "London", "South East", "South West"]

# Create dictionary of oximetry codes: keys are SNOMED codes, values are the terms they refer to
oximetry_codes_df = pd.read_csv('../../codelists/opensafely-pulse-oximetry.csv')
oximetry_codes_dict = oximetry_codes_df.set_index("code")["term"].to_dict()
# Create dictionary of oximetry headers:
# Keys are oximetry headers in input csv files (i.e. pulse_oximetry_code), values are the terms they refer to
oximetry_headers_dict = {f"pulse_oximetry_{k}":v for k,v in oximetry_codes_dict.items()}



## Create dataframe of sums of pulse oximetry codes for each index week

In [8]:
# # Create population data frame which includes all weeks and dictionary of cohort size for each individual week
# population_df, cohort_size = create_population_df("../../output/")

# # Create lists of current and required headers
# # Convert pulse oximetry codelist csv into data frame
# oximetry_codes_df = pd.read_csv('../../codelists/opensafely-pulse-oximetry.csv')
# # Extract list of SNOMED codes
# oximetry_codes_list = oximetry_codes_df['code'].tolist()
# # List of pulse oximetry headers in population dataframe
# oximetry_codes_headers = [f'pulse_oximetry_{x}' for x in oximetry_codes_list]
# # List of headers using descriptions as required
# oximetry_headers = oximetry_codes_df['term'].tolist()

# # Create dictionary for renaming oximetry headers
# oximetry_dictionary = {}
# for n in range(0,len(oximetry_codes_df)):
#     oximetry_dictionary[oximetry_codes_headers[n]] = oximetry_headers[n]

# #Create data frame of sum totals for each index date for each oximetry code
# oximetry_sum = population_df.groupby(['index_date'], as_index=False)[oximetry_codes_headers].sum()

# # Rename oximetry headers in oximetry sums data frame
# oximetry_sum.rename(columns=oximetry_dictionary,inplace=True)

# # Save the dataframe in outputs folder
# #oximetry_sum.to_csv('../../output/oximetry_sums.csv') 
