# Analysis of outputs

In [None]:
import os
import pandas as pd
import matplotlib.pyplot as plt
if ".." not in sys.path:
    sys.path.insert(0, "..")
from analysis_data_processing import create_population_df, redact_and_round_column, redact_and_round_df

## Create dataframe of sums of pulse oximetry codes for each index week

In [None]:
# Create population data frame which includes all weeks and dictionary of cohort size for each individual week
population_df, cohort_size = create_population_df("../../output/")

# Create lists of current and required headers
# Convert pulse oximetry codelist csv into data frame
oximetry_codes_df = pd.read_csv('../../codelists/opensafely-pulse-oximetry.csv')
# Extract list of SNOMED codes
oximetry_codes_list = oximetry_codes_df['code'].tolist()
# List of pulse oximetry headers in population dataframe
oximetry_codes_headers = [f'pulse_oximetry_{x}' for x in oximetry_codes_list]
# List of headers using descriptions as required
oximetry_headers = oximetry_codes_df['term'].tolist()

# Create dictionary for renaming oximetry headers
oximetry_dictionary = {}
for n in range(0,len(oximetry_codes_df)):
    oximetry_dictionary[oximetry_codes_headers[n]] = oximetry_headers[n]

#Create data frame of sum totals for each index date for each oximetry code
oximetry_sum = population_df.groupby(['index_date'], as_index=False)[oximetry_codes_headers].sum()

# Rename oximetry headers in oximetry sums data frame
oximetry_sum.rename(columns=oximetry_dictionary,inplace=True)

# Save the dataframe in outputs folder
#oximetry_sum.to_csv('../../output/oximetry_sums.csv') 


## Create test dataframe to test redact_and_round_df function

In [None]:
test_df = oximetry_sum.copy()
test_df.iloc[2,3] = 5
test_df.iloc[2,2] = 0
test_df.iloc[1,1] = 2
test_df.iloc[1,2] = 3
test_df.iloc[1,3] = 4
test_df.iloc[1,4] = 5
test_df.iloc[1,5] = 6
test_df.iloc[1,6] = 7
test_df.iloc[1,7] = 8
test_df.iloc[1,8] = 9
test_df.iloc[1,9] = 10
test_df.iloc[1,10] = 11
test_df.iloc[1,11] = 12
test_df.iloc[1,12] = 1
test_df = redact_and_round_df(test_df)


## Create timeseries of pulse oximetry code sums (using the test dataframe)

In [None]:
plt.figure(figsize=(20, 10))
plt.rcParams.update({'font.size': 20})
for column in oximetry_headers:
    # Use interpolation to replace redacted values
    interpolated_column = pd.to_numeric(test_df[column], errors='coerce').interpolate()
    plt.plot(test_df["index_date"], interpolated_column)
plt.legend(oximetry_headers, loc='upper left', bbox_to_anchor=(1.0, 1.0), fontsize = 20)
plt.xlabel("Date", fontsize = 25)
plt.title('Use of Pulse Oximetry Codes Over Time', fontsize = 40)


In [None]:
# # Create plot
# plot_1 = oximetry_sums.plot.line('index_date',oximetry_headers, figsize=(20, 10), fontsize = 20).get_figure()
# plt.legend(loc='upper left', bbox_to_anchor=(1.0, 1.0), fontsize = 20)
# plt.xlabel("Date", fontsize = 25)
# plt.title('Use of Pulse Oximetry Codes Over Time', fontsize = 40)
# plot_1.savefig("../../output/oximetry_timeseries.png", bbox_inches ="tight")
