generated from opensafely/research-template
/
plot_cusum.py
68 lines (48 loc) · 2.53 KB
/
plot_cusum.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
import json
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from utilities import OUTPUT_DIR, drop_irrelevant_practices, get_practice_deciles
def plot_cusum(results, filename):
plt.figure(figsize=(15,8))
plt.plot([a+b for a, b in zip(results['target_mean'], results['smin'])], color='red')
plt.plot([a+b for a, b in zip(results['target_mean'], results['smax'])], color='turquoise')
plt.plot([a+b for a, b in zip(results['target_mean'], results['alert_threshold'])], color='black', linestyle='--')
plt.plot([a-b for a, b in zip(results['target_mean'], results['alert_threshold'])], color='black', linestyle='--')
plt.ylabel('value')
plt.xlabel('date')
plt.xticks(ticks = [i for i in range(len(df['date']))], labels = df['date'].values, rotation=90)
plt.tight_layout()
plt.savefig(OUTPUT_DIR / filename)
plt.clf()
def plot_median(array, results, filename):
plt.figure(figsize=(15,8))
plt.plot(array)
plt.plot(results['target_mean'], color='red')
plt.ylabel('value')
plt.xlabel('date')
plt.xticks(ticks = [i for i in range(len(df['date']))], labels = df['date'].values, rotation=90)
for i in results['alert']:
plt.scatter(x=i, y=array[i], color='green', s=50)
plt.tight_layout()
plt.savefig(OUTPUT_DIR / filename)
plt.clf()
with open(OUTPUT_DIR / 'cusum_results.json') as file:
# Load its content and make a new dictionary
data = json.load(file)
for indicator_key, indicator_value in data.items():
print(indicator_key)
for practice_key, practice_value in indicator_value.items():
if len(practice_value['alert'])>0:
df = pd.read_csv(OUTPUT_DIR / f'measure_indicator_{indicator_key}_rate.csv')
df = df.replace(np.inf, np.nan)
df = df[df['value'].notnull()]
df = drop_irrelevant_practices(df)
df['value'] = df['value']*1000
df = get_practice_deciles(df, 'value')
data = df[df['practice'] == int(practice_key)]
percentile = data['percentile']
percentile_array = np.array(percentile)
plot_cusum(practice_value, f'cusum_indicator_{indicator_key}_{practice_key}.jpeg')
plot_median(percentile_array, practice_value, f'alerts_indicator_{indicator_key}_{practice_key}.jpeg')
break