# Import libs

In [None]:
import os
import glob
import pandas as pd
from datetime import datetime
import plotly
import plotly.graph_objects as go
from plotly.subplots import make_subplots

## NHS App Analysis

In [None]:
# import data
data_path = r'data/NHS_app_extract'
file_list = glob.glob(os.path.join(data_path, "*.csv"))
nhsapp_df = pd.concat((pd.read_csv(file) for file in file_list))
nhsapp_df

In [None]:
nhsapp_df.groupby(['First_Region']).mean()

In [None]:
# sum to region
activity_cols = ['Sum_Usage_LoginSessions_Login_Sessions','Sum_Usage_Appointments_Appointments_booked',
       'Sum_Usage_CancelledAppointments_Cancellation_Count',
       'Sum_Usage_Appointments_monthly_Unique_Visitors',
       'Sum_Usage_MedicalRecords_Medical_record_views',
       'Sum_Usage_medicalrecord_monthly_Unique_Visitors',
       'Sum_Usage_OrganDonationRegUpdates_SuccessfulUpdates',
       'Sum_Usage_OrganDonationRegWithdrawals_SuccessfulUpdates',
       'Sum_Usage_OrganDonation_RegistrationsODR',
       'Sum_Usage_Prescriptions_Prescriptions_Ordered',
       'Sum_Usage_Prescriptions_monthly_Unique_Visitors']

uptake_cols = ['Max_GP_Registered_Patients','Sum_NHSApp_RegistrationsCount']

nhsapp_df_analysis = pd.concat([
    nhsapp_df.groupby(['First_Report_Date','First_Region'])[uptake_cols].sum(),
    nhsapp_df.groupby(['First_Report_Date','First_Region'])[activity_cols].sum()
    ], axis=1
    ).reset_index()

In [None]:
# per 1000 population calcs
activity_cols_per1000pop = [col + "_per1000pop" for col in activity_cols]

nhsapp_df_analysis[activity_cols_per1000pop] = nhsapp_df_analysis[activity_cols].div(nhsapp_df_analysis['Max_GP_Registered_Patients']/1000, axis=0)

In [None]:
# filter by date
start_date = '2019-01-01'
end_date = '2021-01-01'
nhsapp_df_analysis['First_Report_Date'] = pd.to_datetime(
    nhsapp_df_analysis['First_Report_Date']
    )

mask = (
    nhsapp_df_analysis['First_Report_Date'] > start_date) & (
    nhsapp_df_analysis['First_Report_Date'] <= end_date
    )


nhsapp_df_analysis = nhsapp_df_analysis.loc[mask]

data_path = r'data/NHS_app_extract/outputs'
nhsapp_df_analysis.to_csv(os.path.join(data_path, 'nhsapp_df_analysis.csv'))

In [None]:
nhsapp_df_analysis_final = pd.concat([
    nhsapp_df_analysis.groupby(['First_Region'])['Max_GP_Registered_Patients'].max(),
    nhsapp_df_analysis.groupby(['First_Region'])['Sum_NHSApp_RegistrationsCount'].sum(),
    nhsapp_df_analysis.groupby(['First_Region'])[activity_cols+activity_cols_per1000pop].mean()
    ], axis=1
    ).reset_index()
nhsapp_df_analysis_final

data_path = r'data/NHS_app_extract/outputs'
nhsapp_df_analysis_final.to_csv(os.path.join(data_path, 'nhsapp_df_analysis_final.csv'))

## POMI

In [None]:
# import data
data_path = r'data/Pomi_latest'
file_list = glob.glob(os.path.join(data_path, "*.csv"))
pomi_df = pd.concat((pd.read_csv(file) for file in file_list))

In [None]:
pomi_df = pomi_df.groupby(['region_name','field'])['value'].sum().unstack('field').reset_index()

data_path = r'data/Pomi_latest/outputs'
pomi_df.to_csv(os.path.join(data_path, 'pomi_df_analysis.csv'))

## GP Survey

In [None]:
# import data
data_path = r'data/GP_survey'
file_name = r'GPPS_2021_CCG_data_(weighted)_(csv)_PUBLIC.csv'
file_list = glob.glob(os.path.join(data_path, file_name))
gpSurvey_df = pd.concat((pd.read_csv(file) for file in file_list))
gpSurvey_df.columns

In [None]:
# import data
data_path = r'data/GP_survey'
file_name = r'GPPS_2021_List_of_reporting_variables_(csv)_PUBLIC.csv'
file_list = glob.glob(os.path.join(data_path, file_name))
gpSurvey_questions_df = pd.concat((pd.read_csv(file) for file in file_list))
gpSurvey_questions_df.head(10)