In [1]:
from os.path import join
import pandas as pd
import numpy as np

import warnings
warnings.filterwarnings(action='ignore')

In [4]:
in_folder = r'..\..\..\..\BRC - Digital Programme\NetworkAnalysis\BRC_RC\CleanData'
in_file = 'BRC_data_wide_BlanchardSchumacherProtocol.csv'
in_path = join(in_folder, in_file)

# To load
constructs_data = pd.read_csv(in_path,
                              parse_dates=[2],
                             )
display(constructs_data.head(),
        constructs_data.dtypes,
        constructs_data.shape,
       )

Unnamed: 0,dataset,participant_id,encounter_date,day_n,hopelessness,depression,voices,visions,anxiety,paranoia,delusions
0,clintouch,10142,2018-08-09,1,4.5,2.25,1.5,1.0,2.25,1.75,1.0
1,clintouch,10142,2018-08-10,2,7.0,2.0,1.0,1.0,3.0,2.25,1.0
2,clintouch,10142,2018-08-11,3,7.0,3.0,1.0,1.0,2.0,5.5,1.0
3,clintouch,10142,2018-08-12,4,7.0,2.0,1.0,1.0,1.0,1.75,1.5
4,clintouch,10142,2018-08-13,5,7.0,2.0,1.0,1.0,1.0,1.5,1.5


dataset                   object
participant_id             int64
encounter_date    datetime64[ns]
day_n                      int64
hopelessness             float64
depression               float64
voices                   float64
visions                  float64
anxiety                  float64
paranoia                 float64
delusions                float64
dtype: object

(4467, 11)

In [5]:
def ContinousBlocks(df):
    cb = (pd
          .Series([1
                   if np.isnan(d)
                   else 0
                   for d
                   in df.day_n
                  ]
                 )
         )
    return cb

In [6]:
def MaxDays(df,
            # study_length,
            final_buffer=42,
           ):

    study_length = {'clintouch': 84,
                    'careloop': 84,
                    'empower': 365,
                   }
    OffSet = pd.DateOffset
    dataset = df.dataset.values[0]
    min_date, last_date = (df
                           .encounter_date
                           .sort_values()
                           .values
                           [[0,-1]]
                          )
    theoretical_max = (min_date
                       +OffSet(study_length
                               [dataset]
                              )
                      )
    encounters_length = ((last_date-min_date)
                         .astype('timedelta64[D]')
                         .astype(int)
                        )
    max_date = (min(last_date+OffSet(final_buffer),
                    theoretical_max,
                   )
                if (encounters_length
                    <= study_length[dataset]
                   )
                else
                last_date
               )
    date_range = pd.date_range(min_date,
                               max_date,
                              )
    max_days = (pd
                .DataFrame({'encounter_date':date_range,
                           },
                          )
               )

    return max_days

In [7]:
def MissedEncounters(continous_blocks,
                     incremental_step=0.5,
                     min_scale_lim=1,
                     max_scale_lim=7,
                    ):
    # expanding window calculations:
    # similar to rolling but the window
    # increases till including the whole array
    me = (continous_blocks
          .expanding()
          # if value of 1 leaves incremental
          # values otherwise it leaves the value
          # in array
          .apply(lambda s:
                 s.replace(to_replace=1,
                           method='bfill',
                          )
                 .sum()
                 *incremental_step
                )
           )

    # limits to 7 to match
    # Likert's scale
    me = me.map(lambda x: min_scale_lim
                if x<incremental_step
                else min(x,
                         max_scale_lim-1
                        )+1,
               )
    return me

In [8]:
def Disengagement(missed_encounters):
    d = (missed_encounters.map(lambda x:
                                 1 if x<7
                                 else 7
                                )
                          )

    return d

In [9]:
identifier= 'B'
constructs_data.participant_id = (constructs_data
                                  .participant_id
                                  .astype(int)
                                 )
data = []
datasets, participants, _ = (constructs_data[['dataset',
                                              'participant_id',
                                             ]]
                             .value_counts()
                             .reset_index()
                             .T
                             .values
                            )
for participant, df in (constructs_data
                        .groupby('participant_id'
                                )
                       ):
    max_days=MaxDays(df,
                 final_buffer=42,
                )
    df = (max_days
          .merge(df,
                 on="encounter_date",
                 how='outer',
                )
         )

    # This part identified the continous gap blocks
    continous_blocks = ContinousBlocks(df)
    df['missed_encounters'] = MissedEncounters(continous_blocks)
    # This part add the _disengagement_ construct
    df['disengagement'] = Disengagement(df['missed_encounters'])
    # imputate the whole df with last values first
    # then with las value for the first value in the ts
    for method in ['ffill', 'bfill']:
        (df
         .fillna(method=method,
                 inplace=True
                )
        )
    df['day_n'] = np.arange(df.shape[0])+1
    df.participant_id = df.participant_id.astype(int)
    data.append(df)

df_plussynthetics = pd.concat(data)

In [10]:
out_folder = r'.'
(df_plussynthetics
 .to_csv(join(out_folder,
              f'Originals+Synthetics.csv',
             ),
         index=False,
        )
)