# Loading relevant libraries

In [1]:
from os.path import join
import pandas as pd
import numpy as np

### To ignore annoying warnings
import warnings
warnings.filterwarnings(action='ignore')

### Uncomment if changes are made to ToolKit.py and need to reload
# %load_ext autoreload
# %autoreload 2
from ToolKit import *

# Loading data

In [2]:
### Producing path to input file
in_folder = '.'
in_file = 'BRC_data_wide_BlanchardSchumacherProtocol.csv'
in_path = join(in_folder, in_file)

### To load data
constructs_data = pd.read_csv(in_path,
                              parse_dates=[2],
                             )
### To review data
display(constructs_data.head(), # general
        constructs_data.dtypes, # data types
        constructs_data.shape,  # shape
       )

Unnamed: 0,dataset,participant_id,encounter_date,day_n,hopelessness,depression,voices,visions,anxiety,paranoia,delusions
0,clintouch,10142,2018-08-09,1,4.5,2.25,1.5,1.0,2.25,1.75,1.0
1,clintouch,10142,2018-08-10,2,7.0,2.0,1.0,1.0,3.0,2.25,1.0
2,clintouch,10142,2018-08-11,3,7.0,3.0,1.0,1.0,2.0,5.5,1.0
3,clintouch,10142,2018-08-12,4,7.0,2.0,1.0,1.0,1.0,1.75,1.5
4,clintouch,10142,2018-08-13,5,7.0,2.0,1.0,1.0,1.0,1.5,1.5


dataset                   object
participant_id             int64
encounter_date    datetime64[ns]
day_n                      int64
hopelessness             float64
depression               float64
voices                   float64
visions                  float64
anxiety                  float64
paranoia                 float64
delusions                float64
dtype: object

(4467, 11)

# Producing synthetic constructs to model missing data and imputting last value in data gaps

In [3]:
constructs_data.participant_id = (constructs_data
                                  .participant_id
                                  .astype(int)
                                 )
data = []
datasets, participants, _ = (constructs_data[['dataset',
                                              'participant_id',
                                             ]]
                             .value_counts()
                             .reset_index()
                             .T
                             .values
                            )
for participant, df in (constructs_data
                        .groupby('participant_id'
                                )
                       ):
    max_days=MaxDays(df,
                 final_buffer=42,
                )
    df = (max_days
          .merge(df,
                 on="encounter_date",
                 how='outer',
                )
         )

    # This part identified the continous gap blocks
    continous_blocks = ContinousBlocks(df)
    df['missed_encounters'] = MissedEncounters(continous_blocks)
    # This part add the _disengagement_ construct
    df['disengagement'] = Disengagement(df['missed_encounters'])
    # imputate the whole df with last values first
    # then with las value for the first value in the ts
    for method in ['ffill', 'bfill']:
        (df
         .fillna(method=method,
                 inplace=True
                )
        )
    df['day_n'] = np.arange(df.shape[0])+1
    df.participant_id = df.participant_id.astype(int)
    data.append(df)

df_plussynthetics = pd.concat(data)

# Saving the processed data to CSV

In [4]:
out_folder = r'.'
(df_plussynthetics
 .to_csv(join(out_folder,
              f'Originals+Synthetics.csv',
             ),
         index=False,
        )
)