Code to demonstrate basic FastCDA capabilities in a Jupyter notebook.

In [1]:
from fastcda import FastCDA
from dgraph_flex import DgraphFlex
import semopy
import pprint as pp

# create  an instance of FastCDA
fc = FastCDA()

In [2]:
# read in the sample ema dataset and view it
df = fc.getSampleData('R34B01.csv')
df

Unnamed: 0,PainIntensity,PainControl,PainConcerned,PainAffectFunc,PainFearfulMove,P_Active,P_Determined,P_Attentive,P_Inspired,P_Alert,...,P_Ashamed_,ControlLife_,BodyEnergy_,BodyRelax_,MindRelax_,SpatialSpan3_,SpatialSpan4_,SpatialSpan5_,GridA_secs_,GridB_secs_
0,-0.165406,0.294884,-0.151717,-0.229416,-0.112509,-1.735667,0.112509,0.160128,-0.112509,0.0,...,0.0,0.0,-1.676163,-2.380476,0.284747,0.229416,-0.273519,-1.662505,4.054276,3.936321
1,-0.165406,0.294884,-0.151717,-0.229416,-0.112509,-3.975237,0.112509,0.160128,-0.112509,0.0,...,0.0,0.0,0.596601,0.420084,0.284747,0.229416,0.677851,-3.055179,-0.099869,-0.081198
2,-0.165406,0.294884,-0.151717,-0.229416,-0.112509,-1.735667,0.112509,0.160128,-0.112509,0.0,...,0.0,0.0,-1.676163,-2.380476,0.284747,0.229416,-0.273519,-0.966168,-0.167015,-0.145093
3,-0.165406,0.294884,-0.151717,-0.229416,-0.112509,0.503903,0.112509,0.160128,-0.112509,0.0,...,0.0,0.0,-1.676163,0.420084,0.284747,0.229416,-1.224889,1.122843,-0.235814,-0.173425
4,-0.165406,0.294884,-0.151717,-0.229416,-0.112509,0.503903,0.112509,0.160128,-0.112509,0.0,...,0.0,0.0,-1.676163,0.420084,0.284747,0.229416,-2.176259,1.122843,-0.236837,-0.242645
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
75,-2.055755,0.294884,-0.151717,-0.229416,-0.112509,0.503903,0.112509,0.160128,-0.112509,0.0,...,0.0,0.0,-1.676163,0.420084,0.284747,0.229416,0.677851,0.426506,-0.119494,-0.157161
76,-2.055755,0.294884,-0.151717,-0.229416,-0.112509,0.503903,0.112509,0.160128,-0.112509,0.0,...,0.0,0.0,0.596601,0.420084,0.284747,0.229416,0.677851,1.122843,-0.246599,-0.267317
77,-0.165406,0.294884,-0.151717,-0.229416,-0.112509,0.503903,0.112509,0.160128,-0.112509,0.0,...,0.0,0.0,0.596601,0.420084,0.284747,0.229416,-0.273519,1.122843,1.889929,1.787556
78,-0.165406,0.294884,-0.151717,-0.229416,-0.112509,0.503903,0.112509,0.160128,-0.112509,0.0,...,0.0,0.0,0.596601,0.420084,0.284747,-2.064742,0.677851,-0.269831,-0.243537,-0.262229


In [3]:
# remove columns that end with _
# Identify columns to drop
cols_to_drop = [col for col in df.columns if col.endswith('_')]

# Drop the identified columns
df_cleaned = df.drop(columns=cols_to_drop)
df_cleaned.shape

(80, 24)

In [4]:
# add the lags, with a suffix of '_lag'
df_lag = fc.add_lag_columns(df_cleaned, lag_stub='_lag')
df_lag

Unnamed: 0,PainIntensity,PainControl,PainConcerned,PainAffectFunc,PainFearfulMove,P_Active,P_Determined,P_Attentive,P_Inspired,P_Alert,...,P_Ashamed_lag,ControlLife_lag,BodyEnergy_lag,BodyRelax_lag,MindRelax_lag,SpatialSpan3_lag,SpatialSpan4_lag,SpatialSpan5_lag,GridA_secs_lag,GridB_secs_lag
0,-0.165406,0.294884,-0.151717,-0.229416,-0.112509,-3.975237,0.112509,0.160128,-0.112509,0.0,...,0.0,0.0,0.577350,0.420084,0.284747,0.229416,0.664411,-2.920630,-0.052046,-0.032113
1,-0.165406,0.294884,-0.151717,-0.229416,-0.112509,-1.735667,0.112509,0.160128,-0.112509,0.0,...,0.0,0.0,-1.732051,-2.380476,0.284747,0.229416,-0.284747,-0.912174,-0.127481,-0.103354
2,-0.165406,0.294884,-0.151717,-0.229416,-0.112509,0.503903,0.112509,0.160128,-0.112509,0.0,...,0.0,0.0,-1.732051,0.420084,0.284747,0.229416,-1.233905,1.096282,-0.204772,-0.134944
3,-0.165406,0.294884,-0.151717,-0.229416,-0.112509,0.503903,0.112509,0.160128,-0.112509,0.0,...,0.0,0.0,-1.732051,0.420084,0.284747,0.229416,-2.183063,1.096282,-0.205921,-0.212122
4,-0.165406,0.294884,-0.151717,-0.229416,-0.112509,0.503903,0.112509,0.160128,-0.112509,0.0,...,0.0,0.0,0.577350,0.420084,0.284747,0.229416,0.664411,-1.581659,-0.069697,-0.054554
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
74,-2.055755,0.294884,-0.151717,-0.229416,-0.112509,0.503903,0.112509,0.160128,-0.112509,0.0,...,0.0,0.0,-1.732051,0.420084,0.284747,0.229416,0.664411,0.426797,-0.074094,-0.116810
75,-2.055755,0.294884,-0.151717,-0.229416,-0.112509,0.503903,0.112509,0.160128,-0.112509,0.0,...,0.0,0.0,0.577350,0.420084,0.284747,0.229416,0.664411,1.096282,-0.216889,-0.239632
76,-0.165406,0.294884,-0.151717,-0.229416,-0.112509,0.503903,0.112509,0.160128,-0.112509,0.0,...,0.0,0.0,0.577350,0.420084,0.284747,0.229416,-0.284747,1.096282,2.183365,2.051512
77,-0.165406,0.294884,-0.151717,-0.229416,-0.112509,0.503903,0.112509,0.160128,-0.112509,0.0,...,0.0,0.0,0.577350,0.420084,0.284747,-2.064742,0.664411,-0.242688,-0.213448,-0.233958


In [5]:
# standardize the data
df_lag_std = fc.standardize_df_cols(df_lag)
df_lag_std

Unnamed: 0,PainIntensity,PainControl,PainConcerned,PainAffectFunc,PainFearfulMove,P_Active,P_Determined,P_Attentive,P_Inspired,P_Alert,...,P_Ashamed_lag,ControlLife_lag,BodyEnergy_lag,BodyRelax_lag,MindRelax_lag,SpatialSpan3_lag,SpatialSpan4_lag,SpatialSpan5_lag,GridA_secs_lag,GridB_secs_lag
0,-0.166478,0.296908,-0.152696,-0.23094,-0.113228,-4.050119,0.113228,0.161165,-0.113228,0.0,...,0.0,0.0,0.582223,0.402200,0.286691,0.230940,0.670478,-3.111857,-0.054213,-0.034771
1,-0.166478,0.296908,-0.152696,-0.23094,-0.113228,-1.780903,0.113228,0.161165,-0.113228,0.0,...,0.0,0.0,-1.717556,-2.486326,0.286691,0.230940,-0.275375,-0.998648,-0.129193,-0.105589
2,-0.166478,0.296908,-0.152696,-0.23094,-0.113228,0.488312,0.113228,0.161165,-0.113228,0.0,...,0.0,0.0,-1.717556,0.402200,0.286691,0.230940,-1.221229,1.114562,-0.206018,-0.136990
3,-0.166478,0.296908,-0.152696,-0.23094,-0.113228,0.488312,0.113228,0.161165,-0.113228,0.0,...,0.0,0.0,-1.717556,0.402200,0.286691,0.230940,-2.167082,1.114562,-0.207160,-0.213710
4,-0.166478,0.296908,-0.152696,-0.23094,-0.113228,0.488312,0.113228,0.161165,-0.113228,0.0,...,0.0,0.0,0.582223,0.402200,0.286691,0.230940,0.670478,-1.703051,-0.071757,-0.057078
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
74,-2.045301,0.296908,-0.152696,-0.23094,-0.113228,0.488312,0.113228,0.161165,-0.113228,0.0,...,0.0,0.0,-1.717556,0.402200,0.286691,0.230940,0.670478,0.410159,-0.076128,-0.118964
75,-2.045301,0.296908,-0.152696,-0.23094,-0.113228,0.488312,0.113228,0.161165,-0.113228,0.0,...,0.0,0.0,0.582223,0.402200,0.286691,0.230940,0.670478,1.114562,-0.218062,-0.241055
76,-0.166478,0.296908,-0.152696,-0.23094,-0.113228,0.488312,0.113228,0.161165,-0.113228,0.0,...,0.0,0.0,0.582223,0.402200,0.286691,0.230940,-0.275375,1.114562,2.167730,2.036463
77,-0.166478,0.296908,-0.152696,-0.23094,-0.113228,0.488312,0.113228,0.161165,-0.113228,0.0,...,0.0,0.0,0.582223,0.402200,0.286691,-2.049593,0.670478,-0.294244,-0.214642,-0.235416


In [6]:
# method to automatically create the knowledge for lag variables
def create_lag_knowledge(columns, lag_stub = '_'):
    """
    Create a lag knowledge structure based on the columns

    Sample structure
    
    knowledge = {'addtemporal': {
                            0: ['alcohol_bev_lag',
                                'TIB_lag',
                                'TST_lag',
                                'PANAS_PA_lag',
                                'PANAS_NA_lag',
                                'worry_scale_lag',
                                'PHQ9_lag'],
                            1: ['alcohol_bev',
                                'TIB',
                                'TST',
                                'PANAS_PA',
                                'PANAS_NA',
                                'worry_scale',
                                'PHQ9']
                            }
            }
    Args:
        columns (List): list of column names
        lag_stub (str, optional): lag stub suffix. Defaults to '_'.
    """
    
    # create the 1 list
    list_1 = [col for col in columns]
    # create the 0 list, the lagged variables
    list_0 = [col+lag_stub for col in columns]
    
    knowledge = {"addtemporal": {
                                    0: list_0,
                                    1: list_1
                                    }
                 }
    return knowledge

knowledge = create_lag_knowledge(df_cleaned.columns, lag_stub='_lag')
pass

In [7]:
knowledge = fc.create_lag_knowledge(df_cleaned.columns, lag_stub = '_lag')

In [8]:
# run model with run_model_search
result, graph = fc.run_model_search(df_lag_std, 
                             model = 'gfci',
                             score={'sem_bic': {'penalty_discount': 1.0}},
                             test={"fisher_z": {"alpha": .01}},
                             knowledge=knowledge
                             )
pass

Singularity encountered when scoring PainIntensity | P_Alert
Singularity encountered when scoring PainIntensity | P_Afraid
Singularity encountered when scoring PainIntensity | P_Nervous
Singularity encountered when scoring PainIntensity | P_Hostile
Singularity encountered when scoring PainIntensity | P_Ashamed
Singularity encountered when scoring PainIntensity | ControlLife
Singularity encountered when scoring PainIntensity | P_Alert_lag
Singularity encountered when scoring PainIntensity | P_Afraid_lag
Singularity encountered when scoring PainIntensity | P_Nervous_lag
Singularity encountered when scoring PainIntensity | P_Hostile_lag
Singularity encountered when scoring PainIntensity | P_Ashamed_lag
Singularity encountered when scoring PainIntensity | ControlLife_lag
Singularity encountered when scoring PainControl | P_Alert
Singularity encountered when scoring PainControl | P_Afraid
Singularity encountered when scoring PainControl | P_Nervous
Singularity encountered when scoring PainC

In [9]:
graph.show_graph()

AttributeError: 'NoneType' object has no attribute 'show_graph'