Code to demonstrate basic tetrad_plus capabilities.

In [1]:
from tetrad_plus import TetradPlus
from dgraph_flex import DgraphFlex
import semopy
import pprint as pp

# create  an instance of TetradPlus
tp = TetradPlus()
pass

In [None]:
# set the data and knowledge files
data_file = 'tetrad_plus/data/boston_data_raw.csv'
knowledge_file = 'tetrad_plus/data/boston_prior.txt'
df_ema = tp.getEMAData()
pass

In [None]:
# read in the data
df = tp.read_csv(data_file)
df

In [None]:
# add the lags, with a suffix of '_lag'
df_lag = tp.add_lag_columns(df, lag_stub='_lag')
df_lag

In [None]:
# standardize the data
df_lag_std = tp.standardize_df_cols(df_lag)
df_lag_std

In [None]:
# read the prior file for testing
prior_lines = tp.read_prior_file(knowledge_file)
# extract knowledge from the prior lines
knowledge = tp.extract_knowledge(prior_lines)
# load the knowledge into the TetradPlus instance
tp.load_knowledge(knowledge)

pp.pprint(knowledge)

In [None]:
search = tp.run_gfci(df_lag_std)
edges = tp.extract_edges(search)
print(f"Number of edges (no jitter): {len(edges)}")
edges
edges_from_run_gfci = edges

In [None]:
# show the raw graph
dg = DgraphFlex()
# add the edges to the graph object
dg.add_edges(edges)
# show the graph
dg.show_graph()

In [None]:
# run the sem

# convert the edges from searchResult to lavaan format
lavaan_model = tp.edges_to_lavaan(edges)
lavaan_model

In [None]:
# run semopy with the lavaan model and the data
sem_results = tp.run_semopy(lavaan_model, df_lag_std)


In [None]:
# the main output we are interested in is the parameter estimates
sem_results['estimates']

In [None]:
# save the graph generated by semopy
# plot into png
png_path = 'boston_data.png'
g = semopy.semplot(sem_results['model'], png_path,  plot_covs = True)


In [None]:
# add the sem results to our graph object

tp.add_sem_results_to_graph(dg, sem_results['estimates'])
# save the graph to a png file
dg.save_graph(plot_format='png', plot_name='boston_graph_single_sem')

# show the graph
dg.show_graph()

In [None]:
# example of reading test data from package
from importlib.resources import files as pkg_resources_files
import glob
path=pkg_resources_files('tetrad_plus.data')
print(path)
data_filename = 'boston_data_raw.csv'
data_resource = path.joinpath(data_filename)
data_path = str(data_resource) # This gives a Path object, convert to string for jpype
df_raw = tp.read_csv(data_path)
pass

In [None]:
# run with data jittered
search = tp.run_gfci(df_lag_std,jitter=True)
edges = tp.extract_edges(search)
print(f"Number of edges with jitter added: {len(edges)}")
dg = DgraphFlex()
# add the edges to the graph object
dg.add_edges(edges, exclude=['o-o','---','<->'])
# show the graph
dg.show_graph()

In [None]:
# convert the edges from searchResult to lavaan format
lavaan_model = tp.edges_to_lavaan(edges)
# run the sem
sem_results = tp.run_semopy(lavaan_model, df_lag_std)
# add sem results to graph
tp.add_sem_results_to_graph(dg, sem_results['estimates'])
# show the graph
dg.show_graph()

In [None]:
# run model with run_model_search
result2 = tp.run_model_search(df_lag_std, 
                             model = 'gfci',
                             score={'sem_bic': {'penalty_discount': 1.0}},
                             test={"fisher_z": {"alpha": .01}},
                             knowledge=knowledge
                             )

print(f"Number of edges: {len(result2['edges'])}")

In [None]:
# use set operations to compare results from run_gfci and run_model_search
edges_gfci = set(edges_from_run_gfci)
edges_model_search = set(result2['edges'])

print(f"Test if edges_gfci and edges_model_search are equal: {edges_gfci == edges_model_search}")
# print number of edges in each
print(f"Number of edges from run_gfci: {len(edges_gfci)}")
print(f"Number of edges from run_model_search: {len(edges_model_search)}")
pass