Code to demonstrate pytetrad_plus capabilities.

In [None]:
from pytetrad_plus import MyTetradSearch
from dgraph_flex import DgraphFlex
import semopy
import pprint as pp

In [None]:
# create  an instance o MyTetradSearch
ts = MyTetradSearch()

# set the data and knowledge files
data_file = 'pytetrad_plus/boston_data_raw.csv'
knowledge_file = 'pytetrad_plus/boston_prior.txt'


In [None]:
# read in the data
df = ts.read_csv(data_file)
df

In [None]:
# add the lags, with a suffix of '_lag'
df_lag = ts.add_lag_columns(df, lag_stub='_lag')
df_lag

In [None]:
# standardize the data
df_lag_std = ts.standardize_df_cols(df_lag)
df_lag_std

In [None]:
# read the prior file for testing
prior_lines = ts.read_prior_file('pytetrad_plus/boston_prior.txt')
# extract knowledge from the prior lines
knowledge = ts.extract_knowledge(prior_lines)

knowledge

In [None]:
# Run the search
searchResult = ts.run_model_search( df_lag_std, 
                                    model='gfci', 
                                    knowledge=knowledge, 
                                    score={'sem_bic': {'penalty_discount': 4.0}},
                                    test={'fisher_z': {'alpha': .05}})

searchResult

In [None]:
# show the raw graph
obj = DgraphFlex()
# create a list of the edges
edges = list(searchResult['setEdges'])
# add the edges to the graph object
obj.add_edges(edges)
# show the graph
obj.show_graph()

In [None]:
# run the sem

# convert the edges from searchResult to lavaan format
lavaan_model = ts.edges_to_lavaan(edges)
lavaan_model

In [None]:
# run semopy with the lavaan model and the data
sem_results = ts.run_semopy(lavaan_model, df_lag_std)
sem_results

In [None]:
# the main output we are interested in is the parameter estimates
sem_results['estimates']

In [None]:
# save the graph generated by semopy
# plot into png
png_path = 'pytetrad_plus/boston_data.png'
g = semopy.semplot(sem_results['model'], png_path,  plot_covs = True)


In [None]:
# add the sem results to our graph object

ts.add_sem_results_to_graph(obj, sem_results['estimates'])
# save the graph to a png file
obj.save_graph(plot_format='png', plot_name='boston_graph_single_sem')

# show the graph
obj.show_graph()

In the next section, we perform the stability analysis.

We do 100 runs with a subsample 90% without substitution.

We keep edges that are present at least 75%.

stable_edges contains the edges that were found 75% of the time.

In [None]:
# run the stability search
stable_edges, sorted_edges = ts.run_stability_search(
                                        df, 
                                        model='gfci',
                                        knowledge=knowledge,
                                        score={'sem_bic': {'penalty_discount': 1.0}},
                                        test={'fisher_z': {'alpha': .05}},
                                        runs=100,
                                        min_fraction=0.75,
                                        subsample_fraction=0.9)

stable_edges

In [None]:
# lets show the sorted edges
sorted_edges

Let's create a graph of these edges

In [None]:
obj = DgraphFlex()
obj.add_edges(stable_edges)
# show the graph
obj.show_graph()

Now that we have the edges, lets run the sem

In [None]:
lavaan_model = ts.edges_to_lavaan(stable_edges)

# run semopy, using the lagged and standardized data
sem_results = ts.run_semopy(lavaan_model, df_lag_std)
# add the semopy results to the graph object
ts.add_sem_results_to_graph(obj, sem_results['estimates'])
# save the graph to a png file
obj.save_graph(plot_format='png', plot_name='boston_graph_stable_sem')
# show the graph
obj.show_graph()