Stability analysis for alcohol

In [1]:
# set the data and knowledge files
data_file = 'canue_alcohol_global.csv'
knowledge_file = 'prior_run8.txt'
group = 'alcohol'  # used for plot file name

In [4]:
from pytetrad_plus import MyTetradSearch
from dgraph_flex import DgraphFlex
import semopy
import pprint as pp

# create the objects we will need

ts = MyTetradSearch()

# get the Tetrad version
print("Tetrad version: ", ts.getTetradVersion())

['dummy1', 'dummy2']
Tetrad version:  7.6.6-0


In [None]:
# read the prior knowledge file
prior_lines = ts.read_prior_file(knowledge_file)
# extract knowledge from the prior lines
knowledge = ts.extract_knowledge(prior_lines)

pp.pprint(knowledge)

In [None]:
# read in the data
df = ts.read_csv(data_file)

df

In [None]:
# standardize the data
df_std = ts.standardize_df_cols(df)
df_std

In [None]:
# Run the search
searchResult = ts.run_model_search( df_std, 
                                    model='gfci', 
                                    knowledge=knowledge, 
                                    score={'sem_bic': {'penalty_discount': 1.0}},
                                    test={'fisher_z': {'alpha': .05}})


pp.pprint(searchResult['setEdges'])
print(f"num edges: {len(searchResult['setEdges'])}")



In [None]:
# Run the search with different parameters
# added max_degree and complete_rule_set_used
searchResult2 = ts.run_model_search( df_std, 
                                    model='gfci', 
                                    knowledge=knowledge, 
                                    score={'sem_bic': {'penalty_discount': 1.0}},
                                    test={'fisher_z': {'alpha': .05}},
                                    max_degree = 1000,
                                    complete_rule_set_used = True,
                                    )



pp.pprint(searchResult2['setEdges'])
print(f"num edges: {len(searchResult2['setEdges'])}")


In [None]:
# display the graph
dgraph = DgraphFlex()
dgraph.add_edges(searchResult['setEdges'])
dgraph.show_graph()

In [None]:
# run the SEM

# convert the edges from searchResult to lavaan format
lavaan_model = ts.edges_to_lavaan(searchResult['setEdges'])

# run the SEM
sem_results = ts.run_semopy(lavaan_model, df_std)

In [None]:
# add the sem results to our graph object

ts.add_sem_results_to_graph(dgraph, sem_results['estimates'])
# save the graph to a png file
dgraph.save_graph(plot_format='png', plot_name=f'{group}_single_sem')

# show the graph
dgraph.show_graph()

In [None]:
# run the stability search
stable_edges, sorted_edges, sorted_edges_raw, run_results = ts.run_stability_search(
                                        df_std, 
                                        model='gfci',
                                        knowledge=knowledge,
                                        score={'sem_bic': {'penalty_discount': 1.0}},
                                        test={'fisher_z': {'alpha': .05}},
                                        runs=100,
                                        min_fraction=0.75,
                                        subsample_fraction=0.9,
                                        lag_flag=False,
                                        save_file='stability_search_results.json',)

stable_edges

In [None]:
# lets create the new graph with the stable edges
dgraph = DgraphFlex()
dgraph.add_edges(stable_edges)

lavaan_model = ts.edges_to_lavaan(stable_edges)

# run semopy, using the lagged and standardized data
sem_results = ts.run_semopy(lavaan_model, df_std)
# add the semopy results to the graph object
ts.add_sem_results_to_graph(dgraph, sem_results['estimates'])
# save the graph to a png file
dgraph.save_graph(plot_format='png', plot_name=f'{group}_stable_sem')
# show the graph
dgraph.show_graph()