Mount Google Drive.

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Set the path for this notebook, the input data, and the output files.  

In [None]:
notebook_path = "/content/drive/MyDrive/gravity/"
input_path = "/content/drive/MyDrive/gravity/"
output_path = "/content/drive/MyDrive/gravity/"

Copy the expressions and conjecturing.py file over.

In [None]:
!cp /content/drive/MyDrive/conjecturing/c/build/expressions {notebook_path}
!cp /content/drive/MyDrive/conjecturing/python/conjecturing.py {notebook_path}
!chmod +x {notebook_path}/expressions

Import libraries, specify settings, and prepare data.

In [None]:
import pandas as pd
import numpy as np
import sys
from sklearn.model_selection import train_test_split
import sys
sys.path.insert(0, notebook_path)
from conjecturing import *

# Preprocessing:
# 1. Ensure that column names are strings and have no special characters other
#    than "_".
# 2. No column name (other than the target) should have "TARGET" in
#    the name.
# 3. Define inv_file, prop_file, my_data, invariant_names,
#    categorical_names, target, num_train, my_skips, my_time,
#    complexity, use_operators.  Optionally, define debug and verbose.

inv_fname = "2025_07_10_inv.txt"
prop_fname = "2025_07_10_prop.txt"

my_time = 5
complexity = 10
num_train = 900
my_skips = 0.0 # percentage of examples with missing data before ignoring a conjecture
use_operators =  [ '-1', '+1', '*2', '/2', '^2', '-()', '1/',
                  'sqrt', 'ln', 'log10', 'exp', '10^', 'ceil',
                  'floor', 'abs', '+', '*', 'max', 'min', '-', '/', '^']
#use_operators =  [ '-1', '+1', '*2']

print("Reading data! \n")
sys.stdout.flush()

gravity_df = pd.read_csv(input_path + 'gravityTrainData.csv',
                         header=None)

print("Done reading data! \n")
gravity_df.info()

# convert column names to strings
gravity_df.columns = ["F", "m1", "m2", "r"]

invariant_names = ['F','m1','m2','r']
categorical_names = []

target = 'F'

my_data = gravity_df

You probably do not need to edit the code from here to the end of the file.

Rename the target column TARGET.  Convert categorical variables to dummies.  One dummy for each binary variable and one dummy for each level for variables with more than two levels.

In [None]:
if target in categorical_names:
    categorical_names[categorical_names.index(target)] = "TARGET"
else:
    invariant_names[invariant_names.index(target)] = "TARGET"

my_data = my_data.rename(columns={target: "TARGET"})
my_data = my_data[invariant_names + categorical_names]

(my_df, property_names, target_property_names) = create_dummies(my_data,
                                                           categorical_names,
                                                           nan_is_level=True)

Define class, invariants, properties, and target properties (if applicable).

In [None]:
Example = create_example_class(my_df,
                     invariant_names,
                     property_names,
                     categorical_names,
                     target_property_names)

Split into training and testing data.

In [None]:
if "TARGET" in categorical_names:
    X_train, X_test = train_test_split(
        my_df.index,
        stratify=my_data["TARGET"],  # stratify on target levels
        train_size=num_train,
        random_state=12345
    )
else:
    X_train, X_test = train_test_split(
        my_df.index,
        train_size=num_train,
        random_state=12345
    )

Create examples for conjecturing.

In [None]:
train_examples = [Example(i, my_df) for i in X_train]
test_examples = [Example(i, my_df) for i in X_test]

Get lists of invariant and property functions.

In [None]:
(invariants, properties, target_properties) = (
        get_invariants_properties(Example,
                                  invariant_names,
                                  property_names,
                                  categorical_names,
                                  target_property_names)
                                               )


Invariant conjecturing - upper and lower bounds.

In [None]:
inv_file = open(output_path + inv_fname, "w")
inv_conjectures = invariant_conjecturing(Example,
                                         train_examples,
                                         categorical_names,
                                         target_property_names,
                                         invariants,
                                         use_operators,
                                         complexity,
                                         my_time,
                                         my_skips,
                                         inv_file,
                                         notebook_path=notebook_path)
inv_file.close()

Property conjecturing - sufficient conditions for a categorical target values.  For a binary target, get sufficient conditions for the positive class and necessary conditions for the negative class.

In [None]:
if "TARGET" in categorical_names:
    prop_file = open(output_path + prop_fname, "w")
    (prop_conjs, conditions) = (
        property_conjecturing(Example,
                              properties,
                              inv_conjectures,
                              categorical_names,
                              target_property_names,
                              train_examples,
                              my_time,
                              my_skips,
                              prop_file,
                              notebook_path=notebook_path)
    )
    prop_file.close()

Apply property conjectures to train and test data if target is categorical.

In [None]:
if "TARGET" in categorical_names:
    (X_train_df, X_test_df, y_train_df, y_test_df) =  (
            apply_property_conjectures(my_data,
                                       my_df,
                                       X_train,
                                       X_test,
                                       property_names,
                                       invariant_names,
                                       categorical_names,
                                       target_property_names,
                                       conditions,
                                       train_examples,
                                       test_examples)
            )



If the target is categorical, calculate support, precision, recall, lift, and F1 of each conjecture on the test data.  The F1 score is only for the class for the sufficient condition it was derived for.

In [None]:
if "TARGET" in categorical_names:
    results_df = evaluate_property_conjectures(categorical_names,
                                  target_property_names,
                                  Example,
                                  conditions,
                                  test_examples,
                                  y_test_df)

    print(results_df)

If the target is an invariant/numerical, calculate the mean aboslute error of each conjecture on the test data.

In [None]:
if "TARGET" in invariant_names:
    results_df = evaluate_invariant_conjectures(Example,
                                              inv_conjectures,
                                              test_examples)
print(results_df)