In [None]:
import torch
from transformers import AutoModelForCausalLM
from transformers import AutoTokenizer
import sys
from datetime import datetime
sys.path.append("../")
sys.path.append("../../")
torch.manual_seed(42)
device = "cuda" if torch.cuda.is_available() else "cpu"
model_id = "gpt2"

tokenizer_with_prefix_space = AutoTokenizer.from_pretrained(model_id, use_fast=True, add_prefix_space=True, local_files_only = False)
model = AutoModelForCausalLM.from_pretrained(model_id,
                                            return_dict_in_generate=True,
                                            pad_token_id=tokenizer_with_prefix_space.eos_token_id).to(device)

In [None]:
from gpt2.gpt2_probabilistic_model_wrapper import GPT2_probabilistic_model_wrapper
from guiding_wfa_doctor_a_the import alphabet_A, alphabet_B
wrapper_with_prefix_space_A = GPT2_probabilistic_model_wrapper(100, alphabet_A, device, model, tokenizer_with_prefix_space)
wrapper_with_prefix_space_B = GPT2_probabilistic_model_wrapper(100, alphabet_B, device, model, tokenizer_with_prefix_space)


In [None]:
from guiding_wfa_doctor_a_the import get_doctor_wfa_A, get_doctor_wfa_B
from pythautomata.utilities.guiding_wfa_sequence_generator import GuidingWDFASequenceGenerator
guiding_wfa_A = get_doctor_wfa_A(wrapper_with_prefix_space_A.terminal_symbol)
guiding_generator = GuidingWDFASequenceGenerator(guiding_wfa_A, None)
guiding_generator.generate_words(15)

In [None]:
guiding_wfa_B = get_doctor_wfa_B(wrapper_with_prefix_space_B.terminal_symbol)
guiding_generator = GuidingWDFASequenceGenerator(guiding_wfa_B, None)
guiding_generator.generate_words(15)

# Using A

In [None]:
from pythautomata.model_exporters.dot_exporters.wfa_dot_exporting_strategy import WFADotExportingStrategy
from IPython.display import display

exporter = WFADotExportingStrategy()
graph = exporter.create_graph(guiding_wfa_A)

display(graph)

In [None]:
exporter.export(guiding_wfa_A,"./")

# Using The

In [None]:
from pythautomata.model_exporters.dot_exporters.wfa_dot_exporting_strategy import WFADotExportingStrategy
from IPython.display import display

exporter = WFADotExportingStrategy()
graph = exporter.create_graph(guiding_wfa_B)

display(graph)

In [None]:
exporter.export(guiding_wfa_B,"./")

In [None]:
from src.synchronic_model_guided_language_model import SynchronicModelGuidedLanguageModel

property_model_A = get_doctor_wfa_A(wrapper_with_prefix_space_A.terminal_symbol)
property_model_B = get_doctor_wfa_B(wrapper_with_prefix_space_B.terminal_symbol)
synchronic_model_with_prefix_space_A = SynchronicModelGuidedLanguageModel(wrapper_with_prefix_space_A, property_model_A, model_name="GUIDED_GPT2", max_seq_length=6,normalize_outputs=True, top_k = 2)
synchronic_model_with_prefix_space_B = SynchronicModelGuidedLanguageModel(wrapper_with_prefix_space_B, property_model_B, model_name="GUIDED_GPT2", max_seq_length=6,normalize_outputs=True, top_k = 2)



In [None]:
from pymodelextractor.teachers.pac_probabilistic_teacher import PACProbabilisticTeacher
from src.hypothesis_aware_sample_probabilistic_teacher import HypothesisAwareSampleProbabilisticTeacher
from pymodelextractor.learners.observation_tree_learners.bounded_pdfa_quantization_n_ary_tree_learner import BoundedPDFAQuantizationNAryTreeLearner
from pythautomata.utilities.probability_partitioner import TopKProbabilityPartitioner, QuantizationProbabilityPartitioner, RankingPartitioner
from pythautomata.model_comparators.wfa_partition_comparison_strategy import WFAPartitionComparator
from pythautomata.utilities.uniform_word_sequence_generator import UniformWordSequenceGenerator


partitioner = TopKProbabilityPartitioner(2)
comparator = WFAPartitionComparator(partitioner)
epsilon = 0.1
delta = epsilon
sequence_generator = guiding_generator
max_states = 30
max_query_length = 100


In [None]:
teacher_with_prefix_space_A = HypothesisAwareSampleProbabilisticTeacher(synchronic_model_with_prefix_space_A, 
                                                                        comparator, 30)

teacher_with_prefix_space_B = HypothesisAwareSampleProbabilisticTeacher(synchronic_model_with_prefix_space_B,
                                                                        comparator, 30)

In [None]:
learner = BoundedPDFAQuantizationNAryTreeLearner(partitioner, 
                                                 max_states, 
                                                 max_query_length, 
                                                 max_seconds_run = None, 
                                                 generate_partial_hipothesis = True, 
                                                 pre_cache_queries_for_building_hipothesis = True,  
                                                 check_probabilistic_hipothesis = False, 
                                                 omit_zero_transitions=True)

In [None]:
from pythautomata.model_exporters.dot_exporters.wfa_dot_exporting_strategy import WFADotExportingStrategy
import time


start_time = time.time()
learning_result_with_prefix_space_A = learner.learn(teacher_with_prefix_space_A, verbose = True)
print("--- %s seconds ---" % (time.time() - start_time))

exporter = WFADotExportingStrategy()
actual_date = datetime.now().strftime("%d_%m_%Y_%H_%M_%S")
exporter.export(learning_result_with_prefix_space_A.model,"./", "pdfa_doctor_with_prefix_"+actual_date)


In [None]:
from pythautomata.model_exporters.dot_exporters.wfa_dot_exporting_strategy import WFADotExportingStrategy

start_time = time.time()
learning_result_with_prefix_space_B = learner.learn(teacher_with_prefix_space_B, verbose = True)
print("--- %s seconds ---" % (time.time() - start_time))

exporter = WFADotExportingStrategy()
actual_date = datetime.now().strftime("%d_%m_%Y_%H_%M_%S")
exporter.export(learning_result_with_prefix_space_B.model,"./", "pdfa_doctor_with_prefix_"+actual_date)


In [None]:
from pythautomata.model_exporters.dot_exporters.wfa_dot_exporting_strategy import WFADotExportingStrategy
from IPython.display import display

graph1 = exporter.create_graph(learning_result_with_prefix_space_A.model)
display(graph1)

In [None]:
graph2 = exporter.create_graph(learning_result_with_prefix_space_B.model)
display(graph2)