In [54]:
from comparisonframe import ComparisonFrame

### 1. Creating validation set

#### 1.1 Initialize comparison class

In [55]:
comparer = ComparisonFrame(
    # optionally
    ## mocker default parameters
    mocker_params = {
        'file_path' : "./comparisonframe_storage",
         'persist' : True},

    ## scores to calculate
    compare_scores = ['word_count_diff','semantic_similarity'],
    aggr_scores = ['median']
)



#### 1.2 Recording queries and expected responses (validation set)

In [56]:
comparer.record_queries(
    queries = ["Black metal", 
               "Tribulation"],
    expected_texts = ["Black metal is an extreme subgenre of heavy metal music.",
    "Tribulation are a Swedish heavy metal band from Arvika that formed in 2005."],
    metadata = {'name' : 'metal_bands'})

### 2. Comparing newly generated data with expected results 

#### 2.1 Initialize new comparison class

In [57]:
comparer = ComparisonFrame(
    # optionally
    ## mocker default parameters
    mocker_params = {
        'file_path' : "./comparisonframe_storage",
         'persist' : True},

    ## scores to calculate
    compare_scores = ['word_count_diff','semantic_similarity'],
    aggr_scores = ['median']
)

### 2.2 Show validation set

In [58]:
untested_queries = comparer.get_all_queries(
    ## optional
    metadata_filters={'name' : 'metal_bands'})
print(untested_queries)

['Black metal', 'Tribulation']


In [59]:
comparer.get_all_records()

[{'expected_text': 'Black metal is an extreme subgenre of heavy metal music.',
  'record_id': '0cc157453395b440f36d1a1aee24aa76a03f5f9ab0a7a8bd7b663c92f2f16e87',
  'query': 'Black metal'},
 {'expected_text': 'Tribulation are a Swedish heavy metal band from Arvika that formed in 2005.',
  'record_id': 'eecd9c2a5b25ee6053891b894157fa30372ed694763385e1ada1dc9ad8e41625',
  'query': 'Tribulation'}]

In [60]:
comparer.get_all_records_df()

Unnamed: 0,expected_text,record_id,query
0,Black metal is an extreme subgenre of heavy me...,0cc157453395b440f36d1a1aee24aa76a03f5f9ab0a7a8...,Black metal
1,Tribulation are a Swedish heavy metal band fro...,eecd9c2a5b25ee6053891b894157fa30372ed694763385...,Tribulation


#### 2.3 Insert newly generated with records

In [61]:
valid_answer_query_1 = "Black metal is an extreme subgenre of heavy metal music."
very_similar_answer_query_1 = "Black metal is a subgenre of heavy metal music."
unexpected_answer_query_1 = "Black metals are beautiful and are often used in jewelry design."

In [62]:
comparer.record_runs(queries = ["Black metal"],
                     provided_texts = [valid_answer_query_1,
                                      very_similar_answer_query_1,
                                      unexpected_answer_query_1],
                    metadata={'desc' : 'definitions'})

In [63]:
comparer.get_all_runs()

[{'query': 'Black metal',
  'provided_text': 'Black metal is an extreme subgenre of heavy metal music.',
  'run_id': 'faf5aab28ee8d460cbb69c6f434bee622aff8cdfb8796282bdc547fff2c1abf8',
  'timestamp': '2024-09-26 01:36:13'},
 {'query': 'Black metal',
  'provided_text': 'Black metal is a subgenre of heavy metal music.',
  'run_id': '9fbd80050d382972c012ffcb4641f48d6220afb2210a20a11da5c7a48664f033',
  'timestamp': '2024-09-26 01:36:13'},
 {'query': 'Black metal',
  'provided_text': 'Black metals are beautiful and are often used in jewelry design.',
  'run_id': 'e4fc3f56c95d4266b6543a306c4305e0d8b960a1e0196d05cfc8ee4ea0bd7129',
  'timestamp': '2024-09-26 01:36:13'}]

In [64]:
df = comparer.get_all_runs_df()
df

Unnamed: 0,query,provided_text,run_id,timestamp
0,Black metal,Black metal is an extreme subgenre of heavy me...,faf5aab28ee8d460cbb69c6f434bee622aff8cdfb87962...,2024-09-26 01:36:13
1,Black metal,Black metal is a subgenre of heavy metal music.,9fbd80050d382972c012ffcb4641f48d6220afb2210a20...,2024-09-26 01:36:13
2,Black metal,Black metals are beautiful and are often used ...,e4fc3f56c95d4266b6543a306c4305e0d8b960a1e0196d...,2024-09-26 01:36:13


#### 2.4 Comparing runs with records

In [65]:
comparer.compare_runs_with_records()



In [66]:
comparer.get_all_run_scores()

[{'query': 'Black metal',
  'provided_text': 'Black metal is an extreme subgenre of heavy metal music.',
  'run_id': 'faf5aab28ee8d460cbb69c6f434bee622aff8cdfb8796282bdc547fff2c1abf8',
  'timestamp': '2024-09-26 01:36:13',
  'record_id': '0cc157453395b440f36d1a1aee24aa76a03f5f9ab0a7a8bd7b663c92f2f16e87',
  'word_count_diff': 0,
  'semantic_similarity': 0.9999999403953552,
  'comparison_id': 'cdb16a8d16a95e85d879c29aaf9762c9e2776843f2a01d6ef9154daacd9b732d'},
 {'query': 'Black metal',
  'provided_text': 'Black metal is a subgenre of heavy metal music.',
  'run_id': '9fbd80050d382972c012ffcb4641f48d6220afb2210a20a11da5c7a48664f033',
  'timestamp': '2024-09-26 01:36:13',
  'record_id': '0cc157453395b440f36d1a1aee24aa76a03f5f9ab0a7a8bd7b663c92f2f16e87',
  'word_count_diff': 1,
  'semantic_similarity': 0.9859851002693176,
  'comparison_id': '16472e44ac7d2d74e18ea583490c2f6b8661cc8b48cc9b7480a51dc8c6796c41'},
 {'query': 'Black metal',
  'provided_text': 'Black metals are beautiful and are of

In [67]:
comparer.get_all_run_scores_df()

Unnamed: 0,query,provided_text,run_id,timestamp,record_id,word_count_diff,semantic_similarity,comparison_id
0,Black metal,Black metal is an extreme subgenre of heavy me...,faf5aab28ee8d460cbb69c6f434bee622aff8cdfb87962...,2024-09-26 01:36:13,0cc157453395b440f36d1a1aee24aa76a03f5f9ab0a7a8...,0,1.0,cdb16a8d16a95e85d879c29aaf9762c9e2776843f2a01d...
1,Black metal,Black metal is a subgenre of heavy metal music.,9fbd80050d382972c012ffcb4641f48d6220afb2210a20...,2024-09-26 01:36:13,0cc157453395b440f36d1a1aee24aa76a03f5f9ab0a7a8...,1,0.985985,16472e44ac7d2d74e18ea583490c2f6b8661cc8b48cc9b...
2,Black metal,Black metals are beautiful and are often used ...,e4fc3f56c95d4266b6543a306c4305e0d8b960a1e0196d...,2024-09-26 01:36:13,0cc157453395b440f36d1a1aee24aa76a03f5f9ab0a7a8...,1,0.494053,966c1da5e641480e8ccd33a7d0f544d9ec6c4e2e799be1...


### 3 Calculating aggregate comparison scores

In [68]:
comparer.calculate_aggr_scores(group_by = ['desc'])



In [69]:
comparer.get_all_aggr_scores()

[{'timestamp': '2024-09-26 01:36:13',
  'comparison_id': ['cdb16a8d16a95e85d879c29aaf9762c9e2776843f2a01d6ef9154daacd9b732d',
   '16472e44ac7d2d74e18ea583490c2f6b8661cc8b48cc9b7480a51dc8c6796c41',
   '966c1da5e641480e8ccd33a7d0f544d9ec6c4e2e799be11529d2cf7a222deb9a'],
  'query': ['Black metal'],
  'grouped_by': ['query'],
  'group': {'query': 'Black metal'},
  'median_word_count_diff': 1.0,
  'median_semantic_similarity': 0.9859851002693176,
  'record_status_id': 'dc1126e128d42f74bb98bad9ce4101fe1a4ea5a46df57d430dea99fdd4b8c628'}]

In [70]:
comparer.get_all_aggr_scores_df(grouped_by = ['desc'])

Unnamed: 0,timestamp,comparison_id,query,grouped_by,group,median_word_count_diff,median_semantic_similarity,record_status_id
0,2024-09-26 01:36:13,[cdb16a8d16a95e85d879c29aaf9762c9e2776843f2a01...,[Black metal],[desc],{'desc': 'definitions'},1.0,0.985985,c9d97729c5b03641fbf8fd35d257f2f1024a812f097ffb...


### 4. Recording test statuses

In [71]:
comparer.calculate_test_statuses(test_query = "median_semantic_similarity > 0.9")


In [72]:
comparer.get_test_statuses()

[{'timestamp': '2024-09-26 01:36:13',
  'record_id': '0cc157453395b440f36d1a1aee24aa76a03f5f9ab0a7a8bd7b663c92f2f16e87',
  'record_status_id': 'dc1126e128d42f74bb98bad9ce4101fe1a4ea5a46df57d430dea99fdd4b8c628',
  'query': 'Black metal',
  'test': 'median_semantic_similarity > 0.9',
  'valid': True}]

In [73]:
comparer.get_test_statuses_df()

Unnamed: 0,timestamp,record_id,record_status_id,query,test,valid
0,2024-09-26 01:36:13,0cc157453395b440f36d1a1aee24aa76a03f5f9ab0a7a8...,dc1126e128d42f74bb98bad9ce4101fe1a4ea5a46df57d...,Black metal,median_semantic_similarity > 0.9,True


### 5. Reseting statuses, flushing records and comparison results

In [74]:
comparer.flush_records()

In [75]:
comparer.flush_runs()

In [76]:
comparer.flush_comparison_scores()

In [77]:
comparer.flush_aggregate_scores()

In [78]:
comparer.flush_test_statuses()