In [6]:
import json
from analysis_funcs import *

In [2]:
jekyll_data_file = './data/jekyll/jekyll_parsed_steps.json'

In [3]:
with open(jekyll_data_file, 'r') as f:
    jekyll_data = json.load(f)

In [4]:
similar_tests = []

for data in jekyll_data:
    if data["feature_file"] == "cache.feature":
        similar_tests.append(data)

In [5]:
similar_tests

[{'feature_file': 'cache.feature',
  'test_num': 1,
  'test_case': 'Default Cache directory',
  'steps': [{'step_num': 1,
    'step_name': 'I have an "index.md" page that contains "{{ site.title }}"',
    'step_definition': 'do\nFile.write(file, <<-HEREDOC)\n---\n#{key || "layout"}: #{value || "none"}\n---\n\n#{text}\nHEREDOC\nend',
    'step_definition_file': 'features/step_definitions.rb'},
   {'step_num': 2,
    'step_name': 'I have a configuration file with "title" set to "Hello World"',
    'step_definition': 'do\nconfig = if source_dir.join("_config.yml").exist?\n  SafeYAML.load_file(source_dir.join("_config.yml"))\nelse\n  {}\nend\nconfig.[]=(key, SafeYAML.load(value))\nif key == "timezone"\n  Jekyll.set_timezone(value)\nend\nFile.write("_config.yml", YAML.dump(config))\nend',
    'step_definition_file': 'features/step_definitions.rb'},
   {'step_num': 3,
    'step_name': 'I run jekyll build',
    'step_definition': 'do\nrun_jekyll(args)\nif args.include?("--verbose") || ENV.[](

In [7]:
test_strings = stringify_test_cases(similar_tests, "step_name")

In [8]:
test_strings

['1: I have an "index.md" page that contains "{{ site.title }}"\n2: I have a configuration file with "title" set to "Hello World"\n3: I run jekyll build\n4: I should get a zero exit status\n5: the .jekyll-cache directory should exist\n6: the .jekyll-cache/Jekyll/Cache/Jekyll--Cache directory should exist\n7: the _site directory should exist\n8: I should see "<p>Hello World</p>" in "_site/index.html"\n',
 '1: I have an "index.md" page that contains "{{ site.title }}"\n2: I have a configuration file with\n3: I run jekyll build\n4: I should get a zero exit status\n5: the .foo-cache directory should exist\n6: the .foo-cache/Jekyll/Cache/Jekyll--Cache directory should exist\n7: the .jekyll-cache directory should not exist\n8: the _site directory should exist\n9: I should see "<p>Hello World</p>" in "_site/index.html"\n',
 '1: I have an "index.md" page that contains "{{ site.title }}"\n2: I have a configuration file with "title" set to "Hello World"\n3: I run jekyll build --safe\n4: I should

In [9]:
ncd_matrix = calculate_pairwise_ncd(test_strings)

In [10]:
ncd_matrix

array([[0.        , 0.0673913 , 0.08277405, 0.08810573],
       [0.0673913 , 0.        , 0.11358575, 0.10964912],
       [0.08277405, 0.11358575, 0.        , 0.07223476],
       [0.08810573, 0.10964912, 0.07223476, 0.        ]])

In [11]:
cosine_matrix = calculate_cosine_similarity(test_strings)

In [12]:
cosine_matrix

array([[1.        , 0.91591687, 0.90542215, 0.90707567],
       [0.91591687, 1.        , 0.81838103, 0.82737403],
       [0.90542215, 0.81838103, 1.        , 0.93123048],
       [0.90707567, 0.82737403, 0.93123048, 1.        ]])

In [13]:
euclidean_matrix = calculate_euclidean_distance(test_strings)

In [14]:
euclidean_matrix

array([[0.        , 0.41008079, 0.43492035, 0.43110169],
       [0.41008079, 0.        , 0.60269224, 0.58758143],
       [0.43492035, 0.60269224, 0.        , 0.37086256],
       [0.43110169, 0.58758143, 0.37086256, 0.        ]])

In [15]:
manhattan_matrix = calculate_manhattan_distance(test_strings)

In [16]:
manhattan_matrix

array([[0.        , 1.43233841, 1.69855468, 1.6754928 ],
       [1.43233841, 0.        , 2.67442886, 2.67735303],
       [1.69855468, 2.67442886, 0.        , 0.90542937],
       [1.6754928 , 2.67735303, 0.90542937, 0.        ]])