In [2]:
import json
import pandas as pd

In [3]:
### Step 1 get rid of the cluster setup category as we believe all categories are required for cluster setup. its content will be disbursed to
### the relevant category

# get the cluster_setup file that will have its content moved to the correct category(files) as we dont need 
# this category anymore
with open('cluster_setup.json') as f:
    superHeroSquad = json.load(f)

# we need to create a dictionary that maps the correct filename to the index of the entries in cluster_setup.json
destination = dict()
# the csv file has the correct filename to move the cluster_setup item  to in the correct order. reset index creates a new column
# that is the index that will be used in a future step
cluster_create_map = pd.read_csv("cluster_create_map.csv").reset_index()
# lets get the unique categories we have in the csv file
for category in cluster_create_map["destination"].drop_duplicates():
    # we will filter for the subset rows that belong to that category
    context = cluster_create_map[cluster_create_map["destination"] == category]["index"]
    # the index we be added to the dictionary with the key being the category name
    destination[category] = context.to_list()

def append_entries(filename,entries):
    """ function that takes a list of new entries and appends them to 
    a json file that contains a list"""
    print(filename)
    print(len(entries))
    filename = filename + ".json"
    # get the current content of the json file
    with open(filename) as f:
        content = json.load(f)
    # append the new content to it 
    content = content + entries
    # write the appended list to the file
    with open(filename, 'w', encoding='utf-8') as f:
        json.dump(content, f, ensure_ascii=False, indent=2)

# for each category we had in the list of cluster_setup items
for category, value in destination.items():
    entries = []
    # for each index value that corresponds to the entry we want to get added to that category
    for entry in destination[category]:
        # add the cluster setup entry that corresponds to that index to a list. this list of cluster setup 
        # entries can then be added to the list we will add to the correct file
        entries.append(superHeroSquad[entry])
    # run the function that takes the list and adds it to the correct file
    append_entries(category,entries)

operations
6
container
8
identity_access_management
3
resource_management
3
network
1
disaster_recovery
1


In [4]:
### Step 2: take the old items and convert them to the format we need to make it work in FTA checklist
import uuid
import os
# get the mapping table that converts filename to the correct category name in FTA's checklist
filename_map = pd.read_csv('filename_map.csv')
filename_map.to_dict("list")

# convert the mapping table to a mapping dictionary that can more easily be used
filename_map_dict = {}
for filename, actualname in zip(filename_map['filename'],filename_map['actualname']):
    filename_map_dict[filename] = actualname

def get_subcategory(tags):
    """function to get the first tag that isnt all"""
    relevant_tags = [x for x in tags if x != "all"]
    if len(relevant_tags) > 0:
        return relevant_tags[0]
    else:
        return "other"

def get_link(entry):
    """ function to get documentation link """
    if 'documentation' in entry.keys():
        try:
            context = entry['documentation'][0]
        except IndexError:
            context = {"title":"none available","url":""}        
    elif 'tools' in entry.keys():
        try:
            context = entry['tools'][0]
        except IndexError:
            context = {"title":"none available","url":""}
    else:
        context = {"title":"none available","url":""}
    return context["url"]

def transform(item,filename):
    transformed_item = dict()
    transformed_item['text'] =  item['title']
    transformed_item["description"] = item["description"]
    transformed_item['subcategory'] =  get_subcategory(item['tags'])
    try:
        transformed_item['category'] = filename_map_dict[filename]
    except KeyError:
        print(f'cant find {filename} in the dictionary')
        pass
    transformed_item['guid'] = str(uuid.uuid4())
    transformed_item['severity'] = item['priority']
    transformed_item['link'] = get_link(item)
    return transformed_item

dir_path = r'./'
# list to store files
filenames = []
# Iterate directory
for file in os.listdir(dir_path):
    # check only json files
    if file.endswith('.json'):
        filenames.append(file)
# print(filenames)

## get the items in all the different files in the dir_path. start by initializing the transformed
# items list
items = []
# get all the categories available in the mapping table so files in the dir not in the 
# considered categories (eg cluster_setup) are not considered
categories = filename_map_dict.keys()

# iterate over the files
for file in filenames:
    # remove .json from filename
    file2 = file.split(".")[0]
    # check to make sure that the filename is in categories
    if file2 in categories:
        # get the content of the file
        with open(file) as f:
            content = json.load(f)
        # transform each item in the file to the FT data format
        for item in content:
            transformed_item = transform(item,file2)
            items.append(transformed_item)
        print(f"finished {file2}")    
    else:
        print(f'cant find {file2} in the dictionary')
        pass


{'storage': 'Storage',
 'development': 'Application Deployment',
 'container': 'Governance and Security',
 'disaster_recovery': 'BC and DR',
 'network': 'Network Topology and Connectivity',
 'resource_management': 'Resource management',
 'windows': 'Windows',
 'operations': 'Operations',
 'identity_access_management': 'Identity and Access Management'}

In [44]:
# finally we pull the ft data and append it to the transformed data then save it in the ft file
with open("./ft_data.json") as f:
    content = json.load(f)
#     combined_list = content["items"] + items
#     content["items"] = combined_list

# with open("appended_ft_data.json", 'w', encoding='utf-8') as f:
#     json.dump(content, f, ensure_ascii=False, indent=2)

In [50]:
len(content["items"])

79

In [49]:
len(items)

103

In [39]:
data = pd.DataFrame(items)

In [40]:
len(data.drop_duplicates("link"))

90

In [51]:
combined_list = content["items"] + items

In [52]:
len(combined_list)

182

In [53]:
content["items"] = combined_list

In [54]:
len(content["items"] )

182

In [58]:
data = pd.DataFrame(content["items"] )
data.to_csv("combined.csv",index=False)