In [None]:
import argparse
import json
import os
import logging
import yaml
from kafka import KafkaProducer
from schema import Schema, SchemaError
log = logging.getLogger()
logging.basicConfig(level=os.environ.get("LOGLEVEL", "INFO"))

In [None]:
def add_value(key):
    match key:
        case 'technology':
            print(key)
            
        case 'hostedAt':
            print(key)

In [None]:
import re

def parse_yaml(yaml_file: str) -> dict:
    with open(yaml_file, mode='r', encoding='utf-8') as file:
        data = yaml.safe_load(file)
    return data

def find_main_language(full_output = False):
  print('language')
  matches = []
  for root, directory, filenames in os.walk(os.getcwd()):
      for filename in filenames:
        if re.search(".(\b.py\b)", filename):
          print(filename)
          print(os.path.getsize(root + '/' + filename))
  print(matches)

find_main_language()

In [None]:
from schema import Schema, SchemaError, Optional, Hook, Or

schema_val = {
    "name": str,
    "description": str,
    "status": str,

    "consumers": {
        "name": str,
        "description": str,
        "type" : str
    },
    "containers": {
        "name": str,
        "sysnonyms": str,
        "description": str,
        Optional("technology", default= lambda : add_value('technology')): str,
        "parentSystem": str,
        "ciDataOwner": str,
        "productOwner": str,
        "applicationType": Or("Business", "Customer Facing", "External Service", "Infrastructure", "Interface", "Office", "Tool", "Unknown"),
        Optional("hostedAt", default = lambda : add_value('hostedAt')): Or("Amazon Web Services (AWS Cloud)", "AT&T", "Azure CF1", "Azure CF2", "Azure Cloud", "DXC", "Equinix", "Google Cloud Platform", "Hybric", "Inlumi", "Local server", "Multi-Cloud", "Not Applicable", "Other", "Salesforce", "ServiceNow", "Solvinity", "Unit4", "Unknown", "User device", "Azure"),
        "deploymentModel": Or("BPO", "CaaS", "IaaS", "On-Premise", "PaaS", "SaaS"),
        "personalData": bool,
        "confidentiality": str,
        "mcv": Or("Highly business critical", "Business critical", "Not business critical", "Not applicable"),
        "maxSeverityLevel": Or(1,2,3,4, "Not applicable"),
        Optional("sox", default= lambda : add_value('sox')): bool,
        Optional("icfr", default= lambda : add_value('icfr')): bool,
        "assignementGroup": str,
        "operationalStatus": Or("Pipelined", "Operational", "Non-Operational", "Submitted for decommissioning", "Decommissioned", "In decommissioning process"),
        "environments": Or("nl", "be"),
        "relationships": {
            "type": str,
            "container": {
                "name": str,
            },
        },
        "components": {
            "name": str,
            "description": str,
            "exposedAPIs": {
                "name": str,
                "description": str,
                "type": str,
                "status": str,
            },
            "consumedAPIs": {
                "name": str,
                "description": str,
                "status": str
            }
        },
    }
}

In [None]:
def validate_yaml(yaml_data):
    #schema = eval(open('./schema.yml', 'r').read())
    validator = Schema(schema_val)
    try:
        validator.validate(yaml_data)
        print('YML valid')
    except SchemaError as se:
        print(se)

In [None]:
def load_doc():
    with open('./test.yml', 'r', encoding='utf8') as stream:
        try:
            return yaml.safe_load(stream)
        except yaml.YAMLError as e:
            print(e)

In [None]:
import avro.schema
from avro.datafile import DataFileReader, DataFileWriter
from avro.io import DatumReader, DatumWriter
avro_schema = avro.schema.parse(open("avro_schema.avsc", "rb").read())

writer = DataFileWriter(open("users.avro", "wb"), DatumWriter(), avro_schema)
writer.append(load_doc())
writer.close()

reader = DataFileReader(open("users.avro", "rb"), DatumReader())

for item in reader:
    print(item)

os.remove("users.avro")


In [None]:
doc = load_doc()
validate_yaml(doc)

In [None]:
schema = {
  "type" : "record",
  "namespace" : "com.test.avro",
  "name" : "SystemModel",
  "fields" : [ {
    "name" : "name",
    "type" : "string"
  }, {
    "name" : "description",
    "type" : "string"
  }, {
    "name" : "status",
    "type" : "string"
  }, {
    "name" : "consumers",
    "type" : {
      "type" : "record",
      "name" : "consumers",
      "fields" : [ {
        "name" : "name",
        "type" : "string"
      }, {
        "name" : "description",
        "type" : "string"
      }, {
        "name" : "type",
        "type" : "string"
      } ]
    }
  }, {
    "name" : "containers",
    "type" : {
      "type" : "record",
      "name" : "containers",
      "fields" : [ {
        "name" : "name",
        "type" : "string"
      }, {
        "name" : "sysnonyms",
        "type" : "string"
      }, {
        "name" : "description",
        "type" : "string"
      }, {
        "name" : "technology",
        "type" : "string"
      }, {
        "name" : "parentSystem",
        "type" : "string"
      }, {
        "name" : "ciDataOwner",
        "type" : "string"
      }, {
        "name" : "productOwner",
        "type" : "string"
      }, {
        "name" : "applicationType",
        "type" : "string"
      }, {
        "name" : "hostedAt",
        "type" : "string"
      }, {
        "name" : "deploymentModel",
        "type" : "string"
      }, {
        "name" : "personalData",
        "type" : "boolean"
      }, {
        "name" : "confidentiality",
        "type" : "string"
      }, {
        "name" : "mcv",
        "type" : "string"
      }, {
        "name" : "maxSeverityLevel",
        "type" : "long"
      }, {
        "name" : "sox",
        "type" : "boolean"
      }, {
        "name" : "icfr",
        "type" : "boolean"
      }, {
        "name" : "assignementGroup",
        "type" : "string"
      }, {
        "name" : "operationalStatus",
        "type" : "string"
      }, {
        "name" : "environments",
        "type" : "string"
      }, {
        "name" : "relationships",
        "type" : {
          "type" : "record",
          "name" : "relationships",
          "fields" : [ {
            "name" : "type",
            "type" : "string"
          }, {
            "name" : "container",
            "type" : {
              "type" : "record",
              "name" : "container",
              "fields" : [ {
                "name" : "name",
                "type" : "string"
              } ]
            }
          } ]
        }
      }, {
        "name" : "components",
        "type" : {
          "type" : "record",
          "name" : "components",
          "fields" : [ {
            "name" : "name",
            "type" : "string"
          }, {
            "name" : "description",
            "type" : "string"
          }, {
            "name" : "exposedAPIs",
            "type" : {
              "type" : "record",
              "name" : "exposedAPIs",
              "fields" : [ { "name" : "name", "type" : "string" }, 
              { "name" : "description", "type" : "string" }, 
              { "name" : "type", "type" : "string" }, 
              { "name" : "status", "type" : "string" } ]
            }
          }, {
            "name" : "consumedAPIs",
            "type" : {
              "type" : "record",
              "name" : "consumedAPIs",
              "fields" : [ {
                "name" : "name",
                "type" : "string"
              }, {
                "name" : "description",
                "type" : "string"
              }, {
                "name" : "status",
                "type" : "string"
              } ]
            }
          } ]
        }
      } ]
    }
  } 
  ]
}

In [3]:
schema_str = """
{
  "type" : "record",
  "name" : "SystemModel",
  "namespace" : "org.example.models.SystemModel",
  "fields" : [ {
    "name" : "name",
    "type" : "string"
  }, {
    "name" : "description",
    "type" : "string"
  }, {
    "name" : "status",
    "type" : "string"
  }, {
    "name" : "consumers",
    "type" : {
      "type" : "record",
      "name" : "consumers",
      "fields" : [ {
        "name" : "name",
        "type" : "string"
      }, {
        "name" : "description",
        "type" : "string"
      }, {
        "name" : "type",
        "type" : "string"
      } ]
    }
  }, {
    "name" : "containers",
    "type" : {
      "type" : "record",
      "name" : "containers",
      "fields" : [ {
        "name" : "name",
        "type" : "string"
      }, {
        "name" : "sysnonyms",
        "type" : "string"
      }, {
        "name" : "description",
        "type" : "string"
      }, {
        "name" : "technology",
        "type" : "string"
      }, {
        "name" : "parentSystem",
        "type" : "string"
      }, {
        "name" : "ciDataOwner",
        "type" : "string"
      }, {
        "name" : "productOwner",
        "type" : "string"
      }, {
        "name" : "applicationType",
        "type" : "string"
      }, {
        "name" : "hostedAt",
        "type" : "string"
      }, {
        "name" : "deploymentModel",
        "type" : "string"
      }, {
        "name" : "personalData",
        "type" : "boolean"
      }, {
        "name" : "confidentiality",
        "type" : "string"
      }, {
        "name" : "mcv",
        "type" : "string"
      }, {
        "name" : "maxSeverityLevel",
        "type" : "long"
      }, {
        "name" : "sox",
        "type" : "boolean"
      }, {
        "name" : "icfr",
        "type" : "boolean"
      }, {
        "name" : "assignementGroup",
        "type" : "string"
      }, {
        "name" : "operationalStatus",
        "type" : "string"
      }, {
        "name" : "environments",
        "type" : "string"
      }, {
        "name" : "relationships",
        "type" : {
          "type" : "record",
          "name" : "relationships",
          "fields" : [ {
            "name" : "type",
            "type" : "string"
          }, {
            "name" : "container",
            "type" : {
              "type" : "record",
              "name" : "container",
              "fields" : [ {
                "name" : "name",
                "type" : "string"
              } ]
            }
          } ]
        }
      }, {
        "name" : "components",
        "type" : {
          "type" : "record",
          "name" : "components",
          "fields" : [ {
            "name" : "name",
            "type" : "string"
          }, {
            "name" : "description",
            "type" : "string"
          }, {
            "name" : "exposedAPIs",
            "type" : {
              "type" : "array",
              "name" : "exposedAPIs",
              "fields" : [ {
                "name" : "name",
                "type" : "string"
              }, {
                "name" : "description",
                "type" : "string"
              }, {
                "name" : "type",
                "type" : "string"
              }, {
                "name" : "status",
                "type" : "string"
              } ]
            }
          }, {
            "name" : "consumedAPIs",
            "type" : {
              "type" : "array",
              "name" : "consumedAPIs",
              "fields" : [ {
                "name" : "name",
                "type" : "string"
              }, {
                "name" : "description",
                "type" : "string"
              }, {
                "name" : "status",
                "type" : "string"
              } ]
            }
          } ]
        }
      } ]
    }
  } ]
}
"""

In [None]:
schema_key_str = """{
    "type": "record",
    "name": "TestObject",
    "namespace": "System-key",
    "fields": [{
        "name": "key",
        "type": "string"
    }]
}"""

In [None]:
data = load_doc()

In [None]:
from confluent_kafka import Producer
from confluent_kafka.serialization import StringSerializer, SerializationContext, MessageField
from confluent_kafka.schema_registry import SchemaRegistryClient
from confluent_kafka.schema_registry.avro import AvroSerializer

In [None]:
with open('avro_schema.avsc') as f:
    schema_str = f.read()

print(schema_str)

In [None]:
topic = "topic5"

with open("avro_schema.avsc") as f:
    schema_str = f.read()

schema_registry_client = SchemaRegistryClient({'url': 'http://10.152.183.242:8081'})

avro_serializer = AvroSerializer(schema_registry_client, schema_str)

string_serializer = StringSerializer('utf_8')

producer = Producer({'bootstrap.servers': '10.152.183.181:9094'})

producer.produce(topic=topic, key=string_serializer('testkey', None), value=avro_serializer(data, SerializationContext(topic, MessageField.VALUE)))

In [None]:
from confluent_kafka.schema_registry import SchemaRegistryClient, Schema

avro_schema = Schema(schema_str, 'AVRO')

client = SchemaRegistryClient("http://10.152.183.242:8081")

schema_id = client.register_schema('test', avro_schema)

In [None]:
from confluent_kafka.avro import AvroProducer

producer = AvroProducer({'bootstrap.servers': '10.152.183.181:9094', 'schema.registry.url': 'http://10.152.183.242:8081'})

producer.produce(topic="topic4", value=data, value_schema=schema_str, key_schema=schema_key_str, key="testkey")

In [None]:
producer = KafkaProducer(
                             value_serializer=lambda v: json.dumps(v).encode('utf-8'),
                             bootstrap_servers="10.152.183.181:9094")
producer.send('topic2', value=data)

In [None]:
from kafka_schema_registry import prepare_producer

producer = prepare_producer(bootstrap_servers=["10.152.183.181:9094"], avro_schema_registry="http://10.152.183.242:8081", topic_name="topic1", value_schema=schema, num_partitions=1, replication_factor=1)

producer.send("topic1",data)

In [70]:
import re
import yaml
from collections import defaultdict

def parse_yaml(yaml_file: str) -> dict:
    with open(yaml_file, mode='r', encoding='utf-8') as file:
        data = yaml.safe_load(file)
    return data

def find_main_language(full_output = False):
  languages = parse_yaml("languages.yml")
  print(languages.items())
  matches = defaultdict(int)
  for root, directory, filenames in os.walk(os.getcwd()):
      for filename in filenames:
        for key, value in languages.items():
            for type in value:
                if re.search(f".({type}$)", filename):
                    size = os.path.getsize(root + '/' + filename)
                    matches[key] += size
  if(full_output):
    return matches
  else:
    return max(matches, key=matches.get)
str(find_main_language())

dict_items([('Python', ['tewst', 'py']), ('Java', ['java'])])


'Java'

In [125]:
from contextlib import suppress

YAML_DATA = {'name': 'poc-git-to-cmdb',
 'description': 'POC to send information about the app to Kafka',
 'containers': [{'name': 'poc-git-to-cmdb',
   'synonyms': 'poc-git-to-kafka-cmdb-sync',
   'description': 'POC',
   'technology': None,
   'team': None,
   'applicationType': 'Tool',
   'hostedAt': 'Azure Cloud',
   'deploymentModel': 'On-Premise',
   'containsPersonalData': False,
   'confidentiality': 'Internal use',
   'mcv': 'Not business critical',
   'maxSeverityLevel': 4,
   'containsFinancialData': False,
   'assignementGroup': 'te',
   'operationalStatus': 'Pipelined',
   'environments': 'nl',
   'components': {'name': 'Component name',
    'description': 'what the system does',
    'exposedAPIs': [{'name': 'Unique API name',
      'description': 'What it can be used for',
      'type': 'HTTP/JSON',
      'status': 'TO_BE_IMPLEMENTED'},
     {'name': 'Unique API name2',
      'description': 'What it can be used for2',
      'type': 'HTTP/JSON2',
      'status': 'TO_BE_IMPLEMENTED2'}],
    'consumedAPIs': [{'name': 'test100',
      'description': 'What is it used for',
      'status': 'TO_BE_IMPLEMENTED',
      'read': True,
      'write': True,
      'execute': False},
     {'name': 'test2000',
      'description': 'What is it used for2',
      'status': 'TO_BE_IMPLEMENTED2',
      'read': True,
      'write': False,
      'execute': True}]}}]}

def delete_keys_from_dict(d, to_delete):
    if isinstance(to_delete, str):
        to_delete = [to_delete]
    if isinstance(d, dict):
        for single_to_delete in set(to_delete):
            if single_to_delete in d:
                del d[single_to_delete]
        for k, v in d.items():
            delete_keys_from_dict(v, to_delete)
    elif isinstance(d, list):
        for i in d:
            delete_keys_from_dict(i, to_delete)

def check_values(d):
    stack = list(d.items()) 
    visited = set()
    while stack: 
        k, v = stack.pop()
        if isinstance(v, dict):
            print(k) 
            if k not in visited: 
                stack.extend(v.items()) 
        else: 
            if v == None or v == '':
                print("%s: %s" % (k, v)) 
                delete_keys_from_dict(d, k)
        visited.add(k)

def filter_none(d): 
    global YAML_DATA
    stack = list(d.items())
    print(stack[:2])
    check_values(stack[:2])
    for item in YAML_DATA['containers']:
        print(item)
    


filter_none(YAML_DATA)
print(YAML_DATA)

[('name', 'poc-git-to-cmdb'), ('description', 'POC to send information about the app to Kafka')]


AttributeError: 'list' object has no attribute 'items'

In [129]:
YAML_DATA = {'name': 'poc-git-to-cmdb',
 'description': 'POC to send information about the app to Kafka',
 'containers': [{'name': 'poc-git-to-cmdb',
   'synonyms': 'poc-git-to-kafka-cmdb-sync',
   'description': 'POC',
   'technology': None,
   'team': None,
   'applicationType': 'Tool',
   'hostedAt': 'Azure Cloud',
   'deploymentModel': 'On-Premise',
   'containsPersonalData': False,
   'confidentiality': 'Internal use',
   'mcv': 'Not business critical',
   'maxSeverityLevel': 4,
   'containsFinancialData': False,
   'assignementGroup': 'te',
   'operationalStatus': 'Pipelined',
   'environments': 'nl',
   'components': {'name': 'Component name',
    'description': 'what the system does',
    'exposedAPIs': [{'name': 'Unique API name',
      'description': 'What it can be used for',
      'type': 'HTTP/JSON',
      'status': 'TO_BE_IMPLEMENTED'},
     {'name': 'Unique API name2',
      'description': 'What it can be used for2',
      'type': 'HTTP/JSON2',
      'status': 'TO_BE_IMPLEMENTED2'}],
    'consumedAPIs': [{'name': 'test100',
      'description': 'What is it used for',
      'status': 'TO_BE_IMPLEMENTED',
      'read': True,
      'write': True,
      'execute': False},
     {'name': 'test2000',
      'description': 'What is it used for2',
      'status': 'TO_BE_IMPLEMENTED2',
      'read': True,
      'write': False,
      'execute': True}]}}]}

def remove_empties_from_dict(a_dict):
    new_dict = {}
    for k, v in a_dict.items():
        if isinstance(v, dict):
            v = remove_empties_from_dict(v)
        if v is not None:
            new_dict[k] = v
    return new_dict or None

def remove_none(obj):
  if isinstance(obj, (list, tuple, set)):
    return type(obj)(remove_none(x) for x in obj if x is not None or '')
  elif isinstance(obj, dict):
    return type(obj)((remove_none(k), remove_none(v))
      for k, v in obj.items() if k is not None and v is not None or '')
  else:
    return obj

remove_none(YAML_DATA)

{'name': 'poc-git-to-cmdb',
 'description': 'POC to send information about the app to Kafka',
 'containers': [{'name': 'poc-git-to-cmdb',
   'synonyms': 'poc-git-to-kafka-cmdb-sync',
   'description': 'POC',
   'applicationType': 'Tool',
   'hostedAt': 'Azure Cloud',
   'deploymentModel': 'On-Premise',
   'containsPersonalData': False,
   'confidentiality': 'Internal use',
   'mcv': 'Not business critical',
   'maxSeverityLevel': 4,
   'containsFinancialData': False,
   'assignementGroup': 'te',
   'operationalStatus': 'Pipelined',
   'environments': 'nl',
   'components': {'name': 'Component name',
    'description': 'what the system does',
    'exposedAPIs': [{'name': 'Unique API name',
      'description': 'What it can be used for',
      'type': 'HTTP/JSON',
      'status': 'TO_BE_IMPLEMENTED'},
     {'name': 'Unique API name2',
      'description': 'What it can be used for2',
      'type': 'HTTP/JSON2',
      'status': 'TO_BE_IMPLEMENTED2'}],
    'consumedAPIs': [{'name': 'test100

In [4]:
import fastavro

# with open('avro_schema.avsc') as f:
#       schema_str = f.read()

parsed_schema = fastavro.parse_schema(schema_str)
# fastavro.validate(data, parsed_schema)

UnknownType: 
{
  "type" : "record",
  "name" : "SystemModel",
  "namespace" : "org.example.models.SystemModel",
  "fields" : [ {
    "name" : "name",
    "type" : "string"
  }, {
    "name" : "description",
    "type" : "string"
  }, {
    "name" : "status",
    "type" : "string"
  }, {
    "name" : "consumers",
    "type" : {
      "type" : "record",
      "name" : "consumers",
      "fields" : [ {
        "name" : "name",
        "type" : "string"
      }, {
        "name" : "description",
        "type" : "string"
      }, {
        "name" : "type",
        "type" : "string"
      } ]
    }
  }, {
    "name" : "containers",
    "type" : {
      "type" : "record",
      "name" : "containers",
      "fields" : [ {
        "name" : "name",
        "type" : "string"
      }, {
        "name" : "sysnonyms",
        "type" : "string"
      }, {
        "name" : "description",
        "type" : "string"
      }, {
        "name" : "technology",
        "type" : "string"
      }, {
        "name" : "parentSystem",
        "type" : "string"
      }, {
        "name" : "ciDataOwner",
        "type" : "string"
      }, {
        "name" : "productOwner",
        "type" : "string"
      }, {
        "name" : "applicationType",
        "type" : "string"
      }, {
        "name" : "hostedAt",
        "type" : "string"
      }, {
        "name" : "deploymentModel",
        "type" : "string"
      }, {
        "name" : "personalData",
        "type" : "boolean"
      }, {
        "name" : "confidentiality",
        "type" : "string"
      }, {
        "name" : "mcv",
        "type" : "string"
      }, {
        "name" : "maxSeverityLevel",
        "type" : "long"
      }, {
        "name" : "sox",
        "type" : "boolean"
      }, {
        "name" : "icfr",
        "type" : "boolean"
      }, {
        "name" : "assignementGroup",
        "type" : "string"
      }, {
        "name" : "operationalStatus",
        "type" : "string"
      }, {
        "name" : "environments",
        "type" : "string"
      }, {
        "name" : "relationships",
        "type" : {
          "type" : "record",
          "name" : "relationships",
          "fields" : [ {
            "name" : "type",
            "type" : "string"
          }, {
            "name" : "container",
            "type" : {
              "type" : "record",
              "name" : "container",
              "fields" : [ {
                "name" : "name",
                "type" : "string"
              } ]
            }
          } ]
        }
      }, {
        "name" : "components",
        "type" : {
          "type" : "record",
          "name" : "components",
          "fields" : [ {
            "name" : "name",
            "type" : "string"
          }, {
            "name" : "description",
            "type" : "string"
          }, {
            "name" : "exposedAPIs",
            "type" : {
              "type" : "array",
              "name" : "exposedAPIs",
              "fields" : [ {
                "name" : "name",
                "type" : "string"
              }, {
                "name" : "description",
                "type" : "string"
              }, {
                "name" : "type",
                "type" : "string"
              }, {
                "name" : "status",
                "type" : "string"
              } ]
            }
          }, {
            "name" : "consumedAPIs",
            "type" : {
              "type" : "array",
              "name" : "consumedAPIs",
              "fields" : [ {
                "name" : "name",
                "type" : "string"
              }, {
                "name" : "description",
                "type" : "string"
              }, {
                "name" : "status",
                "type" : "string"
              } ]
            }
          } ]
        }
      } ]
    }
  } ]
}


In [10]:
import yaml

languagess = {}

with open("languagestest.yml", mode='r', encoding='utf-8') as file:
        data = yaml.safe_load(file)
for key, value in data.items():
        if 'extensions' in value:
                extensions = []
                for ext in value['extensions']:
                        extensions.append(ext.replace(".",""))
                languagess[key] = extensions
                        
with open('languagess.yml', 'w') as outfile:
        yaml.dump(languagess, outfile)

In [None]:
import yaml

def parse_yaml() -> dict:
    with open("test.yml", mode='r', encoding='utf-8') as file:
        data = yaml.safe_load(file)
    return data

data = parse_yaml()



In [37]:
import yaml
import re
import copy
from itertools import islice

#Make recursice for dictionaries in the list of data

def replace_key(data, keys, index = 0):
    temp_data = {}
    
    for i, old_key in enumerate(data):
        list_data = list(data.values())
        temp_data[keys[i + index]] = list_data[i]
    return temp_data


def translate_keys():
    with open("test.yml", mode='r', encoding='utf-8') as file:
        data = yaml.safe_load(file)

    with open('avro_schema.avsc') as f:
      schema_str = f.read()

    schema_str = re.findall('(?<=\"name"\ : ")(.*?)(?=\")',schema_str)
    del schema_str[0]
    del schema_str[3]


    print(schema_str)
    first_data = replace_key(dict(islice(data.items(), 2)), schema_str)
    second_data = replace_key(dict(islice(data.items(), 2, 3)), schema_str, 2)
    third_data = replace_key(second_data["containers"], schema_str, 3)
    print(third_data)
    fourth_data = replace_key(third_data["components"], ["name", "description", "exposedAPIs", "consumedAPIs"])
    fifth_data = list()

    for value in fourth_data["exposedAPIs"]:
        fifth_data.append(replace_key(value, ["name", "description", "type", "status"]))
    fourth_data["exposedAPIs"] = fifth_data

    sixth_data = list()

    for value in fourth_data["consumedAPIs"]:
        sixth_data.append(replace_key(value, ["name", "description", "status", "read", "write", "execute"]))

    fourth_data["consumedAPIs"] = sixth_data

    data = first_data 
    data["containers"] = third_data
    data["containers"]["components"] = fourth_data

    return data
        

translate_keys()

['name', 'description', 'containers', 'name', 'sysnonyms', 'description', 'technology', 'ciDataOwner', 'productOwner', 'applicationType', 'hostedAt', 'deploymentModel', 'personalData', 'confidentiality', 'mcv', 'maxSeverityLevel', 'icfr', 'assignementGroup', 'operationalStatus', 'environments', 'components', 'components', 'name', 'description', 'exposedAPIs', 'name', 'description', 'type', 'status', 'consumedAPIs', 'name', 'description', 'status', 'read', 'write', 'execute']
{'name': 'test', 'sysnonyms': 'poc-git-to-kafka-cmdb-sync', 'description': 'POC', 'technology': 'kafka', 'ciDataOwner': 'Aede van der Weij', 'productOwner': 'Thomas de Vries', 'applicationType': 'Tool', 'hostedAt': 'Azure Cloud', 'deploymentModel': 'On-Premise', 'personalData': False, 'confidentiality': 'Internal use', 'mcv': 'Not business critical', 'maxSeverityLevel': 4, 'icfr': False, 'assignementGroup': 'AMS_ITOnline_L3_Lists_and_Orders', 'operationalStatus': 'Pipelined', 'environments': 'nl', 'components': {'n

{'name': 'poc-git-to-cmdb',
 'description': 'POC to send information about the app to Kafka',
 'containers': {'name': 'test',
  'sysnonyms': 'poc-git-to-kafka-cmdb-sync',
  'description': 'POC',
  'technology': 'kafka',
  'ciDataOwner': 'Aede van der Weij',
  'productOwner': 'Thomas de Vries',
  'applicationType': 'Tool',
  'hostedAt': 'Azure Cloud',
  'deploymentModel': 'On-Premise',
  'personalData': False,
  'confidentiality': 'Internal use',
  'mcv': 'Not business critical',
  'maxSeverityLevel': 4,
  'icfr': False,
  'assignementGroup': 'AMS_ITOnline_L3_Lists_and_Orders',
  'operationalStatus': 'Pipelined',
  'environments': 'nl',
  'components': {'name': 'Component name',
   'description': 'what the system does',
   'exposedAPIs': [{'name': 'Unique API name',
     'description': 'What it can be used for',
     'type': 'HTTP/JSON',
     'status': 'TO_BE_IMPLEMENTED'},
    {'name': 'Unique API name2',
     'description': 'What it can be used for2',
     'type': 'HTTP/JSON2',
     '

In [102]:
import argparse
import json
import os
import logging
import yaml
from kafka import KafkaProducer
from schema import Schema, SchemaError, Optional, Hook, Or
from confluent_kafka import Producer, Consumer
from confluent_kafka.serialization import StringSerializer, SerializationContext, MessageField
from confluent_kafka.schema_registry import SchemaRegistryClient
from confluent_kafka.schema_registry.avro import AvroSerializer
from confluent_kafka import Producer, Consumer, DeserializingConsumer
from confluent_kafka.serialization import StringSerializer, SerializationContext, MessageField, StringDeserializer
from confluent_kafka.schema_registry import SchemaRegistryClient
from confluent_kafka.schema_registry.avro import AvroSerializer, AvroDeserializer
from confluent_kafka import Message
import re
from collections import defaultdict
from collections.abc import Iterable
from itertools import islice
import time
import uuid

#Make recursice for dictionaries in the list of data

def replace_key(data, keys, index = 0):
    temp_data = {}

    for i, old_key in enumerate(data):
        list_data = list(data.values())
        temp_data[keys[i + index]] = list_data[i]

    return temp_data


def translate_keys(data):
    with open('avro_schema.avsc') as f:
      schema_str = f.read()

    schema_str = re.findall('(?<=\"name"\ : ")(.*?)(?=\")',schema_str)
    del schema_str[0]
    del schema_str[3]

    first_data = replace_key(dict(islice(data.items(), 2)), schema_str)
    second_data = replace_key(dict(islice(data.items(), 2, 3)), schema_str, 2)
    third_data = replace_key(second_data["containers"], schema_str, 3)
    fourth_data = replace_key(third_data["components"], ["name", "description", "exposedAPIs", "consumedAPIs"])
    fifth_data = list()

    for value in fourth_data["exposedAPIs"]:
        fifth_data.append(replace_key(value, ["name", "description", "type", "status"]))
    fourth_data["exposedAPIs"] = fifth_data

    sixth_data = list()

    for value in fourth_data["consumedAPIs"]:
        sixth_data.append(replace_key(value, ["name", "description", "status", "read", "write", "execute"]))

    fourth_data["consumedAPIs"] = sixth_data

    data = first_data 
    data["containers"] = third_data
    data["containers"]["components"] = fourth_data

    return data

def parse_yaml() -> dict:
    with open("test.yml", mode='r', encoding='utf-8') as file:
        data = yaml.safe_load(file)
    return data

# translate_keys(parse_yaml())
# print(translate_keys())
parse_yaml()

{'name': 'poc-git-to-cmdb',
 'description': 'POC to send information about the app to Kafka',
 'containers': [{'name': 'poc-git-to-cmdb',
   'synonyms': 'poc-git-to-kafka-cmdb-sync',
   'description': 'POC',
   'technology': None,
   'team': None,
   'applicationType': 'Tool',
   'hostedAt': 'Azure Cloud',
   'deploymentModel': 'On-Premise',
   'containsPersonalData': False,
   'confidentiality': 'Internal use',
   'mcv': 'Not business critical',
   'maxSeverityLevel': 4,
   'containsFinancialData': False,
   'assignementGroup': 'te',
   'operationalStatus': 'Pipelined',
   'environments': 'nl',
   'components': {'name': 'Component name',
    'description': 'what the system does',
    'exposedAPIs': [{'name': 'Unique API name',
      'description': 'What it can be used for',
      'type': 'HTTP/JSON',
      'status': 'TO_BE_IMPLEMENTED'},
     {'name': 'Unique API name2',
      'description': 'What it can be used for2',
      'type': 'HTTP/JSON2',
      'status': 'TO_BE_IMPLEMENTED2'}]

In [5]:
import yaml

def include_constructor(loader, node):
  selector = loader.construct_sequence(node)
  name = selector.pop(0)
  print(selector)
  with open(name) as f:
    content = yaml.safe_load(f)
  # walk over the selector items and descend into the loaded structure each time.
  data = {}
  for item in selector:
    for key, value in content.items():
      if key == item:
        for name in selector:
          content = content[name] 
        return content

  return None

yaml.add_constructor('!include', include_constructor, Loader=yaml.SafeLoader)

with open("test.yml") as f:
    data = yaml.safe_load(f)

data


['labels', 'devops.ah.it/support-assignment-group']


{'sname': 'poc-git-to-cmdb',
 'sdescription': 'POC to send information about the app to Kafka',
 'sstatus': 'pipelined',
 'scontainers': {'sname': 'poc-git-tocmdb',
  'ssysnonyms': 'poc-git-to-kafka-cmdb-sync',
  'sdescription': 'POC',
  'stechnology': None,
  'sparentSystem': 'CMDB',
  'sciDataOwner': 'Aede van der Weij',
  'sproductOwner': 'Thomas de Vries',
  'sapplicationType': 'Tool',
  'shostedAt': 'Azure Cloud',
  'sdeploymentModel': 'On-Premise',
  'spersonalData': False,
  'sconfidentiality': 'Internal use',
  'smcv': 'Not business critical',
  'smaxSeverityLevel': 4,
  'ssox': None,
  'sicfr': None,
  'sassignementGroup': 'AMS_ITOnline_L3_Lists_and_Orders',
  'soperationalStatus': 'Pipelined',
  'senvironments': 'nl',
  'scomponents': {'sname': 'Component name',
   'sdescription': 'what the system does',
   'sexposedAPIs': [{'sname': 'Unique API name',
     'sdescription': 'What it can be used for',
     'stype': 'HTTP/JSON',
     'sstatus': 'TO_BE_IMPLEMENTED'},
    {'sname'

In [82]:
from Loader import Loader
import yaml

with open("test.yml") as f:
    data = yaml.load(f, Loader=Loader)

print(data)

{'sname': 'poc-git-to-cmdb', 'sdescription': 'POC to send information about the app to Kafka', 'sstatus': 'pipelined', 'scontainers': {'sname': 'poc-git-tocmdb', 'ssysnonyms': 'poc-git-to-kafka-cmdb-sync', 'sdescription': 'POC', 'stechnology': None, 'sparentSystem': 'CMDB', 'sciDataOwner': 'Aede van der Weij', 'sproductOwner': 'Thomas de Vries', 'sapplicationType': 'Tool', 'shostedAt': 'Azure Cloud', 'sdeploymentModel': 'On-Premise', 'spersonalData': False, 'sconfidentiality': 'Internal use', 'smcv': 'Not business critical', 'smaxSeverityLevel': 4, 'ssox': None, 'sicfr': None, 'sassignementGroup': 'AMS_ITOnline_L3_SRE_Infra', 'soperationalStatus': 'Pipelined', 'senvironments': 'nl', 'scomponents': {'sname': 'Component name', 'sdescription': 'what the system does', 'sexposedAPIs': [{'sname': 'Unique API name', 'sdescription': 'What it can be used for', 'stype': 'HTTP/JSON', 'sstatus': 'TO_BE_IMPLEMENTED'}, {'sname': 'Unique API name2', 'sdescription': 'What it can be used for2', 'stype'

In [25]:
from confluent_kafka import Producer, Consumer, DeserializingConsumer
from confluent_kafka.serialization import StringSerializer, SerializationContext, MessageField, StringDeserializer
from confluent_kafka.schema_registry import SchemaRegistryClient
from confluent_kafka.schema_registry.avro import AvroSerializer, AvroDeserializer
from confluent_kafka import Message
import time

topic = ["topic10"]

original = {'name': 'poc-git-to-cmdb', 'description': 'POC to send information about the app to Kafka', 'containers': {'name': 'poc-git-to-cmdb', 'sysnonyms': 'poc-git-to-kafka-cmdb-sync', 'description': 'POC', 'technology': 'kafka', 'ciDataOwner': 'Aede van der Weij', 'productOwner': 'Thomas de Vries', 'applicationType': 'Tool', 'hostedAt': 'Azure Cloud', 'deploymentModel': 'On-Premise', 'personalData': False, 'confidentiality': 'Internal use', 'mcv': 'Not business critical', 'maxSeverityLevel': 4, 'icfr': False, 'assignementGroup': 'AMS_ITOnline_L3_Lists_and_Orders', 'operationalStatus': 'Pipelined', 'environments': 'nl', 'components': {'name': 'Component name', 'description': 'what the system does', 'exposedAPIs': [{'name': 'Unique API name', 'description': 'What it can be used for', 'type': 'HTTP/JSON', 'status': 'TO_BE_IMPLEMENTED'}, {'name': 'Unique API name2', 'description': 'What it can be used for2', 'type': 'HTTP/JSON2', 'status': 'TO_BE_IMPLEMENTED2'}], 'consumedAPIs': [{'name': 'test1', 'description': 'What is it used for', 'status': 'TO_BE_IMPLEMENTED', 'read': True, 'write': True, 'execute': False}, {'name': 'test2', 'description': 'What is it used for2', 'status': 'TO_BE_IMPLEMENTED2', 'read': True, 'write': False, 'execute': True}]}}}

def validate_names():

    with open('avro_schema.avsc') as f:
      schema_str = f.read()

    schema_registry_client = SchemaRegistryClient({'url': 'http://10.152.183.242:8081'})

    avro_deserializer = AvroDeserializer(schema_registry_client, schema_str)

    string_deserializer = StringDeserializer('utf_8')


    config = {'bootstrap.servers': '10.152.183.52:9094',
    'group.id': 'aiufdsgdfjhdsagjhdsfjhfjdhajad;lkhkj',
    'auto.offset.reset': 'earliest',
    'value.deserializer': avro_deserializer,
    'key.deserializer': string_deserializer}
    consumer = DeserializingConsumer(config)
    try:
        consumer.subscribe(topic)

        timeout = time.time() + 60

        while time.time() < timeout:
            message = consumer.poll(timeout=1.0)
            
            if message is None: continue
            if message.error():
                print(message)
            else:
                if message.value() == original:
                    print('same')
    finally:
        consumer.close()
    return True

validate_names()

same


True

In [14]:
import argparse
import json
import os
import logging
import yaml
from kafka import KafkaProducer
from schema import Schema, SchemaError, Optional, Hook, Or
from confluent_kafka import Producer, Consumer, Avro
from confluent_kafka.serialization import StringSerializer, SerializationContext, MessageField
from confluent_kafka.schema_registry import SchemaRegistryClient
from confluent_kafka.schema_registry.avro import AvroSerializer
from confluent_kafka import Producer, Consumer, DeserializingConsumer
from confluent_kafka.serialization import StringSerializer, SerializationContext, MessageField, StringDeserializer
from confluent_kafka.schema_registry import SchemaRegistryClient
from confluent_kafka.schema_registry.avro import AvroSerializer, AvroDeserializer
from confluent_kafka import Message
import re
from collections import defaultdict
from collections.abc import Iterable
from itertools import islice
import time
import uuid

orginial_data = {'name': 'poc-git-to-cmdb', 'description': 'POC to send information about the app to Kafka', 'containers': {'name': 'poc-git-to-cmdb', 'sysnonyms': 'poc-git-to-kafka-cmdb-sync', 'description': 'POC', 'technology': 'kafka', 'ciDataOwner': 'Aede van der Weij', 'productOwner': 'Thomas de Vries', 'applicationType': 'Tool', 'hostedAt': 'Azure Cloud', 'deploymentModel': 'On-Premise', 'personalData': False, 'confidentiality': 'Internal use', 'mcv': 'Not business critical', 'maxSeverityLevel': 4, 'icfr': False, 'assignementGroup': 'AMS_ITOnline_L3_Lists_and_Orders', 'operationalStatus': 'Pipelined', 'environments': 'nl', 'components': {'name': 'Component name', 'description': 'what the system does', 'exposedAPIs': [{'name': 'Unique API name', 'description': 'What it can be used for', 'type': 'HTTP/JSON', 'status': 'TO_BE_IMPLEMENTED'}, {'name': 'Unique API name2', 'description': 'What it can be used for2', 'type': 'HTTP/JSON2', 'status': 'TO_BE_IMPLEMENTED2'}], 'consumedAPIs': [{'name': 'test1', 'description': 'What is it used for', 'status': 'TO_BE_IMPLEMENTED', 'read': True, 'write': True, 'execute': False}, {'name': 'test2', 'description': 'What is it used for2', 'status': 'TO_BE_IMPLEMENTED2', 'read': True, 'write': False, 'execute': True}]}}}

def send_to_kafka(settings: dict, data: dict):
    global YAML_DATA

    topic = "topic10"

    with open('avro_schema.avsc') as f:
      schema_str = f.read()

    schema_registry_client = SchemaRegistryClient({'url': 'http://10.152.183.242:8081'})

    avro_serializer = AvroSerializer(schema_registry_client, schema_str)

    string_serializer = StringSerializer('utf_8')

    producer = Producer({'bootstrap.servers': '10.152.183.52:9094'})

    producer.produce(topic=topic, key=string_serializer(data['name'], None), value=avro_serializer(data, SerializationContext(topic, MessageField.VALUE)))

    producer.flush()

for i in range(500):
  orginial_data['name'] = orginial_data['name'] + str(i)
  send_to_kafka({}, data=orginial_data)

In [29]:
from confluent_kafka import TopicPartition

messages = []

def validate_names():
    with open('avro_schema.avsc') as f:
      schema_str = f.read()

    schema_registry_client = SchemaRegistryClient({'url': 'http://10.152.183.242:8081'})

    avro_deserializer = AvroDeserializer(schema_registry_client, schema_str)

    string_deserializer = StringDeserializer('utf_8')


    config = {'bootstrap.servers': '10.152.183.52:9094',
    'group.id': str(uuid.uuid4()),
    'auto.offset.reset': 'earliest',
    'value.deserializer': avro_deserializer,
    'key.deserializer': string_deserializer}
    consumer = DeserializingConsumer(config)
    

    topic_partition = TopicPartition("topic10", partition=0)
    low, high = consumer.get_watermark_offsets(topic_partition)
    print(low)
    print(high)
    current_offset = 0

    try:
        consumer.subscribe(["topic10"])
        
        timeout = time.time() + 30

        # log.info('Consuming data to see if data is already present')
        while current_offset < high:
            message = consumer.poll(timeout=1.0)
            current_offset += 1
            if message is None: continue
            if message.error():
                print(message)
            else:
                # log.info('Consumed data: %s', message.value())
                messages.append(message.value())
    finally:
        consumer.close()
    return True

validate_names()

print(len(messages))

0
1013
1013


In [17]:
def validate_names():
    with open('avro_schema.avsc') as f:
      schema_str = f.read()

    schema_registry_client = SchemaRegistryClient({'url': 'http://10.152.183.242:8081'})

    avro_deserializer = AvroDeserializer(schema_registry_client, schema_str)

    string_deserializer = StringDeserializer('utf_8')


    config = {'bootstrap.servers': '10.152.183.52:9094',
    'group.id': str(uuid.uuid4()),
    'auto.offset.reset': 'earliest',
    'value.deserializer': avro_deserializer,
    'key.deserializer': string_deserializer}
    consumer = DeserializingConsumer(config)

    try:
        consumer.subscribe(["topic10"])

        topic_partition = TopicPartition("topic10", partition=0)
        low, high = consumer.get_watermark_offsets(topic_partition)
        current_offset = 0

        log.info('Consuming data to see if data is already present')
        while current_offset < high:
            message = consumer.poll(timeout=1.0)
            current_offset += 1
            if message is None: continue
            if message.error():
                print(message)
            else:
                # log.info('Consumed data: %s', message.value())
                log.info(message.value() == YAML_DATA)
                if message.value() == YAML_DATA:
                    log.info('Data is already present and validated')
                    exit(0)
    finally:
        consumer.close()
    return True


In [121]:
from itertools import islice
import re

def replace_key(data, keys, index = 0):
    temp_data = {}
    for i, old_key in enumerate(data):
        list_data = list(data.values())
        temp_data[keys[i + index]] = list_data[i]

    return temp_data


def translate_keys(data):
    with open('avro_schema.avsc') as f:
      schema_str = f.read()

    schema_str = re.findall('(?<=\"name"\ : ")(.*?)(?=\")',schema_str)
    del schema_str[0]
    del schema_str[3]
    first_data = replace_key(dict(islice(data.items(), 2)), schema_str)
    second_data = replace_key(dict(islice(data.items(), 2, 3)), schema_str, 2)
    containers = list()
    for container in second_data["containers"]:

        container_object = replace_key(container, schema_str, 3)
        first_container = replace_key(dict(islice(container.items(), 0,10)), ['name', 'synonyms', 'description', 'technology', 'team', 'productOwner', 'applicationType', 'hostedAt', 'deploymentModel', 'dataConfidentiality'])
        first_container['dataConfidentiality'] = replace_key(first_container['dataConfidentiality'], ['containsPersonalData','containsFinancialData','publiclyExposed','restrictedAccess'])
        second_container = replace_key(dict(islice(container_object.items(), 10, 14)), ['missionCriticality', 'assignementGroup', 'operationalStatus', 'components'])

        for key in first_container.keys():
            container_object[key] = first_container[key]

        for key in second_container.keys():
            container_object[key] = second_container[key]
        print(second_container)
        print()
        print(container_object)
        
        fourth_data = replace_key(container_object["components"], ["name", "description", "exposedAPIs", "consumedAPIs"])
        fifth_data = list()

        for value in fourth_data["exposedAPIs"]:
            fifth_data.append(replace_key(value, ["name", "description", "type", "status"]))
        fourth_data["exposedAPIs"] = fifth_data

        sixth_data = list()

        for value in fourth_data["consumedAPIs"]:
            sixth_data.append(replace_key(value, ["name", "description", "status", "read", "write", "execute"]))

        fourth_data["consumedAPIs"] = sixth_data

        data = first_data 
        containers.append(container_object)
        
    data["containers"] = containers

    return data

import yaml

def parse_yaml() -> dict:
    with open("test.yml", mode='r', encoding='utf-8') as file:
        data = yaml.safe_load(file)
    return data
def parse_yaml2() -> dict:
    with open("test2.yml", mode='r', encoding='utf-8') as file:
        data = yaml.safe_load(file)
    return data

translate_keys(parse_yaml())


[{'name': 'poc-git-to-cmdb', 'synonyms': 'poc-git-to-kafka-cmdb-sync', 'description': 'POC', 'technology': 'tech', 'team': 'team', 'productOwner': 'owner', 'applicationType': 'Tool', 'hostedAt': 'Azure Cloud', 'deploymentModel': 'On-Premise', 'dataConfidentiality': {'containsPersonalData': False, 'containsFinancialData': False, 'publiclyExposed': False, 'restrictedAccess': True}, 'missionCriticality': 'Not business critical', 'assignementGroup': 'te', 'operationalStatus': 'Pipelined', 'components': {'name': 'Component name', 'description': 'what the system does', 'exposedAPIs': [{'name': 'Unique API name', 'description': 'What it can be used for', 'type': 'HTTP/JSON', 'status': 'TO_BE_IMPLEMENTED'}, {'name': 'Unique API name2', 'description': 'What it can be used for2', 'type': 'HTTP/JSON2', 'status': 'TO_BE_IMPLEMENTED2'}], 'consumedAPIs': [{'name': 'Unique API name', 'description': 'What is it used for', 'status': 'TO_BE_IMPLEMENTED', 'read': True, 'write': True, 'execute': False}, 

TypeError: unhashable type: 'list'

In [None]:
from itertools import islice
import re

def replace_key(data, keys, index = 0):
    temp_data = {}
    for i, old_key in enumerate(data):
        list_data = list(data.values())
        temp_data[keys[i + index]] = list_data[i]

    return temp_data


def translate_keys(data):
    with open('avro_schema.avsc') as f:
      schema_str = f.read()

    schema_str = re.findall('(?<=\"name"\ : ")(.*?)(?=\")',schema_str)
    del schema_str[0]
    del schema_str[3]
    first_data = replace_key(dict(islice(data.items(), 2)), schema_str)
    second_data = replace_key(dict(islice(data.items(), 2, 3)), schema_str, 2)
    containers = list()
    for container in second_data["containers"]:

        container_object = replace_key(container, schema_str, 3)
        first_container = replace_key(dict(islice(container.items(), 0,10)), ['name', 'synonyms', 'description', 'technology', 'team', 'productOwner', 'applicationType', 'hostedAt', 'deploymentModel', 'dataConfidentiality'])
        first_container['dataConfidentiality'] = replace_key(first_container['dataConfidentiality'], ['containsPersonalData','containsFinancialData','publiclyExposed','restrictedAccess'])
        second_container = replace_key(dict(islice(container_object.items(), 10, 14)), ['missionCriticality', 'assignementGroup', 'operationalStatus', 'components'])

        for key in first_container.keys():
            container_object[key] = first_container[key]

        for key in second_container.keys():
            container_object[key] = second_container[key]
        print(second_container)
        print()
        print(container_object)
        
        fourth_data = replace_key(container_object["components"], ["name", "description", "exposedAPIs", "consumedAPIs"])
        fifth_data = list()

        for value in fourth_data["exposedAPIs"]:
            fifth_data.append(replace_key(value, ["name", "description", "type", "status"]))
        fourth_data["exposedAPIs"] = fifth_data

        sixth_data = list()

        for value in fourth_data["consumedAPIs"]:
            sixth_data.append(replace_key(value, ["name", "description", "status", "read", "write", "execute"]))

        fourth_data["consumedAPIs"] = sixth_data

        data = first_data 
        containers.append(container_object)
        
    data["containers"] = containers

    return data

import yaml

def parse_yaml() -> dict:
    with open("test.yml", mode='r', encoding='utf-8') as file:
        data = yaml.safe_load(file)
    return data
def parse_yaml2() -> dict:
    with open("test2.yml", mode='r', encoding='utf-8') as file:
        data = yaml.safe_load(file)
    return data

translate_keys(parse_yaml())

In [98]:
import re
import os

def find_team():

    path = Path(os.getcwd() + '/CODEOWNERS')
    if not path.is_file():
        log.error("CODEOWNERS file could not be found, please manually fill in team")
        exit(1)

    with open(os.getcwd() + '/CODEOWNERS') as f:
        code = f.read()

    matches = re.findall(r"(CODEOWNERS|\*)[ \t]+(@RoyalAholdDelhaize\/)(.*)", code)

    return matches[0][-1]

find_team()

'team-sre-core'

In [2]:
from itertools import islice
import re

def replace_key(data, keys, index = 0):
    temp_data = {}
    for i, old_key in enumerate(data):
        list_data = list(data.values())
        temp_data[keys[i + index]] = list_data[i]

    return temp_data


def translate_keys(data):
    first_data = replace_key(dict(islice(data.items(), 2)), ['name', 'description'])
    containers = list()

    for container in data["containers"]:
        first_container = replace_key(dict(islice(container.items(), 0,10)), ['name', 'synonyms', 'description', 'technology', 'team', 'productOwner','githubURL','targetAudience', 'hostedAt', 'deploymentModel', 'dataConfidentiality'])
        first_container['dataConfidentiality'] = replace_key(first_container['dataConfidentiality'], ['containsPersonalData','containsFinancialData','publiclyExposed','restrictedAccess'])
        second_container = replace_key(dict(islice(container.items(), 10, 14)), ['missionCriticality', 'assignementGroup', 'operationalStatus', 'components'])
        
        component_list = list()
        for component in container['components']:

            component = replace_key(component, ["name", "description", "exposedAPIs", "consumedAPIs"])
        
            exposedAPI_list = list()
            
            for value in component["exposedAPIs"]:
                exposedAPI_list.append(replace_key(value, ["name", "description", "type", "status"]))

            component["exposedAPIs"] = exposedAPI_list
            
            consumedAPI_list = list()
            for value in component["consumedAPIs"]:
                consumedAPI_list.append(replace_key(value, ["name", "description", "status", "read", "write", "execute"]))

            component["consumedAPIs"] = consumedAPI_list
            component_list.append(component)
        
        second_container['components'] = component_list

        first_container.update(second_container)

        containers.append(first_container)
        
    first_data["containers"] = containers

    return first_data

import yaml

def parse_yaml() -> dict:
    with open("test.yml", mode='r', encoding='utf-8') as file:
        data = yaml.safe_load(file)
    return data

translate_keys(parse_yaml())

AttributeError: 'str' object has no attribute 'values'

In [1]:
import os
import yaml
import glob

files = glob.glob(os.getcwd() + '/persons/*.yml')

person_dict = {}

def read_yaml_file(filename):
    with open(filename, 'r') as stream:
        try:
            person = yaml.safe_load(stream)
            name = person['person']['name']
            name = name.replace(' ', '_')
            info = {'teams': person['person']['teams'], 'roles': person['person']['roles']}
            person_dict[name] = info
        except Exception as e:
            raise e

for file in files:
    read_yaml_file(file)

# with open('persons.yml', 'w') as outfile:
#     yaml.safe_dump(person_dict, outfile)
person_dict

{'Mark_Rutter': {'teams': ['team-mechanization'],
  'roles': ['test-engineer', 'opsgenie-standby']},
 'Pavel_Khudzik': {'teams': ['team-fulfillment-minsk',
   'team-fulfillment-europe'],
  'roles': ['database-developer', 'backend-developer', 'opsgenie-standby']},
 'Sven_Moller': {'teams': ['team-sre',
   'team-sre-incident-command',
   'team-sre-security'],
  'roles': ['incident-commander', 'security-engineer', 'opsgenie-standby']},
 'Reinier_van_Petegem': {'teams': ['team-management',
   'team-business-technology'],
  'roles': None},
 'Matthijs_Tolkamp': {'teams': ['team-cms-platform'],
  'roles': ['backend-developer', 'opsgenie-standby']},
 'Jeroen_Tusveld': {'teams': ['team-fulfillment-minsk',
   'team-fulfillment-europe'],
  'roles': ['database-developer', 'backend-developer', 'opsgenie-standby']},
 'Jaiwan_Bindraban': {'teams': ['team-loyalty-and-savings'],
  'roles': ['test-engineer']},
 'Marc_Buurman': {'teams': ['team-salesforce-platform', 'team-self-service'],
  'roles': ['pro

In [39]:
import os
import yaml
import glob

files = glob.glob(os.getcwd() + '/teams/*.yml')

team_dict = {}

def read_yaml_file(filename):
    with open(filename, 'r') as stream:
        try:
            team = yaml.safe_load(stream)
            name = team['team']['name']
            if 'area' in team['team'].keys():
                info = {'area': team['team']['area'], 'description': team['team']['description']}
            else:
                info = {'description': team['team']['description']}
            team_dict[name] = info
        except Exception as e:
            raise e

for file in files:
    read_yaml_file(file)


# print(team_dict)

with open('teams.yml', 'w') as outfile:
    yaml.safe_dump(team_dict, outfile)

In [135]:
def find_product_owner(role):
    global YAML_DATA
    with open("persons.yml", mode='r', encoding='utf-8') as file:
        data = yaml.safe_load(file)
    for k, v in data.items():
        if v['teams'] is None:
            continue
        if 'team-sre' in v['teams']:
            if role in v['roles']:
                return k
    return None

find_product_owner('')

In [52]:
import argparse
import json
import os
import logging
import yaml
from kafka import KafkaProducer
from schema import Schema, SchemaError, Optional, Hook, Or
from confluent_kafka import Producer, Consumer
from confluent_kafka.serialization import StringSerializer, SerializationContext, MessageField
from confluent_kafka.schema_registry import SchemaRegistryClient
from confluent_kafka.schema_registry.avro import AvroSerializer
from confluent_kafka import Producer, Consumer, DeserializingConsumer
from confluent_kafka.serialization import StringSerializer, SerializationContext, MessageField, StringDeserializer
from confluent_kafka.schema_registry import SchemaRegistryClient
from confluent_kafka.schema_registry.avro import AvroSerializer, AvroDeserializer
from confluent_kafka import Message
from confluent_kafka import TopicPartition
import re
from collections import defaultdict
from collections.abc import Iterable
from itertools import islice
from pathlib import Path
import time
import uuid
from github import Github

DEFAULT_DATA_FILE = 'system.yml'
DEFAULT_CA_FILE = 'ca.crt'
DATA_FILE_ENV_VAR = 'DATA_FILE'

# Env variables
KAFKA_TOPIC_NAME_ENV_VAR = 'KAFKA_TOPIC_NAME'
KAFKA_BOOTSTRAP_ENV_VAR = 'KAFKA_BOOTSTRAP_SERVERS'
KAFKA_PASSWD_ENV_VAR = 'KAFKA_PASSWORD'
KAFKA_USERNAME_ENV_VAR = 'KAFKA_USERNAME'
KAFKA_CA_ENV_VAR = 'KAFKA_CA_CONTENT'
KAFKA_VALIDATION_CHECK_ENV_VAR ='KAFKA_VALIDATION_CHECK'
KAFKA_BYPASS_MODE_ENV_VAR = 'KAFKA_BYPASS_MODE_ENV_VAR'
TOKEN_GITHUB = 'TOKEN_GITHUB'

# Kafka settings
KAFKA_TOPIC_DEFAULT_KEY = 'topic2'
KAFKA_SECURITY_PROTOCOL = 'PLAINTEXT'
KAFKA_SASL_MECHANISM = 'SCRAM-SHA-512'

TOPIC_NAME = 'topic20'
BOOTSTRAP_SERVERS_URL = '10.152.183.52:9094'
SCHEMA_REGISTRY_URL = 'http://10.152.183.242:8081'
ORGANIZATION_NAME = 'RoyalAholdDelhaize'
TEAMS_AS_CODE_REPO_NAME = 'sre-teams-configuration'

EXIT_OKAY = 0
EXIT_ERORR = 1
EXIT_MISSING = 2

def parse_yaml() -> dict:
    with open("test.yml", mode='r', encoding='utf-8') as file:
        data = yaml.safe_load(file)
    return data
YAML_DATA = parse_yaml()

def send_to_kafka(settings: dict, data: dict):
    global YAML_DATA

    with open('avro_schema.avsc') as f:
      schema_str = f.read()

    schema_registry_client = SchemaRegistryClient({'url': SCHEMA_REGISTRY_URL})

    avro_serializer = AvroSerializer(schema_registry_client, schema_str)

    string_serializer = StringSerializer('utf_8')

    producer = Producer({'bootstrap.servers': BOOTSTRAP_SERVERS_URL})

    # producer.produce(topic=TOPIC_NAME, key=string_serializer(YAML_DATA['name'], None), value=avro_serializer(data, SerializationContext(TOPIC_NAME, MessageField.VALUE)))

    producer.flush()



send_to_kafka(None, YAML_DATA)

In [37]:
import requests

response = requests.get('https://api.github.com/repos/RoyalAholdDelhaize/ah-ctp-kafka-action/releases', headers={'Authorization': 'Bearer '})

data = response.content.decode().strip('][').split(', ')

response = requests.get('https://github.com/RoyalAholdDelhaize/ah-ctp-kafka-action/releases/download/latest/persons.yml', headers={'Authorization': 'Bearer '})

response.content.decode()


'Not Found'

In [157]:
from github import Github

github_client = Github('')
teams_as_code = github_client.get_organization('RoyalAholdDelhaize').get_repo('ah-ctp-kafka-action')
# temp = teams_as_code.get_latest_release().get_assets()
# temp.get_page(0)
url = teams_as_code.get_latest_release().get_assets().get_page(0)[0].url
print(url)
url = 'https://api.github.com/repos/RoyalAholdDelhaize/ah-ctp-kafka-action/releases/assets/88754179'
header = {'Authorization': 'Bearer ', 'Accept': 'application/octet-stream'}
response = requests.get(url,header,stream=True)
response.content


https://api.github.com/repos/RoyalAholdDelhaize/ah-ctp-kafka-action/releases/assets/88754179


b'{"message":"Not Found","documentation_url":"https://docs.github.com/rest/reference/repos#get-a-release-asset"}'

In [114]:
import zipfile
import re

url = 'https://api.github.com/repos/RoyalAholdDelhaize/ah-ctp-kafka-action/zipball/latest'
header = {'Authorization': 'Bearer '}
response = requests.get(url, headers= header, stream=True)
# if response.status_code == 200:
#     with open('testt.yml', 'wb') as f:
#         f.write(response.raw.read())
handle = open('testt.zip', "wb")
for chunk in response.iter_content(chunk_size=512):
    if chunk:
        handle.write(chunk)
handle.close()

with zipfile.ZipFile("testt.zip", 'r') as zip:
    for file in zip.filelist:
        if file.filename.__contains__('/persons.yml'):
            zip.extract(file.filename)

In [130]:
url = 'https://github.com/RoyalAholdDelhaize/ah-ctp-kafka-action/releases/download/latest/persons.yml'
header = {'Authorization': 'Bearer ', 'accept': 'application/octet-stream',''}
response = requests.get(url, headers= header, stream=True)
response.content

b'Not Found'

In [175]:
import requests
from github import Github
import yaml

github_client = Github('')
person_repository = github_client.get_organization('RoyalAholdDelhaize').get_repo('ah-ctp-kafka-action')

url = person_repository.get_latest_release().get_assets().get_page(0)[0].url
header = {'Authorization': 'Bearer ', 'Accept': 'application/octet-stream'}

response = requests.get(url,headers=header, allow_redirects=False, stream=True)
#If found get the location url from the Location header
if response.status_code == 302:
    print(response.headers['Location'])
    response = requests.get(response.headers['Location'], allow_redirects=False, stream=True)
test = yaml.safe_load(response.content.decode())
test

https://objects.githubusercontent.com/github-production-release-asset-2e65be/572089699/e311a069-c9b3-4506-b3d4-19f00eb5f118?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=AKIAIWNJYAX4CSVEH53A%2F20221219%2Fus-east-1%2Fs3%2Faws4_request&X-Amz-Date=20221219T145432Z&X-Amz-Expires=300&X-Amz-Signature=2ec4b4a4021aa2d20db5c4a3aa226afc93f0048adb842c67c512372a402bd1c6&X-Amz-SignedHeaders=host&actor_id=13902127&key_id=0&repo_id=572089699&response-content-disposition=attachment%3B%20filename%3Dpersons.yml&response-content-type=application%2Foctet-stream


{'Mark_Rutter': {'teams': ['team-mechanization'],
  'roles': ['test-engineer', 'opsgenie-standby']},
 'Pavel_Khudzik': {'teams': ['team-fulfillment-minsk',
   'team-fulfillment-europe'],
  'roles': ['database-developer', 'backend-developer', 'opsgenie-standby']},
 'Sven_Moller': {'teams': ['team-sre',
   'team-sre-incident-command',
   'team-sre-security'],
  'roles': ['incident-commander', 'security-engineer', 'opsgenie-standby']},
 'Reinier_van_Petegem': {'teams': ['team-management',
   'team-business-technology'],
  'roles': 'None'},
 'Matthijs_Tolkamp': {'teams': ['team-cms-platform'],
  'roles': ['backend-developer', 'opsgenie-standby']},
 'Jeroen_Tusveld': {'teams': ['team-fulfillment-minsk',
   'team-fulfillment-europe'],
  'roles': ['database-developer', 'backend-developer', 'opsgenie-standby']},
 'Jaiwan_Bindraban': {'teams': ['team-loyalty-and-savings'],
  'roles': ['test-engineer']},
 'Marc_Buurman': {'teams': ['team-salesforce-platform', 'team-self-service'],
  'roles': ['p

In [164]:
import requests

response = requests.get('https://github.com/RoyalAholdDelhaize/ah-ctp-kafka-action/releases/download/latest/persons.yml')
print(response.content.decode())

Not Found


In [163]:
import requests

response = requests.get('https://objects.githubusercontent.com/github-production-release-asset-2e65be/572089699/e311a069-c9b3-4506-b3d4-19f00eb5f118?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=AKIAIWNJYAX4CSVEH53A%2F20221219%2Fus-east-1%2Fs3%2Faws4_request&X-Amz-Date=20221219T130834Z&X-Amz-Expires=300&X-Amz-Signature=bda5612b111bbd0e00bbdb138ca79a81845377bc7daf1b759fa9f50a87e46ffe&X-Amz-SignedHeaders=host&actor_id=13902127&key_id=0&repo_id=572089699&response-content-disposition=attachment%3B%20filename%3Dpersons.yml&response-content-type=application%2Foctet-stream')
print(response.content.decode())


{'Mark_Rutter': {'teams': ['team-mechanization'], 'roles': ['test-engineer', 'opsgenie-standby']}, 'Pavel_Khudzik': {'teams': ['team-fulfillment-minsk', 'team-fulfillment-europe'], 'roles': ['database-developer', 'backend-developer', 'opsgenie-standby']}, 'Sven_Moller': {'teams': ['team-sre', 'team-sre-incident-command', 'team-sre-security'], 'roles': ['incident-commander', 'security-engineer', 'opsgenie-standby']}, 'Reinier_van_Petegem': {'teams': ['team-management', 'team-business-technology'], 'roles': None}, 'Matthijs_Tolkamp': {'teams': ['team-cms-platform'], 'roles': ['backend-developer', 'opsgenie-standby']}, 'Jeroen_Tusveld': {'teams': ['team-fulfillment-minsk', 'team-fulfillment-europe'], 'roles': ['database-developer', 'backend-developer', 'opsgenie-standby']}, 'Jaiwan_Bindraban': {'teams': ['team-loyalty-and-savings'], 'roles': ['test-engineer']}, 'Marc_Buurman': {'teams': ['team-salesforce-platform', 'team-self-service'], 'roles': ['product-owner']}, 'Milan_Vrskovy': {'team

In [None]:
re