# Introduction

In this notebook, I work through a number of files from snyk and render outputs.  The input files are either json or sarif.  I expect most work to happen with the JSON files.

In [10]:
import pandas as pd
import numpy as np
import json
from elasticsearch import Elasticsearch, helpers

Let's load up our files and start to examine them.  The JSON files come in heavy, and we may have to reduce or only load in a subset.


In [6]:
# Read in the json file into a data structure.  Let's start with plain old json format and then expand

import json
import pandas as pd

json_elastic = None
json_todolist = None
## Read in the file elastic.json as a json structure
with open('datafiles/elastic.json') as f:
    json_elastic = json.load(f)

## Read in the file todolist-goof.json as a json structure.
with open('datafiles/todolist-goof.json') as f:
    json_todolist = json.load(f)

This next part gets our envrionment variables to collect our API credentials.

In [3]:
import os
ELASTIC_API_URL = os.environ.get('ELASTIC_API_URL')
ELASTIC_API_KEY = os.environ.get('ELASTIC_API_KEY')
#THe authorization headers are by username + password
headers = {
    'Authorization': f'ApiKey {ELASTIC_API_KEY}'
}   


In [4]:
print(ELASTIC_API_URL)


https://172.29.213.51:9200/


In [5]:
import requests

## Read the products from the Elastic Server.  This is a GET request to /products
def get_products():
    url = f"{ELASTIC_API_URL}/products"
    
    # We specify verify=False to match curl's --insecure flag
    response = requests.get(url, headers=headers, verify=False)
    return response.json()


products = get_products()
print (f'Your products are: \n{products}')


Your products are: 
{'products': {'aliases': {}, 'mappings': {'properties': {'id': {'type': 'long'}, 'in_stock': {'type': 'long'}, 'name': {'type': 'text', 'fields': {'keyword': {'type': 'keyword', 'ignore_above': 256}}}, 'price': {'type': 'long'}, 'tages': {'type': 'text', 'fields': {'keyword': {'type': 'keyword', 'ignore_above': 256}}}, 'tags': {'type': 'text', 'fields': {'keyword': {'type': 'keyword', 'ignore_above': 256}}}}}, 'settings': {'index': {'routing': {'allocation': {'include': {'_tier_preference': 'data_content'}}}, 'number_of_shards': '2', 'provided_name': 'products', 'creation_date': '1726425825891', 'number_of_replicas': '2', 'uuid': 'tPcO96JhRLqzI7bhDZSGXQ', 'version': {'created': '8512000'}}}}}




The general structure of the json file is:

```json
{
    "vulnerabilities": [],
    ...
    "summary" : "",
    "projectName" : "",
    "path" : "",
    "applications" : [
        {
            "projectName":"",
            "dependencyCount":"",
            "displayTargetFile":"",
            "targetFile":"",
            "path":"",
            "packageManager":"",
            "summary" : "",
            "vulnerabilities":[]
        }
    ]
}
```

I think the best we can do is run through all of the vulnerabilities and make note of them in a new DF.
Alternatively, we can run through all of the vulnerabilities and add them to something like Elastic for fun.

This means that for each of the JSON data structures, let's iteratet throught the vulnerabilities, and also through each of the vulnerabilities.

In [14]:
# Initialize the Elasticsearch client and use the API key to log on

from elasticsearch import Elasticsearch
es = Elasticsearch(ELASTIC_API_URL, api_key=ELASTIC_API_KEY, verify_certs=False)

  _transport = transport_class(


In [32]:
# Clean up indicies to start clean
es.indices.delete(index='applications')
es.indices.delete(index='vulnerabilities')



ObjectApiResponse({'acknowledged': True})

In [33]:
# Create a new Elasticsearch index named "applications"

index_settings = {
    "settings": {
        "number_of_shards": 1,
        "number_of_replicas": 0
    }
}

if not es.indices.exists(index="applications"):
    response = es.indices.create(index="applications", body=index_settings)

if not es.indices.exists(index="vulnerabilities"):
    response = es.indices.create(index="vulnerabilities", body=index_settings)




In [35]:
def iterate_through_json(jsonfile):
    # we want to iterate and report on two lists inside of the Json body named jsonfile.
    # The first is named applications, and the second is named vulnerabilities.  
    # These two lists are independent and at the same level
    # Read through each and print out their contents
    i = 0
    for app in jsonfile['applications']:
        # print(app)
        # Now load each app named "app" as a new document in ElasticSearch into the index named "applications"
        # TODO: There is enough variation in the records where I need to adjust how they are stored.  For example, the upgradePath is empty or contains values.async_search
        # For this part, I'll create a new record that is just a subset of the original.
        newapp = {
            "projectName": app['projectName'],
            "targgetFile": app['targetFile'],
            "displayTargetFile": app['displayTargetFile'],
            "id" : i,
        }
        es.index(index="applications", document=newapp)
        i += 1
    print(f'There are {i} applications')

    i = 0        
    for vuln in jsonfile['vulnerabilities']:
        # print(vuln)
        newvuln = {
            "id": vuln['id'],
            "CVSSv3": vuln['CVSSv3'],
            "severity": vuln['severity'],
            "cvssScore": vuln['cvssScore'],
            "description": vuln['description'],
            "packageName": vuln['packageName'],
        }
        es.index(index="vulnerabilities", document=newvuln)
        i += 1
    print(f'There are {i} vulnerabilities')

    
iterate_through_json(jsonfile=json_elastic)   
    




There are 94 applications
There are 104 vulnerabilities


