In [12]:
import requests
import xml.etree.ElementTree as ET
from urllib.parse import quote_plus

def parse_pom_dependencies_from_string(pom_content):
    # Parse the pom.xml content
    root = ET.fromstring(pom_content)

    # Define the namespace map to handle the default namespace used in pom.xml
    namespaces = {'m': 'http://maven.apache.org/POM/4.0.0'}

    # Get the properties from the pom.xml content
    properties = root.find('m:properties', namespaces)
    properties_dict = {}
    if properties is not None:
        for child in properties:
            tag = child.tag.replace('{' + namespaces['m'] + '}', '')  # Strip namespace
            properties_dict[tag] = child.text

    # Find all the dependency elements in the pom.xml content
    dependencies = root.findall(".//m:dependencies/m:dependency", namespaces)

    # Extract the groupId, artifactId, and version for each dependency
    dependency_list = []
    for dependency in dependencies:
        groupId = dependency.find('m:groupId', namespaces).text
        artifactId = dependency.find('m:artifactId', namespaces).text
        version_element = dependency.find('m:version', namespaces)
        if version_element is not None:
            # Resolve the version from properties if necessary
            version_text = version_element.text
            if version_text.startswith('${') and version_text.endswith('}'):
                property_name = version_text[2:-1]
                version = properties_dict.get(property_name, 'VERSION PROPERTY NOT FOUND')
            else:
                version = version_text
        else:
            version = 'VERSION NOT SPECIFIED'
        dependency_list.append(f"{groupId}:{artifactId}:{version}")

    return dependency_list

# Your credentials and endpoint


# The search query
SEARCH_QUERY = "pom.xml"
# Initialize variables
files = []

# Properly encode the search query to be used in a URL
encoded_query = quote_plus(SEARCH_QUERY)
next_page = f"{BASE_URL}/{WORKSPACE}/search/code?search_query={encoded_query}"

# Loop through all pages of search results
while next_page:
    response = requests.get(next_page, auth=(USERNAME, PASSWORD))

    # Check if the request was successful
    if response.status_code == 200:
        search_results = response.json()
        for result in search_results['values']:
            path = result['file']['links']['self']['href']
            libraries = []
            # only check if file is a pom.xml
            if SEARCH_QUERY in result['file']['path']:
                libraries = parse_pom_dependencies_from_string(requests.get(path, auth=(USERNAME, PASSWORD)).text)

            file = {
                'name': result['file']['path'],
                'path': path,
                'content': libraries
            }
            files.append(file)

        # Get the next page URL, if it exists
        next_page = search_results.get('next', None)
    else:
        print("Failed to search the repository:", response.status_code, response.text)
        next_page = None  # Stop if there's an error

print(len(files))

192


In [13]:
for file in files:
    print('--------------------------------------------------------------------------------------------')
    print('File Name = ' + file['name'])
    print('File Path = ' + file['path'])
    print('--------------------------------------------------------------------------------------------')
    for library in file['content']:
        print(library)
    print('')

--------------------------------------------------------------------------------------------
File Name = app/the-mummy-aws/pom.xml
File Path = https://api.bitbucket.org/2.0/repositories/articledev/themummy/src/57040a7e23ee53e1fcc930c82e8230e7126176ed/app/the-mummy-aws/pom.xml
--------------------------------------------------------------------------------------------
junit:junit:VERSION NOT SPECIFIED
com.amazonaws:aws-java-sdk-sqs:1.11.490
com.google.code.gson:gson:2.8.5
org.apache.commons:commons-lang3:3.0

--------------------------------------------------------------------------------------------
File Name = pom.xml
File Path = https://api.bitbucket.org/2.0/repositories/articledev/kafka-connect-simple-key/src/ed811a405628414fcb4001787e9482d30b67c28d/pom.xml
--------------------------------------------------------------------------------------------
org.apache.kafka:connect-api:1.1.0

--------------------------------------------------------------------------------------------
File Na