In [1]:
# Module 4 
from bs4 import BeautifulSoup
import requests as requests

# Reference: https://docs.python.org/2/library/datetime.html  
from datetime import datetime as dt

# Webpage that we will scrape
# https://nvd.nist.gov/vuln/search 

In [2]:
# Ask user to input a search term, e.g. "splunk"
search_term = input('enter a search term: ')

# Ask user to type in a minimum severity , e.g. "7.4"
min_severity = input('enter a minimum severity (1-10): ')

# Ask user to type in a start date in a predefined format, e.g. "10-02-2017"
start_date = input('enter a start date (MM-DD-YYYY): ')

# Ask user to type in an end date in a predefined format, e.g. "12-31-2018"
end_date = input('enter an end date (MM-DD-YYYY): ')

enter a search term: python
enter a minimum severity (1-10): 8.5
enter a start date (MM-DD-YYYY): 10-13-2012
enter an end date (MM-DD-YYYY): 11-30-2015


In [3]:
# Explore the result based on the query (v2)
pub_start_date = start_date.replace("-","%2F") # 10-13-2012 10%2F13%2F2012
pub_end_date = end_date.replace("-","%2F") # 11-30-2015 11%2F30%2F2015
url = 'https://nvd.nist.gov/vuln/search/results?form_type=Advanced&results_type=overview&query='+search_term+'&search_type=all&cvss_version=2&pub_start_date='+pub_start_date+'&pub_end_date='+pub_end_date+'&startIndex=0'

# Request content from web page
response = requests.get(url)
content = response.content

soup = BeautifulSoup(content, 'lxml')


# Observe the content of soup 
# print(soup)

In [4]:
import math

# Find the total number of results 
total = soup.find('strong', {"data-testid": "vuln-matching-records-count"})
total = int(total.text)

# Find the number of pages (20 results per page)
pages = math.ceil(int(total) / 20)

print(f'The total number of results returned were "{total}". There are "{pages}" pages.')


The total number of results returned were "93". There are "5" pages.


In [5]:
# Create lists to store results from query

#vulnerability IDs
vul_IDs =[]

#vulnerability summaries
summaries=[]

#severity levels
severities = []

#publish dates 
publish_dates = []

#the urls of individual vulnerbility description pages
#we don't see them from the survey page yet
urls = []

In [6]:
for page in range(pages):
    
    # Find the query for the page
    url_prefix = "https://nvd.nist.gov" 
    url = str(soup.find(action=True)).replace('<form action="', "").split('" id=', 1)[0].replace("&amp;", "&")
    
    # Calculate the appropriate index for the page
    index = page * 20
    url = url.split("Index=")[0]
    url = url + f'Index={index}'
    url = url_prefix + url
    
    # Output the page and url query for the page
    print(f'We are on page #{page+1}.')
    print(url)
    
    # Get the response, content and soup for the page
    response = requests.get(url)
    content = response.content
    soup = BeautifulSoup(content, 'lxml')

    # Find the table on the page and then get the rows
    table = soup.find('table', {"data-testid": "vuln-results-table"})
    rows = table.findAll('tr')

    #in each row
    for tr in rows[1:]: #from 2nd row
        
        # Check severity and only store for those that have high enough
        severityRows = tr.find("td", {"nowrap":"nowrap"}).findAll("span")
        for severity in severityRows:
            if("V2" in severity.text):
                sev = float(severity.text.split(" ")[1])
            else: sev = 10.0
                
        # Store the severity
        if sev >= float(min_severity):
            severities.append(sev)
                
            # Store the url and Vul_ID
            url_row = tr.find("th", {"nowrap":"nowrap"}).findAll("a")
            for row in url_row:
                url = row["href"]
                urls.append(url_prefix + url)
                vul_IDs.append(row.text)
                
            # Get the summary
            summary_row = tr.find("td").findAll("p")
            for row in summary_row:
                summaries.append(row.text)
            
            # Store publish date
            date_row = tr.find("td").findAll("span")
            for row in date_row:
                date = row.text.split("-0")[0]
                publish_dates.append(date)

We are on page #1.
https://nvd.nist.gov/vuln/search/results?form_type=Advanced&results_type=overview&query=python&search_type=all&cvss_version=2&pub_start_date=10%2f13%2f2012&pub_end_date=11%2f30%2f2015&startIndex=0
We are on page #2.
https://nvd.nist.gov/vuln/search/results?form_type=Advanced&results_type=overview&query=python&search_type=all&cvss_version=2&pub_start_date=10%2f13%2f2012&pub_end_date=11%2f30%2f2015&startIndex=20
We are on page #3.
https://nvd.nist.gov/vuln/search/results?form_type=Advanced&results_type=overview&query=python&search_type=all&cvss_version=2&pub_start_date=10%2f13%2f2012&pub_end_date=11%2f30%2f2015&startIndex=40
We are on page #4.
https://nvd.nist.gov/vuln/search/results?form_type=Advanced&results_type=overview&query=python&search_type=all&cvss_version=2&pub_start_date=10%2f13%2f2012&pub_end_date=11%2f30%2f2015&startIndex=60
We are on page #5.
https://nvd.nist.gov/vuln/search/results?form_type=Advanced&results_type=overview&query=python&search_type=all&cvs

In [17]:
for index,item in enumerate(vul_IDs):
    print(f'No. {index+1}')
    print(f'Vul_ID: {item}')
    print(f'Severity: {severities[index]}')
    print(f'Publish Date: {publish_dates[index]}')
    print(f'For more information, visit: {urls[index]}')
    print("----------------------------------------------------------")

No. 1
Vul_ID: CVE-2015-7828
Severity: 10.0
Publish Date: November 10, 2015; 12:59:05 PM 
For more information, visit: https://nvd.nist.gov/vuln/detail/CVE-2015-7828
----------------------------------------------------------
No. 2
Vul_ID: CVE-2014-2331
Severity: 8.5
Publish Date: August 31, 2015; 02:59:03 PM 
For more information, visit: https://nvd.nist.gov/vuln/detail/CVE-2014-2331
----------------------------------------------------------
No. 3
Vul_ID: CVE-2015-3446
Severity: 9.3
Publish Date: May 01, 2015; 11:59:08 AM 
For more information, visit: https://nvd.nist.gov/vuln/detail/CVE-2015-3446
----------------------------------------------------------
No. 4
Vul_ID: CVE-2014-8165
Severity: 10.0
Publish Date: February 19, 2015; 10:59:05 AM 
For more information, visit: https://nvd.nist.gov/vuln/detail/CVE-2014-8165
----------------------------------------------------------
No. 5
Vul_ID: CVE-2012-5493
Severity: 8.5
Publish Date: September 30, 2014; 10:55:06 AM 
For more information, vi