In [7]:
import numpy as np
import scrapy
import pandas as pd

In [4]:
class BusinessSpider(scrapy.Spider):
    name = "business"

    def start_requests(self):
        urls = [
            'https://firststop.sos.nd.gov/api/Records/businesssearch'
        ]
        for url in urls:
            yield scrapy.Request(url=url, callback=self.parse)

    def parse(self, response):
        # Parse the title boxes for those starting with X
        businesses = response.css('title-box').re(r'X.*')
        
        # Create arrays which remain empty if parse returns nothing for 
        # owners, registered agents, or commercial registered agents
        commercial_reg_agents = np.empty(len(businesses))
        reg_agents = np.empty(len(businesses))
        owners = np.empty(len(businesses))
        
        # Presumably there is some logic to look only within the div object
        # "details-list container-fluid" whose title box has an X
        i = 0
        for business in businesses:
            commercial_reg_agents[i] = response.css('details-list.container fluid.detail.value where detail.label="Commercial Registered Agent"')
            reg_agents[i] = response.css('details-list.container fluid.detail.value where detail.label="Registered Agent"')
            owners[i] = response.css('details-list.container fluid.detail.value where detail.label="Owner"')
            i += 1
        
        table = pd.Dataframe(np.array(businesses, commercial_reg_agents, reg_agents, owners), \
                            columns=['Business', 'Commercial Registered Agent', 'Registered Agent', 'Owners'])
        filename = 'businesses.csv'
        with open(filename, 'wb') as f:
            f.write(table)
        self.log('Saved file %s' % filename)

In [8]:
# Can't really plot the network without the data
# I would plot each company as a node. If two companies shared the same owner
# or registered agent they would be connected via an edge. Edges in different
# colors would be ideal.