In [1]:
import requests
import json
import pandas as pd 
import time
from bs4 import BeautifulSoup as bs
from requests.exceptions import MissingSchema

In [2]:
start_time = time.time()

In [3]:
url = "https://www.adapt.io/directory/industry/telecommunications/A-1"
base_url = "https://www.adapt.io/"

In [4]:
# Html Parser

def make_soup(url):
    content = requests.get(url)
    html_content=content.content
    soup=bs(html_content,'html.parser')
    return soup

In [5]:
# List of Industry companies
# For company_index.json

def company_index(url):
    comp_soup = make_soup(url) 
    container = comp_soup.find("div", attrs={"class": "DirectoryList_seoDirectoryList__aMaj8"})
    container.find("a")

    links    = []
    headings = []

    for row in container.find_all('a'):
        # getting the name of company
        heading = row.text
        headings.append(heading)
        
        if 'href' in row.attrs:
            print(f"{heading} - {row.attrs['href']}")
            link = row.attrs['href']
            links.append(row.attrs['href'])
            
        # Appending to DF
        company_index_df.loc[company_index_df.shape[0]] = [heading, link]
    
    return headings, links

In [6]:
# Go to the next page

def next_page(url):
    try:
        print("New Page")
        comp_soup = make_soup(url)
        container = comp_soup.find("div", attrs={"class": "DirectoryList_actionBtnLink__Seqhh undefined"})
        next_page = container.find('a').get('href')
        return next_page
    except AttributeError:
        print("End of Pages")

In [7]:
# Basic Company Infos like Revenue, Size, etc

def company_info(url, name):
    # Company Details Extraction
    while True:
        try:
            print("Company Info Extracting...")
            company_link = url
            company_soup = make_soup(company_link)

            if (company_soup.find("title").text == "Sorry, we're down for maintenance! | Adapt"):
                raise AttributeError

            company_top_info      = company_soup.find("div", attrs={"class": "CompanyTopInfo_infoWrapper__12xGT"})
            company_website       = company_top_info.find("div", attrs={"class": "CompanyTopInfo_websiteUrl__13kpn"}).text
            company_name          = name
            company_webdomain     = company_website.replace('http://www.', '')

            company_revenue       = company_top_info.find(text = "Revenue")
            company_employee_size = company_top_info.find(text = "Head Count")
            company_industry      = company_top_info.find(text = "Industry")
            company_location      = company_top_info.find(text = "Location")

            if (company_revenue == None):
                company_revenue = "Null"
            else:
                company_revenue = company_revenue.parent.parent.find_all("span")[1].text

            if (company_employee_size == None):
                company_employee_size ="Null"
            else:
                company_employee_size = company_employee_size.parent.parent.find_all("span")[1].text

            if (company_industry == None):
                company_industry = "Null"
            else:
                company_industry = company_industry.parent.parent.find_all("span")[1].text

            if (company_location == None):
                company_location ="Null"
            else:
                company_location = company_location.parent.parent.find_all("span")[1].text

            company_df.loc[company_df.shape[0]] = [company_name, company_website, company_webdomain, company_revenue,
                                      company_employee_size, company_industry, company_location]


            # Similar companies
            print("Similar Companies Extracting...")


            similar_companies = company_soup.find("div", attrs={"class": "SimilarCompanies_similarCompanyList__247e7"})
            if (similar_companies == None):
                similar_companies = "Null"
                similar_company_df.loc[similar_company_df.shape[0]] = [company_name, similar_companies]        

            else:            
                similar_companies_all = similar_companies.find_all("div", attrs={"SimilarCompanies_roundedBorder__NAU02 undefined"})

                # Appending to DF
                for i in range(len(similar_companies_all)):
                    similar_company = similar_companies_all[i].find("a").text
                    similar_company_df.loc[similar_company_df.shape[0]] = [company_name, similar_company]        
            
            break
            
        except AttributeError:
            print("Retrying in 2 seconds...")
            time.sleep(2)

In [8]:
# Company contact details

def company_contact(url, company_name):
    while True:
        try:
            print("Company Contacts Extracting...")
            company_link = url
            company_soup = make_soup(company_link)

            if (company_soup.find("title").text == "Sorry, we're down for maintenance! | Adapt"):
                raise AttributeError

            company_contacts = company_soup.find("div", attrs={"class": "TopContacts_topContactList__lnim_"})
            company_contacts_all = company_soup.find_all("div", attrs={"class": "TopContacts_roundedBorder__1a3yB undefined"})


            for i in range(len(company_contacts_all)):
                contact_name = company_contacts_all[i].find("div", attrs={"class": "TopContacts_contactName__3N-_e"}).text # Name of Employee
                job_title    = company_contacts_all[i].find("p", attrs={"class": "TopContacts_jobTitle__3M7A2"}).text # Job Title
                emaildomain  = company_contacts_all[i].find("button", attrs={"class": "simpleButton mailPhoneBtn emailBtn"}).text
                emaildomain  = emaildomain.split("@")
                emaildomain  = emaildomain[1]

                contact_link       = company_contacts_all[0].find("a").get('href') # Link to find the department
                contact_soup       = make_soup(contact_link)
                
                contact_department = contact_soup.find(text = "Department")
                
                if (contact_department == None):
                    contact_department ="Null"
                else:
                    contact_department = contact_department.parent.parent.find_all("span")[1].text

                # Appending to DF
                company_contacts_df.loc[company_contacts_df.shape[0]] = [company_name, contact_name, job_title,
                                                                         emaildomain, contact_department]
            
            break
            
        except AttributeError:
            print("Retrying in 2 seconds...")
            time.sleep(2)

In [9]:
# Company Profile

def company_profiles(url, name):
    
    # Company Basic Details
    company_info(url, name)
    
    # Company Contact Details
    company_contact(url, name)

In [10]:
# Company profile dataframes

company_index_df = pd.DataFrame(columns = ['company_name', 'source_url'])

company_df = pd.DataFrame(columns = ['company_name', 'company_website', 'company_webdomain', 'company_revenue',
                                    'company_employee_size', 'company_industry', 'company_location'])

company_contacts_df = pd.DataFrame(columns = ['company_name', 'contact_name', 'contact_jobtitle', 'contact_email_domain',
                                          'contact_department'])

similar_company_df = pd.DataFrame(columns = ['company_name', 'similar_company'])

In [11]:
company_links = []
company_names = []

In [12]:
while True:
    try:
        comp_index = company_index(url)
        company_names.extend(comp_index[0])
        company_links.extend(comp_index[1])
        url = next_page(url)
    except requests.exceptions.RequestException as e:
        print("End")
        break

A + Communications and Security - https://www.adapt.io/company/a--communications-and-security
A&A Technology Group - https://www.adapt.io/company/a-a-technology-group
A Better Answer - https://www.adapt.io/company/a-better-answer-4
A Cheerful Giver - https://www.adapt.io/company/a-cheerful-giver-inc-1
A-CTI - https://www.adapt.io/company/a-cti-1
A P G Inc - https://www.adapt.io/company/a-p-g-inc
Crexendo Business Solutions - https://www.adapt.io/company/a-r-management-solutions--llc
A.V. Lauttamus Communications, Inc. - https://www.adapt.io/company/a-v--lauttamus-communications--inc-
A-V Services, Inc. - https://www.adapt.io/company/a-v-services--inc-
A1 Teletronics - https://www.adapt.io/company/a1-teletronics-inc-
Abacus Group, Inc. - https://www.adapt.io/company/abacus-group--inc--1
Abadi Group - https://www.adapt.io/company/abadi-group-1
ABcom - https://www.adapt.io/company/abcom-llc
Abilita - https://www.adapt.io/company/abilita-5
ABIS - https://www.adapt.io/company/abis-3
Abmnus 

Akuvox - https://www.adapt.io/company/akuvox-networks
Alamon - https://www.adapt.io/company/alamon--inc
Alaska Communications - https://www.adapt.io/company/alaska-communications-3
Alepo - https://www.adapt.io/company/alepo-1
ATS - https://www.adapt.io/company/algerie-telecom-satellite
Algérie Télécom - https://www.adapt.io/company/algerietelecom
PT. Aliansi Sakti - https://www.adapt.io/company/aliansi-sakti-
Alianza - https://www.adapt.io/company/alianza--inc-
Alien Inc - https://www.adapt.io/company/alien-inc-1
Alive Telecom - https://www.adapt.io/company/alive-telecom
Alkan CIT - https://www.adapt.io/company/alkan-cit
Alkan Telecom - https://www.adapt.io/company/alkan-telecom-1
All Access Communications - https://www.adapt.io/company/all-access-communications
All Business Communications - https://www.adapt.io/company/all-business-communications
All-Mode Communications - https://www.adapt.io/company/all-mode-communications
All West Communications - https://www.adapt.io/company/all-we

Arctic Slope Telephone Association Cooperative (ASTAC) - https://www.adapt.io/company/arctic-slope-telephone-association-cooperative--astac-
Ardmore Telephone - https://www.adapt.io/company/ardmore-telephone-co
Suddenlink - https://www.adapt.io/company/ares-energy--ltd-
Aria Technologies - https://www.adapt.io/company/aria-technologies
ARIA Technologies - https://www.adapt.io/company/aria-technologies--inc--2
Ariss Enterprises Inc - https://www.adapt.io/company/ariss-enterprises-inc
Arkadin Collaboration Services - https://www.adapt.io/company/arkadin-1
Arkwest Communications - https://www.adapt.io/company/arkwest-communications
Armstrong Utilities, Inc. - https://www.adapt.io/company/armstrong-utilities--inc--1
ARPU Telecommunication Services (ARPU+) - https://www.adapt.io/company/arpu-telecommunication-services--arpu--
Array Networks, Inc. - https://www.adapt.io/company/array-networks--inc-
ArrayComm - https://www.adapt.io/company/arraycomm
Arrow Voice & Data - https://www.adapt.io/c

In [13]:
# Convert to json
# company_index.json

company_index = company_index_df.to_dict('records')

json_object = json.dumps(company_index, indent = 4)

with open('company_index.json', 'w') as f:
    f.write(json_object)

In [14]:
for i in range(len(company_links)):
    print(company_links[i])
    company_profiles(company_links[i], company_names[i])

https://www.adapt.io/company/a--communications-and-security
Company Info Extracting...
Similar Companies Extracting...
Company Contacts Extracting...
Retrying in 2 seconds...
Company Contacts Extracting...
https://www.adapt.io/company/a-a-technology-group
Company Info Extracting...
Similar Companies Extracting...
Company Contacts Extracting...
Retrying in 2 seconds...
Company Contacts Extracting...
https://www.adapt.io/company/a-better-answer-4
Company Info Extracting...
Similar Companies Extracting...
Company Contacts Extracting...
https://www.adapt.io/company/a-cheerful-giver-inc-1
Company Info Extracting...
Similar Companies Extracting...
Company Contacts Extracting...
https://www.adapt.io/company/a-cti-1
Company Info Extracting...
Similar Companies Extracting...
Company Contacts Extracting...
Retrying in 2 seconds...
Company Contacts Extracting...
https://www.adapt.io/company/a-p-g-inc
Company Info Extracting...
Retrying in 2 seconds...
Company Info Extracting...
Similar Companies 

Similar Companies Extracting...
Company Contacts Extracting...
https://www.adapt.io/company/acs-86
Company Info Extracting...
Similar Companies Extracting...
Company Contacts Extracting...
Retrying in 2 seconds...
Company Contacts Extracting...
https://www.adapt.io/company/actel
Company Info Extracting...
Similar Companies Extracting...
Company Contacts Extracting...
https://www.adapt.io/company/actelis-networks
Company Info Extracting...
Similar Companies Extracting...
Company Contacts Extracting...
https://www.adapt.io/company/actify--llc--subsidiary-of-brightpoint-
Company Info Extracting...
Similar Companies Extracting...
Company Contacts Extracting...
https://www.adapt.io/company/actiontec-electronics
Company Info Extracting...
Similar Companies Extracting...
Company Contacts Extracting...
Retrying in 2 seconds...
Company Contacts Extracting...
https://www.adapt.io/company/active-voice
Company Info Extracting...
Similar Companies Extracting...
Company Contacts Extracting...
Retryi

Retrying in 2 seconds...
Company Contacts Extracting...
https://www.adapt.io/company/advancetec-industries--inc--1
Company Info Extracting...
Similar Companies Extracting...
Company Contacts Extracting...
Retrying in 2 seconds...
Company Contacts Extracting...
https://www.adapt.io/company/advantage-360
Company Info Extracting...
Similar Companies Extracting...
Company Contacts Extracting...
Retrying in 2 seconds...
Company Contacts Extracting...
https://www.adapt.io/company/advantage-communications-group--llc
Company Info Extracting...
Similar Companies Extracting...
Company Contacts Extracting...
Retrying in 2 seconds...
Company Contacts Extracting...
https://www.adapt.io/company/advantech-b-b-smartworx
Company Info Extracting...
Similar Companies Extracting...
Company Contacts Extracting...
Retrying in 2 seconds...
Company Contacts Extracting...
https://www.adapt.io/company/advantel-networks
Company Info Extracting...
Similar Companies Extracting...
Company Contacts Extracting...
htt

Retrying in 2 seconds...
Company Contacts Extracting...
https://www.adapt.io/company/airvana-4
Company Info Extracting...
Similar Companies Extracting...
Company Contacts Extracting...
https://www.adapt.io/company/airvana-inc
Company Info Extracting...
Similar Companies Extracting...
Company Contacts Extracting...
https://www.adapt.io/company/airwave-networks
Company Info Extracting...
Similar Companies Extracting...
Company Contacts Extracting...
https://www.adapt.io/company/airway-technologies-1
Company Info Extracting...
Retrying in 2 seconds...
Company Info Extracting...
Similar Companies Extracting...
Company Contacts Extracting...
Retrying in 2 seconds...
Company Contacts Extracting...
https://www.adapt.io/company/ais-engineering--inc-
Company Info Extracting...
Similar Companies Extracting...
Company Contacts Extracting...
Retrying in 2 seconds...
Company Contacts Extracting...
https://www.adapt.io/company/ajm-framers-inc
Company Info Extracting...
Similar Companies Extracting..

https://www.adapt.io/company/alphion-in
Company Info Extracting...
Similar Companies Extracting...
Company Contacts Extracting...
Retrying in 2 seconds...
Company Contacts Extracting...
https://www.adapt.io/company/alpine-power-systems
Company Info Extracting...
Similar Companies Extracting...
Company Contacts Extracting...
https://www.adapt.io/company/alpine-wireless
Company Info Extracting...
Similar Companies Extracting...
Company Contacts Extracting...
Retrying in 2 seconds...
Company Contacts Extracting...
https://www.adapt.io/company/altai-technologies-ltd
Company Info Extracting...
Similar Companies Extracting...
Company Contacts Extracting...
https://www.adapt.io/company/altaworx--llc--1
Company Info Extracting...
Similar Companies Extracting...
Company Contacts Extracting...
https://www.adapt.io/company/altice-business-usa-2
Company Info Extracting...
Similar Companies Extracting...
Company Contacts Extracting...
Retrying in 2 seconds...
Company Contacts Extracting...
https://

Similar Companies Extracting...
Company Contacts Extracting...
https://www.adapt.io/company/amgoo-telecom-co---ltd-
Company Info Extracting...
Similar Companies Extracting...
Company Contacts Extracting...
Retrying in 2 seconds...
Company Contacts Extracting...
https://www.adapt.io/company/amimon
Company Info Extracting...
Similar Companies Extracting...
Company Contacts Extracting...
https://www.adapt.io/company/amirit-technologies--inc--1
Company Info Extracting...
Similar Companies Extracting...
Company Contacts Extracting...
Retrying in 2 seconds...
Company Contacts Extracting...
https://www.adapt.io/company/ammacore--inc
Company Info Extracting...
Similar Companies Extracting...
Company Contacts Extracting...
https://www.adapt.io/company/amp-communications--llc
Company Info Extracting...
Similar Companies Extracting...
Company Contacts Extracting...
https://www.adapt.io/company/amphenol-antenna-solutions--inc--1
Company Info Extracting...
Similar Companies Extracting...
Company Co

Retrying in 2 seconds...
Company Contacts Extracting...
https://www.adapt.io/company/applied-global-technologies--agt--1
Company Info Extracting...
Similar Companies Extracting...
Company Contacts Extracting...
Retrying in 2 seconds...
Company Contacts Extracting...
https://www.adapt.io/company/applied-optoelectronics--inc--1
Company Info Extracting...
Similar Companies Extracting...
Company Contacts Extracting...
https://www.adapt.io/company/appota
Company Info Extracting...
Similar Companies Extracting...
Company Contacts Extracting...
https://www.adapt.io/company/aptilo-networks-ab
Company Info Extracting...
Similar Companies Extracting...
Company Contacts Extracting...
Retrying in 2 seconds...
Company Contacts Extracting...
https://www.adapt.io/company/apwireless-infrastructure-partners--llc-1
Company Info Extracting...
Similar Companies Extracting...
Company Contacts Extracting...
https://www.adapt.io/company/apx-net--inc-1
Company Info Extracting...
Similar Companies Extracting..

Retrying in 2 seconds...
Company Info Extracting...
Similar Companies Extracting...
Company Contacts Extracting...
Retrying in 2 seconds...
Company Contacts Extracting...
https://www.adapt.io/company/assistt
Company Info Extracting...
Retrying in 2 seconds...
Company Info Extracting...
Similar Companies Extracting...
Company Contacts Extracting...
https://www.adapt.io/company/assraljawal
Company Info Extracting...
Similar Companies Extracting...
Company Contacts Extracting...
Retrying in 2 seconds...
Company Contacts Extracting...
https://www.adapt.io/company/astellia
Company Info Extracting...
Similar Companies Extracting...
Company Contacts Extracting...
Retrying in 2 seconds...
Company Contacts Extracting...
https://www.adapt.io/company/astound-broadband
Company Info Extracting...
Similar Companies Extracting...
Company Contacts Extracting...
Retrying in 2 seconds...
Company Contacts Extracting...
https://www.adapt.io/company/at-conference---an-arkadin-company
Company Info Extractin

Retrying in 2 seconds...
Company Contacts Extracting...
https://www.adapt.io/company/averistar-com
Company Info Extracting...
Similar Companies Extracting...
Company Contacts Extracting...
https://www.adapt.io/company/aviat-networks
Company Info Extracting...
Similar Companies Extracting...
Company Contacts Extracting...
https://www.adapt.io/company/aviat-networks-1
Company Info Extracting...
Similar Companies Extracting...
Company Contacts Extracting...
Retrying in 2 seconds...
Company Contacts Extracting...
https://www.adapt.io/company/avid-communications--llc
Company Info Extracting...
Similar Companies Extracting...
Company Contacts Extracting...
https://www.adapt.io/company/avispl-ca
Company Info Extracting...
Similar Companies Extracting...
Company Contacts Extracting...
https://www.adapt.io/company/avotus-1
Company Info Extracting...
Similar Companies Extracting...
Company Contacts Extracting...
https://www.adapt.io/company/avst
Company Info Extracting...
Similar Companies Extra

In [15]:
print("--- %s seconds ---" % (time.time() - start_time))

--- 6701.124981641769 seconds ---


In [21]:
company_index_df.to_csv("company_index_df.csv", index=False)
    
company_df.to_csv("company_df.csv", index=False)

company_contacts_df.to_csv("company_contacts_df.csv", index=False)

similar_company_df.to_csv("similar_company_df.csv", index=False)