# Zendesk Crawling for client info

- [Zendesk Customer Page](https://www.zendesk.com/why-zendesk/customers/)

In [142]:
#Configs
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

In [143]:
# imports
import requests
from bs4 import BeautifulSoup
import csv

In [57]:
site_url = 'https://www.zendesk.com/why-zendesk/customers/'

site_content = requests.get(site_url).content

soup = BeautifulSoup(site_content)

In [1]:
# print(soup.prettify())

In [58]:
# navigate data
f'title tag - { soup.title }'

f'title string - { soup.title.string }'

'title tag - <title>Customers - What Companies Use to Service You | Zendesk</title>'

'title string - Customers - What Companies Use to Service You | Zendesk'

In [117]:
# get all the div blocks with customer info
customer_tiles = soup.find_all(class_='customer-tile')

f'Number of companies found: { len(customer_tiles) }'

'Number of companies found: 166'

In [114]:
# get single company
company_1 = customer_tiles[0]

f'Company name - { company_1.h4.string }'

company_1_tags = [tag.string for tag in company_1.find(class_='tags').find_all(class_='filter')]

f'company tags - { company_1_tags }'

f'Customer html -'
print(company_1.prettify()) 


'Company name - Squarespace'

"company tags - ['Americas', 'Asia Pacific', 'Chat', 'Europe', 'Guide', 'Internet & Mobile', 'Software & Technology', 'Support', 'Talk']"

'Customer html -'

<div class="customer-tile americas asia-pacific product-chat europe product-guide internet-mobile software-technology product-support product-talk">
 <a class="goto-read-story" href="https://www.zendesk.com/customer/squarespace/">
  <div class="customer-background" style="background-image: url('https://d26a57ydsghvgx.cloudfront.net/product/Customer%20Story%20Images/Squarespace6.jpg'); background-size: cover;">
  </div>
 </a>
 <div class="headline">
  <a class="goto-read-story" data-sl-variant="squarespace" href="https://www.zendesk.com/customer/squarespace/">
   <h4>
    Squarespace
   </h4>
  </a>
  <div class="tags">
   <a class="filter" href="#americas">
    Americas
   </a>
   ,
   <a class="filter" href="#asia-pacific">
    Asia Pacific
   </a>
   ,
   <a class="filter" href="#product-chat">
    Chat
   </a>
   ,
   <a class="filter" href="#europe">
    Europe
   </a>
   ,
   <a class="filter" href="#product-guide">
    Guide
   </a>
   ,
   <a class="filter" href="#internet-mobil

In [212]:
# define all the possible filters used by the site
products_list = ['Chat', 'Guide', 'Support', 'Talk', 'Explore', 'Connect']

regions_list = ['Americas', 'Asia Pacific', 'Europe']

industries_list = [
    'Internet & Mobile', 
    'Software & Technology', 
    'Retail & eCommerce', 
    'Travel & Hospitality', 
    'Financial Services',
    'Healthcare',
    'Media & Entertainment',
    'Telecommunications',
    'Energy',
    'Non-Profit',
    'Government',
    'Education'
]

In [215]:
# get info about all companies
output = []
for company in customer_tiles:
    c = {}
    
    c['company_name'] = company.h4.string
    
    tags  = [tag.string for tag in company.find(class_='tags').find_all(class_='filter')]
    
    # build a list with the filters tags that belong to products
    products = [ tag for tag in tags if tag in products_list]
    # direct array to string with removal of [] and ' '
    c['products'] = str(products).replace('\'', '').strip('[]')
    
    # build a list with the filters tags that belong to regions
    regions = [ tag for tag in tags if tag in regions_list]
    # direct array to string with removal of [] and ' '
    c['regions'] =  str(regions).replace('\'', '').strip('[]')
    
    # build a list with the filters tags that belong to industries
    industries = [ tag for tag in tags if tag in industries_list]
    # direct array to string with removal of [] and ' '
    c['industries'] = str(industries).replace('\'', '').strip('[]')
    
    output.append(c)
    
# output

In [216]:
%%capture
# capture will capture all output and not show it 

# generate a csv document with output
with open('Documents/Notebooks/zendesk_clients.csv', 'w', newline='') as csv_file:
    field_names = ['company_name', 'products', 'regions', 'industries']
    writer = csv.DictWriter(csv_file, fieldnames=field_names)
    
    writer.writeheader()
    
    for item in output:
        writer.writerow(item)