## Import Packages and Libraries

In [1]:
import requests
import numpy as np
import pandas as pd
from bs4 import BeautifulSoup

### Add the headers in case the response code returned is 403

In [2]:
headers={'User-Agent':'Mozilla/5.0 (Windows NT 6.3; Win 64 ; x64) Apple WeKit /537.36(KHTML , like Gecko) Chrome/80.0.3987.162 Safari/537.36'}
url='https://www.ambitionbox.com/list-of-companies?page=1'
webpage=requests.get(url,headers=headers).text

### Convert the webpage to soup object

In [3]:
soup=BeautifulSoup(webpage,'lxml')

In [4]:
soup.find_all('h1')[0].text

'List of companies in India'

In [5]:
company=soup.find_all('div',class_='company-content-wrapper')
len(company) 

30

### Collecting data from the first page

In [6]:
name=[]
rating=[]
reviews=[]
ctype=[]
hq=[]
how_old=[]
no_of_employee=[]

for i in company:

  name.append(i.find('h2').text.strip())
  rating.append(i.find('p',class_='rating').text.strip())
  reviews.append(i.find('a' , class_='review-count').text.strip())
  ctype.append(i.find_all('p',class_='infoEntity')[0].text.strip())
  hq.append(i.find_all('p',class_='infoEntity')[1].text.strip())
  how_old.append(i.find_all('p',class_='infoEntity')[2].text.strip())
  no_of_employee.append(i.find_all('p',class_='infoEntity')[3].text.strip())

df=pd.DataFrame({'name':name,
   'rating':rating,
   'reviews':reviews,
   'company_type':ctype,
   'Head_Quarters':hq,
   'Company_Age':how_old,
   'No_of_Employee':no_of_employee,
   })
  

In [7]:
df.head()

Unnamed: 0,name,rating,reviews,company_type,Head_Quarters,Company_Age,No_of_Employee
0,TCS,3.9,(44k Reviews),Public,"Mumbai,Maharashtra + 257 more",54 years old,1 Lakh+ Employees (India)
1,Accenture,4.2,(30k Reviews),Public,Dublin + 137 more,33 years old,1 Lakh+ Employees (India)
2,Cognizant,4.0,(26.8k Reviews),Private,Teaneck. New Jersey. + 89 more,28 years old,1 Lakh+ Employees (India)
3,ICICI Bank,4.0,(32.5k Reviews),Public,"Mumbai,Maharashtra + 1024 more",28 years old,1 Lakh+ Employees (India)
4,Wipro,3.9,(25.5k Reviews),Public,"Bangalore/Bengaluru,Karnataka + 261 more",77 years old,1 Lakh+ Employees (India)


## Running code to extract data from all the pages of the website

In [8]:
final=pd.DataFrame()

#for j in range(1,1001): total pages on the website
for j in range(1,2): # simulating for two pages only
  webpage=requests.get('https://www.ambitionbox.com/list-of-companies?page={}'.format(j)).text
  soup=BeautifulSoup(webpage,'lxml')
  company=soup.find_all('div',class_='company-content-wrapper')
  name=[]
  rating=[]
  reviews=[]
  ctype=[]
  hq=[]
  how_old=[]
  no_of_employee=[]

  for i in company:

    try:
       name.append(i.find('h2').text.strip())
    except:
       name.append(np.nan)

    try:
       rating.append(i.find('p',class_='rating').text.strip())
    except:
       rating.append(np.nan)
   
    try:
      reviews.append(i.find('a' , class_='review-count').text.strip())
    except:
      reviews.append(np.nan)

    try:
      ctype.append(i.find_all('p',class_='infoEntity')[0].text.strip())
    except:
      ctype.append(np.nan)

    try:
      hq.append(i.find_all('p',class_='infoEntity')[1].text.strip())
    except:
      hq.append(np.nan)
    
    try:
      how_old.append(i.find_all('p',class_='infoEntity')[2].text.strip())
    except:
      how_old.append(np.nan)
    
    try:
      no_of_employee.append(i.find_all('p',class_='infoEntity')[3].text.strip())
    except:
      no_of_employee.append(np.nan)
    
  df=pd.DataFrame({'name':name,
    'rating':rating,
    'reviews':reviews,
    'company_type':ctype,
    'Head_Quarters':hq,
    'Company_Age':how_old,
    'No_of_Employee':no_of_employee,
    })
  
  final=final.append(df,ignore_index=True)

In [9]:
final.head()

Unnamed: 0,name,rating,reviews,company_type,Head_Quarters,Company_Age,No_of_Employee
0,TCS,3.9,(44k Reviews),Public,"Mumbai,Maharashtra + 257 more",54 years old,1 Lakh+ Employees (India)
1,Accenture,4.2,(30k Reviews),Public,Dublin + 137 more,33 years old,1 Lakh+ Employees (India)
2,Cognizant,4.0,(26.8k Reviews),Private,Teaneck. New Jersey. + 89 more,28 years old,1 Lakh+ Employees (India)
3,ICICI Bank,4.0,(32.5k Reviews),Public,"Mumbai,Maharashtra + 1024 more",28 years old,1 Lakh+ Employees (India)
4,Wipro,3.9,(25.5k Reviews),Public,"Bangalore/Bengaluru,Karnataka + 261 more",77 years old,1 Lakh+ Employees (India)
