## Step1: Import Required Libraries

In [159]:
import pandas as pd
import numpy as np
from selenium import webdriver
from bs4 import BeautifulSoup
import requests
import os
from tqdm import tqdm
import time

### step2: Copy the url from required page source where the tables need to scrap and assign it to variable.

In [2]:
url = 'https://devtomanager.com/interviews/'

### Step 3:- Set the path to chrome driver to configure web driver to use Chrome browser.

In [3]:
browser = webdriver.Chrome()

### Step4:- Code to open the URL

In [4]:
browser.get(url)

### Step5:- Read the HTML from page source

In [5]:
html = browser.page_source
html

'<html lang="en-US"><head>\n        <title>\nInterviews with Software Managers | Developer to Manager\n</title>\n\n        <meta charset="utf-8">\n        <meta name="viewport" content="width=device-width, initial-scale=1.0, shrink-to-fit=no">\n\n        <meta name="author" content="Siddhant Goel">\n        <meta name="keywords" content="career, development, engineering-management, knowledge, leadership, management, platform, software">\n\n        \n<meta name="description" content="At Developer to Manager, we regularly interview software engineering managers on how they approach management and leadership.">\n\n\n        \n<meta name="twitter:card" content="summary">\n<meta name="twitter:title" content="Interviews with Software Managers | Developer to Manager">\n<meta name="twitter:description" content="About transitioning from development to management">\n<meta name="twitter:site" content="@devtomanager">\n<meta name="twitter:image" content="https://devtomanager.com/static/images/logo

### Step 6: Parse HTML code and grab tables with Beautiful Soup

In [6]:
soup = BeautifulSoup(html,'html.parser')
soup

<html lang="en-US"><head>
<title>
Interviews with Software Managers | Developer to Manager
</title>
<meta charset="utf-8"/>
<meta content="width=device-width, initial-scale=1.0, shrink-to-fit=no" name="viewport"/>
<meta content="Siddhant Goel" name="author"/>
<meta content="career, development, engineering-management, knowledge, leadership, management, platform, software" name="keywords"/>
<meta content="At Developer to Manager, we regularly interview software engineering managers on how they approach management and leadership." name="description"/>
<meta content="summary" name="twitter:card"/>
<meta content="Interviews with Software Managers | Developer to Manager" name="twitter:title"/>
<meta content="About transitioning from development to management" name="twitter:description"/>
<meta content="@devtomanager" name="twitter:site"/>
<meta content="https://devtomanager.com/static/images/logo-wide.png" name="twitter:image"/>
<meta content="https://devtomanager.com/interviews/page/1/" pr

In [7]:
soup.title.text.strip()

'Interviews with Software Managers | Developer to Manager'

### Step7:- Extracting the Eployee Names from the Page using select method

In [8]:
emp_name = [i.text.strip().split(',')[0] for i in soup.select('h5.card-title')]
emp_name

['Aviv Ben-Yosef', 'Anand Safi', 'Shawn Axsom', 'Kevin Doyle', 'Arnab Sen']

### Step8:- Extracting the Eployee job role from the Page using select method

In [31]:
lst = [i.text.strip().split(',')[1] for i in soup.select('h5.card-title')]
lst

[' Tech Executive Consultant',
 ' Engineering Manager at Mark43',
 ' Senior Engineering Manager at Docker',
 ' CTO at patientMpower',
 ' Technical Delivery Manager at AKQA']

In [36]:
job_position = []
for i in lst:
    job_position.append(i.split('at')[0].strip(' '))

In [37]:
job_position

['Tech Executive Consultant',
 'Engineering Manager',
 'Senior Engineering Manager',
 'CTO',
 'Technical Delivery Manager']

### Step9:- Extracting the Eployee working Company from the Page

In [76]:
working = []
for i in lst:
    try:
        working.append(i.split('at ')[1].strip(' '))
    except:
        working.append(np.nan)

In [77]:
working

[nan, 'Mark43', 'Docker', 'patientMpower', 'AKQA']

### Step10:- Extracting the Quotes,date and tags from the Page using select method

In [116]:
quote = [i.text.strip() for i in soup.select('p.card-text')][0::2]
quote

['“I want to set up managers to succeed, no matter how much background they’ve already got.”',
 '“I very well understand where my interests lie, which is being an enabler first and then a creator rather than being a creator/ maker 100%.”',
 '“Your focus (and challenges) grows from your direct reports, to teams, to departments, executives, and then external customers and partnerships.”',
 '“If I can provide enough direction to allow people to focus on the jobs they’ll do better than I ever could, everybody wins.”',
 '“The switch to management can be highly rewarding and provide a level of independence, authority, and interactivity that a pure software engineering role may not offer.”']

In [126]:
 date = [i.text.strip().split('\n')[0] for i in soup.select('p.card-text')][1::2]
 date   

['August 16, 2021',
 'July 19, 2021',
 'July 05, 2021',
 'June 21, 2021',
 'May 10, 2021']

In [154]:
tags = [i.text.strip().split()[4:] for i in soup.select('p.card-text')][1::2]
tags

[['#coaching', '#consulting'],
 ['#coaching', '#frontend', '#full-stack', '#public-safety'],
 ['#backend', '#coaching', '#information-systems', '#web'],
 ['#full-stack', '#health-tech'],
 ['#consulting', '#digital-marketing', '#e-commerce', '#product']]

### Step11: Reusable Code for single page to extract the data

In [155]:
def Interviews():
    emp_name = [i.text.strip().split(',')[0] for i in soup.select('h5.card-title')]
    lst = [i.text.strip().split(',')[1] for i in soup.select('h5.card-title')]
    job_position = []
    for i in lst:
        job_position.append(i.split('at')[0].strip(' '))
    working = []
    for i in lst:
        try:
            working.append(i.split('at ')[1].strip(' '))
        except:
            working.append(np.nan)
    quote = [i.text.strip() for i in soup.select('p.card-text')][0::2]
    date = [i.text.strip().split('\n')[0] for i in soup.select('p.card-text')][1::2]
    tags = [i.text.strip().split()[4:] for i in soup.select('p.card-text')][1::2]
    return pd.DataFrame({'Employee Name':emp_name,'Job Position':job_position,'Working':working,'Interview quote':quote,'Date':date,'Tags':tags})    

### Step11: Reusable Code for multiple pages by using single page reusable code to extract the data

In [162]:
tables=[]
for i in tqdm(range(1,6)):
    url_pages=url.format(i)
    browser.get(url_pages)
    time.sleep(5)
    html = browser.page_source
    soup = BeautifulSoup(html,'html.parser')
    tables.append(Interviews())

100%|██████████| 5/5 [00:30<00:00,  6.20s/it]


### Step12: Concating the tables data which extracted from multiple pages 

In [166]:
interview = pd.concat(tables,axis=0)
interview

Unnamed: 0,Employee Name,Job Position,Working,Interview quote,Date,Tags
0,Aviv Ben-Yosef,Tech Executive Consultant,,"“I want to set up managers to succeed, no matt...","August 16, 2021","[#coaching, #consulting]"
1,Anand Safi,Engineering Manager,Mark43,“I very well understand where my interests lie...,"July 19, 2021","[#coaching, #frontend, #full-stack, #public-sa..."
2,Shawn Axsom,Senior Engineering Manager,Docker,“Your focus (and challenges) grows from your d...,"July 05, 2021","[#backend, #coaching, #information-systems, #web]"
3,Kevin Doyle,CTO,patientMpower,“If I can provide enough direction to allow pe...,"June 21, 2021","[#full-stack, #health-tech]"
4,Arnab Sen,Technical Delivery Manager,AKQA,“The switch to management can be highly reward...,"May 10, 2021","[#consulting, #digital-marketing, #e-commerce,..."
0,Aviv Ben-Yosef,Tech Executive Consultant,,"“I want to set up managers to succeed, no matt...","August 16, 2021","[#coaching, #consulting]"
1,Anand Safi,Engineering Manager,Mark43,“I very well understand where my interests lie...,"July 19, 2021","[#coaching, #frontend, #full-stack, #public-sa..."
2,Shawn Axsom,Senior Engineering Manager,Docker,“Your focus (and challenges) grows from your d...,"July 05, 2021","[#backend, #coaching, #information-systems, #web]"
3,Kevin Doyle,CTO,patientMpower,“If I can provide enough direction to allow pe...,"June 21, 2021","[#full-stack, #health-tech]"
4,Arnab Sen,Technical Delivery Manager,AKQA,“The switch to management can be highly reward...,"May 10, 2021","[#consulting, #digital-marketing, #e-commerce,..."


### Step13: Converting and saving the extracted table in to csv_file

In [167]:
interview.to_csv('interview.csv')

### Step14: Checking the saved csv_file, filtering and modifing it to proper format

In [170]:
df = pd.read_csv('interview.csv')
df

Unnamed: 0.1,Unnamed: 0,Employee Name,Job Position,Working,Interview quote,Date,Tags
0,0,Aviv Ben-Yosef,Tech Executive Consultant,,"“I want to set up managers to succeed, no matt...","August 16, 2021","['#coaching', '#consulting']"
1,1,Anand Safi,Engineering Manager,Mark43,“I very well understand where my interests lie...,"July 19, 2021","['#coaching', '#frontend', '#full-stack', '#pu..."
2,2,Shawn Axsom,Senior Engineering Manager,Docker,“Your focus (and challenges) grows from your d...,"July 05, 2021","['#backend', '#coaching', '#information-system..."
3,3,Kevin Doyle,CTO,patientMpower,“If I can provide enough direction to allow pe...,"June 21, 2021","['#full-stack', '#health-tech']"
4,4,Arnab Sen,Technical Delivery Manager,AKQA,“The switch to management can be highly reward...,"May 10, 2021","['#consulting', '#digital-marketing', '#e-comm..."
5,0,Aviv Ben-Yosef,Tech Executive Consultant,,"“I want to set up managers to succeed, no matt...","August 16, 2021","['#coaching', '#consulting']"
6,1,Anand Safi,Engineering Manager,Mark43,“I very well understand where my interests lie...,"July 19, 2021","['#coaching', '#frontend', '#full-stack', '#pu..."
7,2,Shawn Axsom,Senior Engineering Manager,Docker,“Your focus (and challenges) grows from your d...,"July 05, 2021","['#backend', '#coaching', '#information-system..."
8,3,Kevin Doyle,CTO,patientMpower,“If I can provide enough direction to allow pe...,"June 21, 2021","['#full-stack', '#health-tech']"
9,4,Arnab Sen,Technical Delivery Manager,AKQA,“The switch to management can be highly reward...,"May 10, 2021","['#consulting', '#digital-marketing', '#e-comm..."


In [171]:
df.drop(columns=['Unnamed: 0'],inplace=True)

In [172]:
df

Unnamed: 0,Employee Name,Job Position,Working,Interview quote,Date,Tags
0,Aviv Ben-Yosef,Tech Executive Consultant,,"“I want to set up managers to succeed, no matt...","August 16, 2021","['#coaching', '#consulting']"
1,Anand Safi,Engineering Manager,Mark43,“I very well understand where my interests lie...,"July 19, 2021","['#coaching', '#frontend', '#full-stack', '#pu..."
2,Shawn Axsom,Senior Engineering Manager,Docker,“Your focus (and challenges) grows from your d...,"July 05, 2021","['#backend', '#coaching', '#information-system..."
3,Kevin Doyle,CTO,patientMpower,“If I can provide enough direction to allow pe...,"June 21, 2021","['#full-stack', '#health-tech']"
4,Arnab Sen,Technical Delivery Manager,AKQA,“The switch to management can be highly reward...,"May 10, 2021","['#consulting', '#digital-marketing', '#e-comm..."
5,Aviv Ben-Yosef,Tech Executive Consultant,,"“I want to set up managers to succeed, no matt...","August 16, 2021","['#coaching', '#consulting']"
6,Anand Safi,Engineering Manager,Mark43,“I very well understand where my interests lie...,"July 19, 2021","['#coaching', '#frontend', '#full-stack', '#pu..."
7,Shawn Axsom,Senior Engineering Manager,Docker,“Your focus (and challenges) grows from your d...,"July 05, 2021","['#backend', '#coaching', '#information-system..."
8,Kevin Doyle,CTO,patientMpower,“If I can provide enough direction to allow pe...,"June 21, 2021","['#full-stack', '#health-tech']"
9,Arnab Sen,Technical Delivery Manager,AKQA,“The switch to management can be highly reward...,"May 10, 2021","['#consulting', '#digital-marketing', '#e-comm..."


In [174]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 25 entries, 0 to 24
Data columns (total 6 columns):
 #   Column           Non-Null Count  Dtype 
---  ------           --------------  ----- 
 0   Employee Name    25 non-null     object
 1   Job Position     25 non-null     object
 2   Working          20 non-null     object
 3   Interview quote  25 non-null     object
 4   Date             25 non-null     object
 5   Tags             25 non-null     object
dtypes: object(6)
memory usage: 1.3+ KB


In [177]:
df['Date']=pd.to_datetime(df['Date'])
df

Unnamed: 0,Employee Name,Job Position,Working,Interview quote,Date,Tags
0,Aviv Ben-Yosef,Tech Executive Consultant,,"“I want to set up managers to succeed, no matt...",2021-08-16,"['#coaching', '#consulting']"
1,Anand Safi,Engineering Manager,Mark43,“I very well understand where my interests lie...,2021-07-19,"['#coaching', '#frontend', '#full-stack', '#pu..."
2,Shawn Axsom,Senior Engineering Manager,Docker,“Your focus (and challenges) grows from your d...,2021-07-05,"['#backend', '#coaching', '#information-system..."
3,Kevin Doyle,CTO,patientMpower,“If I can provide enough direction to allow pe...,2021-06-21,"['#full-stack', '#health-tech']"
4,Arnab Sen,Technical Delivery Manager,AKQA,“The switch to management can be highly reward...,2021-05-10,"['#consulting', '#digital-marketing', '#e-comm..."
5,Aviv Ben-Yosef,Tech Executive Consultant,,"“I want to set up managers to succeed, no matt...",2021-08-16,"['#coaching', '#consulting']"
6,Anand Safi,Engineering Manager,Mark43,“I very well understand where my interests lie...,2021-07-19,"['#coaching', '#frontend', '#full-stack', '#pu..."
7,Shawn Axsom,Senior Engineering Manager,Docker,“Your focus (and challenges) grows from your d...,2021-07-05,"['#backend', '#coaching', '#information-system..."
8,Kevin Doyle,CTO,patientMpower,“If I can provide enough direction to allow pe...,2021-06-21,"['#full-stack', '#health-tech']"
9,Arnab Sen,Technical Delivery Manager,AKQA,“The switch to management can be highly reward...,2021-05-10,"['#consulting', '#digital-marketing', '#e-comm..."


In [178]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 25 entries, 0 to 24
Data columns (total 6 columns):
 #   Column           Non-Null Count  Dtype         
---  ------           --------------  -----         
 0   Employee Name    25 non-null     object        
 1   Job Position     25 non-null     object        
 2   Working          20 non-null     object        
 3   Interview quote  25 non-null     object        
 4   Date             25 non-null     datetime64[ns]
 5   Tags             25 non-null     object        
dtypes: datetime64[ns](1), object(5)
memory usage: 1.3+ KB


In [179]:
df['Tags']=df['Tags'].astype('category')
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 25 entries, 0 to 24
Data columns (total 6 columns):
 #   Column           Non-Null Count  Dtype         
---  ------           --------------  -----         
 0   Employee Name    25 non-null     object        
 1   Job Position     25 non-null     object        
 2   Working          20 non-null     object        
 3   Interview quote  25 non-null     object        
 4   Date             25 non-null     datetime64[ns]
 5   Tags             25 non-null     category      
dtypes: category(1), datetime64[ns](1), object(4)
memory usage: 1.3+ KB


### Step15:- Saving the updated table as csv_file

In [182]:
df.to_csv('Interviews.csv',index=False)

In [183]:
df1=pd.read_csv('Interviews.csv')
df1

Unnamed: 0,Employee Name,Job Position,Working,Interview quote,Date,Tags
0,Aviv Ben-Yosef,Tech Executive Consultant,,"“I want to set up managers to succeed, no matt...",2021-08-16,"['#coaching', '#consulting']"
1,Anand Safi,Engineering Manager,Mark43,“I very well understand where my interests lie...,2021-07-19,"['#coaching', '#frontend', '#full-stack', '#pu..."
2,Shawn Axsom,Senior Engineering Manager,Docker,“Your focus (and challenges) grows from your d...,2021-07-05,"['#backend', '#coaching', '#information-system..."
3,Kevin Doyle,CTO,patientMpower,“If I can provide enough direction to allow pe...,2021-06-21,"['#full-stack', '#health-tech']"
4,Arnab Sen,Technical Delivery Manager,AKQA,“The switch to management can be highly reward...,2021-05-10,"['#consulting', '#digital-marketing', '#e-comm..."
5,Aviv Ben-Yosef,Tech Executive Consultant,,"“I want to set up managers to succeed, no matt...",2021-08-16,"['#coaching', '#consulting']"
6,Anand Safi,Engineering Manager,Mark43,“I very well understand where my interests lie...,2021-07-19,"['#coaching', '#frontend', '#full-stack', '#pu..."
7,Shawn Axsom,Senior Engineering Manager,Docker,“Your focus (and challenges) grows from your d...,2021-07-05,"['#backend', '#coaching', '#information-system..."
8,Kevin Doyle,CTO,patientMpower,“If I can provide enough direction to allow pe...,2021-06-21,"['#full-stack', '#health-tech']"
9,Arnab Sen,Technical Delivery Manager,AKQA,“The switch to management can be highly reward...,2021-05-10,"['#consulting', '#digital-marketing', '#e-comm..."
