In [9]:
import requests
import pandas as pd
from bs4 import BeautifulSoup

In [30]:
instructors_page = 'https://www.datacamp.com/instructors?all=true'
instructor_link_selector = '.instructor-block__description .instructor-block__link' # CSS class of the link
instructor_name_selector = '.instructor-block__name'  # CSS class of the name

instructor_resp = requests.get(instructors_page)
soup = BeautifulSoup(instructor_resp.text, 'lxml')

instructor_urls = [url['href'] for url in soup.select(instructor_link_selector)]
instructor_names = [name.text.strip() for name in soup.select(instructor_name_selector)]
instructor_urls = ['https://www.datacamp.com' + url for url in instructor_urls]

In [31]:
instructor_df = pd.DataFrame({'name': instructor_names,'url': instructor_urls})
print(instructor_df.shape)
instructor_df.head()

(136, 2)


Unnamed: 0,name,url
0,Filip Schouwenaars,https://www.datacamp.com/instructors/filipsch
1,Jonathan Cornelissen,https://www.datacamp.com/instructors/jonathana...
2,Hugo Bowne-Anderson,https://www.datacamp.com/instructors/hugobowne
3,Nick Carchedi,https://www.datacamp.com/instructors/nickyc
4,Dhavide Aruliah,https://www.datacamp.com/instructors/dhavide


In [38]:
def generate_keywords(topics, keywords, match_types=['Exact', 'Phrase'],campaign='SEM_Campaign'):
    col_names = ['Campaign', 'Ad Group', 'Keyword', 'Criterion Type']
    campaign_keywords = []
    
    for topic in topics:
        for word in keywords:
            for match in match_types:
                keyword = topic.lower() + ' ' + word
                row = [campaign, topic, keyword, match] 
                campaign_keywords.append(row)
    for topic in topics:
        for word in keywords:
            for match in match_types:
                keyword = word + ' ' + topic.lower() 
                row = [campaign, topic,keyword,match]
                campaign_keywords.append(row)

    return pd.DataFrame.from_records(campaign_keywords, columns=col_names)
   


In [39]:
topics = ['Data Science', 'Machine Learning']
keywords = ['course', 'tutorial']
generate_keywords(topics, keywords).head(10)

Unnamed: 0,Campaign,Ad Group,Keyword,Criterion Type
0,SEM_Campaign,Data Science,data science course,Exact
1,SEM_Campaign,Data Science,data science course,Phrase
2,SEM_Campaign,Data Science,data science tutorial,Exact
3,SEM_Campaign,Data Science,data science tutorial,Phrase
4,SEM_Campaign,Machine Learning,machine learning course,Exact
5,SEM_Campaign,Machine Learning,machine learning course,Phrase
6,SEM_Campaign,Machine Learning,machine learning tutorial,Exact
7,SEM_Campaign,Machine Learning,machine learning tutorial,Phrase
8,SEM_Campaign,Data Science,course data science,Exact
9,SEM_Campaign,Data Science,course data science,Phrase


In [40]:
topics = ['R', 'Python', 'SQL', 'Git', 'Shell']
keywords = ['data science', 'programming', 'analytics', 'data analysis', 'machine learning',
            'deep learning', 'financial analysis', 'data viz', 'visualization', 'data visualization',
            'learn', 'course', 'courses', 'education', 'data import', 'data cleaning', 
            'data manipulation', 'probability', 'stats', 'statistics', 'course', 'courses',
           'learn', 'education', 'tutorial'] 
tech_keywords = generate_keywords(topics, keywords, campaign='SEM_Technologies')
print('total keywords:', tech_keywords.shape[0])
tech_keywords.head()

total keywords: 500


Unnamed: 0,Campaign,Ad Group,Keyword,Criterion Type
0,SEM_Technologies,R,r data science,Exact
1,SEM_Technologies,R,r data science,Phrase
2,SEM_Technologies,R,r programming,Exact
3,SEM_Technologies,R,r programming,Phrase
4,SEM_Technologies,R,r analytics,Exact


In [41]:
courses_page = 'https://www.datacamp.com/courses/all'
course_link_selector = '.courses__explore-list .course-block'

course_resp = requests.get(courses_page)
soup = BeautifulSoup(course_resp.text, 'lxml')

course_urls = [link.contents[1]['href'] for link in soup.select(course_link_selector)] 
course_urls = ['https://www.datacamp.com' + url for url in course_urls]
course_names = [link.h4.text for link in soup.select(course_link_selector)]

In [42]:
course_df = pd.DataFrame({'name': course_names,'url': course_urls})
course_df['name_clean'] = course_df.name.str.replace('\(.*\)', '').str.strip()
print('total keywords:', course_df.shape[0])
course_df.head()

total keywords: 174


Unnamed: 0,name,url,name_clean
0,Introduction to R,https://www.datacamp.com/courses/free-introduc...,Introduction to R
1,"Data Analysis in R, the data.table Way",https://www.datacamp.com/courses/data-table-da...,"Data Analysis in R, the data.table Way"
2,Data Manipulation in R with dplyr,https://www.datacamp.com/courses/dplyr-data-ma...,Data Manipulation in R with dplyr
3,Data Visualization in R with ggvis,https://www.datacamp.com/courses/ggvis-data-vi...,Data Visualization in R with ggvis
4,Reporting with R Markdown,https://www.datacamp.com/courses/reporting-wit...,Reporting with R Markdown


In [43]:
keywords = ['', 'learn', 'course', 'courses', 'tutorial', 'education']
course_keywords = generate_keywords(course_df['name_clean'], keywords, campaign='SEM_Courses')
print('total keywords:', course_keywords.shape[0])
course_keywords.head(10)

total keywords: 4176


Unnamed: 0,Campaign,Ad Group,Keyword,Criterion Type
0,SEM_Courses,Introduction to R,introduction to r,Exact
1,SEM_Courses,Introduction to R,introduction to r,Phrase
2,SEM_Courses,Introduction to R,introduction to r learn,Exact
3,SEM_Courses,Introduction to R,introduction to r learn,Phrase
4,SEM_Courses,Introduction to R,introduction to r course,Exact
5,SEM_Courses,Introduction to R,introduction to r course,Phrase
6,SEM_Courses,Introduction to R,introduction to r courses,Exact
7,SEM_Courses,Introduction to R,introduction to r courses,Phrase
8,SEM_Courses,Introduction to R,introduction to r tutorial,Exact
9,SEM_Courses,Introduction to R,introduction to r tutorial,Phrase


In [62]:
skills_page = 'https://www.datacamp.com/tracks/skill'
skills_link_selector = '.shim'
print(skills_link_selector)

skills_resp = requests.get(skills_page)
skill_soup = BeautifulSoup(skills_resp.text, 'lxml')

skills_urls = [link['href'] for link in skill_soup.select(skills_link_selector)] 
skills_names = [skill.replace('/tracks/', '').replace('-', ' ') for skill in skills_urls]
skills_urls = ['https://www.datacamp.com' + url for url in skills_urls]

.shim


In [63]:
career_page = 'https://www.datacamp.com/tracks/career'
career_link_selector = '.shim'

career_resp = requests.get(career_page)
career_soup = BeautifulSoup(career_resp.text, 'lxml')

career_urls = [link['href'] for link in career_soup.select(career_link_selector)] 

career_names = [career.replace('/tracks/', '').replace('-', ' ') for career in career_urls]
career_urls = ['https://www.datacamp.com' + url for url in career_urls]

In [67]:
tracks_df = pd.DataFrame({'name': skills_names + career_names,'url': skills_urls + career_urls})
tracks_df['name'] = [x.title() for x in tracks_df['name']]
tracks_df.drop(tracks_df.index[:1], inplace=True)
tracks_df.head()

Unnamed: 0,name,url
1,R Programming,https://www.datacamp.com/tracks/r-programming
2,Importing Cleaning Data With R,https://www.datacamp.com/tracks/importing-clea...
3,Data Manipulation With R,https://www.datacamp.com/tracks/data-manipulat...
4,Python Programming,https://www.datacamp.com/tracks/python-program...
5,Importing Cleaning Data With Python,https://www.datacamp.com/tracks/importing-clea...


In [68]:
tracks_keywords = generate_keywords(tracks_df['name'], keywords, campaign='SEM_Tracks')
print('total keywords:', tracks_keywords.shape[0])
tracks_keywords.head()


total keywords: 1224


Unnamed: 0,Campaign,Ad Group,Keyword,Criterion Type
0,SEM_Tracks,R Programming,r programming,Exact
1,SEM_Tracks,R Programming,r programming,Phrase
2,SEM_Tracks,R Programming,r programming learn,Exact
3,SEM_Tracks,R Programming,r programming learn,Phrase
4,SEM_Tracks,R Programming,r programming course,Exact


In [69]:
full_keywords_df = pd.concat([instructor_keywords_df, tech_keywords, course_keywords, tracks_keywords])
full_keywords_df.to_csv('keywords.csv', index=False)
full_keywords_df.head()


Unnamed: 0,Campaign,Ad Group,Keyword,Criterion Type
0,SEM_Instructors,Filip Schouwenaars,filip schouwenaars course,Exact
1,SEM_Instructors,Filip Schouwenaars,filip schouwenaars course,Phrase
2,SEM_Instructors,Filip Schouwenaars,filip schouwenaars courses,Exact
3,SEM_Instructors,Filip Schouwenaars,filip schouwenaars courses,Phrase
4,SEM_Instructors,Filip Schouwenaars,filip schouwenaars learn,Exact


## Generating Ads

In [70]:
def split_string(string, splits=2, max_len=60):
    """Split `string` into `splits` words, each shorter than `max_len` / `splits`"""
    if len(string) < max_len / splits:
        return string, ''
    str_words = string.split(' ')
    result = ''
    for i, word in enumerate(str_words):
        if len(result + ' ' + word) <= max_len / splits:
            result += word + ' '
        else:
            break
    spaces = result.strip().count(' ')
    result2 = string[string[len(result):].index(word) + len(result):]
    return result.strip(), result2

In [71]:
print(split_string('this is a very long course name that needs splitting', 2, 60))
print(split_string('short course name', 2, 60))

('this is a very long course', 'name that needs splitting')
('short course name', '')


In [72]:
full_keywords_df.Campaign.unique()

array(['SEM_Instructors', 'SEM_Technologies', 'SEM_Courses', 'SEM_Tracks'],
      dtype=object)

In [73]:
course_df['Campaign'] = 'SEM_Courses'
course_df = course_df.rename(columns={'name_clean': 'name', 'name': 'old_name'})
course_df.head()

Unnamed: 0,old_name,url,name,Campaign
0,Introduction to R,https://www.datacamp.com/courses/free-introduc...,Introduction to R,SEM_Courses
1,"Data Analysis in R, the data.table Way",https://www.datacamp.com/courses/data-table-da...,"Data Analysis in R, the data.table Way",SEM_Courses
2,Data Manipulation in R with dplyr,https://www.datacamp.com/courses/dplyr-data-ma...,Data Manipulation in R with dplyr,SEM_Courses
3,Data Visualization in R with ggvis,https://www.datacamp.com/courses/ggvis-data-vi...,Data Visualization in R with ggvis,SEM_Courses
4,Reporting with R Markdown,https://www.datacamp.com/courses/reporting-wit...,Reporting with R Markdown,SEM_Courses


In [74]:
instructor_df['Campaign'] = 'SEM_Instructors'
instructor_df.head()

Unnamed: 0,name,url,Campaign
0,Filip Schouwenaars,https://www.datacamp.com/instructors/filipsch,SEM_Instructors
1,Jonathan Cornelissen,https://www.datacamp.com/instructors/jonathana...,SEM_Instructors
2,Hugo Bowne-Anderson,https://www.datacamp.com/instructors/hugobowne,SEM_Instructors
3,Nick Carchedi,https://www.datacamp.com/instructors/nickyc,SEM_Instructors
4,Dhavide Aruliah,https://www.datacamp.com/instructors/dhavide,SEM_Instructors


In [75]:
tracks_df['Campaign'] = 'SEM_Tracks'
tracks_df.head()

Unnamed: 0,name,url,Campaign
1,R Programming,https://www.datacamp.com/tracks/r-programming,SEM_Tracks
2,Importing Cleaning Data With R,https://www.datacamp.com/tracks/importing-clea...,SEM_Tracks
3,Data Manipulation With R,https://www.datacamp.com/tracks/data-manipulat...,SEM_Tracks
4,Python Programming,https://www.datacamp.com/tracks/python-program...,SEM_Tracks
5,Importing Cleaning Data With Python,https://www.datacamp.com/tracks/importing-clea...,SEM_Tracks


In [76]:
tech_domain = 'https://www.datacamp.com/courses/tech:'
tech_domain_list = []
for tech in ['R', 'Python', 'SQL', 'Git', 'Shell']:
    tech_domain_list.append((tech, tech_domain + tech))
tech_df = pd.DataFrame.from_records(tech_domain_list, columns=['name', 'url'])
tech_df['Campaign'] = 'SEM_Technologies'
tech_df

Unnamed: 0,name,url,Campaign
0,R,https://www.datacamp.com/courses/tech:R,SEM_Technologies
1,Python,https://www.datacamp.com/courses/tech:Python,SEM_Technologies
2,SQL,https://www.datacamp.com/courses/tech:SQL,SEM_Technologies
3,Git,https://www.datacamp.com/courses/tech:Git,SEM_Technologies
4,Shell,https://www.datacamp.com/courses/tech:Shell,SEM_Technologies


In [83]:
full_ads_df = pd.concat([course_df[['Campaign', 'name', 'url']],
                        instructor_df,
                        tracks_df,
                        tech_df], ignore_index=True)
full_ads_df = full_ads_df.rename(columns={'name': 'Ad Group', 'url': 'Final URL'})
print('total rows:', full_ads_df.shape[0])
n_adgroups = full_ads_df.shape[0]
full_ads_df.head()

total rows: 366


of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=True'.


  after removing the cwd from sys.path.


Unnamed: 0,Campaign,Ad Group,Final URL
0,SEM_Courses,Introduction to R,https://www.datacamp.com/courses/free-introduc...
1,SEM_Courses,"Data Analysis in R, the data.table Way",https://www.datacamp.com/courses/data-table-da...
2,SEM_Courses,Data Manipulation in R with dplyr,https://www.datacamp.com/courses/dplyr-data-ma...
3,SEM_Courses,Data Visualization in R with ggvis,https://www.datacamp.com/courses/ggvis-data-vi...
4,SEM_Courses,Reporting with R Markdown,https://www.datacamp.com/courses/reporting-wit...


In [78]:
full_ads_df = full_ads_df.iloc[[x  for x in range(n_adgroups) for i in range(3)], :] 
print('total rows:', full_ads_df.shape[0])
full_ads_df.head(9)

total rows: 1098


Unnamed: 0,Campaign,Ad Group,Final URL
0,SEM_Courses,Introduction to R,https://www.datacamp.com/courses/free-introduc...
0,SEM_Courses,Introduction to R,https://www.datacamp.com/courses/free-introduc...
0,SEM_Courses,Introduction to R,https://www.datacamp.com/courses/free-introduc...
1,SEM_Courses,"Data Analysis in R, the data.table Way",https://www.datacamp.com/courses/data-table-da...
1,SEM_Courses,"Data Analysis in R, the data.table Way",https://www.datacamp.com/courses/data-table-da...
1,SEM_Courses,"Data Analysis in R, the data.table Way",https://www.datacamp.com/courses/data-table-da...
2,SEM_Courses,Data Manipulation in R with dplyr,https://www.datacamp.com/courses/dplyr-data-ma...
2,SEM_Courses,Data Manipulation in R with dplyr,https://www.datacamp.com/courses/dplyr-data-ma...
2,SEM_Courses,Data Manipulation in R with dplyr,https://www.datacamp.com/courses/dplyr-data-ma...


In [79]:
Description = [
    'Learn Directly From the Top Experts in the Field. 20% off Annual Subcriptions',
    'Be Ahead of the Curve, Master Data Science Skills. $29 / Month. Cancel Anytime',
    'Choose From a Wide Variety of Topics Tuaght by the Best in the World. Start Now'   
]
Description = [x for i in range(n_adgroups) for x in Description ]
full_ads_df['Description'] = Description
full_ads_df.head()

Unnamed: 0,Campaign,Ad Group,Final URL,Description
0,SEM_Courses,Introduction to R,https://www.datacamp.com/courses/free-introduc...,Learn Directly From the Top Experts in the Fie...
0,SEM_Courses,Introduction to R,https://www.datacamp.com/courses/free-introduc...,"Be Ahead of the Curve, Master Data Science Ski..."
0,SEM_Courses,Introduction to R,https://www.datacamp.com/courses/free-introduc...,Choose From a Wide Variety of Topics Tuaght by...
1,SEM_Courses,"Data Analysis in R, the data.table Way",https://www.datacamp.com/courses/data-table-da...,Learn Directly From the Top Experts in the Fie...
1,SEM_Courses,"Data Analysis in R, the data.table Way",https://www.datacamp.com/courses/data-table-da...,"Be Ahead of the Curve, Master Data Science Ski..."


In [80]:
benefits = [
    'Boost Your Data Science Career',
    'Stand Out From the Crowd',
    'Tackle Complex Questions'    
]

In [81]:
benefits = [x for i in range(n_adgroups) for x in benefits]
headlines = [split_string(x) for x in full_ads_df['Ad Group']]
full_ads_df['Headline 1'] = [x[0] for x in headlines]
full_ads_df['Headline 2'] = [x[1] if x[1] else benefits[i] for i, x in enumerate(headlines)]
print('total ads:', full_ads_df.shape[0])
full_ads_df.head(9)

total ads: 1098


Unnamed: 0,Campaign,Ad Group,Final URL,Description,Headline 1,Headline 2
0,SEM_Courses,Introduction to R,https://www.datacamp.com/courses/free-introduc...,Learn Directly From the Top Experts in the Fie...,Introduction to R,Boost Your Data Science Career
0,SEM_Courses,Introduction to R,https://www.datacamp.com/courses/free-introduc...,"Be Ahead of the Curve, Master Data Science Ski...",Introduction to R,Stand Out From the Crowd
0,SEM_Courses,Introduction to R,https://www.datacamp.com/courses/free-introduc...,Choose From a Wide Variety of Topics Tuaght by...,Introduction to R,Tackle Complex Questions
1,SEM_Courses,"Data Analysis in R, the data.table Way",https://www.datacamp.com/courses/data-table-da...,Learn Directly From the Top Experts in the Fie...,"Data Analysis in R, the",data.table Way
1,SEM_Courses,"Data Analysis in R, the data.table Way",https://www.datacamp.com/courses/data-table-da...,"Be Ahead of the Curve, Master Data Science Ski...","Data Analysis in R, the",data.table Way
1,SEM_Courses,"Data Analysis in R, the data.table Way",https://www.datacamp.com/courses/data-table-da...,Choose From a Wide Variety of Topics Tuaght by...,"Data Analysis in R, the",data.table Way
2,SEM_Courses,Data Manipulation in R with dplyr,https://www.datacamp.com/courses/dplyr-data-ma...,Learn Directly From the Top Experts in the Fie...,Data Manipulation in R with,dplyr
2,SEM_Courses,Data Manipulation in R with dplyr,https://www.datacamp.com/courses/dplyr-data-ma...,"Be Ahead of the Curve, Master Data Science Ski...",Data Manipulation in R with,dplyr
2,SEM_Courses,Data Manipulation in R with dplyr,https://www.datacamp.com/courses/dplyr-data-ma...,Choose From a Wide Variety of Topics Tuaght by...,Data Manipulation in R with,dplyr


In [82]:
full_ads_df.to_csv('ads.csv', index=False)