-
Notifications
You must be signed in to change notification settings - Fork 0
/
emails.py
52 lines (40 loc) · 1.36 KB
/
emails.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
import csv
import requests
import time
from bs4 import BeautifulSoup
# Method to get the name and email of a society
def getData(link):
# Deals with occasional requests error
try:
socPage = requests.get(base_url + link['href'])
except:
time.sleep(5)
getData(link)
return
tempSoup = BeautifulSoup(socPage.text, 'html.parser')
emailLink = tempSoup.find("a", class_="msl_email")
# Not all society pages have an email
try:
mailTo = emailLink['href']
except:
return
# Eliminated the 'mailto:' part before the actual email
email = (mailTo.split(':'))[1]
title = tempSoup.find('li', class_="current-page")
name = title.get_text()
myData.append([name, email])
# url needed later
base_url = "https://www.warwicksu.com"
# Conatins the list of all the societies
suPage = "https://www.warwicksu.com/societies-sports/societies/"
source = requests.get(suPage)
soup = BeautifulSoup(source.text, 'html.parser')
# A list to store the data
myData = [["Name", "Email"]]
for links in soup.find_all('a', class_="msl-gl-link"):
# links contains the url for each society's page
getData(links)
# writes myData to csv file using csv library
with open('emails.csv', 'w', newline = '') as f:
writer = csv.writer(f)
writer.writerows(myData)