-
Notifications
You must be signed in to change notification settings - Fork 0
/
superphone_scrape_contacts1.py
170 lines (141 loc) · 6.58 KB
/
superphone_scrape_contacts1.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait # available since 2.4.0
from selenium.webdriver.support import expected_conditions as EC # available since 2.26.0
from selenium.webdriver.common.by import By
import pandas as pd
import time
import sys
from gspread_pandas import upload_with_pd
# config
SITE_URL = 'https://app.superphone.io/'
SITE_LOGIN_URL = SITE_URL + 'login/'
SITE_MESSAGE_URL = SITE_URL + 'messages'
SITE_CONTACT_URL = SITE_URL + 'contacts'
USER_EMAIL = 'lancecoleman91@gmail.com'
USER_PASSWORD = 'wecandoit!'
CHROMEDRIVER_PATH = 'chromedriver.exe'
PUBLIC_KEY = '4a5239f51e126a269795f768cb60be78fb6b3b2e2d3217fa97fad4392543721c'
# chrome driver
driver = None
# pandas data
df = pd.DataFrame(columns=['Name', 'Gender', 'photo link', 'City', 'State', 'Tags', 'LAST CONTACTED', '$ SPENT', 'Email', 'Messaging', 'Mobile', 'Assigned', 'Address', 'Instagram', 'Twitter', 'Birthday', 'Industry', 'Notes', 'Number of Messages Incoming', 'Number of Messages Outgoing', 'Etc'])
# get attr from path
def get_info(path, attr=None):
try:
elms = driver.find_elements_by_xpath(path)
if len(elms) > 0:
results = []
for elm in elms:
if attr is None :
results.append(elm.text)
else :
results.append(elm.get_attribute(attr))
if len(results) == 1:
return results[0]
elif len(results) > 1:
return results
except Exception as e:
print(e)
return ""
# run chrome
def run_chrome():
global driver, df
try:
# chrome driver
op = webdriver.ChromeOptions()
# op.add_argument('--headless')
# op.add_argument('--disable-gpu')
driver = webdriver.Chrome(CHROMEDRIVER_PATH, options=op)
driver.maximize_window()
# login
driver.get(SITE_LOGIN_URL)
time.sleep(10)
driver.find_element_by_xpath("//input[@id='email']").send_keys(USER_EMAIL)
time.sleep(1)
driver.find_element_by_xpath("//input[@id='password']").send_keys(USER_PASSWORD)
time.sleep(2)
driver.find_element_by_xpath("//button[@type='submit']").click()
time.sleep(10)
page = 0
limit = 500
row = 1
while(True):
path = "?&page={0}&limit={1}".format(page, limit)
# open contact page
driver.get(SITE_CONTACT_URL + path)
time.sleep(10)
trs = driver.find_elements_by_xpath("//tr[@class='with-highlight']")
if len(trs) == 0: break
# get all contacts
for tr in trs:
tr.click()
time.sleep(5)
# get all infos
name = get_info("//span[@class='SpContactName sc-gmeYpB bKDGrO']")
photo_link = get_info("//div[@class='thumbnail-wrapper circular lg']/img", "src")
tags = get_info("//div[@class='tags-list']/button")
last_contacted = get_info("//div[@class='stat'][1]/h3")
spent = get_info("//div[@class='stat'][2]/h3")
keys = get_info("//div[@class='sc-cNnxps cCzEKX']")
gender = None
city = None
state = None
email = None
messaging = None
mobile = None
assigned = None
address = None
instagram = None
twitter = None
birthday = None
industry = None
notes = None
messages_incoming = None
messages_outgoing = None
etc = None
if len(keys) > 0:
i = 1
for key in keys:
try:
if key.lower() == "email" : email = get_info("//div[@class='sc-fAJaQT gNIPBu'][{}]/div[2]/a".format(i))
elif key.lower() == "mobile" : mobile = get_info("//div[@class='sc-fAJaQT gNIPBu'][{}]/div[2]/a".format(i))
elif key.lower() == "assigned" : assigned = get_info("//div[@class='sc-fAJaQT gNIPBu'][{}]/div[2]".format(i))
elif key.lower() == "address" :
address = get_info("//div[@class='sc-fAJaQT gNIPBu'][{}]/div[2]".format(i))
address = address.split('\\n')[0]
city = address.split(',')[0]
state = address.split(',')[1]
elif key.lower() == "instagram" : instagram = get_info("//div[@class='sc-fAJaQT gNIPBu'][{}]/div[2]/a".format(i))
elif key.lower() == "twitter" : twitter = get_info("//div[@class='sc-fAJaQT gNIPBu'][{}]/div[2]/a".format(i))
elif key.lower() == "birthday" : birthday = get_info("//div[@class='sc-fAJaQT gNIPBu'][{}]/div[2]".format(i))
elif key.lower() == "industry" : industry = get_info("//div[@class='sc-fAJaQT gNIPBu'][{}]/div[2]".format(i))
elif key.lower() == "job title" : notes = get_info("//div[@class='sc-fAJaQT gNIPBu'][{}]/div[2]".format(i))
elif key.lower() == "gender" : gender = get_info("//div[@class='sc-fAJaQT gNIPBu'][{}]/div[2]".format(i))
except Exception as e:
print(e)
i = i + 1
df.loc[row] = [name, gender, photo_link, city, state, tags, last_contacted, spent, email, messaging, mobile, assigned, address, instagram, twitter, birthday, industry, notes, messages_incoming, messages_outgoing, etc]
row = row + 1
page = page + 1
# if row > 10:
# df.to_excel('Sample1.xlsx', sheet_name='Sheet1')
# break
# quit
time.sleep(10)
driver.quit()
except Exception as e:
print(e)
# main
def main():
global df
try:
# run chrome
run_chrome()
# print result
# df.head(10)
upload_with_pd(df, "Contacts")
except Exception as e:
print(e)
# call main
if __name__ == "__main__":
main()