### Set up
Want to copy and paste every single phone number in this document: https://automatetheboringstuff.com/files/examplePhoneEmailDirectory.pdf

1. create regex for phone numbers
2. create regex for email addresses
3. get text from document that has been copied to clipboard
4. extract emails and phone numbers from text
5. copy the results to clipboard

In [13]:
import re          #for regular expressions
import pyperclip   #for copying and pasting

In [14]:
# 1. create regex for phone nums
phoneRegex = re.compile(r'''

(((\d\d\d)|(\(\d\d\d\)))?  #optional area code w or w/o paranthesis
(\s|-)                    #separator could be space or dash
\d\d\d                    #first 3 digits
-                         #separator
\d\d\d\d                  #last 4 digits
(((ext(\.)?\s)|x)         #optional extension word part 
(\d{2,5}))?)               #optional extention number part
                          #put everything inside one big group...
                          #so that when use findall() get full 
                          #number as one of the tuples

''', re.VERBOSE)

In [15]:
# 2. create regex for emails
emailRegex = re.compile(r'''

[a-zA-z0-9_.+]+   #name part (receppasdc) - 1+ of upper/lower case letter, numbers, and extra characters
@                 #@ symbol (@)
[a-zA-z0-9_.+]+   #domain name part (gmail.com)

''', re.VERBOSE)

In [20]:
# 3. get text from clipboard

text = pyperclip.paste()

In [21]:
# 4. extract emails and phone numbers from text

extractedPhone = phoneRegex.findall(text)
extractedEmail = emailRegex.findall(text)

In [25]:
#getting first string of each tuple to get full phone number
allPhoneNumbers = []
for phoneNumber in extractedPhone:
    allPhoneNumbers.append(phoneNumber[0])

In [28]:
#see if worked

print(allPhoneNumbers)
print(extractedEmail)

['479-205-4874', '678-560-3485', '724-900-2986', '242-391-3183', '604-720-6426', '651-807-8065', '209-754-9111', '641-433-6698', '701-528-9851', '304-491-9583', '863-583-8107', '507-948-3980', '546-367-3454', '321-854-5616', '904-896-2920', '309-387-1990', '605-373-2329', '573-454-1209', '252-822-2439', '586-481-1805', '615-716-5379', '903-995-3368', '205-868-3935', '881-376-2173', '936-631-8841', '307-368-4710', '631-957-9402', '336-402-2815', '511-768-9073', '862-579-2515', '678-439-5117', '949-328-4768', '764-582-6489', '662-882-4349', '323-686-4356', '321-641-1192', '980-511-2211', '931-381-2749', '557-314-1719', '641-845-9700', '571-248-3160', '611-848-3013', '724-392-9051', '303-606-9242', '419-691-5429', '740-228-1291', '479-529-9642', '308-702-9334', '704-481-3176', '270-245-5606', '559-639-2831', '506-203-1818', '716-387-4756', '501-919-6026', '351-796-1964', '809-948-1893', '984-578-4176', '765-298-6852', '309-531-8927', '561-405-2390', '423-694-1512', '561-365-7342', '717-61

In [36]:
#want to have one phone number and one email per line

results = '\n'.join(allPhoneNumbers) + '\n'.join(extractedEmail)
print(results)

479-205-4874
678-560-3485
724-900-2986
242-391-3183
604-720-6426
651-807-8065
209-754-9111
641-433-6698
701-528-9851
304-491-9583
863-583-8107
507-948-3980
546-367-3454
321-854-5616
904-896-2920
309-387-1990
605-373-2329
573-454-1209
252-822-2439
586-481-1805
615-716-5379
903-995-3368
205-868-3935
881-376-2173
936-631-8841
307-368-4710
631-957-9402
336-402-2815
511-768-9073
862-579-2515
678-439-5117
949-328-4768
764-582-6489
662-882-4349
323-686-4356
321-641-1192
980-511-2211
931-381-2749
557-314-1719
641-845-9700
571-248-3160
611-848-3013
724-392-9051
303-606-9242
419-691-5429
740-228-1291
479-529-9642
308-702-9334
704-481-3176
270-245-5606
559-639-2831
506-203-1818
716-387-4756
501-919-6026
351-796-1964
809-948-1893
984-578-4176
765-298-6852
309-531-8927
561-405-2390
423-694-1512
561-365-7342
717-616-6054
517-593-3243
971-374-3441
313-758-7914
713-418-9707
811-557-8092
601-247-7920
405-866-8158
940-998-9912
904-383-5407
772-773-7846
312-773-6768
814-960-3437
703-767-4323
403-212-2346

In [37]:
# 5. copy extractions to clipboard

pyperclip.copy(results)