-
Notifications
You must be signed in to change notification settings - Fork 2
/
UrlExtraction.py
42 lines (37 loc) · 1.51 KB
/
UrlExtraction.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
## @file UrlExtraction.py
#
# @brief this file contains the spider class
#
# @author JunHao
#
# @section No imports.
## Documentation for a Url Extraction Class.
# UrlExtraction gets link pass by user and identifies sitename and user unique ID on social platforms.
class UrlExtraction:
#Get UniqueID based on social media sites.
def getUniqueID(self,UserUrl):
"""! Extract Unqiue User ID based on site and return said ID.
@param UserUrl Url link provided by user.
@return uniqueID
"""
if "youtube" in UserUrl:
if "user" in UserUrl:
uniqueID = UserUrl.replace("https://www.youtube.com/user/","")
return uniqueID
else:
uniqueID = UserUrl.replace("https://www.youtube.com/channel/","")
return uniqueID
elif "twitter" in UserUrl:
uniqueID = UserUrl.replace("https://twitter.com/","")
uniqueID = uniqueID.split("?",1)[0] #Split User ID from /ref/.. onwards in link.
return uniqueID
#Get Sitename based on Url provided by user.
def getSiteName(self,UserUrl):
"""! Extract sitename based on Url provided by user.
@param UserUrl Url link provided by user.
@return sitename
"""
UserUrl = UserUrl.replace("https://www.","")
UserUrl = UserUrl.replace("https://","")
sitename = UserUrl.split(".",1)[0] #Split sitename from .com/... onwards in link.
return sitename