Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adding YouTube capability back to GHunt #437

Open
wants to merge 3 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 9 additions & 1 deletion ghunt/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,11 @@ def parse_and_run():
parser_drive = subparsers.add_parser('drive', help="Get information on a Drive file or folder.")
parser_drive.add_argument("file_id", help="Example: 1N__vVu4c9fCt4EHxfthUNzVOs_tp8l6tHcMBnpOZv_M")
parser_drive.add_argument('--json', type=str, help="File to write the JSON output to.")

### YouTube module
parser_youtube = subparsers.add_parser('youtube', help="Get information on a YouTube channel (doesn't work with channels created after Google removed IDs from the page source, and relies on the page having been archived by Wayback Machine.")
parser_youtube.add_argument("channel_url", help="Example: https://www.youtube.com/@YouTube")
parser_youtube.add_argument('--json', type=str, help="File to write the JSON output to.")

### Parsing
args = parser.parse_args(args=None if sys.argv[1:] else ['--help'])
Expand All @@ -44,4 +49,7 @@ def process_args(args: argparse.Namespace):
trio.run(gaia.hunt, None, args.gaia_id, args.json)
case "drive":
from ghunt.modules import drive
trio.run(drive.hunt, None, args.file_id, args.json)
trio.run(drive.hunt, None, args.file_id, args.json)
case "youtube":
from ghunt.modules import youtube
trio.run(youtube.hunt, None, args.channel_url, args.json)
24 changes: 24 additions & 0 deletions ghunt/modules/youtube.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
from ghunt.helpers.utils import get_httpx_client
from ghunt import globals as gb

import requests, re, waybackpy, argparse, trio, httpx

async def hunt(as_client: httpx.AsyncClient, channel_url: str, json_file: bool=None):
# later: add a way to change this later
User_Agent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:103.0) Gecko/20100101 Firefox/103.0"
r = requests.get(channel_url)
matchChannelID = re.search("(https?:\/\/)(www\.)?youtube\.com\/(channel)\/[\w-]+", r.text)
# later: maybe add a prompt here to ask the user if the channel ID looks valid? and if it doesn't, it can iterate through all the different channel IDs and ask for each one? Also, add error handling if no match found
channelIDURL = matchChannelID.group(0)
# later: add a way to let the user
# later: switch to memento API for access to more archives?
waybackpy_url_object = waybackpy.Url(channelIDURL, User_Agent)
nearest_archive_url = waybackpy_url_object.near(year=2019)
# later: add a way to see if there are any archives at all, and if there are any before Plus IDs were removed?
rArchived = requests.get(nearest_archive_url)
# later: add error handling if request fails
matchGAIAID = re.search("(?:https?:\/\/plus.google.com\/)([0-9]+)", rArchived.text)
# later: add error handling if no match found
gaia_id = matchGAIAID.group(1)
from ghunt.modules import gaia
await gaia.hunt(None, gaia_id, json_file)