### Web Crawler using SERP API

In [1]:
from serpapi import GoogleSearch
import json
from pytube import YouTube
import csv

params = {
    "q": "site:youtube.com openinapp.co",
    "location": "California, United States",
    "hl": "en",
    "gl": "us",
    "num": 10000,
    "google_domain": "google.com",
    "api_key": '4ce0dd3e9937381bbae3d7f537807a91ce00e624febe41d03259f7b43df17152'
}

search = GoogleSearch(params)
results = search.get_dict()

# Extract YouTube channel links from the search results
youtube_links = []
if 'organic_results' in results:
    for result in results['organic_results']:
        if 'youtube.com' in result['link']:
            youtube_links.append(result['link'])

# Save the results in JSON format
with open('results.json', 'w') as file:
    json.dump(youtube_links, file, indent=4)

print('Scraping complete!')

# Load the youtube_links from the JSON file
with open('results.json', 'r') as file:
    youtube_links = json.load(file)

# Create a list to store video and channel links
data = []

# Iterate over the youtube_links
for link in youtube_links:
    # keeping channel link as it is, if it is already in youtube_links
    if '/c/' in link or '/channel/' in link:
        data.append({'Video Link': link, 'Channel Link': link})
    else:
        # Create a YouTube object for the link
        try:
            yt = YouTube(link)
            channel_link = yt.channel_url
            data.append({'Video Link': link, 'Channel Link': channel_link})
        except Exception as e:
            print(f"Error processing link: {link}")
            print(f"Error message: {str(e)}")

# Save the data to a CSV file
csv_file = 'youtube_links.csv'
fields = ['Video Link', 'Channel Link']
with open(csv_file, 'w', newline='') as file:
    writer = csv.DictWriter(file, fieldnames=fields)
    writer.writeheader()
    writer.writerows(data)

print('CSV file created successfully!')

Scraping complete!
Error processing link: https://www.youtube.com/hashtag/openinapp
Error message: regex_search: could not find match for (?:v=|\/)([0-9A-Za-z_-]{11}).*
CSV file created successfully!


#### Used SERP API for web crawling and 
#### pytube for youtube channel link extraction 

In [4]:
import pandas as pd

In [5]:
pd.read_csv('youtube_links.csv')

Unnamed: 0,Video Link,Channel Link
0,https://www.youtube.com/c/OpeninApp,https://www.youtube.com/c/OpeninApp
1,https://www.youtube.com/channel/UCIGDcUqL2kKMd...,https://www.youtube.com/channel/UCIGDcUqL2kKMd...
2,https://www.youtube.com/watch?v=J_A5UE2_6Zs,https://www.youtube.com/channel/UCIGDcUqL2kKMd...
3,https://www.youtube.com/watch?v=vOo-VyPIiW0,https://www.youtube.com/channel/UC3gnV5Yt4crST...
4,https://www.youtube.com/watch?v=O6lup3SN4Bw,https://www.youtube.com/channel/UCIGDcUqL2kKMd...
...,...,...
94,https://www.youtube.com/watch?v=TgvaaJwpstM,https://www.youtube.com/channel/UCjAN96n1KcKIK...
95,https://www.youtube.com/watch?v=JLUxTgpK3DY,https://www.youtube.com/channel/UCzHCLS-7yPauT...
96,https://www.youtube.com/watch?v=imA7FYM7iW8,https://www.youtube.com/channel/UCjAN96n1KcKIK...
97,https://www.youtube.com/watch?v=CFbyjoBoWTw,https://www.youtube.com/channel/UCdX_KVt0S-B0w...
