In [14]:
import os
import json
import time
import requests
import random
import ipinfo
import numpy as np
import pandas as pd
from tqdm import tqdm
from bs4 import BeautifulSoup

import seaborn as sns
from matplotlib import pyplot as plt
from matplotlib.colors import BoundaryNorm
from matplotlib.colors import ListedColormap

from seleniumwire import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

sns.set_palette('colorblind')

pd.set_option('display.max_columns', None)
pd.set_option('display.width', 1000)

In [32]:
def get_location_from_ip(access_token, ip_address):
    ipinfo_handler = ipinfo.getHandler(access_token)
    ip_address = ip_address.strip()
    details = ipinfo_handler.getDetails(ip_address)
    region = details.all['region']
    country = details.all['country']
    return region, country


def create_selenium_driver(proxy_dict):
    selenium_options = proxy_dict
    chrome_options = Options()
    chrome_options.add_argument("--no-sandbox")
    # chrome_options.add_argument("--headless")

    driver = webdriver.Chrome(
        seleniumwire_options=selenium_options, 
        options=chrome_options              
    )
    wait = WebDriverWait(driver, random.randint(7, 10))

    return driver, wait


def decline_optional_cookies(wait):
    try:
        decline_btn = wait.until(
            EC.element_to_be_clickable(
                (By.XPATH, "//button[normalize-space()='Decline optional cookies']")
            )
        )
        decline_btn.click()
    except Exception:
        # pass
        print("No 'Decline optional cookies' button found or it was not clickable.")


def close_login_popup(wait):
    try:
        close_div = wait.until(
            EC.element_to_be_clickable(
                (By.CSS_SELECTOR, "svg[aria-label='Close']")
                
            )
        )
        close_div.click()
    except Exception:
        # pass
        print("No login popup found or it was not clickable.")

### Check Proxy Locations

In [4]:
proxies_path = '/Users/brahmaninutakki/saarland/docs/proxies/2024-05-27 06 53 59.txt'

with open(proxies_path, 'r') as f:
    proxies = f.readlines()

ips = [proxy.split(':')[0] for proxy in proxies]

In [5]:
ips[:5]

['31.131.11.187',
 '172.81.23.200',
 '107.166.116.67',
 '31.131.9.22',
 '172.81.21.41']

In [6]:
# get ip inferred locations
ip_locations = {}
for ip in tqdm(ips):
    region, country = get_location_from_ip('73b7dc7b513e06', ip)
    ip_locations[ip] = {'region': region, 'country': country}

100%|██████████| 111/111 [00:22<00:00,  4.83it/s]


In [7]:
ip_locations

{'31.131.11.187': {'region': 'Virginia', 'country': 'US'},
 '172.81.23.200': {'region': 'New York', 'country': 'US'},
 '107.166.116.67': {'region': 'New York', 'country': 'US'},
 '31.131.9.22': {'region': 'Virginia', 'country': 'US'},
 '172.81.21.41': {'region': 'New York', 'country': 'US'},
 '172.81.20.69': {'region': 'New York', 'country': 'US'},
 '162.218.13.126': {'region': 'Texas', 'country': 'US'},
 '52.128.216.28': {'region': 'New York', 'country': 'US'},
 '31.131.8.161': {'region': 'Virginia', 'country': 'US'},
 '23.226.24.128': {'region': 'New Jersey', 'country': 'US'},
 '172.81.22.204': {'region': 'New York', 'country': 'US'},
 '31.131.10.143': {'region': 'Virginia', 'country': 'US'},
 '31.131.11.55': {'region': 'Virginia', 'country': 'US'},
 '172.81.23.163': {'region': 'New York', 'country': 'US'},
 '107.166.116.224': {'region': 'New York', 'country': 'US'},
 '31.131.9.147': {'region': 'Virginia', 'country': 'US'},
 '172.81.21.77': {'region': 'New York', 'country': 'US'},
 '

In [8]:
unique_cities = set()
for ip, loc in ip_locations.items():
    unique_cities.add(loc['region'])

unique_cities

{'Lower Saxony', 'New Jersey', 'New York', 'Texas', 'Virginia'}

### Setup crawls

In [55]:
msnbc_urls = [
"https://www.instagram.com/reel/DRYGzcyEsGL/",
"https://www.instagram.com/reel/DRX71AeCgTh/",
"https://www.instagram.com/reel/DRX7mPtkhbo/",
"https://www.instagram.com/reel/DRXyanzDw2z/",
"https://www.instagram.com/reel/DRXeikNDutF/",
"https://www.instagram.com/reel/DRXVJ4LkQVH/",
"https://www.instagram.com/reel/DRWNKQPjO_h/",
"https://www.instagram.com/reel/DRWCrncjAPT/",
"https://www.instagram.com/reel/DRV4jnhkf2v/",
"https://www.instagram.com/p/DRV0xJXETpL/",
]

huffpost_urls = [
"https://www.instagram.com/reel/DRVaQ0JE5AC/",
"https://www.instagram.com/p/DRVxaKSDOM_/",
"https://www.instagram.com/p/DRVmv2RATHe/",
"https://www.instagram.com/reel/DRVXN3TAbH-/",
"https://www.instagram.com/p/DRSumE-E5yK/",
"https://www.instagram.com/p/DRSlVEnEyjU/",
"https://www.instagram.com/reel/DRShUjTAcwn/",
"https://www.instagram.com/p/DRSaTqqkya4/",
"https://www.instagram.com/p/DRSMqiQDPBR/",
"https://www.instagram.com/p/DRQry8hjLUz/",
]

cnn_urls = [
"https://www.instagram.com/p/DRYdbIKj3Zo/",
"https://www.instagram.com/p/DRYTefyEV_e/",
"https://www.instagram.com/p/DRYCLfXCP2V/",
"https://www.instagram.com/p/DRX4Cm9AaaX/",
"https://www.instagram.com/p/DRXc5ZsDe7T/",
"https://www.instagram.com/p/DRXVsTUCpG8/",
"https://www.instagram.com/p/DRXIAi2jsGK/",
"https://www.instagram.com/p/DRXD15DkpPm/",
"https://www.instagram.com/reel/DRXBM5Ojq4P/",
"https://www.instagram.com/reel/DRW1V1nE_nd/",
]

washingtonpost_urls = [
"https://www.instagram.com/p/DRYMvrrFks8/",
"https://www.instagram.com/p/DRX-31ODa_2/",
"https://www.instagram.com/p/DRX4CI7DjVE/",
"https://www.instagram.com/p/DRXw_eDjr9l/",
"https://www.instagram.com/p/DRXf-IGj2TH/",
"https://www.instagram.com/reel/DRXO1Q9Co1Y/",
"https://www.instagram.com/reel/DRV4-1ck1EG/",
"https://www.instagram.com/p/DRVzwuyjFMm/",
"https://www.instagram.com/p/DRVusgLF-KT/",
"https://www.instagram.com/p/DRVn1eJjQm2/",
]

forbes_urls = [
"https://www.instagram.com/p/DRYi49Elhfc/",
"https://www.instagram.com/p/DRYFwOWElfS/",
"https://www.instagram.com/p/DRYCRP5gUZ3/",
"https://www.instagram.com/p/DRXqPpGEvzD/",
"https://www.instagram.com/p/DRXXXBBgStO/",
"https://www.instagram.com/p/DRXIfJ-kt1B/",
"https://www.instagram.com/p/DRXG_higQPW/",
"https://www.instagram.com/p/DRVg9Zalp_S/",
"https://www.instagram.com/p/DRVQXcWAaaw/",
"https://www.instagram.com/p/DRVAqK1AYIk/",
]

thehill_urls = [
"https://www.instagram.com/reel/DRVZKhOkeKw/",
"https://www.instagram.com/p/DRR-6TIDQv2/",
"https://www.instagram.com/p/DRP2kfKE_Z1/",
"https://www.instagram.com/p/DRPl89HEnWP/",
"https://www.instagram.com/p/DRNeXQakhum/",
"https://www.instagram.com/reel/DRNOKZMkZv9/",
"https://www.instagram.com/reel/DRNI1TVEZ90/",
"https://www.instagram.com/reel/DRNAF39ESYr/",
"https://www.instagram.com/p/DRM2gqLkeys/",
"https://www.instagram.com/reel/DRK7-xAEaGg/",
]

washingtontimes_urls = [
"https://www.instagram.com/p/DRVp8_bErI7/",
"https://www.instagram.com/p/DRViLV5E_8S/",
"https://www.instagram.com/p/DRVa9uoE3IN/",
"https://www.instagram.com/p/DRVSYiAkzb1/",
"https://www.instagram.com/p/DRVKQFeE7hq/",
"https://www.instagram.com/p/DRVCly3k6Nf/",
"https://www.instagram.com/p/DRU66sgE9tl/",
"https://www.instagram.com/p/DRUz9F7EnXy/",
"https://www.instagram.com/p/DRUrAuGkh76/",
"https://www.instagram.com/p/DRUjQ89EgPE/",
]

nypost_urls = [
"https://www.instagram.com/p/DRYdxYRDMFJ/",
"https://www.instagram.com/p/DRYaVhZAaW0/",
"https://www.instagram.com/p/DRYW5g9gXCr/",
"https://www.instagram.com/p/DRYTlB7gZYt/",
"https://www.instagram.com/p/DRYRxGYAUPl/",
"https://www.instagram.com/p/DRYQC6dATyk/",
"https://www.instagram.com/p/DRYMnK-Abqo/",
"https://www.instagram.com/p/DRYJLKLAXq0/",
"https://www.instagram.com/p/DRYF1uggWJ4/",
"https://www.instagram.com/p/DRYCYdKEtLe/",
]

foxnews_urls = [
"https://www.instagram.com/reel/DRYg5_ZlC-O/",
"https://www.instagram.com/reel/DRYeCC9DP9C/",
"https://www.instagram.com/p/DRYdTZHjT-E/",
"https://www.instagram.com/reel/DRYaF2vj-hm/",
"https://www.instagram.com/reel/DRYXH9GlIcS/",
"https://www.instagram.com/reel/DRYOJ1tkVa-/",
"https://www.instagram.com/p/DRYLB8mjbCV/",
"https://www.instagram.com/p/DRYKy0ajKVp/",
"https://www.instagram.com/reel/DRYExbVkauI/",
"https://www.instagram.com/p/DRX-wXoksdM/",
]

breitbart_urls = [
"https://www.instagram.com/p/DRZPF4PDjqw/",
"https://www.instagram.com/p/DRYfrKnDJXa/",
"https://www.instagram.com/p/DRYHN6eDcGi/",
"https://www.instagram.com/p/DRXyZHHEsO9/",
"https://www.instagram.com/p/DRXUNHfDYvW/",
"https://www.instagram.com/reel/DRXIibxjFKK/",
"https://www.instagram.com/p/DRWhnmjDo-i/",
"https://www.instagram.com/p/DRWXFT9DRtK/",
"https://www.instagram.com/reel/DRWBiQXDG0H/",
"https://www.instagram.com/reel/DRV5wtFDGsb/",
]

all_urls = {
    'msnbc': msnbc_urls,
    'huffpost': huffpost_urls,
    'cnn': cnn_urls,
    'washingtonpost': washingtonpost_urls,
    'forbes': forbes_urls,
    'thehill': thehill_urls,
    'washingtontimes': washingtontimes_urls,
    'nypost': nypost_urls,
    'foxnews': foxnews_urls,
    'breitbart': breitbart_urls
}

#### no persona baselines 

In [None]:
proxy_map = {'ny_1': '172.81.22.22',
             'texas_1': '162.218.13.134'}

proxy_name = 'texas_1'
proxy = proxy_map[proxy_name]
proxy_dict = {'https': f'http://iweber02:qp9dQbDM@{proxy}:29842'}

for name, urls in tqdm(all_urls.items()): 

    save_path = f'/Users/brahmaninutakki/saarland/insta-comments/saved_data/new/{proxy_name}/{name}'

    if not os.path.exists(save_path):
        os.makedirs(save_path)


    driver, wait = create_selenium_driver(proxy_dict)

    for url in urls:
        url_id = url.split('/')[-2]

        if os.path.exists(f'{save_path}/{url_id}_comments.json'):
            print(f"Skipping...")
            continue

        else:
            driver.get(url)
            decline_optional_cookies(wait)
            close_login_popup(wait)

            try:
                classes = "x1lliihq x1plvlek xryxfnj x1n2onr6 xyejjpt x15dsfln x193iq5w xeuugli x1fj9vlw x13faqbe x1vvkbs x1s928wv xhkezso x1gmr53x x1cpjm7i x1fgarty x1943h6x x1i0vuye xvs91rp xo1l8bm x5n08af x10wh9bi xpm28yp x8viiok x1o7cslx"
                selector = "span." + ".".join(classes.split())
                content = wait.until(
                    EC.presence_of_all_elements_located((By.CSS_SELECTOR, selector))
                )

                content = [comment.text for comment in content]

                
                with open(f'{save_path}/{url_id}_comments.json', 'w') as f:
                    json.dump(content, f)

                classes = "x1ejq31n x18oe1m7 x1sy0etr xstzfhl x1roi4f4 xexx8yu xyri2b x18d9i69 x1c1uobl x1n2onr6"
                selector = "time." + ".".join(classes.split())
                timestamp = wait.until(
                    EC.presence_of_all_elements_located((By.CSS_SELECTOR, selector))
                )
                timestamp = [t.get_attribute('datetime') for t in timestamp]

                with open(f'{save_path}/{url_id}_timestamps.json', 'w') as f:
                    json.dump(timestamp, f)
                
            except Exception as e:
                print(f"Error extracting comments for {name}:{url}", e)

            time.sleep(random.randint(2, 5))

    if driver:
        driver.close()

    time.sleep(random.randint(5, 10))

driver.quit()

In [59]:
pathfile = '/Users/brahmaninutakki/saarland/insta-comments/saved_data/new/texas_1'
for file in os.listdir(pathfile):
    print(file, len(os.listdir(os.path.join(pathfile, file))))


foxnews 20
forbes 20
huffpost 20
breitbart 20
washingtonpost 20
nypost 20
cnn 18
msnbc 20
thehill 20
washingtontimes 15


#### Persona Crawls 

In [14]:
ip_locations['162.218.13.80']

{'region': 'Texas', 'country': 'US'}

In [84]:
proxy_map = {'ny_1': '172.81.22.22',
             'texas_1': '162.218.13.134'}

proxy_name = 'texas_1'
proxy = proxy_map[proxy_name]
proxy_dict = {'https': f'http://iweber02:qp9dQbDM@{proxy}:29842'}


# url = all_urls['msnbc'][6]
url = 'https://www.instagram.com/accounts/login/'

dir_name = 'male_dem'
uname = ''
pwd = ''

if dir_name == 'male_dem':
    uname = 'bjsdbfajajba'
    pwd = 'bjsdbfajajba2000'
elif dir_name == 'female_rep':
    uname = 'jabfjadbajb'
    pwd = 'D$O2fQ!lCe9X9Q'
elif dir_name == 'male_rep':
    uname = 'bhbjkndjsna'
    pwd = 'fepvox-sewGyt-3ryrxi'
elif dir_name == 'female_dem':
    uname = 'sdbhjsajbdja '
    pwd = '_5Kn*H]8qpT3$7&'

driver, wait = create_selenium_driver(proxy_dict)
driver.get(url)
decline_optional_cookies(wait)

time.sleep(random.randint(2, 3))
try:
    login_btn = wait.until(
        EC.element_to_be_clickable(
            (By.XPATH, "//div[@role='button' and normalize-space()='Log In']")
        )
    )
    login_btn.click()
except Exception:
    print("No 'Log In' button found or it was not clickable.")

try:
    login_btn = wait.until(
        EC.element_to_be_clickable(
            (By.XPATH, "/html/body/div[1]/div/div/div[2]/div/div/div[1]/div[1]/div[1]/section/main/div[1]/div[2]/div/div/div/div/div[2]/div/div[2]/div[2]/div/a")
        )
    )
    login_btn.click()
except Exception:
    print("No 'Log In' button found or it was not clickable.")

try:
    login_btn = wait.until(
        EC.element_to_be_clickable(
            (By.XPATH, "/html/body/div[1]/div/div/div[2]/div/div/div[1]/div[1]/div[1]/section/div/div/div[2]/div/div/div/div[1]/a]")
        )
    )
    login_btn.click()
except Exception:
    print("No 'Log In' button found or it was not clickable.")


time.sleep(random.randint(2, 5))
try:
    username = wait.until(EC.element_to_be_clickable(
        (By.CSS_SELECTOR, 'input[aria-label="Phone number, username or email address"]')
    ))
    username.clear()
    username.send_keys(uname)
except Exception as e:
    print("username error", e)

try:
    password = wait.until(EC.element_to_be_clickable(
        (By.CSS_SELECTOR, 'input[aria-label="Password"]')
    ))
    password.clear()
    password.send_keys(pwd)
except Exception:
    print("pwd error")

time.sleep(random.randint(2, 5))
try:
    login_btn = wait.until(
        EC.element_to_be_clickable(
            (By.XPATH, "//button[@type='submit' and .//div[normalize-space()='Log in'] and not(@disabled)]")
        )
    )
    login_btn.click()
except Exception:
    print("No 'Log In' button found or it was not clickable.")

time.sleep(random.randint(2, 5))
try:
    username = wait.until(EC.element_to_be_clickable(
        (By.CSS_SELECTOR, 'input[aria-label="Phone number, username or email address"]')
    ))
    username.clear()
    username.send_keys(uname)
except Exception as e:
    print("username error", e)

try:
    password = wait.until(EC.element_to_be_clickable(
        (By.CSS_SELECTOR, 'input[aria-label="Password"]')
    ))
    password.clear()
    password.send_keys(pwd)
except Exception:
    print("pwd error")

time.sleep(random.randint(2, 5))
try:
    login_btn = wait.until(
        EC.element_to_be_clickable(
            (By.XPATH, "//button[@type='submit' and .//div[normalize-space()='Log in'] and not(@disabled)]")
        )
    )
    login_btn.click()
except Exception:
    print("No 'Log In' button found or it was not clickable.")


decline_optional_cookies(wait)

time.sleep(random.randint(2, 5))
try:
    dont_save = wait.until(
        EC.element_to_be_clickable(
            (By.XPATH, "//div[@role='button' and normalize-space()='Not now']")
        )
    )
    dont_save.click()
except Exception:
    print("No 'Not now' button found or it was not clickable.")

decline_optional_cookies(wait)

time.sleep(random.randint(15, 25))

for name, urls in tqdm(all_urls.items()):
    for url in urls:
        url_id = url.split('/')[-2]
        save_path = f'/Users/brahmaninutakki/saarland/insta-comments/saved_data/new/{dir_name}/{proxy_name}/{name}'


        if os.path.exists(f'{save_path}/{url_id}_comments.json'):
            print(f"Skipping...")
            continue

        driver.switch_to.new_window('tab')
        driver.get(url)
        decline_optional_cookies(wait)
        try:
            login_btn = wait.until(
                EC.element_to_be_clickable(
                    (By.XPATH, "//div[@role='button' and normalize-space()='Log In']")
                )
            )
            login_btn.click()
        except Exception:
            print("No 'Log In' button found or it was not clickable.")

        try:
            classes = "x1lliihq x1plvlek xryxfnj x1n2onr6 xyejjpt x15dsfln x193iq5w xeuugli x1fj9vlw x13faqbe x1vvkbs x1s928wv xhkezso x1gmr53x x1cpjm7i x1fgarty x1943h6x x1i0vuye xvs91rp xo1l8bm x5n08af x10wh9bi xpm28yp x8viiok x1o7cslx"
            selector = "span." + ".".join(classes.split())
            content = wait.until(
                EC.presence_of_all_elements_located((By.CSS_SELECTOR, selector))
            )

            content = [comment.text for comment in content]

            if not os.path.exists(save_path):
                os.makedirs(save_path)
            
            with open(f'{save_path}/{url_id}_comments.json', 'w') as f:
                json.dump(content, f)

            classes = "x1ejq31n x18oe1m7 x1sy0etr xstzfhl x1roi4f4 xexx8yu xyri2b x18d9i69 x1c1uobl x1n2onr6"
            selector = "time." + ".".join(classes.split())
            timestamp = wait.until(
                EC.presence_of_all_elements_located((By.CSS_SELECTOR, selector))
            )
            timestamp = [t.get_attribute('datetime') for t in timestamp]

            with open(f'{save_path}/{url_id}_timestamps.json', 'w') as f:
                json.dump(timestamp, f)

        except Exception as e:
            print(f"Error extracting comments for {name}:{url}", e)

        driver.close()

        if driver.window_handles:
            driver.switch_to.window(driver.window_handles[0])

        time.sleep(random.randint(2, 5))

    time.sleep(random.randint(10, 15))

driver.quit()

No 'Log In' button found or it was not clickable.
No 'Log In' button found or it was not clickable.
No 'Log In' button found or it was not clickable.
No 'Log In' button found or it was not clickable.
No 'Decline optional cookies' button found or it was not clickable.
No 'Decline optional cookies' button found or it was not clickable.


  0%|          | 0/10 [00:00<?, ?it/s]

No 'Decline optional cookies' button found or it was not clickable.
No 'Log In' button found or it was not clickable.
No 'Decline optional cookies' button found or it was not clickable.
No 'Log In' button found or it was not clickable.
No 'Decline optional cookies' button found or it was not clickable.
No 'Log In' button found or it was not clickable.
No 'Decline optional cookies' button found or it was not clickable.
No 'Log In' button found or it was not clickable.
No 'Decline optional cookies' button found or it was not clickable.
No 'Log In' button found or it was not clickable.
No 'Decline optional cookies' button found or it was not clickable.
No 'Log In' button found or it was not clickable.
No 'Decline optional cookies' button found or it was not clickable.
No 'Log In' button found or it was not clickable.
No 'Decline optional cookies' button found or it was not clickable.
No 'Log In' button found or it was not clickable.
No 'Decline optional cookies' button found or it was not

 10%|█         | 1/10 [04:47<43:05, 287.26s/it]

No 'Decline optional cookies' button found or it was not clickable.
No 'Log In' button found or it was not clickable.
No 'Decline optional cookies' button found or it was not clickable.
No 'Log In' button found or it was not clickable.
No 'Decline optional cookies' button found or it was not clickable.
No 'Log In' button found or it was not clickable.
No 'Decline optional cookies' button found or it was not clickable.
No 'Log In' button found or it was not clickable.
No 'Decline optional cookies' button found or it was not clickable.
No 'Log In' button found or it was not clickable.
No 'Decline optional cookies' button found or it was not clickable.
No 'Log In' button found or it was not clickable.
No 'Decline optional cookies' button found or it was not clickable.
No 'Log In' button found or it was not clickable.
No 'Decline optional cookies' button found or it was not clickable.
No 'Log In' button found or it was not clickable.
No 'Decline optional cookies' button found or it was not

 20%|██        | 2/10 [09:38<38:38, 289.80s/it]

No 'Decline optional cookies' button found or it was not clickable.
No 'Log In' button found or it was not clickable.
No 'Decline optional cookies' button found or it was not clickable.
No 'Log In' button found or it was not clickable.
No 'Decline optional cookies' button found or it was not clickable.
No 'Log In' button found or it was not clickable.
No 'Decline optional cookies' button found or it was not clickable.
No 'Log In' button found or it was not clickable.
No 'Decline optional cookies' button found or it was not clickable.
No 'Log In' button found or it was not clickable.
No 'Decline optional cookies' button found or it was not clickable.
No 'Log In' button found or it was not clickable.
No 'Decline optional cookies' button found or it was not clickable.
No 'Log In' button found or it was not clickable.
No 'Decline optional cookies' button found or it was not clickable.
No 'Log In' button found or it was not clickable.
No 'Decline optional cookies' button found or it was not

 30%|███       | 3/10 [14:29<33:51, 290.16s/it]

No 'Decline optional cookies' button found or it was not clickable.
No 'Log In' button found or it was not clickable.
No 'Decline optional cookies' button found or it was not clickable.
No 'Log In' button found or it was not clickable.
No 'Decline optional cookies' button found or it was not clickable.
No 'Log In' button found or it was not clickable.
No 'Decline optional cookies' button found or it was not clickable.
No 'Log In' button found or it was not clickable.
No 'Decline optional cookies' button found or it was not clickable.
No 'Log In' button found or it was not clickable.
No 'Decline optional cookies' button found or it was not clickable.
No 'Log In' button found or it was not clickable.
No 'Decline optional cookies' button found or it was not clickable.
No 'Log In' button found or it was not clickable.
No 'Decline optional cookies' button found or it was not clickable.
No 'Log In' button found or it was not clickable.
No 'Decline optional cookies' button found or it was not

 40%|████      | 4/10 [19:18<28:57, 289.56s/it]

No 'Decline optional cookies' button found or it was not clickable.
No 'Log In' button found or it was not clickable.
No 'Decline optional cookies' button found or it was not clickable.
No 'Log In' button found or it was not clickable.
No 'Decline optional cookies' button found or it was not clickable.
No 'Log In' button found or it was not clickable.
No 'Decline optional cookies' button found or it was not clickable.
No 'Log In' button found or it was not clickable.
No 'Decline optional cookies' button found or it was not clickable.
No 'Log In' button found or it was not clickable.
No 'Decline optional cookies' button found or it was not clickable.
No 'Log In' button found or it was not clickable.
No 'Decline optional cookies' button found or it was not clickable.
No 'Log In' button found or it was not clickable.
No 'Decline optional cookies' button found or it was not clickable.
No 'Log In' button found or it was not clickable.
No 'Decline optional cookies' button found or it was not

 50%|█████     | 5/10 [24:08<24:10, 290.01s/it]

No 'Decline optional cookies' button found or it was not clickable.
No 'Log In' button found or it was not clickable.
No 'Decline optional cookies' button found or it was not clickable.
No 'Log In' button found or it was not clickable.
No 'Decline optional cookies' button found or it was not clickable.
No 'Log In' button found or it was not clickable.
No 'Decline optional cookies' button found or it was not clickable.
No 'Log In' button found or it was not clickable.
No 'Decline optional cookies' button found or it was not clickable.
No 'Log In' button found or it was not clickable.
No 'Decline optional cookies' button found or it was not clickable.
No 'Log In' button found or it was not clickable.
No 'Decline optional cookies' button found or it was not clickable.
No 'Log In' button found or it was not clickable.
No 'Decline optional cookies' button found or it was not clickable.
No 'Log In' button found or it was not clickable.
No 'Decline optional cookies' button found or it was not

 60%|██████    | 6/10 [28:58<19:19, 289.93s/it]

No 'Decline optional cookies' button found or it was not clickable.
No 'Log In' button found or it was not clickable.
Error extracting comments for washingtontimes:https://www.instagram.com/p/DRVp8_bErI7/ Message: 

No 'Decline optional cookies' button found or it was not clickable.
No 'Log In' button found or it was not clickable.
Error extracting comments for washingtontimes:https://www.instagram.com/p/DRViLV5E_8S/ Message: 

No 'Decline optional cookies' button found or it was not clickable.
No 'Log In' button found or it was not clickable.
Error extracting comments for washingtontimes:https://www.instagram.com/p/DRVa9uoE3IN/ Message: 

No 'Decline optional cookies' button found or it was not clickable.
No 'Log In' button found or it was not clickable.
Error extracting comments for washingtontimes:https://www.instagram.com/p/DRVSYiAkzb1/ Message: 

No 'Decline optional cookies' button found or it was not clickable.
No 'Log In' button found or it was not clickable.
No 'Decline option

 70%|███████   | 7/10 [34:43<15:23, 308.00s/it]

No 'Decline optional cookies' button found or it was not clickable.
No 'Log In' button found or it was not clickable.
No 'Decline optional cookies' button found or it was not clickable.
No 'Log In' button found or it was not clickable.
No 'Decline optional cookies' button found or it was not clickable.
No 'Log In' button found or it was not clickable.
No 'Decline optional cookies' button found or it was not clickable.
No 'Log In' button found or it was not clickable.
No 'Decline optional cookies' button found or it was not clickable.
No 'Log In' button found or it was not clickable.
No 'Decline optional cookies' button found or it was not clickable.
No 'Log In' button found or it was not clickable.
No 'Decline optional cookies' button found or it was not clickable.
No 'Log In' button found or it was not clickable.
No 'Decline optional cookies' button found or it was not clickable.
No 'Log In' button found or it was not clickable.
No 'Decline optional cookies' button found or it was not

 80%|████████  | 8/10 [39:18<09:54, 297.42s/it]

No 'Decline optional cookies' button found or it was not clickable.
No 'Log In' button found or it was not clickable.
No 'Decline optional cookies' button found or it was not clickable.
No 'Log In' button found or it was not clickable.
No 'Decline optional cookies' button found or it was not clickable.
No 'Log In' button found or it was not clickable.
No 'Decline optional cookies' button found or it was not clickable.
No 'Log In' button found or it was not clickable.
No 'Decline optional cookies' button found or it was not clickable.
No 'Log In' button found or it was not clickable.
No 'Decline optional cookies' button found or it was not clickable.
No 'Log In' button found or it was not clickable.
No 'Decline optional cookies' button found or it was not clickable.
No 'Log In' button found or it was not clickable.
No 'Decline optional cookies' button found or it was not clickable.
No 'Log In' button found or it was not clickable.
No 'Decline optional cookies' button found or it was not

 90%|█████████ | 9/10 [44:06<04:54, 294.53s/it]

No 'Decline optional cookies' button found or it was not clickable.
No 'Log In' button found or it was not clickable.
No 'Decline optional cookies' button found or it was not clickable.
No 'Log In' button found or it was not clickable.
No 'Decline optional cookies' button found or it was not clickable.
No 'Log In' button found or it was not clickable.
No 'Decline optional cookies' button found or it was not clickable.
No 'Log In' button found or it was not clickable.
No 'Decline optional cookies' button found or it was not clickable.
No 'Log In' button found or it was not clickable.
No 'Decline optional cookies' button found or it was not clickable.
No 'Log In' button found or it was not clickable.
No 'Decline optional cookies' button found or it was not clickable.
No 'Log In' button found or it was not clickable.
No 'Decline optional cookies' button found or it was not clickable.
No 'Log In' button found or it was not clickable.
No 'Decline optional cookies' button found or it was not

100%|██████████| 10/10 [48:52<00:00, 293.25s/it]


In [75]:
driver.quit()

In [83]:
pathfile = '/Users/brahmaninutakki/saarland/insta-comments/saved_data/new/male_rep/ny_1'
for file in os.listdir(pathfile):
    print(file, len(os.listdir(os.path.join(pathfile, file))))

foxnews 20
forbes 20
huffpost 20
breitbart 20
washingtonpost 20
nypost 20
cnn 20
msnbc 20
thehill 20
washingtontimes 15
