In [1]:
import requests
from bs4 import BeautifulSoup
from urllib.parse import urljoin
import re
import time
import pandas as pd
import numpy as np
from requests_html import HTMLSession, AsyncHTMLSession
import asyncio
import ast

MAX_CONCURRENT_CHROME = 3  # Set the maximum number of concurrent Chrome instances
semaphore = asyncio.Semaphore(MAX_CONCURRENT_CHROME)

In [3]:
# URL to scrape
base_url = "https://www.vogue.com/fashion-shows/seasons"

In [4]:
# Function to get all the fashion show and slideshow URLs under /seasons
def get_fashion_season_urls(base_url):
    # Request the main seasons page
    response = requests.get(base_url)
    if response.status_code != 200:
        print(f"Failed to retrieve the page: {response.status_code}")
        return []

    # Parse the page content
    soup = BeautifulSoup(response.content, "html.parser")

    # Find all 'a' tags that contain fashion show URLs
    show_urls = []
    for a_tag in soup.find_all('a', href=True):
        href = a_tag['href']
        # Filter URLs that contain "/fashion-shows/" and are not general category pages
        if "/fashion-shows/" in href and not href.endswith(("seasons", "latest-shows", "designers", "featured", "image-archive")):
            # Create full URL by joining base URL with relative paths
            full_url = urljoin(base_url, href)
            # Append the full URL to the list
            show_urls.append(full_url)

    # Remove duplicates
    show_urls = list(set(show_urls))

    return show_urls

    
# Function to get all slideshow URLs from a single fashion show page
def get_show_urls(show_url, verbose=False):
    slideshow_links = []
    show_name = re.sub("^.*fashion-shows/", '', show_url)
    if verbose: print(show_name)
    
    # Find slideshow links (ending with #1, #2, etc.)
    pattern = re.compile(r'.*SummaryItem.*')

    for _ in range(5):
        response = requests.get(show_url)
        if response.status_code != 200:
            print(f"Failed to retrieve the show page: {response.status_code}")
            return []

        soup = BeautifulSoup(response.content, "html.parser")

        for a_tag in soup.find_all('a', {'class': pattern}):
            href = a_tag.get('href')
            if show_name in href and re.match(r'^/fashion-shows.*', href):
                full_url = urljoin(show_url, href)
                full_url += "/slideshow/collection"
                slideshow_links.append(full_url)
        
        if len(slideshow_links) > 0: break
    
    # Remove duplicates and return slideshow URLs
    if len(slideshow_links) > 0: return list(set(slideshow_links))
    else: print("WARNING:", show_name, "NOT CONNECTED")


In [5]:
# Call the function and print the result
fashion_season_urls = get_fashion_season_urls(base_url)

shows_urls = list(map(lambda item: get_show_urls(item, verbose=True), fashion_season_urls))
# print(shows_urls)

fall-2006-ready-to-wear
australia-resort-2019
fall-1992-ready-to-wear
spring-2009-ready-to-wear
sao-paulo-resort-2017
seoul-fall-2017
berlin-fall-2016
fall-1991-ready-to-wear
resort-2020
fall-2003-couture
fall-2015-couture
resort-2007
copenhagen-fall-2023
spring-2013-menswear
australia-resort-2025
fall-2011-couture
resort-2021-menswear
shanghai-fall-2023
berlin-fall-2021
fall-2001-ready-to-wear
spring-2020-couture
stockholm-fall-2017
stockholm-fall-2022
spring-2018-couture
pre-fall-2021
fall-2017-menswear
ukraine-fall-2016
spring-2007-couture
spring-2021-ready-to-wear
pre-fall-2008
spring-1996-couture
fall-2024-menswear
russia-spring-2016
resort-2024-menswear
spring-1996-ready-to-wear
stockholm-spring-2018
Failed to retrieve the show page: 502
seoul-fall-2015
spring-2010-ready-to-wear
fall-1999-couture
bridal-fall-2019
copenhagen-fall-2019
bridal-spring-2019
madrid-fall-2020
spring-2008-couture
pre-fall-2016
tbilisi-spring-2017
copenhagen-fall-2021
spring-2013-couture
fall-1994-ready-t

In [7]:
for i in range(len(shows_urls)):
    if len(shows_urls[i]) == 0:
        print(i)
        shows_urls[i] = get_show_urls(fashion_season_urls[i])

In [8]:
df_shows = {'year': [re.search(r'\b(19|20)\d{2}\b', url).group() for url in fashion_season_urls],
            'season_url': fashion_season_urls,
            'show_url': shows_urls}

df_shows = pd.../secretsFrame(df_shows)
df_shows.to_csv('../secrets/shows_season.csv', index_label='seasonID')
df_shows

Unnamed: 0,year,season_url,show_url
0,2006,https://www.vogue.com/fashion-shows/fall-2006-...,[https://www.vogue.com/fashion-shows/fall-2006...
1,2019,https://www.vogue.com/fashion-shows/australia-...,[https://www.vogue.com/fashion-shows/australia...
2,1992,https://www.vogue.com/fashion-shows/fall-1992-...,[https://www.vogue.com/fashion-shows/fall-1992...
3,2009,https://www.vogue.com/fashion-shows/spring-200...,[https://www.vogue.com/fashion-shows/spring-20...
4,2017,https://www.vogue.com/fashion-shows/sao-paulo-...,[https://www.vogue.com/fashion-shows/sao-paulo...
...,...,...,...
375,2021,https://www.vogue.com/fashion-shows/resort-2021,[https://www.vogue.com/fashion-shows/resort-20...
376,2000,https://www.vogue.com/fashion-shows/spring-200...,[https://www.vogue.com/fashion-shows/spring-20...
377,1996,https://www.vogue.com/fashion-shows/fall-1996-...,[https://www.vogue.com/fashion-shows/fall-1996...
378,2024,https://www.vogue.com/fashion-shows/berlin-spr...,[https://www.vogue.com/fashion-shows/berlin-sp...


In [9]:
df_long = df_shows.explode('show_url').reset_index(drop=True)
df_long.to_csv('../secrets/shows_season_long.csv', index_label='showID')
df_long

Unnamed: 0,year,season_url,show_url
0,2006,https://www.vogue.com/fashion-shows/fall-2006-...,https://www.vogue.com/fashion-shows/fall-2006-...
1,2006,https://www.vogue.com/fashion-shows/fall-2006-...,https://www.vogue.com/fashion-shows/fall-2006-...
2,2006,https://www.vogue.com/fashion-shows/fall-2006-...,https://www.vogue.com/fashion-shows/fall-2006-...
3,2006,https://www.vogue.com/fashion-shows/fall-2006-...,https://www.vogue.com/fashion-shows/fall-2006-...
4,2006,https://www.vogue.com/fashion-shows/fall-2006-...,https://www.vogue.com/fashion-shows/fall-2006-...
...,...,...,...
4217,2012,https://www.vogue.com/fashion-shows/fall-2012-...,https://www.vogue.com/fashion-shows/fall-2012-...
4218,2012,https://www.vogue.com/fashion-shows/fall-2012-...,https://www.vogue.com/fashion-shows/fall-2012-...
4219,2012,https://www.vogue.com/fashion-shows/fall-2012-...,https://www.vogue.com/fashion-shows/fall-2012-...
4220,2012,https://www.vogue.com/fashion-shows/fall-2012-...,https://www.vogue.com/fashion-shows/fall-2012-...


In [2]:
# Initial npics

df_long = pd.read_csv('../secrets/shows_season_long.csv')

div_pattern = re.compile(r'RunwayGalleryLookNumberText.*')

In [3]:
async def get_npics(url, max_retries=5):
    async with semaphore:
        retries = 0
        session = AsyncHTMLSession()
        while retries < max_retries:
            try:
                # Send a GET request to the URL
                response = await session.get(url)
                
                # Render the JavaScript on the page (if needed)
                await response.html.arender()
                
                # Use BeautifulSoup to parse the HTML
                soup = BeautifulSoup(response.html.html, 'html.parser')
                
                # Extract desired data; for example, all headings
                div = soup.find('div', {'class': div_pattern})
                
                if div:
                    if response.html.browser:
                        await session.close()
                    npics = int(div.text.strip().split()[-1])
                    return (url, npics)
                else:
                    retries += 1
                    print("Retry", url, "retries:", retries)
                    time.sleep(0.5)
            
            except Exception as e:
                print(f"Error loading {url}")
                retries += 1
                time.sleep(0.5)
                
        await session.close()
        del session
        return None


async def run_tasks(s):
    tasks = [get_npics(url) for url in s]
    results = await asyncio.gather(*tasks)
    return results

In [28]:
epoch = 7
GAP = 500
start = 4000
end = -1
temp_result = await run_tasks(df_long['show_url'][start:end])

Future exception was never retrieved
future: <Future finished exception=NetworkError('Protocol error (Target.sendMessageToTarget): No session with given id')>
pyppeteer.errors.NetworkError: Protocol error (Target.sendMessageToTarget): No session with given id
Future exception was never retrieved
future: <Future finished exception=NetworkError('Protocol error (Target.sendMessageToTarget): No session with given id')>
pyppeteer.errors.NetworkError: Protocol error (Target.sendMessageToTarget): No session with given id
Future exception was never retrieved
future: <Future finished exception=NetworkError('Protocol error (Target.sendMessageToTarget): No session with given id')>
pyppeteer.errors.NetworkError: Protocol error (Target.sendMessageToTarget): No session with given id
Future exception was never retrieved
future: <Future finished exception=NetworkError('Protocol error (Target.sendMessageToTarget): No session with given id')>
pyppeteer.errors.NetworkError: Protocol error (Target.sendMes

Retry https://www.vogue.com/fashion-shows/resort-2021/alexander-mcqueen/slideshow/collection retries: 1
Retry https://www.vogue.com/fashion-shows/resort-2021/alexander-mcqueen/slideshow/collection retries: 2
Retry https://www.vogue.com/fashion-shows/resort-2021/alexander-mcqueen/slideshow/collection retries: 3
Retry https://www.vogue.com/fashion-shows/resort-2021/alexander-mcqueen/slideshow/collection retries: 4
Retry https://www.vogue.com/fashion-shows/resort-2021/alexander-mcqueen/slideshow/collection retries: 5


Future exception was never retrieved
future: <Future finished exception=NetworkError('Protocol error (Target.sendMessageToTarget): No session with given id')>
pyppeteer.errors.NetworkError: Protocol error (Target.sendMessageToTarget): No session with given id


Retry https://www.vogue.com/fashion-shows/resort-2021/mugler/slideshow/collection retries: 1
Retry https://www.vogue.com/fashion-shows/resort-2021/mugler/slideshow/collection retries: 2


In [29]:
# Writing list to a file
with open('../secrets/npics_temp.txt', 'a') as file:
    file.write('Start: ' + str(start) + " End: " + str(end) + "\n")
    for item in temp_result:
        file.write(str(item) + '\n')

In [16]:
get_show_urls("https://www.vogue.com/fashion-shows/fall-2015-couture")

/fashion-shows/fall-2015-couture/valentino
/fashion-shows/fall-2015-couture/maison-martin-margiela
/fashion-shows/fall-2015-couture/armani-prive
/fashion-shows/fall-2015-couture/chanel
/fashion-shows/fall-2015-couture/giambattista-valli
/fashion-shows/fall-2015-couture/christian-dior
/fashion-shows/fall-2015-couture/schiaparelli
/fashion-shows/fall-2015-couture/atelier-versace


['https://www.vogue.com/fashion-shows/fall-2015-couture/schiaparelli/slideshow/collection#1',
 'https://www.vogue.com/fashion-shows/fall-2015-couture/chanel/slideshow/collection#1',
 'https://www.vogue.com/fashion-shows/fall-2015-couture/atelier-versace/slideshow/collection#1',
 'https://www.vogue.com/fashion-shows/fall-2015-couture/armani-prive/slideshow/collection#1',
 'https://www.vogue.com/fashion-shows/fall-2015-couture/christian-dior/slideshow/collection#1',
 'https://www.vogue.com/fashion-shows/fall-2015-couture/giambattista-valli/slideshow/collection#1',
 'https://www.vogue.com/fashion-shows/fall-2015-couture/maison-martin-margiela/slideshow/collection#1',
 'https://www.vogue.com/fashion-shows/fall-2015-couture/valentino/slideshow/collection#1']

In [50]:
npics = []
with open('../secrets/npics_temp.txt', 'r') as file:
    for line in file:
        # Strip any extra whitespace and newline characters
        line = line.strip()
        
        # Convert the string representation of the tuple to an actual tuple
        try:
            tuple_data = ast.literal_eval(line)
            if (len(tuple_data) > 1):
                npics.append(tuple_data)
        except:
            print(f"Skipping invalid line: {line}")

Skipping invalid line: Start: 0 End: 500
Skipping invalid line: None
Skipping invalid line: None
Skipping invalid line: None
Skipping invalid line: None
Skipping invalid line: None
Skipping invalid line: None
Skipping invalid line: None
Skipping invalid line: None
Skipping invalid line: Start: 500 End: 1000
Skipping invalid line: None
Skipping invalid line: Start: 1000 End: 1500
Skipping invalid line: None
Skipping invalid line: None
Skipping invalid line: None
Skipping invalid line: None
Skipping invalid line: None
Skipping invalid line: None
Skipping invalid line: None
Skipping invalid line: None
Skipping invalid line: None
Skipping invalid line: None
Skipping invalid line: None
Skipping invalid line: Start: 1500 End: 2000
Skipping invalid line: None
Skipping invalid line: None
Skipping invalid line: None
Skipping invalid line: None
Skipping invalid line: None
Skipping invalid line: None
Skipping invalid line: None
Skipping invalid line: None
Skipping invalid line: None
Skipping inva

In [65]:
df_npics = pd.DataFrame(npics, columns=['show_url', 'n_pics'])

df_long = pd.merge(df_long, df_npics, on='show_url', how='outer')
df_long['n_pics'] = df_long['n_pics'].where(df_long['n_pics'].notna(), other=pd.NA).astype('Int64')
df_long

Unnamed: 0,showID,year,season_url,show_url,n_pics
0,0,2006,https://www.vogue.com/fashion-shows/fall-2006-...,https://www.vogue.com/fashion-shows/fall-2006-...,40
1,1,2006,https://www.vogue.com/fashion-shows/fall-2006-...,https://www.vogue.com/fashion-shows/fall-2006-...,48
2,2,2006,https://www.vogue.com/fashion-shows/fall-2006-...,https://www.vogue.com/fashion-shows/fall-2006-...,69
3,3,2006,https://www.vogue.com/fashion-shows/fall-2006-...,https://www.vogue.com/fashion-shows/fall-2006-...,26
4,4,2006,https://www.vogue.com/fashion-shows/fall-2006-...,https://www.vogue.com/fashion-shows/fall-2006-...,38
...,...,...,...,...,...
4217,4217,2012,https://www.vogue.com/fashion-shows/fall-2012-...,https://www.vogue.com/fashion-shows/fall-2012-...,22
4218,4218,2012,https://www.vogue.com/fashion-shows/fall-2012-...,https://www.vogue.com/fashion-shows/fall-2012-...,25
4219,4219,2012,https://www.vogue.com/fashion-shows/fall-2012-...,https://www.vogue.com/fashion-shows/fall-2012-...,12
4220,4220,2012,https://www.vogue.com/fashion-shows/fall-2012-...,https://www.vogue.com/fashion-shows/fall-2012-...,23


In [67]:
df_long.to_csv("../secrets/shows_season_long.csv", index=False)

In [68]:
df_long = pd.read_csv('../secrets/shows_season_long.csv')

In [4]:
# Second npics

df_long = pd.read_csv('../secrets/shows_season_long.csv')
df_long

Unnamed: 0,showID,year,season_url,show_url,n_pics
0,0,2006,https://www.vogue.com/fashion-shows/fall-2006-...,https://www.vogue.com/fashion-shows/fall-2006-...,40.0
1,1,2006,https://www.vogue.com/fashion-shows/fall-2006-...,https://www.vogue.com/fashion-shows/fall-2006-...,48.0
2,2,2006,https://www.vogue.com/fashion-shows/fall-2006-...,https://www.vogue.com/fashion-shows/fall-2006-...,69.0
3,3,2006,https://www.vogue.com/fashion-shows/fall-2006-...,https://www.vogue.com/fashion-shows/fall-2006-...,26.0
4,4,2006,https://www.vogue.com/fashion-shows/fall-2006-...,https://www.vogue.com/fashion-shows/fall-2006-...,38.0
...,...,...,...,...,...
4217,4217,2012,https://www.vogue.com/fashion-shows/fall-2012-...,https://www.vogue.com/fashion-shows/fall-2012-...,22.0
4218,4218,2012,https://www.vogue.com/fashion-shows/fall-2012-...,https://www.vogue.com/fashion-shows/fall-2012-...,25.0
4219,4219,2012,https://www.vogue.com/fashion-shows/fall-2012-...,https://www.vogue.com/fashion-shows/fall-2012-...,12.0
4220,4220,2012,https://www.vogue.com/fashion-shows/fall-2012-...,https://www.vogue.com/fashion-shows/fall-2012-...,23.0


In [4]:
df_long['show_url'] = df_long['show_url'].str.replace(r'https://www.vogue.com/fashion-shows/sao-paolo-spring-2017', 'https://www.vogue.com/fashion-shows/sao-paulo-spring-2017', regex=True)
df_long

Unnamed: 0,showID,year,season_url,show_url,n_pics
0,0,2006,https://www.vogue.com/fashion-shows/fall-2006-...,https://www.vogue.com/fashion-shows/fall-2006-...,40.0
1,1,2006,https://www.vogue.com/fashion-shows/fall-2006-...,https://www.vogue.com/fashion-shows/fall-2006-...,48.0
2,2,2006,https://www.vogue.com/fashion-shows/fall-2006-...,https://www.vogue.com/fashion-shows/fall-2006-...,69.0
3,3,2006,https://www.vogue.com/fashion-shows/fall-2006-...,https://www.vogue.com/fashion-shows/fall-2006-...,26.0
4,4,2006,https://www.vogue.com/fashion-shows/fall-2006-...,https://www.vogue.com/fashion-shows/fall-2006-...,38.0
...,...,...,...,...,...
4217,4217,2012,https://www.vogue.com/fashion-shows/fall-2012-...,https://www.vogue.com/fashion-shows/fall-2012-...,22.0
4218,4218,2012,https://www.vogue.com/fashion-shows/fall-2012-...,https://www.vogue.com/fashion-shows/fall-2012-...,25.0
4219,4219,2012,https://www.vogue.com/fashion-shows/fall-2012-...,https://www.vogue.com/fashion-shows/fall-2012-...,12.0
4220,4220,2012,https://www.vogue.com/fashion-shows/fall-2012-...,https://www.vogue.com/fashion-shows/fall-2012-...,23.0


In [6]:
temp_result = await run_tasks(df_long[df_long['n_pics'].isna()]['show_url'])

Retry https://www.vogue.com/fashion-shows/fall-2017-menswear/margaret-howell/slideshow/collection retries: 1
Retry https://www.vogue.com/fashion-shows/spring-2013-menswear/comme-des-garcons-homme-plus/slideshow/collection retries: 1
Retry https://www.vogue.com/fashion-shows/fall-2017-menswear/gucci/slideshow/collection retries: 1
Retry https://www.vogue.com/fashion-shows/fall-2017-menswear/margaret-howell/slideshow/collection retries: 2
Retry https://www.vogue.com/fashion-shows/spring-2013-menswear/comme-des-garcons-homme-plus/slideshow/collection retries: 2
Retry https://www.vogue.com/fashion-shows/fall-2017-menswear/gucci/slideshow/collection retries: 2
Retry https://www.vogue.com/fashion-shows/spring-2013-menswear/comme-des-garcons-homme-plus/slideshow/collection retries: 3
Retry https://www.vogue.com/fashion-shows/fall-2017-menswear/gucci/slideshow/collection retries: 3
Retry https://www.vogue.com/fashion-shows/fall-2017-menswear/margaret-howell/slideshow/collection retries: 3
Retr

Future exception was never retrieved
future: <Future finished exception=NetworkError('Protocol error (Target.sendMessageToTarget): No session with given id')>
pyppeteer.errors.NetworkError: Protocol error (Target.sendMessageToTarget): No session with given id
Future exception was never retrieved
future: <Future finished exception=NetworkError('Protocol error (Target.sendMessageToTarget): No session with given id')>
pyppeteer.errors.NetworkError: Protocol error (Target.sendMessageToTarget): No session with given id
Future exception was never retrieved
future: <Future finished exception=NetworkError('Protocol error Target.sendMessageToTarget: Target closed.')>
pyppeteer.errors.NetworkError: Protocol error Target.sendMessageToTarget: Target closed.
Future exception was never retrieved
future: <Future finished exception=NetworkError('Protocol error (Target.sendMessageToTarget): No session with given id')>
pyppeteer.errors.NetworkError: Protocol error (Target.sendMessageToTarget): No sessio

Retry https://www.vogue.com/fashion-shows/spring-2010-menswear/comme-des-garcons-homme-plus/slideshow/collection retries: 1
Retry https://www.vogue.com/fashion-shows/spring-2010-menswear/comme-des-garcons-homme-plus/slideshow/collection retries: 2
Retry https://www.vogue.com/fashion-shows/spring-2010-menswear/comme-des-garcons-homme-plus/slideshow/collection retries: 3
Retry https://www.vogue.com/fashion-shows/spring-2010-menswear/comme-des-garcons-homme-plus/slideshow/collection retries: 4
Retry https://www.vogue.com/fashion-shows/spring-2010-menswear/comme-des-garcons-homme-plus/slideshow/collection retries: 5
Retry https://www.vogue.com/fashion-shows/spring-2017-menswear/burberry-prorsum/slideshow/collection retries: 1
Retry https://www.vogue.com/fashion-shows/spring-2017-menswear/burberry-prorsum/slideshow/collection retries: 2
Retry https://www.vogue.com/fashion-shows/spring-2017-menswear/burberry-prorsum/slideshow/collection retries: 3
Retry https://www.vogue.com/fashion-shows/sp

In [8]:
# Writing list to a file
with open('../secrets/npics_temp.txt', 'a') as file:
    for item in temp_result:
        file.write(str(item) + '\n')

In [14]:
df_long = pd.read_csv('../secrets/shows_season_long.csv').drop('n_pics', axis=1)
df_long['show_url'] = df_long['show_url'].str.replace(r'https://www.vogue.com/fashion-shows/sao-paolo-spring-2017', 'https://www.vogue.com/fashion-shows/sao-paulo-spring-2017', regex=True)
df_long

Unnamed: 0,showID,year,season_url,show_url
0,0,2006,https://www.vogue.com/fashion-shows/fall-2006-...,https://www.vogue.com/fashion-shows/fall-2006-...
1,1,2006,https://www.vogue.com/fashion-shows/fall-2006-...,https://www.vogue.com/fashion-shows/fall-2006-...
2,2,2006,https://www.vogue.com/fashion-shows/fall-2006-...,https://www.vogue.com/fashion-shows/fall-2006-...
3,3,2006,https://www.vogue.com/fashion-shows/fall-2006-...,https://www.vogue.com/fashion-shows/fall-2006-...
4,4,2006,https://www.vogue.com/fashion-shows/fall-2006-...,https://www.vogue.com/fashion-shows/fall-2006-...
...,...,...,...,...
4217,4217,2012,https://www.vogue.com/fashion-shows/fall-2012-...,https://www.vogue.com/fashion-shows/fall-2012-...
4218,4218,2012,https://www.vogue.com/fashion-shows/fall-2012-...,https://www.vogue.com/fashion-shows/fall-2012-...
4219,4219,2012,https://www.vogue.com/fashion-shows/fall-2012-...,https://www.vogue.com/fashion-shows/fall-2012-...
4220,4220,2012,https://www.vogue.com/fashion-shows/fall-2012-...,https://www.vogue.com/fashion-shows/fall-2012-...


In [15]:
npics = []
with open('../secrets/npics_temp.txt', 'r') as file:
    for line in file:
        # Strip any extra whitespace and newline characters
        line = line.strip()
        
        # Convert the string representation of the tuple to an actual tuple
        try:
            tuple_data = ast.literal_eval(line)
            if (len(tuple_data) > 1):
                npics.append(tuple_data)
        except:
            print(f"Skipping invalid line: {line}")

Skipping invalid line: Start: 0 End: 500
Skipping invalid line: None
Skipping invalid line: None
Skipping invalid line: None
Skipping invalid line: None
Skipping invalid line: None
Skipping invalid line: None
Skipping invalid line: None
Skipping invalid line: None
Skipping invalid line: Start: 500 End: 1000
Skipping invalid line: None
Skipping invalid line: Start: 1000 End: 1500
Skipping invalid line: None
Skipping invalid line: None
Skipping invalid line: None
Skipping invalid line: None
Skipping invalid line: None
Skipping invalid line: None
Skipping invalid line: None
Skipping invalid line: None
Skipping invalid line: None
Skipping invalid line: None
Skipping invalid line: None
Skipping invalid line: Start: 1500 End: 2000
Skipping invalid line: None
Skipping invalid line: None
Skipping invalid line: None
Skipping invalid line: None
Skipping invalid line: None
Skipping invalid line: None
Skipping invalid line: None
Skipping invalid line: None
Skipping invalid line: None
Skipping inva

In [16]:
df_npics = pd.DataFrame(npics, columns=['show_url', 'n_pics'])

df_long = pd.merge(df_long, df_npics, on='show_url', how='outer')
df_long['n_pics'] = df_long['n_pics'].where(df_long['n_pics'].notna(), other=pd.NA).astype('Int64')
df_long

Unnamed: 0,showID,year,season_url,show_url,n_pics
0,0,2006,https://www.vogue.com/fashion-shows/fall-2006-...,https://www.vogue.com/fashion-shows/fall-2006-...,40
1,1,2006,https://www.vogue.com/fashion-shows/fall-2006-...,https://www.vogue.com/fashion-shows/fall-2006-...,48
2,2,2006,https://www.vogue.com/fashion-shows/fall-2006-...,https://www.vogue.com/fashion-shows/fall-2006-...,69
3,3,2006,https://www.vogue.com/fashion-shows/fall-2006-...,https://www.vogue.com/fashion-shows/fall-2006-...,26
4,4,2006,https://www.vogue.com/fashion-shows/fall-2006-...,https://www.vogue.com/fashion-shows/fall-2006-...,38
...,...,...,...,...,...
4217,4217,2012,https://www.vogue.com/fashion-shows/fall-2012-...,https://www.vogue.com/fashion-shows/fall-2012-...,22
4218,4218,2012,https://www.vogue.com/fashion-shows/fall-2012-...,https://www.vogue.com/fashion-shows/fall-2012-...,25
4219,4219,2012,https://www.vogue.com/fashion-shows/fall-2012-...,https://www.vogue.com/fashion-shows/fall-2012-...,12
4220,4220,2012,https://www.vogue.com/fashion-shows/fall-2012-...,https://www.vogue.com/fashion-shows/fall-2012-...,23


In [18]:
df_long.to_csv("../secrets/shows_season_long.csv", index=False)