# ArtsyCollector

## Objective 
To collect artwork data from Artsy.net

### 1. Scrape links for all the artworks on sale currently

In [None]:
# import libraries
import requests
import pprint
import lxml
from selenium import webdriver
from time import sleep

# specify the base URL
BASE_URL = "https://www.artsy.net/collect?page=1&acquireable=true&offerable=false&at_auction=false"

# launch the Selenium Chrome driver
driver = webdriver.Chrome('selenium/chrome_driver/chromedriver')
driver.get(BASE_URL)

# find the total number of pages in this search
page_nav = driver.find_elements_by_xpath('//div[@class="Box-sc-15se88d-0 iMqpar"]')
page_total = int(page_nav[-1].find_element_by_tag_name("a").text)

# specify the links data object
links = []

# loop over each page
for i in range(page_total):

    # open the page with Selenium
    page_counter = i + 1
    URL = "https://www.artsy.net/collect?page=" + str(page_counter) + "&acquireable=true&offerable=false&at_auction=false"
    driver.get(URL)
    sleep(2)

    # find all the artworks on the page
    artworksOnPage = driver.find_elements_by_xpath('//div[@data-test="artworkGridItem"]')

    for entry in artworksOnPage:
        # find all the links to those artworks on the page
        links.append(entry.find_element_by_tag_name("a").get_attribute("href"))

# print the total number of links we have
print(len(links))

# write the links file to a separate .txt file for backup
with open('data/links_list.txt', 'w') as filehandle:
    for listitem in links:
        filehandle.write('%s\n' % listitem)

### 2. Scrape information from each of those artwork links with Chrome Selenium

In [8]:
# import libraries
import requests
import pprint
import lxml
import pandas as pd
from selenium import webdriver
from time import sleep


# define the data structure

data = {"page_url": [], "artist": [], "artist_nationality": [], "artist_birthdate": [], "title": [], "image_url": [], "year": [], "gallery": [], "gallery_location": [], "medium": [], "medium_details": [], "size_inches": [], "size_cm": [], "condition": [], "classification": [], "signed": [], "authenticated": [], "framed": [], "currency": [], "price": []}
currency_symbols = ["$", "€", "£"]
digits_str = ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9"]


# set up the Selenium driver in Chrome

driver = webdriver.Chrome('selenium/chrome_driver/chromedriver')
verbose = False

# read the list of links

## actual
links_list = []
start_index = 1395
with open('data/links_list.txt', 'r') as filehandle:
    for line in filehandle:
        currentLink = line[:-1]
        links_list.append(currentLink)
final_links_list = links_list[start_index:]

## for diagnostics ONLY
links_dummy = ["https://www.artsy.net/artwork/karl-hartman-dried-grass", "https://www.artsy.net/artwork/karl-hartman-uncommon-crow", "https://www.artsy.net/artwork/steven-alexander-nomad-3", "https://www.artsy.net/artwork/hollis-dunlap-inner-dialogues", "https://www.artsy.net/artwork/jaena-kwon-red-petals"]



# loop over all entries in the links list

for index, entry in enumerate(final_links_list):
    print(index+1, entry)
    url = entry
    driver.get(url)
    sleep(1)

    # check if it's an error page

    ## modern error page - variation 1
    error_message_1 = driver.find_elements_by_class_name("error-handler-body")

    ## modern error page - variation 2
    error_message_2 = driver.find_elements_by_class_name("error-message")
    
    ## old-style error page
    if "Error" in driver.title or "error" in driver.title:
        error_title = True
    else:
        error_title = False
    
    # if any of these errors show up, append "error" to data object and skip the rest of the code

    if len(error_message_1)>0 or len(error_message_2)>0 or error_title:

        # append "error" for everything except the page URL
        for key in data:

            if key == "page_url":
                data[key].append(url)

            else:
                data[key].append("error")

                artist = "error"
                artist_birthdate = "error"
                artist_nationality = "error"
                authenticated = "error"
                classification = "error"
                condition = "error"
                currency = "error"
                framed = "error"
                gallery = "error"
                gallery_location = "error"
                image_url = "error"
                medium = "error"
                medium_details = "error"
                price = "error"
                signed = "error"
                size_cm = "error"
                size_inches = "error"
                title = "error"
                year = "error"

    
    # if it's not an error page, proceed with data extraction
    else:

        # IMAGE AREA

        ## check if the image area exists
        try:
            driver.find_elements_by_xpath('//img[@data-type="artwork-image"]')
        except:
            image_url = "NA"
        else:
            ## IMAGE URL
            image = driver.find_elements_by_xpath('//img[@data-type="artwork-image"]')
            if len(image) > 0:
                image = image[0]
                image_url = image.get_attribute("src")
            else:
                image_url = "NA"
        if verbose:
            print(image_url)


        # SIDEBAR AREA

        ## check if the sidebar area exists
        try:
            driver.find_elements_by_xpath('//div[@data-test="artworkSidebar"]')
        except:
            artist = "NA"
            title = "NA"
            year = "NA"
            medium_details = "NA"
            size_inches = "NA"
            size_cm = "NA"
            classification = "NA"
            currency = "NA"
            price = "NA"
            gallery = "NA"
            gallery_location = "NA"
        else:
            sidebar = driver.find_elements_by_xpath('//div[@data-test="artworkSidebar"]')
            if len(sidebar) > 0:
                sidebar = sidebar[0]

                ## ARTIST
                try:
                    artist = sidebar.text.split("\n")[0]
                except:
                    artist = "NA"
                else:
                    artist = sidebar.text.split("\n")[0]

                ## TITLE & YEAR
                try:
                    sidebar.text.split("\n")[2]
                except:
                    title = "NA"
                    year = "NA"
                else:
                    line = sidebar.text.split("\n")[2]
                    if "," in line:
                        title = line.split(", ")[0]
                        year = line.split(", ")[1]
                    else:
                        if "1" in line:
                            title = "NA"
                            year = line
                        else:
                            title = line
                            year = "NA"

                ## MEDIUM DETAILS
                try:
                    medium_details = sidebar.text.split("\n")[3]
                except:
                    medium_details = "NA"
                else:
                    medium_details = sidebar.text.split("\n")[3]
                
                ## SIZE (INCHES)
                try:
                    size_inches = sidebar.text.split("\n")[4]
                except:
                    size_inches = "NA"
                else:
                    size_inches = sidebar.text.split("\n")[4]

                ## SIZE (CM)
                try:
                    size_cm = sidebar.text.split("\n")[5]
                except:
                    size_cm = "NA"
                else:
                    size_cm = sidebar.text.split("\n")[5]

                ## CLASSIFICATION
                try:
                    classification = sidebar.text.split("\n")[6]
                except:
                    classification = "NA"
                else:
                    classification = sidebar.text.split("\n")[6]

                ## PRICE & CURRENCY
                try:
                    sidebar.text.split("\n")[7]
                except:
                    price = "NA"
                    currency = "NA"
                else:
                    line = sidebar.text.split("\n")[7]
                    if any(element in line for element in digits_str):
                        currency = line[0]
                        price = line[1:]
                    else:
                        currency = "NA"
                        price = "NA"
                    
                    ## GALLERY & GALLERY LOCATION (altered position)
                    if line == "Sold":
                        try:
                            gallery = sidebar.text.split("\n")[8]
                        except:
                            gallery = "NA"
                        else:
                            gallery = sidebar.text.split("\n")[8]
                        
                        try:
                            gallery_location = sidebar.text.split("\n")[9]
                        except:
                            gallery_location = "NA"
                        else:
                            gallery_location = sidebar.text.split("\n")[9]
                    else:
                        try:
                            gallery = sidebar.text.split("\n")[12]
                        except:
                            gallery = "NA"
                        else:
                            gallery = sidebar.text.split("\n")[12]
                        
                        try:
                            gallery_location = sidebar.text.split("\n")[13]
                        except:
                            gallery_location = "NA"
                        else:
                            gallery_location = sidebar.text.split("\n")[13]
            
            else:
                artist = "NA"
                title = "NA"
                year = "NA"
                medium_details = "NA"
                size_inches = "NA"
                size_cm = "NA"
                classification = "NA"
                currency = "NA"
                price = "NA"
                gallery = "NA"
                gallery_location = "NA"

        if verbose:
            print(artist)
            print(title)
            print(year)
            print(medium_details)
            print(size_inches)
            print(size_cm)
            print(classification)
            print(currency)
            print(price)
            print(gallery)
            print(gallery_location)
        

        # CAPTION AREA

        try:
            driver.find_elements_by_xpath('//div[@data-test="artworkDetails"]')
        except:
            medium = "NA"
            condition = "NA"
            signed = "NA"
            authenticated = "NA"
            framed = "NA"
        else:
            caption = driver.find_elements_by_xpath('//div[@data-test="artworkDetails"]')
            if len(caption) > 0:
                try:
                    caption[0].text.split("\n")
                except:
                    medium = "NA"
                    condition = "NA"
                    signed = "NA"
                    authenticated = "NA"
                    framed = "NA"
                else:
                    caption_text = caption[0].text.split("\n")
                    ## MEDIUM
                    if "Medium" in caption_text:
                        index = caption_text.index("Medium")
                        medium = caption_text[index+1]
                    else:
                        medium = "NA"

                    ## CONDITION
                    if "Condition" in caption_text:
                        index = caption_text.index("Condition")
                        condition = caption_text[index+1]
                    else:
                        condition = "NA"

                    ## SIGNED
                    if "Signature" in caption_text:
                        index = caption_text.index("Signature")
                        signed = caption_text[index+1]
                    else:
                        signed = "NA"

                    ## AUTHENTICATED
                    if "Certificate of authenticity" in caption_text:
                        index = caption_text.index("Certificate of authenticity")
                        authenticated = caption_text[index+1]
                    else:
                        authenticated = "NA"

                    ## FRAMED
                    if "Frame" in caption_text:
                        index = caption_text.index("Frame")
                        framed = caption_text[index+1]
                    else:
                        framed = "NA"

            else:
                medium = "NA"
                condition = "NA"
                signed = "NA"
                authenticated = "NA"
                framed = "NA"
        
        if verbose:
            print(medium)
            print(condition)
            print(signed)
            print(authenticated)
            print(framed)
        

        # ARTIST BIO AREA

        try:
            driver.find_elements_by_xpath('//div[@data-test="artistInfo"]')
        except:
            artist_nationality = "NA"
            artist_birthdate = "NA"
        else:
            bio = driver.find_elements_by_xpath('//div[@data-test="artistInfo"]')
            if len(bio) > 0:
                try:
                    bio[0].text.split("\n")
                except:
                    artist_nationality = "NA"
                    artist_birthdate = "NA"
                else:
                    bio_text = bio[0].text.split("\n")
                   # if there is a birthdate
                    if "b." in bio_text[1]:
                        # if there is only a birthdate
                        if bio_text[1][0] == "b":
                            artist_nationality = "NA"
                            artist_birthdate = bio_text[1].split("b. ")[1][0:4].translate({ord('•'): None})
                        # if there is both a birthdate and a nationality
                        else:
                            artist_nationality = bio_text[1].split(", b. ")[0].translate({ord('•'): None})
                            artist_birthdate = bio_text[1].split(", b. ")[1][0:4].translate({ord('•'): None})
                    
                    # if there is no birthdate
                    else:
                        # if there is only a nationality
                        if len(bio_text[1]) > 0:
                            artist_nationality = bio_text[1].translate({ord('•'): None})
                            artist_birthdate = "NA"
                        # if there is no birthdate or nationality
                        else:
                            artist_nationality = "NA"
                            artist_birthdate = "NA" 
            else:
                artist_nationality = "NA"
                artist_birthdate = "NA"
            
            if verbose:
                print(artist_nationality)
                print(artist_birthdate)


        # append all collected information to the data object

        data["artist"].append(artist)
        data["artist_birthdate"].append(artist_birthdate)
        data["artist_nationality"].append(artist_nationality)
        data["authenticated"].append(authenticated)
        data["classification"].append(classification)
        data["condition"].append(condition)
        data["currency"].append(currency)
        data["framed"].append(framed)
        data["gallery"].append(gallery)
        data["gallery_location"].append(gallery_location)
        data["image_url"].append(image_url)
        data["medium"].append(medium)
        data["medium_details"].append(medium_details)
        data["page_url"].append(url)
        data["price"].append(price)
        data["signed"].append(signed)
        data["size_inches"].append(size_inches)
        data["size_cm"].append(size_cm)
        data["title"].append(title)
        data["year"].append(year)
        
        
    # print updated data
    if verbose == False:
        print(title, artist, price, currency, artist_nationality, artist_birthdate, framed, authenticated, signed)


# quit driver and print data
driver.quit()
print(len(data["title"]))

1 https://www.artsy.net/artwork/polly-apfelbaum-target-practice-21
Target Practice 21 Polly Apfelbaum 3,000 $ American 1955 Not included NA Hand-signed by artist
2 https://www.artsy.net/artwork/pipilotti-rist-small-homo-toes-the-line
Small Homo Toes the Line Pipilotti Rist ditions 4, 5, 7, 8, 9 of 30 E Swiss 1962 Not included NA Hand-signed by artist, Signed in pencil by the artist, top left
3 https://www.artsy.net/artwork/josette-urso-monaco
Monaco Josette Urso 3,600 $ American NA Included NA Hand-signed by artist
4 https://www.artsy.net/artwork/karl-hartman-dried-grass
Dried Grass Karl Hartman NA NA American NA Not included Included Hand-signed by artist, sticker label, Signed lower right corner, "Karl Hartman"
5 https://www.artsy.net/artwork/jesus-rafael-soto-ovalo-rojo
Ovalo Rojo Jesús Rafael Soto NA NA Venezuelan, 1923–2005 NA Not included NA Hand-signed by artist, Signed and numbered in pencil
6 https://www.artsy.net/artwork/erte-romain-de-tirtoff-erte-original-gouache-painting-s

In [None]:
from pprint import pprint
pprint(data)

import json
with open("artworks_list_2.json", 'w') as f:
    json.dump(data, f)

In [12]:
import json
import pandas as pd


# open the data backup file
file_name = "artworks_list_2"
with open("data/backup/" + file_name + ".json") as f:
  data_copy = json.load(f)


# calculate the lengths for all columns in the data object
def calculateLengths(df, verbose=True):
    column_dim = {"column_name": [], "length": []}
    for key in df:
        column_dim["column_name"].append(key)
        column_dim["length"].append(len(df[key]))
        if verbose:
            print(key, len(df[key]))
    return column_dim

column_dim = calculateLengths(data_copy, verbose=False)


# calculate the minimum column length
min_length = min(column_dim["length"])


# bevel the columns to the minimum length
for key in data_copy:
    if len(data_copy[key]) > min_length:
        distance = (len(data_copy[key]) - min_length) * -1
        data_copy[key] = data_copy[key][:distance]

calculateLengths(data_copy)

# save as a JSON
json.dump(data_copy, open("data/working/" + file_name + "_evened.json", 'w'))

page_url 1575
artist 1575
artist_nationality 1575
artist_birthdate 1575
title 1575
image_url 1575
year 1575
gallery 1575
gallery_location 1575
medium 1575
medium_details 1575
size_inches 1575
size_cm 1575
condition 1575
classification 1575
signed 1575
authenticated 1575
framed 1575
currency 1575
price 1575


### 3A. Attempt a quicker solution with Beautiful Soup

In [None]:
import requests
import urllib3
from bs4 import BeautifulSoup

URL = "https://www.artsy.net/artwork/karl-hartman-uncommon-crow"
http = urllib3.PoolManager()

response = http.request('GET', URL)
soup = BeautifulSoup(response.data, "lxml")

image_URL = soup.img['src']
print(image_URL)

sidebar = driver.find_element_by_xpath('//div[@data-test="artworkSidebar"]').text.split("\n")
    data["image_URL"].append(image_URL)
    data["artist"].append(sidebar[0])
    data["title"].append(sidebar[2].split(", ")[0])
    data["year"].append(sidebar[2].split(", ")[1])
    data["medium_details"].append(sidebar[3])
    data["size_inches"].append(sidebar[4])
    data["size_cm"].append(sidebar[5])
    data["classification"].append(sidebar[6])
    data["currency"].append(sidebar[7][0:1])
    data["price"].append(sidebar[7][1:])
    data["gallery"].append(sidebar[12])
    data["gallery_location"].append(sidebar[13])


### 3B. Attempt a quicker solution with Scrapy

In [None]:
import scrapy

class ImageSpider(scrapy.Spider):

    name: 'images'

    def start_requests(self):
        urls = [
            'https://www.artsy.net/artwork/karl-hartman-uncommon-crow'
        ]
        for url in urls:
            yield scrapy.Request(url=url, callback=self.parse)
    
    def parse(self, response):
        page = response.

### 3C. Attempt a quicker solution with the Artsy API

In [None]:
# get authorization token

import requests

client_id = "e7f6428cf01e0bd5a170"
client_secret = "33fc3469753cc4bf54833b9c2dc21c85"
api_url = "https://api.artsy.net/api/tokens/xapp_token"

data = {
    "client_id": client_id,
    "client_secret": client_secret,
}

auth_response = requests.post(api_url, data=data)
auth_response.json()
auth_token = auth_response.json()["token"]
auth_token_type = auth_response.json()["type"]

In [None]:
import json

TEST_URL = "https://api.artsy.net/api/artists/hollis-dunlap"
DERIVED_URL = "https://api.artsy.net/api/artworks?artist_id=519812e68b3b81124c000193"
URL = "https://api.artsy.net/api/sales?live=true&total_count=10"

r = requests.get(DERIVED_URL, headers={'X-Xapp-Token': auth_token})
sales_json = r.json()

print(json.dumps(sales_json, indent=4, sort_keys=True))
len(sales_json)

In [None]:
sales_json["_embedded"]

### 4. Import the data into Pandas

In [40]:
import json
import numpy as np
import pandas as pd

# set options to enable widescreen view
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
pd.set_option('display.width', None)
pd.set_option('display.max_colwidth', -1)


# specify template data structure for pandas
data_structure = {"page_url": [], "artist": [], "artist_nationality": [], "artist_birthdate": [], "title": [], "image_url": [], "year": [], "gallery": [], "gallery_location": [], "medium": [], "medium_details": [], "size_inches": [], "size_cm": [], "condition": [], "classification": [], "signed": [], "authenticated": [], "framed": [], "currency": [], "price": []}
df = pd.DataFrame(data_structure)


# MERGE SEPARATE DATA FILES

files = ["data/working/artworks_list_1_evened.json", "data/working/artworks_list_2_evened.json"]

for json_file in files:

    # open the raw JSON file
    with open(json_file) as f:
        data_copy = json.load(f)
    
    # convert into pandas DF and concatenate into one DF
    df_temp = pd.DataFrame(data_copy)
    if "artworks_list_1" in json_file:
        df_temp = df_temp.rename(columns={"page_URL": "page_url", "image_URL": "image_url", "artist_birthdate": "artist_born"})
    df = pd.concat([df, df_temp], axis=0)

# reset the index
df = df.reset_index(drop=True)

# describe the data
df.describe()

# if data looks to be the right format, save as CSV backup
df.to_csv("data/artworks_list_FINAL.csv")

Unnamed: 0,page_url,artist,artist_nationality,artist_birthdate,title,image_url,year,gallery,gallery_location,medium,medium_details,size_inches,size_cm,condition,classification,signed,authenticated,framed,currency,price
count,2970,2970,2970,2970.0,2970,2970,2970,2970,2970,2970,2970,2970,2970,2970.0,2970,2970,2970,2970,2970,2970.0
unique,2962,1151,341,79.0,2728,2954,367,413,439,17,1331,1800,1863,325.0,236,784,3,3,8,511.0
top,https://www.artsy.net/artwork/jonas-fisch-bein...,Peter Max,American,,Untitled,error,2020,Make offer,Certificate of authenticity,Painting,Oil on canvas,This work is part of a limited edition set.,50.8 × 40.6 cm,,This is a unique work.,Hand-signed by artist,Included,Not included,$,
freq,2,68,1046,959.0,117,9,744,749,748,1675,211,105,32,1620.0,2477,1155,2090,1882,1794,225.0


### 5. Explore the data and see what needs cleaning

In [77]:
print(df.columns)
print(df.dtypes)

Index(['page_url', 'artist', 'artist_nationality', 'artist_born', 'title',
       'image_url', 'year', 'gallery', 'gallery_location', 'medium',
       'medium_details', 'size_inches', 'size_cm', 'condition',
       'classification', 'signed', 'authenticated', 'framed', 'currency',
       'price'],
      dtype='object')
page_url              object
artist                object
artist_nationality    object
artist_born           object
title                 object
image_url             object
year                  object
gallery               object
gallery_location      object
medium                object
medium_details        object
size_inches           object
size_cm               object
condition             object
classification        object
signed                object
authenticated         object
framed                object
currency              object
price                 object
dtype: object


#### General cleaning:

1. identify all 'error' rows 
    * log the rows (or at least the page URLs) in a separate error file to come back to later
    * remove all rows from `df`
2. make sure the 'NA's are in the appropriate format for summary stats in Pandas

#### Maintain columns:
* `gallery`
* `gallery_location`
* `image_url`
* `medium_details`
* `page_url`
* `title`

#### Convert columns:
* `artist` - category
* `artist_born` - timedate or int
* `artist_nationality` - category
* `authenticated` - bool or str or category
* `classification` - category (and simplify)
* `condition` - bool or str or category
* `currency` - category
* `framed` - bool
* `medium` - category
* `price` - float
* `signed` - bool or str or category
* `year` - timedate or int

#### Transform columns:
* merge `size_inches` and `size_cm`
* `size_cm` - into `length` and `width` and `height` (ints)
* split some errors in `artist_nationality` into `artist_nationality`, `artist_born`, and `artist_died`

#### Add columns:
* `artist_died` - timedate or int

In [78]:
df

Unnamed: 0,page_url,artist,artist_nationality,artist_born,title,image_url,year,gallery,gallery_location,medium,medium_details,size_inches,size_cm,condition,classification,signed,authenticated,framed,currency,price
0,https://www.artsy.net/artwork/yigal-ozeri-untitled-pricilla-3,Yigal Ozeri,Israeli,1958,"Untitled ""Pricilla""",https://d32dm0rphc51dk.cloudfront.net/FZUq9m6kyOkb_CKpIY6qSg/large.jpg,ca. 2012,Corridor Contemporary,"Tel-Aviv, Philadelphia",Painting,Oil on canvas,36 × 54 in,91.4 × 137.2 cm,Excellent,This is a unique work.,Hand-signed by artist,Included,Included,$,55000
1,https://www.artsy.net/artwork/gert-and-uwe-tobias-untitled-2436,Gert & Uwe Tobias,Romanian-German,1973,Untitled,https://d32dm0rphc51dk.cloudfront.net/QGdZ5YPikOCJLlbH9yOuVA/large.jpg,2017,Make offer,Cassina Projects,"Drawing, Collage or other Work on Paper",Mixed media on paper,21 3/10 × 17 7/10 in,54 × 45 cm,,This is a unique work.,,Included,Included,€,3500
2,https://www.artsy.net/artwork/ramon-enrich-brow-1,Ramon Enrich,Spanish,1968,Brow,https://d32dm0rphc51dk.cloudfront.net/gvo22Yz_H23yTsixzS8hLA/large.jpg,2015,Artistics,Paris,Painting,Acrylic on canvas.,21 7/10 × 27 3/5 in,55 × 70 cm,,This is a unique work.,"Hand-signed by artist, sticker label",Included,Not included,€,2000
3,https://www.artsy.net/artwork/hollis-dunlap-inner-and-outer-space,Hollis Dunlap,American,1977,Inner and Outer Space,https://d32dm0rphc51dk.cloudfront.net/qadABn9NLR86r8CxgiYm9A/large.jpg,2016,Gallery 1261,Denver,Painting,Oil on Linen,24 × 30 in,61 × 76.2 cm,,This is a unique work.,Hand-signed by artist,,Not included,$,6500
4,https://www.artsy.net/artwork/joanne-tarlin-flowers-of-evil,error,error,error,error,error,error,error,error,error,error,error,error,error,error,error,error,error,error,error
5,https://www.artsy.net/artwork/seen-tags-pink-vertical,SEEN,American,1961,'Tags' (pink-vertical),https://d32dm0rphc51dk.cloudfront.net/ZemlrenDunl_ADB7iOjQ2A/large.jpg,2019,Signari Gallery,Certificate of authenticity,Painting,"Hand-painted spray/acrylic on hand-pulled, 100% cotton canvas.",24 × 9 in,61 × 22.9 cm,Good,This is a unique work.,"Hand-signed by artist, Hand-signed and dated in black marker on reverse.",Included,Not included,$,1250
6,https://www.artsy.net/artwork/galia-linn-stone-guardian,Galia Linn,Israeli,1963,Stone Guardian,https://d32dm0rphc51dk.cloudfront.net/j2V0pM2frJXmfLGwe7BeOg/large.jpg,2020,Los Angeles,Certificate of authenticity,Sculpture,Hopkins white. Glazed stoneware.,53 × 21 × 21 in,134.6 × 53.3 × 53.3 cm,,This is a unique work.,"Hand-signed by artist, Initialed on bottom",Included,Not included,$,15000
7,https://www.artsy.net/artwork/hollis-dunlap-october-girl-portrait-in-fall,Hollis Dunlap,American,1977,October Girl (Portrait in Fall),https://d32dm0rphc51dk.cloudfront.net/_aItfQf5JK7wBhRk_VmJdA/large.jpg,2020,Gallery 1261,Denver,Painting,Oil on Panel,14 × 11 in,35.6 × 27.9 cm,,This is a unique work.,Hand-signed by artist,,Included,$,1800
8,https://www.artsy.net/artwork/nancy-ellen-craig-male-nude,Nancy Ellen Craig,"American, 1927–2015",,Male Nude,https://d32dm0rphc51dk.cloudfront.net/8FZXC1DHbDHLZ5sC5OvzbQ/large.jpg,Late 20th c.,Bakker Gallery,Provincetown,"Drawing, Collage or other Work on Paper",Graphite on paper,12 3/4 × 10 3/4 in,32.4 × 27.3 cm,"Very good condition, irregular edges, pinholes",This is a unique work.,"Hand-signed by artist, Signed lower right",,Included,$,700
9,https://www.artsy.net/artwork/egmont-hartwig-conserves,Egmont Hartwig,Netherlandish,1973,Conserves,https://d32dm0rphc51dk.cloudfront.net/qV6EnlcJaDQ-cm7BrcPSsw/large.jpg,2017,Make offer,Desiderio Gallery,Painting,Oil on wood,27 3/5 × 39 2/5 in,70 × 100 cm,The work is in perfect condition.,This is a unique work.,Hand-signed by artist,Included,Not included,€,8000


In [47]:
def getIndexes(dfObj, value):
    ''' Get index positions of value in dataframe i.e. dfObj.'''
    listOfPos = list()
    # Get bool dataframe with True at positions where the given value exists
    result = dfObj.isin([value])
    # Get list of columns that contains the value
    seriesObj = result.any()
    columnNames = list(seriesObj[seriesObj == True].index)
    # Iterate over list of columns and fetch the rows indexes where value exists
    for col in columnNames:
        rows = list(result[col][result[col] == True].index)
        for row in rows:
            listOfPos.append((row, col))
    # Return a list of tuples indicating the positions of value in the dataframe
    return listOfPos

# Get list of index positions i.e. row & column of all occurrences of 81 in the dataframe
listOfPositions = getIndexes(df, "error")
print('Index positions of error in Dataframe : ')
for i in range(len(listOfPositions)):
    print('Position ', i, ' (Row index , Column Name) : ', listOfPositions[i])

Index positions of error in Dataframe : 
Position  0  (Row index , Column Name) :  (4, 'artist')
Position  1  (Row index , Column Name) :  (16, 'artist')
Position  2  (Row index , Column Name) :  (47, 'artist')
Position  3  (Row index , Column Name) :  (447, 'artist')
Position  4  (Row index , Column Name) :  (834, 'artist')
Position  5  (Row index , Column Name) :  (870, 'artist')
Position  6  (Row index , Column Name) :  (893, 'artist')
Position  7  (Row index , Column Name) :  (1034, 'artist')
Position  8  (Row index , Column Name) :  (1303, 'artist')
Position  9  (Row index , Column Name) :  (4, 'artist_nationality')
Position  10  (Row index , Column Name) :  (16, 'artist_nationality')
Position  11  (Row index , Column Name) :  (47, 'artist_nationality')
Position  12  (Row index , Column Name) :  (447, 'artist_nationality')
Position  13  (Row index , Column Name) :  (834, 'artist_nationality')
Position  14  (Row index , Column Name) :  (870, 'artist_nationality')
Position  15  (Row