In [47]:
import pandas as pd
import numpy as np

from dotenv import load_dotenv
import os
import requests
import json

import matplotlib.pyplot as plt
import seaborn as sns

from PIL import Image
from io import BytesIO

import time
import json
from ratelimit import limits, sleep_and_retry

### Accessing the wiki Art api

In [28]:
# Load our API access key
access_key = os.getenv('WIKIART_ACCESS_KEY')
secret_code = os.getenv('WIKIART_SECRET_KEY')

In [29]:
login_url = "https://www.wikiart.org/en/Api/2/login"

In [30]:
# Examine the most viewed artists list

base_url = "https://www.wikiart.org/en/api/2/MostViewedPaintings"

try:
    response = requests.get(base_url)
    response.raise_for_status()
    
    result = response.json()
    
    most_visited_df = pd.DataFrame(result.get('data', []))
    
    print("Download complete")

except requests.exceptions.RequestException as e:
    print(f"An error occurred: {e}")

An error occurred: 500 Server Error: Internal Server Error for url: https://www.wikiart.org/en/api/2/MostViewedPaintings


In [17]:
most_visited_df.head(10)

Unnamed: 0,id,title,url,artistUrl,artistName,artistId,completitionYear,width,image,height
0,57727444edc2cb3880cb7bf6,Mona Lisa,mona-lisa,leonardo-da-vinci,Leonardo da Vinci,57726d85edc2cb3880b48ccd,1519.0,408,https://uploads5.wikiart.org/00475/images/leon...,600
1,5772716cedc2cb3880c1907f,The Starry Night,the-starry-night-1889,vincent-van-gogh,Vincent van Gogh,57726d82edc2cb3880b486a0,1889.0,750,https://uploads3.wikiart.org/00475/images/vinc...,598
2,57727593edc2cb3880ceb255,"In Bed, The Kiss",in-bed-the-kiss-1892,henri-de-toulouse-lautrec,Henri de Toulouse-Lautrec,57726d86edc2cb3880b48f5d,1892.0,750,https://uploads8.wikiart.org/images/henri-de-t...,559
3,57726e7bedc2cb3880b7466a,The Birth of Venus,the-birth-of-venus-1485,sandro-botticelli,Sandro Botticelli,57726d7dedc2cb3880b47c88,1485.0,750,https://uploads6.wikiart.org/images/sandro-bot...,500
4,577271f9edc2cb3880c37dcd,The School of Athens,school-of-athens-detail-from-right-hand-side-s...,raphael,Raphael,57726d83edc2cb3880b487fe,1511.0,750,https://uploads6.wikiart.org/00475/images/raph...,534
5,5772722cedc2cb3880c42b1a,Camille Monet and a Child in the Artist’s Gard...,madame-monet-and-child,claude-monet,Claude Monet,57726d83edc2cb3880b488f1,1875.0,710,https://uploads0.wikiart.org/images/claude-mon...,600
6,57726e2fedc2cb3880b61c00,Composition A,composition-a-1923,piet-mondrian,Piet Mondrian,57726d7bedc2cb3880b47ad4,1923.0,616,https://uploads3.wikiart.org/images/piet-mondr...,600
7,57726ee7edc2cb3880b8b06b,The Kiss,the-kiss-1908,gustav-klimt,Gustav Klimt,57726d7eedc2cb3880b47e13,1908.0,598,https://uploads7.wikiart.org/00142/images/5772...,600
8,5772722aedc2cb3880c427ca,"Impression, sunrise",impression-sunrise,claude-monet,Claude Monet,57726d83edc2cb3880b488f1,1872.0,750,https://uploads0.wikiart.org/00129/images/clau...,582
9,577272dcedc2cb3880c6a79b,Basket of Apples,still-life-with-bottle-and-apple-basket-1894,paul-cezanne,Paul Cezanne,57726d84edc2cb3880b48a5b,1895.0,750,https://uploads1.wikiart.org/images/paul-cezan...,592


In [18]:
most_visited_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 60 entries, 0 to 59
Data columns (total 10 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   id                60 non-null     object 
 1   title             60 non-null     object 
 2   url               60 non-null     object 
 3   artistUrl         60 non-null     object 
 4   artistName        60 non-null     object 
 5   artistId          60 non-null     object 
 6   completitionYear  59 non-null     float64
 7   width             60 non-null     int64  
 8   image             60 non-null     object 
 9   height            60 non-null     int64  
dtypes: float64(1), int64(2), object(7)
memory usage: 4.8+ KB


In [35]:
login_url = "https://www.wikiart.org/en/Api/2/login"
painting_details_url = "https://www.wikiart.org/en/api/2/Painting"

# Painting details for "The Starry Night"
painting_id = "5772716cedc2cb3880c1907f"  # Alternatively, use the URL slug "the-starry-night-1889"

try:
    # Step 1: Create session
    login_params = {
        "accessCode": access_key,
        "secretCode": secret_code
    }
    login_response = requests.get(login_url, params=login_params)
    login_response.raise_for_status()
    
    login_data = login_response.json()
    session_key = login_data.get('SessionKey')
    
    if not session_key:
        raise Exception(f"Failed to obtain session key. Response: {login_data}")

    print(f"Successfully obtained session key: {session_key}")

    # Step 2: Retrieve painting details
    painting_params = {
        "id": painting_id,
        "authSessionKey": session_key  # Pass the session key
    }

    response = requests.get(painting_details_url, params=painting_params)

    if response.status_code == 200:
        painting_data = response.json()
        starry_night_df = pd.DataFrame([painting_data])  # Convert painting data to DataFrame
        print("Download complete")
        print(starry_night_df.head()) 
    elif response.status_code == 404:
        print(f"Painting not found for ID: {painting_id}")
    else:
        print(f"Failed to retrieve painting details for ID: {painting_id}")

except requests.exceptions.RequestException as e:
    print(f"An error occurred during the request: {e}")
    if hasattr(e, 'response'):
        print(f"Response status code: {e.response.status_code}")
        print(f"Response content: {e.response.text}")
except json.JSONDecodeError as e:
    print(f"Error decoding JSON: {e}")
except Exception as e:
    print(f"An unexpected error occurred: {e}")

Successfully obtained session key: 070ed297cac0
Full request URL: https://www.wikiart.org/en/api/2/Painting?id=5772716cedc2cb3880c1907f&imageFormat=Large&authSessionKey=070ed297cac0
Request headers: {
  "User-Agent": "python-requests/2.31.0",
  "Accept-Encoding": "gzip, deflate, br",
  "Accept": "*/*",
  "Connection": "keep-alive"
}
Response status code: 200
Response headers: {
  "Date": "Sat, 19 Oct 2024 04:17:53 GMT",
  "Content-Type": "application/json; charset=utf-8",
  "Transfer-Encoding": "chunked",
  "Connection": "keep-alive",
  "vary": "Accept-Encoding",
  "Cache-Control": "private, s-maxage=0",
  "Set-Cookie": "ASP.NET_SessionId=qdyirh23ccxq4aqtzxxpp4lm; path=/; HttpOnly; SameSite=Lax",
  "x-aspnetmvc-version": "4.0",
  "x-aspnet-version": "4.0.30319",
  "x-powered-by": "ASP.NET, PleskLin",
  "content-security-policy": "upgrade-insecure-requests",
  "cf-cache-status": "DYNAMIC",
  "Report-To": "{\"endpoints\":[{\"url\":\"https:\\/\\/a.nel.cloudflare.com\\/report\\/v4?s=SHMbmm

In [36]:
starry_night_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1 entries, 0 to 0
Data columns (total 23 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   id                1 non-null      object 
 1   title             1 non-null      object 
 2   url               1 non-null      object 
 3   artistUrl         1 non-null      object 
 4   artistName        1 non-null      object 
 5   artistId          1 non-null      object 
 6   completitionYear  1 non-null      int64  
 7   dictionaries      1 non-null      object 
 8   location          1 non-null      object 
 9   period            0 non-null      object 
 10  serie             0 non-null      object 
 11  genres            1 non-null      object 
 12  styles            1 non-null      object 
 13  media             1 non-null      object 
 14  sizeX             1 non-null      float64
 15  sizeY             1 non-null      float64
 16  diameter          0 non-null      object 
 17  g

In [39]:
starry_night_df['styles']

0    [Post-Impressionism]
Name: styles, dtype: object

In [40]:


def download_image(image_url, save_path):
    try:
        # Send a GET request to the image URL
        response = requests.get(image_url)
        response.raise_for_status()  # Raise an exception for bad status codes

        # Open the image using PIL
        img = Image.open(BytesIO(response.content))

        # Save the image
        img.save(save_path)
        print(f"Image successfully downloaded and saved to {save_path}")

    except requests.exceptions.RequestException as e:
        print(f"Error downloading the image: {e}")
    except IOError as e:
        print(f"Error saving the image: {e}")

# Assuming starry_night_df is the DataFrame containing the painting data
if 'starry_night_df' in locals() and not starry_night_df.empty:
    # Get the image URL from the DataFrame
    image_url = starry_night_df['image'].iloc[0]
    
    # Create a directory to save the image
    save_dir = "downloaded_images"
    os.makedirs(save_dir, exist_ok=True)
    
    # Generate a filename for the image
    filename = f"starry_night_{painting_id}.jpg"
    save_path = os.path.join(save_dir, filename)
    
    # Download and save the image
    download_image(image_url, save_path)
else:
    print("Painting data not available. Please ensure you've successfully retrieved the painting details first.")

# Print the image URL for verification
print(f"Image URL: {image_url if 'image_url' in locals() else 'Not available'}")

Image successfully downloaded and saved to downloaded_images\starry_night_5772716cedc2cb3880c1907f.jpg
Image URL: https://uploads3.wikiart.org/00475/images/vincent-van-gogh/the-starry-night-1889.jpg!Large.jpg


In [42]:
base_url = "https://www.wikiart.org/en/api/2"

def get_session_key():
    login_url = f"{base_url}/login"
    login_params = {
        "accessCode": access_key,
        "secretCode": secret_code
    }
    response = requests.get(login_url, params=login_params)
    response.raise_for_status()
    return response.json().get('SessionKey')

def make_api_request(endpoint, params):
    session_key = get_session_key()
    params['authSessionKey'] = session_key
    url = f"{base_url}/{endpoint}"
    response = requests.get(url, params=params)
    response.raise_for_status()
    return response.json()

def explore_dictionaries_by_group(group):
    print(f"\nExploring Dictionaries by Group {group}")
    try:
        data = make_api_request("DictionariesByGroup", {"group": group})
        print(json.dumps(data, indent=2))
    except Exception as e:
        print(f"Error: {e}")

def explore_artists_by_dictionary(group, dict_url):
    print(f"\nExploring Artists by Dictionary - Group: {group}, DictUrl: {dict_url}")
    try:
        data = make_api_request("ArtistsByDictionary", {"group": group, "dictUrl": dict_url})
        print(json.dumps(data, indent=2))
    except Exception as e:
        print(f"Error: {e}")
def explore_painting_search(term):
    print(f"\nExploring Painting Search - Term: {term}")
    try:
        data = make_api_request("PaintingSearch", {"term": term})
        print(json.dumps(data, indent=2))
    except Exception as e:
        print(f"Error: {e}")

# Example usage
explore_dictionaries_by_group(1)  # Explore group 1
explore_dictionaries_by_group(2)  # Explore group 2




Exploring Dictionaries by Group 1
{
  "data": [
    {
      "id": "5d230437edc2c9fb74756177",
      "title": "1st Intermediate Period (2181\u20132055 BC)",
      "url": "1st-intermediate-period-2181-2055-bc",
      "group": 1
    },
    {
      "id": "5d23049cedc2c9fb74782eb2",
      "title": "2nd Intermediate Period (1650\u20131550 BC)",
      "url": "2nd-intermediate-period-1650-1550-bc",
      "group": 1
    },
    {
      "id": "5d23052fedc2c9fb747bf1d9",
      "title": "3rd Intermediate Period (1069\u2013664 BC)",
      "url": "3rd-intermediate-period-1069-664-bc",
      "group": 1
    },
    {
      "id": "57726a68edc2ca38801d5111",
      "title": "Abbasid Period (750\u20131258)",
      "url": "abbasid-period-750-1258",
      "group": 1
    },
    {
      "id": "57726a67edc2ca38801d4d81",
      "title": "Abstract Art",
      "url": "abstract-art",
      "group": 1
    },
    {
      "id": "57726a67edc2ca38801d4e69",
      "title": "Abstract Expressionism",
      "url": "abstract

In [43]:
# After exploring dictionaries, we'll use a sample dict_url for Artists by Dictionary
# You'll need to replace 'sample_dict_url' with an actual URL from the DictionariesByGroup response
explore_artists_by_dictionary(1, "baroque")

# Example painting search
explore_painting_search("impressionism")


Exploring Artists by Dictionary - Group: 1, DictUrl: baroque
{
  "data": [
    {
      "id": "62502c309e4363244cf85596",
      "artistName": "Michelangelo Cerquozzi",
      "url": "michelangelo-cerquozzi",
      "lastNameFirst": null,
      "birthDay": "/Date(-58948387200000)/",
      "deathDay": "/Date(-9782640000000)/",
      "birthDayAsString": "102",
      "deathDayAsString": "1660",
      "image": "https://uploads3.wikiart.org/00387/images//h0027-l06020228.jpg!Portrait.jpg",
      "wikipediaUrl": null,
      "dictionaries": [
        "57726a66edc2ca38801d4cd1",
        "57726b4fedc2cb3880ad71d0"
      ],
      "periods": [],
      "series": [],
      "activeYearsStart": null,
      "activeYearsCompletion": null,
      "biography": "",
      "gender": "male",
      "originalArtistName": "",
      "relatedArtists": []
    },
    {
      "id": "625874c99e436338ccb9f54f",
      "artistName": "Simone Pignoni",
      "url": "simone-pignoni",
      "lastNameFirst": null,
      "birthDay

In [48]:
# Rate limiting decorators
@sleep_and_retry
@limits(calls=10, period=2.5)
def call_api():
    pass

@sleep_and_retry
@limits(calls=400, period=3600)
def hourly_limit():
    pass

def get_session_key(max_retries=3, delay=5):
    login_url = f"{base_url}/login"
    login_params = {
        "accessCode": access_key,
        "secretCode": secret_code
    }
    
    for attempt in range(max_retries):
        try:
            call_api()
            hourly_limit()
            response = requests.get(login_url, params=login_params)
            response.raise_for_status()
            return response.json().get('SessionKey')
        except requests.exceptions.RequestException as e:
            print(f"Attempt {attempt + 1} failed: {e}")
            if attempt < max_retries - 1:
                print(f"Retrying in {delay} seconds...")
                time.sleep(delay)
            else:
                print("Max retries reached. Unable to get session key.")
                raise

def make_api_request(endpoint, params, max_retries=3, delay=5):
    for attempt in range(max_retries):
        try:
            call_api()
            hourly_limit()
            session_key = get_session_key()
            params['authSessionKey'] = session_key
            url = f"{base_url}/{endpoint}"
            response = requests.get(url, params=params)
            response.raise_for_status()
            return response.json()
        except requests.exceptions.RequestException as e:
            print(f"Attempt {attempt + 1} failed: {e}")
            if attempt < max_retries - 1:
                print(f"Retrying in {delay} seconds...")
                time.sleep(delay)
            else:
                print(f"Max retries reached. Unable to complete request to {endpoint}.")
                raise

def fetch_paintings_by_style(style):
    all_paintings = []
    pagination_token = None

    while True:
        params = {
            "term": style,
            "paginationToken": pagination_token
        }

        try:
            data = make_api_request("PaintingSearch", params)
            paintings = data.get('data', [])
            
            if not paintings:
                break

            all_paintings.extend(paintings)
            pagination_token = data.get('paginationToken')

            if not pagination_token:
                break

        except Exception as e:
            print(f"Error fetching paintings for style '{style}': {e}")
            print("Moving to next style...")
            break

    return all_paintings

# List of top styles (you'll need to populate this based on your research)
top_styles = [
    "Impressionism",
    "Realism",
    "Romanticism",
    "Expressionism",
    "Post-Impressionism",
    "Baroque",
    "Art Nouveau (Modern)",
    "Surrealism",
    "Symbolism",
    "Abstract Expressionism",
    "Neoclassicism",
    "Naïve Art (Primitivism)",
    "Rococo",
    "Cubism",
    "Northern Renaissance",
    "Academicism",
    "Pop Art",
    "Mannerism (Late Renaissance)",
    "Minimalism",
    "Conceptual Art",
    "Abstract Art",
    "Art Informel",
    "Early Renaissance",
    "Ukiyo-e",
    "Magic Realism",
    "Neo-Expressionism",
    "High Renaissance",
    "Contemporary Realism",
    "Color Field Painting",
    "Orientalism",
    "Lyrical Abstraction",
    "Fauvism",
    "Contemporary",
    "Op Art",
    "Neo-Impressionism",
    "Art-Deco",
]

# Fetch paintings for each style and store in a DataFrame
all_paintings_data = []

for style in top_styles:
    print(f"Fetching paintings for style: {style}")
    style_paintings = fetch_paintings_by_style(style)
    for painting in style_paintings:
        painting['style'] = style  # Add style information to each painting
    all_paintings_data.extend(style_paintings)
    print(f"Fetched {len(style_paintings)} paintings for {style}")

# Create DataFrame
all_paintings_df = pd.DataFrame(all_paintings_data)

display(all_paintings_df.head(10))

Fetching paintings for style: Impressionism
Attempt 1 failed: 500 Server Error: Internal Server Error for url: https://www.wikiart.org/en/api/2/login?accessCode=9b4b72dc72b24bcf&secretCode=90407df7dca0ee1b
Retrying in 5 seconds...
Attempt 2 failed: 500 Server Error: Internal Server Error for url: https://www.wikiart.org/en/api/2/login?accessCode=9b4b72dc72b24bcf&secretCode=90407df7dca0ee1b
Retrying in 5 seconds...
Attempt 3 failed: 500 Server Error: Internal Server Error for url: https://www.wikiart.org/en/api/2/login?accessCode=9b4b72dc72b24bcf&secretCode=90407df7dca0ee1b
Max retries reached. Unable to get session key.
Attempt 1 failed: 500 Server Error: Internal Server Error for url: https://www.wikiart.org/en/api/2/login?accessCode=9b4b72dc72b24bcf&secretCode=90407df7dca0ee1b
Retrying in 5 seconds...
Attempt 1 failed: 500 Server Error: Internal Server Error for url: https://www.wikiart.org/en/api/2/login?accessCode=9b4b72dc72b24bcf&secretCode=90407df7dca0ee1b
Retrying in 5 seconds..

KeyboardInterrupt: 

In [45]:
all_paintings_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 83 entries, 0 to 82
Data columns (total 11 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   id                83 non-null     object 
 1   title             83 non-null     object 
 2   url               0 non-null      object 
 3   artistUrl         83 non-null     object 
 4   artistName        83 non-null     object 
 5   artistId          83 non-null     object 
 6   completitionYear  68 non-null     float64
 7   width             83 non-null     int64  
 8   image             83 non-null     object 
 9   height            83 non-null     int64  
 10  style             83 non-null     object 
dtypes: float64(1), int64(2), object(8)
memory usage: 7.3+ KB
