# Get Data

## Imports and Setup


In [1]:
# Imports
import requests
from bs4 import BeautifulSoup
from lxml import etree
from time import sleep

In [2]:
# Set constants for use later
BASE_URL = "https://www.mailleartisans.org/weaves/weavedisplay.php?key="
FIRST_ARTICLE = 1
LAST_ARTICLE = 1487

In [3]:

# Create function to get multiple articles
def get_articles(articles) -> dict:
    out = dict()
    for i in articles:
        print(f"Getting article {i}")
        response = requests.get(f"{BASE_URL}{i}")
        if response.status_code == 200:
            out[i] = response.content
            print(f"Got article {i} sleeping for 10 seconds.")
        else:
            out[i] = 'failure'
            print(f"Could not get article {i} sleeping for 10 seconds.")
        sleep(10)
    
    return out


## Test Getting Data From Articles


### Get Test Articles


In [7]:
# Get some articles and store them to avoid repeated calls to the website

to_get = [
    1,    # single AR no Min no Max
    2,    # multi AR no Min no Max
    6,    # single AR no Max
    11,   # multi AR no Max
    189,  # No ARs given
    1086, # Single ar all ARs given
    1173, # multi AR all values given
]

arts = get_articles(to_get)



Getting article 1
Got article 1 sleeping for 10 seconds.
Getting article 2
Got article 2 sleeping for 10 seconds.
Getting article 6
Got article 6 sleeping for 10 seconds.
Getting article 11
Got article 11 sleeping for 10 seconds.
Getting article 189
Got article 189 sleeping for 10 seconds.
Getting article 1086
Got article 1086 sleeping for 10 seconds.
Getting article 1173
Got article 1173 sleeping for 10 seconds.


### Test Single Article

In [10]:
# Select Article
article = arts[1]

# Convert first article into an etree
soup = BeautifulSoup(article, 'html.parser')
dom = etree.HTML(str(soup.html))

# Parse etree to find values of interest

## Weave Title
path = '/html/body/div[5]/div/table/tr/td[2]/div[2]/font'
title = dom.xpath(path)[0].text
print(f'Weave Title: {title}')

## Ideal AR
path = '/html/body/div[5]/div/table/tr/td[1]/table/tr[2]/td/div[3]/span'
ideal_ar = dom.xpath(path)[0].text
print(f'Ideal AR: {ideal_ar}')

Weave Title: Trizantine
Ideal AR: 5.2


### Test All Articles

In [None]:

# Iterate through all articles
for key, value in arts.items():
    print(key)

    # Convert article to etree
    soup = BeautifulSoup(value, 'html.parser')
    dom = etree.HTML(str(soup.html))

    # Parse etree to find values of interest

    ## Weave Title
    path = '/html/body/div[5]/div/table/tr/td[2]/div[2]/font'
    title = dom.xpath(path)[0].text
    print(f'Weave Title: {title}')

    ## Ideal AR
    path = '/html/body/div[5]/div/table/tr/td[1]/table/tr[2]/td/div[3]/span'
    ideal_ar = dom.xpath(path)[0].text
    print(f'Ideal AR: {ideal_ar}')
