In [1]:
# Connect to Flask API

import requests
from IPython.display import display, HTML

AUTH = ("admin", "admin") # Development authentication
ENDPOINT = "https://api:5000" # Accessible inside Docker network

response = requests.get(ENDPOINT + "/", verify = False, auth = AUTH)

# Check if the request was successful
if response.status_code == 200:
    # Get the HTML content from the response
    html_content = response.text
    # Display the HTML content
    display(HTML(html_content))
else:
    print("Failed to retrieve HTML content:", response.status_code)



### What do API calls return?
#### API calls will normally return a JSON of the following structure:

**On success:**

`{ 'success': boolean, 'message': string, 'data': any }`

**On error:**

`{ 'success': boolean, 'error': string }`

If unsure, print(response.text) to get the whole response (without calling .json()).

In [3]:
# Get all tables in the database to verify the architecture

response = requests.get(ENDPOINT + "/get_tables", verify = False, auth = AUTH).json()

# Check if the request was successful
if response['success'] == True:
    print(response['message'])
    print(response['data'])
else:
    print(response['error'])

Successfully retrieved all tables from 'crawldb'.
['page', 'image', 'link', 'site', 'page_type', 'page_data', 'data_type']




In [13]:
# GET FRONTIER

# Returns
# [[id, url], [id2, url2], ...]

response = requests.get(ENDPOINT + "/get_frontier_pages", json=data, verify = False, auth = AUTH).json()

if response['success'] == True:
    print(response['message'])
    print(response['data'])
else:
    print(response['error'])



Frontier pages fetched successfully!
[[1, 'https://example.com/page1'], [3, 'https://example.com/page2'], [5, 'https://example.com/page3'], [6, 'https://example.com/page4']]


In [11]:
# Insert PAGE into FRONTIER

# Takes:
data = {
  "domain": "example.com",
  "url": "https://example.com/page4"
}

# Returns:
# page_id

response = requests.post(ENDPOINT + "/insert_page_into_frontier", json=data, verify = False, auth = AUTH).json()

if response['success'] == True:
    print(response['message'])
    print(response['data'])
else:
    print(response['error'])



duplicate key value violates unique constraint "unq_url_idx"
DETAIL:  Key (url)=(https://example.com/page4) already exists.



In [None]:
# Update PAGE data

# Takes:
data = {
    "url": "https://example.com/page4",
    "page_type": "NOT_FRONTIER"
    "html_content": "<html><body>Sample HTML content</body></html>",
    "http_status_code": 200,
    "accessed_time": "2024-04-06T12:34:56"
}

# Returns:
# nothing

response = requests.post(ENDPOINT + "/update_page_data", json=data, verify = False, auth = AUTH).json()

if response['success'] == True:
    print(response['message'])
#     print(response['data'])
else:
    print(response['error'])

In [None]:
# GET hash values

# Returns:
# [hash, hash1, hash2]

response = requests.get(ENDPOINT + "/get_hash_values", json=data, verify = False, auth = AUTH).json()

if response['success'] == True:
    print(response['message'])
    print(response['data'])
else:
    print(response['error'])

In [25]:
import api_calls as ac

res = ac.test_connection()

print(res)

{'message': 'Connection to database successful!', 'success': True}


