In [3]:
# Connect to Flask API

import requests
from IPython.display import display, HTML

AUTH = ("admin", "admin") # Development authentication
ENDPOINT = "https://api:5000" # Accessible inside Docker network

response = requests.get(ENDPOINT + "/", verify = False, auth = AUTH)

# Check if the request was successful
if response.status_code == 200:
    # Get the HTML content from the response
    html_content = response.text
    # Display the HTML content
    display(HTML(html_content))
else:
    print("Failed to retrieve HTML content:", response.status_code)



In [9]:
# Drop whole database, USE CAREFULLY!

# response = requests.post(ENDPOINT + "/drop_db", verify = False, auth = AUTH).json()

# Check if the request was successful
if response['success'] == True:
    print(response['message'])
else:
    print(response['error'])

connection to server at "cloudsql-proxy" (172.20.0.2), port 5432 failed: FATAL:  database "crawldb" does not exist





In [16]:
# Execute SQL script

script = 'crawldb.sql'

# response = requests.post(ENDPOINT + "/execute_script/" + script, verify = False, auth = AUTH).json()

# Check if the request was successful
if response['success'] == True:
    print(response['message'])
else:
    print(response['error'])



Script 'crawldb.sql' executed successfully!


### What do API calls return?
#### API calls will normally return a JSON of the following structure:

**On success:**

`{ 'success': boolean, 'message': string, 'data': any }`

**On error:**

`{ 'success': boolean, 'error': string }`

If unsure, print(response.text) to get the whole response (without calling .json()).

In [3]:
# Get all tables in the database to verify the architecture

response = requests.get(ENDPOINT + "/get_tables", verify = False, auth = AUTH).json()

# Check if the request was successful
if response['success'] == True:
    print(response['message'])
    print(response['data'])
else:
    print(response['error'])



Successfully retrieved all tables from 'crawldb'.
['page', 'image', 'link', 'site', 'page_type', 'page_data', 'data_type']


In [2]:
# GET FRONTIER

# Returns
# [[id, url], [id2, url2], ...]

response = requests.get(ENDPOINT + "/get_frontier_pages", verify = False, auth = AUTH).json()

if response['success'] == True:
    print(response['message'])
    print(response['data'])
else:
    print(response['error'])



Frontier pages fetched successfully!
[]


In [3]:
# Insert PAGE into FRONTIER

# Takes:
data = {
  "domain": "e-uprava.gov.si",
  "url": "https://e-uprava.gov.si/.download/vloge/dokumenti/17290/file-content"
}

# Returns:
# page_id

response = requests.post(ENDPOINT + "/insert_page_into_frontier", json=data, verify = False, auth = AUTH).json()

if response['success'] == True:
    print(response['message'])
    print(response['data'])
else:
    print(response['error'])



Inserted page into frontier.
6895


In [9]:
# Get PAGE data

# Takes:
data = {
    "url": 'https://example.com/page'
}

# Returns
# page

response = requests.get(ENDPOINT + "/get_page", json=data, verify = False, auth = AUTH).json()

if response['success'] == True:
    print(response['message'])
    print(response['data'])
else:
    print(response['error'])



JSONDecodeError: Expecting value: line 1 column 1 (char 0)

In [4]:
# Update PAGE data

# Takes:
data = {
    "url": "http://www.gov.si",
    "page_type_code": "HTML",
    "html_content": None,
    "http_status_code": 200,
    "accessed_time": "2024-04-06T12:34:56",
    "data_type_code": None,
    "sitemap_content": None,
    "robots_content": None
}

# Returns:
# nothing

response = requests.post(ENDPOINT + "/update_page_data", json=data, verify = False, auth = AUTH).json()

if response['success'] == True:
    print(response['message'])
#     print(response['data'])
else:
    print(response['error'])

Page data updated successfully!




In [7]:
# GET hash values

# Returns:
# [hash, hash1, hash2]

response = requests.get(ENDPOINT + "/get_hashed_content", verify = False, auth = AUTH).json()

if response['success'] == True:
    print(response['message'])
    print(response['data'])
else:
    print(response['error'])



JSONDecodeError: Expecting value: line 1 column 1 (char 0)

In [51]:
import api_calls as ac

frontier = ac.get_frontier()
hashed_content = ac.get_hashed_content()

print(hashed_content, frontier)



[[None]] []
