In [None]:
# Install pyDatalog
!pip install pyDatalog

from pyDatalog import pyDatalog
import re
import pandas as pd


Collecting pyDatalog
  Downloading pyDatalog-0.17.4.tar.gz (325 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/325.5 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[90m╺[0m [32m317.4/325.5 kB[0m [31m9.4 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m325.5/325.5 kB[0m [31m7.0 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Building wheels for collected packages: pyDatalog
  Building wheel for pyDatalog (pyproject.toml) ... [?25l[?25hdone
  Created wheel for pyDatalog: filename=pydatalog-0.17.4-py2.py3-none-any.whl size=74280 sha256=0fa18b35d0880fd1ecfd8c8112b221312dcb538333a6fa6834e2bdbe81d0e45f
  Stored in directory: /root/.cache/pip/wheels/3d/86/49/61fd7f650165a5795f4b61720e1aeb2875f7e3f6d09de485be
Su

In [None]:
SHEET_ID = "1nZUKlY0F9DvsKG5au21Lsp2FMojRoVOW9luWJQkrdHM"
GID = "756887908"

csv_url = f"https://docs.google.com/spreadsheets/d/{SHEET_ID}/export?format=csv&gid={GID}"
df = pd.read_csv(csv_url)

# Strip whitespace in string columns
for col in df.columns:
    if df[col].dtype == "object":
        df[col] = df[col].astype(str).str.strip()

# Convert empty strings to NA, then drop any row with any NA
df = df.replace(r"^\s*$", pd.NA, regex=True).dropna(how="any").reset_index(drop=True)

df.shape, df.head()

((186, 37),
               breed                                        description  \
 0     Affenpinscher  The Affen’s apish look has been described many...   
 1      Afghan Hound  The Afghan Hound is an ancient breed, his whol...   
 2  Airedale Terrier  The Airedale Terrier is the largest of all ter...   
 3             Akita  Akitas are burly, heavy-boned spitz-type dogs ...   
 4  Alaskan Malamute  The Alaskan Malamute stands 23 to 25 inches at...   
 
                                  temperament popularity  min_height  \
 0        Confident, Famously Funny, Fearless        148       22.86   
 1  Dignified, Profoundly Loyal, Aristocratic        113       63.50   
 2               Friendly, Clever, Courageous         60       58.42   
 3    Courageous, Dignified, Profoundly Loyal         47       60.96   
 4               Affectionate, Loyal, Playful         58       58.42   
 
    max_height  min_weight  max_weight  min_expectancy  max_expectancy  ...  \
 0       29.21    3.175

In [None]:
def calculate_threshold_values(min_val, max_val):
  span = max_val - min_val
  t1 = min_val + span / 3.0
  t2 = min_val + 2.0 * span / 3.0

  return t1, t2


In [None]:
def categorize_numeric_value(value: float, t1: float, t2: float, cat1, cat2, cat3) -> str:
    if value <= t1:
        return cat1
    elif value <= t2:
        return cat2
    else:
        return cat3

#-------- Calculate threshold values for Size categories --------#

# Drop rows where either min or max weight is 0
df = df[(df["min_weight"] > 0) & (df["max_weight"] > 0)].reset_index(drop=True)

df["avg_weight"] = (df["min_weight"] + df["max_weight"]) / 2.0

min_avg = df["avg_weight"].min()
max_avg = df["avg_weight"].max()

t1, t2 = calculate_threshold_values(min_avg, max_avg)

df["size_class"] = df["avg_weight"].apply(lambda w: categorize_numeric_value(w, t1, t2, "Small", "Medium", "Large"))


#-------- Calculate threshold values for Good With Children categories --------#
t1, t2 = calculate_threshold_values(df["Good With Young Children"].min(), df["Good With Young Children"].max())
df["good_with_children_class"] = df["Good With Young Children"].apply(lambda v: categorize_numeric_value(v, t1, t2, "Bad_With_Children", "Ok_With_Children", "Good_With_Children"))


#-------- Calculate threshold values for Good With Other Dogs categories --------#
t1, t2 = calculate_threshold_values(df["Good With Other Dogs"].min(), df["Good With Other Dogs"].max())
df["good_with_other_dogs_class"] = df["Good With Other Dogs"].apply(lambda v: categorize_numeric_value(v, t1, t2, "Bad_With_Other_Dogs", "Ok_With_Other_Dogs", "Good_With_Other_Dogs"))


#-------- Calculate threshold values for Protectiveness categories --------#
t1, t2 = calculate_threshold_values(df["Watchdog/Protective Nature"].min(), df["Watchdog/Protective Nature"].max())
df["protectiveness_class"] = df["Watchdog/Protective Nature"].apply(lambda v: categorize_numeric_value(v, t1, t2, "Not_Protective", "Somewhat_Protective", "Very_Protective"))


#-------- Calculate threshold values for Barking Level categories --------#
t1, t2 = calculate_threshold_values(df["Barking Level"].min(), df["Barking Level"].max())
df["barking_level_class"] = df["Barking Level"].apply(lambda v: categorize_numeric_value(v, t1, t2, "Barks_Rarely", "Barks_Occasionally", "Barks_Often"))

In [None]:
pyDatalog.clear()

def to_term_name(breed: str) -> str:
    # Replace non-alphanumeric with underscore
    name = re.sub(r"[^0-9a-zA-Z_]+", "_", breed).strip("_")
    return name

breed_to_term = {breed: to_term_name(breed) for breed in df["breed"].unique()}
shedding_to_term = {shedding: ('Sheds_' + to_term_name(shedding)) for shedding in df["shedding_category"].unique()}
grooming_to_term = {grooming: ('Requires_' + to_term_name(grooming)) for grooming in df["grooming_frequency_category"].unique()}
energy_to_term = {energy: (to_term_name(energy)) for energy in df["energy_level_category"].unique()}
trainability_to_term = {trainability: (to_term_name(trainability)) for trainability in df["trainability_category"].unique()}
demeanor_to_term = {demeanor: (to_term_name(demeanor)) for demeanor in df["demeanor_category"].unique()}
coat_type_to_term = {coat_type: (to_term_name(coat_type) + "_Coat") for coat_type in df["Coat Type"].unique()}
coat_length_to_term = {coat_length: (to_term_name(coat_length) + "_Coat") for coat_length in df["Coat Length"].unique()}


# Create terms
all_breed_terms = sorted(set(breed_to_term.values()))
all_shedding_terms = sorted(set(shedding_to_term.values()))
all_grooming_terms = sorted(set(grooming_to_term.values()))
all_energy_terms = sorted(set(energy_to_term.values()))
all_trainability_terms = sorted(set(trainability_to_term.values()))
all_demeanor_terms = sorted(set(demeanor_to_term.values()))
all_coat_type_terms = sorted(set(coat_type_to_term.values()))
all_coat_length_terms = sorted(set(coat_length_to_term.values()))
terms_str = ", ".join(all_breed_terms
                      + all_shedding_terms
                      + all_grooming_terms
                      + all_energy_terms
                      + all_trainability_terms
                      + all_coat_type_terms
                      + all_coat_length_terms
                      + ["Small", "Medium", "Large"]
                      + ["B"]
                      + ["Bad_With_Children", "Ok_With_Children", "Good_With_Children"]
                      + ["Bad_With_Other_Dogs", "Ok_With_Other_Dogs", "Good_With_Other_Dogs"]
                      + ["Not_Protective", "Somewhat_Protective", "Very_Protective"]
                      + ["Barks_Rarely", "Barks_Occasionally", "Barks_Often"])

pyDatalog.create_terms(terms_str)

# Assert facts
for _, row in df.iterrows():
    breed = row["breed"]

    # size fact:
    size = globals()[row["size_class"]]
    +size(breed)

    # shedding fact:
    pred_name = shedding_to_term[row["shedding_category"]]
    shed_pred = globals()[pred_name]
    +shed_pred(breed)

    # grooming fact:
    pred_name = grooming_to_term[row["grooming_frequency_category"]]
    groom_pred = globals()[pred_name]
    +groom_pred(breed)

    # energy fact:
    pred_name = energy_to_term[row["energy_level_category"]]
    energy_pred = globals()[pred_name]
    +energy_pred(breed)

    # trainability fact:
    pred_name = trainability_to_term[row["trainability_category"]]
    trainability_pred = globals()[pred_name]
    +trainability_pred(breed)

    # demeanor fact:
    pred_name = demeanor_to_term[row["demeanor_category"]]
    demeanor_pred = globals()[pred_name]
    +demeanor_pred(breed)

    # good with children fact:
    good_with_children = globals()[row["good_with_children_class"]]
    +good_with_children(breed)

    # good with other dogs fact:
    good_with_other_dogs = globals()[row["good_with_other_dogs_class"]]
    +good_with_other_dogs(breed)

    # coat type fact:
    pred_name = coat_type_to_term[row["Coat Type"]]
    coat_type_pred = globals()[pred_name]
    +coat_type_pred(breed)

    # coat length fact:
    pred_name = coat_length_to_term[row["Coat Length"]]
    coat_length_pred = globals()[pred_name]
    +coat_length_pred(breed)

    # protectiveness fact:
    protectiveness = globals()[row["protectiveness_class"]]
    +protectiveness(breed)

    # barking level fact:
    barking_level = globals()[row["barking_level_class"]]
    +barking_level(breed)



In [None]:
# Query for breeds based on various criteria; return the breed name as it appears in the dataset

term_to_breed = {v: k for k, v in breed_to_term.items()}

def query_to_originals(query_result) -> list[str]:
    # query_result is something like: [(Affenpinscher,), (Afghan_Hound,), ...]
    originals = []
    for (term_obj,) in query_result:
        # term_obj prints like "Affenpinscher" — we map by string name
        originals.append(term_to_breed.get(str(term_obj), str(term_obj)))
    return originals

print("Small breeds:")
print(query_to_originals(Small(B).data)[:30])

print("\nRegular shedders:")
print(query_to_originals(Sheds_Regularly(B).data))

print("\nRequires Weekly Brushing:")
print(query_to_originals(Requires_Weekly_Brushing(B).data))

print("\nEnergetic Breeds:")
print(query_to_originals(Energetic(B).data))

print("\nIndependent Breeds:")
print(query_to_originals(Independent(B).data))

print("\nAlert/Responsive Breeds:")
print(query_to_originals(Alert_Responsive(B).data))

print("\nBreeds Ok with Children:")
print(query_to_originals(Ok_With_Children(B).data))

print("\nBreeds Good with Other Dogs:")
print(query_to_originals(Good_With_Other_Dogs(B).data))

print("\nBreeds with Smooth Coats:")
print(query_to_originals(Smooth_Coat(B).data))

print("\nBreeds with Long Coats:")
print(query_to_originals(Long_Coat(B).data))

print("\nVery Protective Breeds")
print(query_to_originals(Very_Protective(B).data))

print("\nQuiet Breeds")
print(query_to_originals(Barks_Rarely(B).data))

Small breeds:
['Yorkshire Terrier', 'Xoloitzcuintli', 'Wirehaired Vizsla', 'Wirehaired Pointing Griffon', 'Wire Fox Terrier', 'Whippet', 'West Highland White Terrier', 'Welsh Terrier', 'Vizsla', 'Treeing Walker Coonhound', 'Toy Fox Terrier', 'Tibetan Terrier', 'Tibetan Spaniel', 'Swedish Vallhund', 'Sussex Spaniel', 'Standard Schnauzer', 'Staffordshire Bull Terrier', 'Spanish Water Dog', 'Soft Coated Wheaten Terrier', 'Smooth Fox Terrier', 'Sloughi', 'Skye Terrier', 'Silky Terrier', 'Siberian Husky', 'Shih Tzu', 'Shiba Inu', 'Shetland Sheepdog', 'Sealyham Terrier', 'Scottish Terrier', 'Schipperke']

Regular shedders:
['Pug', 'Pembroke Welsh Corgi', 'Leonberger', 'Labrador Retriever', 'Kuvasz', 'German Wirehaired Pointer', 'German Shorthaired Pointer', 'German Shepherd Dog', 'French Bulldog', 'Flat-Coated Retriever', 'Field Spaniel', 'English Setter', 'English Foxhound', 'Dogue de Bordeaux', 'Doberman Pinscher', 'Clumber Spaniel', 'Chinese Shar-Pei', 'Chesapeake Bay Retriever', 'Bulldog