### Installing Dependencies

In [1]:
import pandas as pd
import numpy as np
import random
from scipy.optimize import minimize

### Preprocessing and Cleaning Data

In [28]:
df = pd.read_csv("dogdata.csv")

# Preprocessing:
# Calculate average height
df['avg_height'] = (df['min_height'] + df['max_height']) / 2
# Calculate average weight
df['avg_weight'] = (df['min_weight'] + df['max_weight']) / 2
# Calculate max value of max_height
max_height = df['max_height'].max()
# Calculate max value of max_weight
max_weight = df['max_weight'].max()
# Apply Max Scaling
df['scaled_avg_height'] = df['avg_height'] / max_height
df['scaled_avg_weight'] = df['avg_weight'] / max_weight
# Put Scaled and Relevant data into new dataframe
breed_data = df[['scaled_avg_height','scaled_avg_weight','grooming_frequency_value','shedding_value','energy_level_value','trainability_value','demeanor_value']]
breed_data = pd.concat([df.iloc[:,0], breed_data], axis=1)
breed_data.columns.values[0] = 'Breeds'

# Cleaning:
# Remove rows with NaN values 
breed_data_cleaned = breed_data.dropna()
# Save the cleaned DataFrame to a new CSV file
breed_data_cleaned.to_csv('breed_data_cleaned.csv', index=False)
print("Cleaned data saved to 'breed_data_cleaned.csv'")

Cleaned data saved to 'breed_data_cleaned.csv'


### Defining function for determining optimal breed combination

In [26]:
# Prepare data for use in loss function
data = breed_data_cleaned.drop(columns=['Breeds'])
B = np.asarray(data).T

def estimate_dog_breed(B, y):
    # Defining the loss function (SSE) to minimise
    def loss(w):
        x = B @ w
        diffsqrd = (x - y) ** 2
        return diffsqrd.sum() # SSE

    # Defining constraint: Elements of w must sum to 1
    constraint = {'type': 'eq', 'fun': lambda w: np.sum(w) - 1}

    # Initial guess of weights (Random values between 0 and 1)
    w0 = np.random.uniform(0, 1, size=B.shape[1])
    # Normalise initial guess to meet constraint
    w0 /= np.sum(w0)

    # Define bounds [0,1] for all weights
    bounds = [(0, 1) for _ in range(B.shape[1])]

    # Minimize using SLSQP (supports constraints)
    result = minimize(loss, w0, method='SLSQP', bounds=bounds, constraints=[constraint])

    # Discard weights of less than 0.1
    w_adjusted = np.where(result.x < 0.1, 0, result.x)

    # Renormalize to sum to 1
    if np.sum(w_adjusted) > 0:
        w_adjusted /= np.sum(w_adjusted)  # Normalize to sum to 1

    # Print results
    percents = [round(i*100, 0) for i in w_adjusted if i != 0]
    breeds = [breed_data_cleaned.iloc[i, 0] for i in range(len(w_adjusted)) if w_adjusted[i] != 0]
    print('We estimate your dog to be:')
    for i in range(len(breeds)):
        print(percents[i], '% ', breeds[i])

### Run the following Cell Tests

In [33]:
estimate_dog_breed(B, data.iloc[0]) # Affenpinscher
estimate_dog_breed(B, data.iloc[1]) # Afghan Hound
estimate_dog_breed(B, data.iloc[2]) # Airedale Terrier

We estimate your dog to be:
100.0 %  Affenpinscher
We estimate your dog to be:
100.0 %  Afghan Hound
We estimate your dog to be:
100.0 %  Airedale Terrier


### Taking User inputs

In [11]:
age = int(input('How old is your dog in earth years?'))
if age <= 3:
    print("Please understand that your dog is still young and may not have yet developed all of the attributes of it's breed or breed mixture. This makes it far more difficult to guess it's breed / breed mixture.")

KeyboardInterrupt: Interrupted by user

In [180]:
height = int(input('What height is your dog (cm)?'))
scaled_height = height / max_height

What height is your dog (cm)? 30


In [181]:
weight = int(input('What weight is your dog (kg)?'))
scaled_weight = weight / max_weight

What weight is your dog (kg)? 7


In [182]:
groomingvals = [i for i in df['grooming_frequency_value'].dropna().unique()]
groomingfreqs = [i for i in df['grooming_frequency_category'].dropna().unique()]
for i in groomingfreqs:
    print(groomingfreqs.index(i), i)
groomingval = groomingvals[int(input('From the above unordered list, enter the number that corresponds to the option that best describes your dogs grooming demands.'))]

0 2-3 Times a Week Brushing
1 Daily Brushing
2 Occasional Bath/Brush
3 Weekly Brushing
4 Specialty/Professional


From the above unordered list, enter the number that corresponds to the option that best describes your dogs grooming demands. 2


In [183]:
sheddingvals = [i for i in df['shedding_value'].dropna().unique()]
sheddingfreqs = [i for i in df['shedding_category'].dropna().unique()]
for i in sheddingfreqs:
    print(sheddingfreqs.index(i), i)
sheddingval = sheddingvals[int(input("From the above unordered list, enter the number that corresponds to the option that best describes how frequently your dog sheds it's fur."))]

0 Seasonal
1 Infrequent
2 Occasional
3 Regularly
4 Frequent


From the above unordered list, enter the number that corresponds to the option that best describes how frequently your dog sheds it's fur. 1


In [185]:
energyvals = [i for i in df['energy_level_value'].dropna().unique()]
energycategs = [i for i in df['energy_level_category'].dropna().unique()]
for i in energycategs:
    print(energycategs.index(i), i)
energyval = energyvals[int(input("From the above unordered list, enter the number that corresponds to the option that best describes your dog's energy level."))]

0 Regular Exercise
1 Energetic
2 Needs Lots of Activity
3 Couch Potato
4 Calm


From the above unordered list, enter the number that corresponds to the option that best describes your dog's energy level. 1


In [186]:
trainabilityvals = [i for i in df['trainability_value'].dropna().unique()]
trainabilitycategs = [i for i in df['trainability_category'].dropna().unique()]
for i in trainabilitycategs:
    print(trainabilitycategs.index(i), i)
trainabilityval = trainabilityvals[int(input("From the above unordered list, enter the number that corresponds to the option that best describes how easy your dog is to train."))]

0 Easy Training
1 May be Stubborn
2 Eager to Please
3 Independent
4 Agreeable


From the above unordered list, enter the number that corresponds to the option that best describes how easy your dog is to train. 2


In [187]:
demeanorvals = [i for i in df['demeanor_value'].dropna().unique()]
demeanorcategs = [i for i in df['demeanor_category'].dropna().unique()]
for i in demeanorcategs:
    print(demeanorcategs.index(i), i)
demeanorval = demeanorvals[int(input("From the above unordered list, enter the number that corresponds to the option that best describes the demeanor of your dog."))]

0 Outgoing
1 Aloof/Wary
2 Friendly
3 Alert/Responsive
4 Reserved with Strangers


From the above unordered list, enter the number that corresponds to the option that best describes the demeanor of your dog. 2


In [188]:
y = [scaled_height,scaled_weight,groomingval,sheddingval,energyval,trainabilityval,demeanorval]
estimate_dog_breed(B, y)

[0.22703412073490814, 0.06430149313725596, 0.2, 0.2, 0.8, 1.0, 0.8]
