# Dog Breed Classification with Naive Bayes Algorithm

##### Fatemeh Razaqnejad
Utilize the Naive Bayes classifier to categorize dog breeds by analyzing continuous and discrete features such as weight, height, and bark frequency, leveraging Gaussian, uniform, and binomial distributions.

#### Import necessary libraries

In [None]:
import pandas as pd
import numpy as np
from scipy.stats import norm, binom, uniform
import os

#### Load the data

In [None]:
# Replace 'dogs.csv' with the actual path to your dataset if necessary
dogs_data = pd.read_csv('dogs.csv')

#### Functions to calculate distribution parameters

In [None]:
def calculate_gaussian_params(data):
    mean = np.mean(data)
    variance = np.var(data)
    return mean, variance

def calculate_binomial_params(data, n):
    p = np.mean(data) / n
    return p, n

def calculate_uniform_params(data):
    a = np.min(data)
    b = np.max(data)
    return a, b

#### Calculate parameters for each category

In [None]:
categories = dogs_data['class'].unique()
params = {}

n_trials = 30  # Total number of trials for bark_days

for category in categories:
    category_data = dogs_data[dogs_data['class'] == category]
    params[category] = {
        'height': calculate_gaussian_params(category_data['height']),
        'weight': calculate_gaussian_params(category_data['weight']),
        'bark_days': calculate_binomial_params(category_data['bark_days'], n_trials),
        'ear_head_ratio': calculate_uniform_params(category_data['ear_head_ratio']),
    }

#### Function to classify a sample based on calculated parameters

In [None]:
def classify(sample, params):
    epsilon = 1e-10  # Small value to prevent zero probability
    probabilities = {}
    
    for category in params:
        height_mu, height_var = params[category]['height']
        weight_mu, weight_var = params[category]['weight']
        bark_days_p, bark_days_n = params[category]['bark_days']
        ear_head_ratio_a, ear_head_ratio_b = params[category]['ear_head_ratio']
        
        height_prob = norm.pdf(sample['height'], height_mu, np.sqrt(height_var))
        weight_prob = norm.pdf(sample['weight'], weight_mu, np.sqrt(weight_var))
        bark_days_prob = binom.pmf(sample['bark_days'], bark_days_n, bark_days_p)
        ear_head_ratio_prob = uniform.pdf(sample['ear_head_ratio'], ear_head_ratio_a, ear_head_ratio_b - ear_head_ratio_a)
        
        # Ensure probabilities are not zero
        height_prob = max(height_prob, epsilon)
        weight_prob = max(weight_prob, epsilon)
        bark_days_prob = max(bark_days_prob, epsilon)
        ear_head_ratio_prob = max(ear_head_ratio_prob, epsilon)
        
        total_prob = height_prob * weight_prob * bark_days_prob * ear_head_ratio_prob
        probabilities[category] = {
            'height_prob': height_prob,
            'weight_prob': weight_prob,
            'bark_days_prob': bark_days_prob,
            'ear_head_ratio_prob': ear_head_ratio_prob,
            'total_prob': total_prob
        }
    
    best_category = max(probabilities, key=lambda k: probabilities[k]['total_prob'])
    return best_category, probabilities

#### Function to get a sample from user input

In [None]:
def get_sample_from_input():
    height = float(input("Enter the height: "))
    weight = float(input("Enter the weight: "))
    bark_days = int(input("Enter the bark_days: "))
    ear_head_ratio = float(input("Enter the ear_head_ratio: "))
    
    return {
        'height': height,
        'weight': weight,
        'bark_days': bark_days,
        'ear_head_ratio': ear_head_ratio
    }

# Get sample from user input
sample = get_sample_from_input()

#### Classify the sample and print the results

In [None]:
predicted_class, detailed_probs = classify(sample, params)

print(f'The predicted class for the sample is: {predicted_class}\n')
print('Detailed probabilities for each category:')
for category, probs in detailed_probs.items():
    print(f'Category {category}:')
    print(f"  Height Probability: {probs['height_prob']}")
    print(f"  Weight Probability: {probs['weight_prob']}")
    print(f"  Bark Days Probability: {probs['bark_days_prob']}")
    print(f"  Ear Head Ratio Probability: {probs['ear_head_ratio_prob']}")
    print(f"  Total Probability: {probs['total_prob']}\n")

#### (Optional) Check and classify samples from 'sampleC/' directory

In [None]:
def check_sampleC_directory(params):
    sampleC_path = 'sampleC/'
    if os.path.exists(sampleC_path):
        files = os.listdir(sampleC_path)
        if files:
            for file_name in files:
                if file_name.endswith('.csv'):
                    sample_data = pd.read_csv(os.path.join(sampleC_path, file_name))
                    for index, row in sample_data.iterrows():
                        sample = {
                            'height': row['height'],
                            'weight': row['weight'],
                            'bark_days': row['bark_days'],
                            'ear_head_ratio': row['ear_head_ratio']
                        }
                        predicted_class, detailed_probs = classify(sample, params)
                        print(f'The predicted class for the sample in {file_name} (index {index}) is: {predicted_class}\n')
                        print('Detailed probabilities for each category:')
                        for category, probs in detailed_probs.items():
                            print(f'Category {category}:')
                            print(f"  Height Probability: {probs['height_prob']}")
                            print(f"  Weight Probability: {probs['weight_prob']}")
                            print(f"  Bark Days Probability: {probs['bark_days_prob']}")
                            print(f"  Ear Head Ratio Probability: {probs['ear_head_ratio_prob']}")
                            print(f"  Total Probability: {probs['total_prob']}\n")
        else:
            print("There is no sample to analyze")

# Uncomment the following line to run this function
# check_sampleC_directory(params)