In [1]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import plotly.express as px
import plotly.graph_objects as go

%matplotlib inline

In [3]:
dogs_df = pd.read_csv('data/dogs_dataset.csv')
dogs_df.head()

Unnamed: 0,Breed,Age (Years),Weight (kg),Color,Gender
0,Airedale Terrier,13,35,White,Male
1,Jack Russell Terrier,10,43,Tan,Female
2,Dogo Argentino,2,16,Spotted,Female
3,Labrador Retriever,9,57,Bicolor,Male
4,French Bulldog,12,39,Spotted,Male


In [4]:
dogs_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3000 entries, 0 to 2999
Data columns (total 5 columns):
 #   Column       Non-Null Count  Dtype 
---  ------       --------------  ----- 
 0   Breed        3000 non-null   object
 1   Age (Years)  3000 non-null   int64 
 2   Weight (kg)  3000 non-null   int64 
 3   Color        3000 non-null   object
 4   Gender       3000 non-null   object
dtypes: int64(2), object(3)
memory usage: 117.3+ KB


In [5]:
# average age and weight of dogs in the dataset.

dogs_df.describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
Age (Years),3000.0,7.499,4.011238,1.0,4.0,8.0,11.0,14.0
Weight (kg),3000.0,32.059667,15.628463,5.0,19.0,33.0,45.0,59.0


In [14]:
# how many unique dog breeds in the dataset

print('There are', dogs_df.Breed.nunique(), 'dog breeds in the dataset.')

There are 53 dog breeds in the dataset.


In [24]:
# and which breeds are included the most

top_breeds = dogs_df.Breed.value_counts().nlargest(10).reset_index()
top_breeds

Unnamed: 0,Breed,count
0,Rottweiler,118
1,French Bulldog,70
2,Pekingese,68
3,Pembroke Welsh Corgi,67
4,Doberman Pinscher,66
5,Weimaraner,66
6,Pug,65
7,Bichon Frise,64
8,Vizsla,64
9,Cavalier King Charles Spaniel,64


In [30]:
# what are the top colors of the dogs in the dataset

top_colors = dogs_df.Color.value_counts().nlargest(10).reset_index()
top_colors

Unnamed: 0,Color,count
0,Black and White,205
1,Bicolor,200
2,Brindle,200
3,Merle,199
4,Sable,199
5,Black and Tan,193
6,Spotted,191
7,Blue,191
8,Brown,189
9,Red,185


In [35]:
# number of dogs by gender

dogs_by_gender = dogs_df.Gender.value_counts().reset_index()
dogs_by_gender

Unnamed: 0,Gender,count
0,Female,1520
1,Male,1480


In [31]:
fig = px.bar(top_breeds, x = 'Breed', y = 'count',
            text_auto = '.2s',
            labels = {'count' : 'Frequency'},
            title = 'Distribution of dogs by breed - Top 10'
            )

fig.show()

In [34]:
fig = px.bar(top_colors, x = 'Color', y = 'count',
            text_auto = '.2s',
            labels = {'count' : 'Frequency'},
            title = 'Distribution of dogs by color - Top 10'
            )
fig.update_xaxes(tickangle=45)

fig.show()

In [38]:
fig = px.bar(dogs_by_gender, x = 'Gender', y = 'count',
            text_auto = '.3s',
            labels = {'count' : 'Frequency'},
            title = 'Distribution of dogs by gender'
            )
fig.update_traces(marker_line_color = 'white')

fig.show()

In [45]:
# what is the mean age for each breed

mean_age_by_breed = dogs_df.groupby('Breed')['Age (Years)'].mean().reset_index()
mean_age_by_breed

Unnamed: 0,Breed,Age (Years)
0,Airedale Terrier,8.218182
1,Akita,7.784314
2,Alaskan Malamute,6.981818
3,Australian Shepherd,7.823529
4,Basenji,7.55102
5,Beagle,7.636364
6,Belgian Malinois,8.363636
7,Bernese Mountain Dog,6.785714
8,Bichon Frise,6.5
9,Bloodhound,7.237288


In [50]:
# what is the mean weight for each breed

mean_weight_by_breed = dogs_df.groupby('Breed')['Weight (kg)'].mean().reset_index()
mean_weight_by_breed

Unnamed: 0,Breed,Weight (kg)
0,Airedale Terrier,31.818182
1,Akita,31.686275
2,Alaskan Malamute,34.636364
3,Australian Shepherd,33.352941
4,Basenji,31.938776
5,Beagle,32.509091
6,Belgian Malinois,29.113636
7,Bernese Mountain Dog,33.267857
8,Bichon Frise,31.96875
9,Bloodhound,32.271186


In [54]:
# mean age by breed

fig = px.line(mean_age_by_breed, x = 'Breed', y = 'Age (Years)',
             labels = {'Age (Years)' : 'Mean Age'},
             title = 'Mean age of dog by breed',
             markers = True)

fig.show()

In [55]:
# mean weight by breed 

fig = px.line(mean_weight_by_breed, x = 'Breed', y = 'Weight (kg)',
             title = 'Mean weight of dog by breed',
             markers = True)

fig.show()

In [68]:
# what are the top colors by breed

top_color_by_breed = dogs_df.groupby('Breed')['Color'].value_counts().nlargest(10).reset_index()
top_color_by_breed

Unnamed: 0,Breed,Color,count
0,Rottweiler,Merle,15
1,Pug,Brown,11
2,Rottweiler,Brindle,11
3,Rottweiler,White,11
4,Rottweiler,Spotted,10
5,Bernese Mountain Dog,Brindle,9
6,Doberman Pinscher,Sable,9
7,Havanese,Black,9
8,Poodle,Black and White,9
9,Rottweiler,Black,9


In [75]:
# top colors by bread

fig = px.bar(top_color_by_breed, x = 'Breed', y = 'count',
             color = 'Color',
             labels = {'count' : 'Color Frequency'},
             title = 'Top colors by breed'
             )

fig.show()