In [6]:
import pandas as pd
in_out_df = pd.read_csv('aac_intakes_outcomes.csv')
in_df = pd.read_csv('aac_intakes.csv')
out_df = pd.read_csv('aac_outcomes.csv')

In [8]:
# Clean data
in_df['datetime'] = pd.to_datetime(in_df['datetime'])

### Is there an area where more pets are found?
#### Find the top 5 places where animals are found so the shelter can coordinate with local volunteers and animal control to monitor these areas.

In [5]:
location_counts = in_out_df['found_location']
top_five_locations = location_counts.value_counts()[:5]

print("Top 5 Locations where pets are found:")
top_five_locations

Top 5 Locations where pets are found:


found_location
Austin (TX)                          14311
Outside Jurisdiction                   945
Travis (TX)                            907
7201 Levander Loop in Austin (TX)      514
Del Valle (TX)                         407
Name: count, dtype: int64

### What is the average number of pets found in a month in the year 2015? Are there months where there is a higher number of animals found?
#### Knowing the number of pets the shelter might see in a month can help them gather enough resources and donations to care for the animals they receive.


In [17]:
pets_2015 = in_df[in_df['datetime'].dt.year == 2015].copy()
pets_2015['month'] = pets_2015['datetime'].dt.month
monthly_counts = pets_2015['month'].value_counts().sort_index()

average_pets_per_month = monthly_counts.mean()

average_pets_per_month

1559.3333333333333

In [18]:
monthly_counts

month
1     1198
2     1119
3     1346
4     1543
5     2094
6     2189
7     1635
8     1718
9     1591
10    1740
11    1411
12    1128
Name: count, dtype: int64

In [19]:
months_over_1559 = monthly_counts[monthly_counts > 1559]
months_over_1559

month
5     2094
6     2189
7     1635
8     1718
9     1591
10    1740
Name: count, dtype: int64

### What is the ratio of incoming pets vs. adopted pets?
#### This key metric helps the shelter know how they are doing.

In [21]:
total_incoming_pets = len(in_df)

adopted_pets = out_df[out_df['outcome_type'] == 'Adoption']
total_adopted_pets = len(adopted_pets)

adoption_ratio = total_incoming_pets / total_adopted_pets

adoption_ratio

2.3424573498480954

### What is the distribution of the types of animals in the shelter?
#### Find the count of each type of animal in the shelter.

In [22]:
animal_type_counts = in_df['animal_type'].value_counts()
animal_type_counts

animal_type
Dog          45743
Cat          29659
Other         4434
Bird           342
Livestock        9
Name: count, dtype: int64

In [23]:
animal_type_distribution = (animal_type_counts / animal_type_counts.sum()) * 100

animal_type_distribution

animal_type
Dog          57.045406
Cat          36.987292
Other         5.529575
Bird          0.426503
Livestock     0.011224
Name: count, dtype: float64

### What are the adoption rates for specific breeds?
#### Find the top 5 dog breeds in the shelter (based on count) and then find the adoption percentage of each breed.

In [26]:
dogs_intake = in_df[in_df['animal_type'] == 'Dog']
dogs_outcome = out_df[out_df['animal_type'] == 'Dog']
top_5_dog_breeds = dogs_intake['breed'].value_counts().head(5)

top_5_dog_breeds

breed
Pit Bull Mix                 6382
Chihuahua Shorthair Mix      4860
Labrador Retriever Mix       4841
German Shepherd Mix          1963
Australian Cattle Dog Mix    1105
Name: count, dtype: int64

In [27]:
top_5_breeds_list = top_5_dog_breeds.index.tolist()

adoption_percentages = {}

for breed in top_5_breeds_list:
    total_intakes = len(dogs_intake[dogs_intake['breed'] == breed])

    total_adoptions = len(dogs_outcome[(dogs_outcome['breed'] == breed) & (dogs_outcome['outcome_type'] == 'Adoption')])

    adoption_percentage = (total_adoptions / total_intakes) * 100
    adoption_percentages[breed] = round(adoption_percentage, 2)
    
adoption_percentages

{'Pit Bull Mix': 37.32,
 'Chihuahua Shorthair Mix': 47.18,
 'Labrador Retriever Mix': 49.66,
 'German Shepherd Mix': 47.73,
 'Australian Cattle Dog Mix': 56.02}

### What are the adoption rates for different colorings?
#### Find the top 5 colorings in the shelter (based on count) and then find the adoption percentage of each color.

In [29]:
top_5_colors = in_df['color'].value_counts().head(5)
top_5_colors

color
Black/White    8340
Black          6710
Brown Tabby    4487
Brown          3618
White          2849
Name: count, dtype: int64

In [31]:
top_5_colors_list = top_5_colors.index.tolist()

color_adoption_percentages = {}

for color in top_5_colors_list:
    total_intakes_color = len(in_df[in_df['color'] == color])

    total_adoptions_color = len(out_df[(out_df['color'] == color) & (out_df['outcome_type'] == 'Adoption')])

    adoption_percentage_color = (total_adoptions_color / total_intakes_color) * 100
    color_adoption_percentages[color] = round(adoption_percentage_color, 2)

color_adoption_percentages

{'Black/White': 45.73,
 'Black': 41.09,
 'Brown Tabby': 42.66,
 'Brown': 22.19,
 'White': 37.98}

### About how many animals are spayed/neutered each month?
#### This will help the shelter allocate resources and staff. Assume that all intact males and females will be spayed/neutered.

In [34]:
intact_animals = in_df[in_df['sex_upon_intake'].isin(['Intact Male', 'Intact Female'])].copy()

intact_animals['year'] = intact_animals['datetime'].dt.year
intact_animals['month'] = intact_animals['datetime'].dt.month

spay_neuter_counts = intact_animals.groupby(['year', 'month']).size()

total_neuters = spay_neuter_counts.sum()

num_months = len(spay_neuter_counts)

average_monthly_neuters = total_neuters / num_months

average_monthly_neuters

913.8518518518518

### Extra Credit


### How many animals in the shelter are repeats? Which animal was returned to the shelter the most?

#### This means the animal has been brought in more than once.

In [40]:
animal_id_counts = in_df['animal_id'].value_counts()

repeat_animals = animal_id_counts[animal_id_counts > 1]

num_repeat_animals = len(repeat_animals)

max_returns = repeat_animals.max()
animal_ids_most_returns = repeat_animals[repeat_animals == max_returns].index

animals_most_returns_details = in_df[in_df['animal_id'].isin(animal_ids_most_returns)][['animal_id', 'name', 'breed']].drop_duplicates()

num_repeat_animals, animals_most_returns_details, max_returns

(6154,
      animal_id     name            breed
 6393   A721033  Lil Bit  Rat Terrier Mix,
 13)


### What are the adoption rates for the following age groups?

    baby: 4 months and less
    young: 5 months - 2 years
    adult: 3 years - 10 years
    senior: 11+



### If spay/neuter for a dog costs 100 and a spay/neuter for a cat costs 50, how much did the shelter spend in 2015 on these procedures?