## Cleaning Dataset 

In [2]:
import pandas as pd
import requests
import time
import googlemaps
import folium
df = pd.read_csv("C:/Users/khapr/OneDrive/Documents/MS BIOSTAT/HSHV/FullHSHVData2.csv")

#### Rewrite the addresses so they are more interpretable by GeocodeAPI

In [4]:
df['Jurisdiction In'] = df['Jurisdiction In'].replace(r'^WC-', '', regex=True)  # Remove WC-
df['Jurisdiction In'] = df['Jurisdiction In'].replace(r'\s+(Twp|City)$', '', regex=True)
df['Location Found'] = df['Location Found'].replace('/', ' and ', regex=True)

#### Get latitude and longitutde for each address 

In [6]:
df[['lon', 'lat']] = df['pnt'].str.split(', ', expand=True)
df['lat'] = pd.to_numeric(df['lat'], errors='coerce')  
df['lon'] = pd.to_numeric(df['lon'], errors='coerce')

In [7]:
API_KEY = 'AIzaSyCQhSlO7I12W_LENQsmU_JMkT7hFa4zmRw'
gmaps = googlemaps.Client(key=API_KEY)
condition_1 = ~df['address_google'].str.contains('MI|Michigan', na=False)
condition_2 = df['address_google'] == 'Michigan, USA'
incorrect_coords_df = df[condition_1 | condition_2].copy()

latitudes = []
longitudes = []
addresses = []

for index, row in incorrect_coords_df.iterrows():
    address = f"{row['Location Found']}, {row['Jurisdiction In']}, MI, USA"
    try:
        geocode_result = gmaps.geocode(address)
        if geocode_result:
            location = geocode_result[0]['geometry']['location']
            latitudes.append(location['lat'])
            longitudes.append(location['lng'])
            addresses.append(geocode_result[0]['formatted_address'])
        else:
            latitudes.append(None)
            longitudes.append(None)
            addresses.append(None)
    except Exception as e:
        print(f"Error geocoding {address}: {e}")
        latitudes.append(None)
        longitudes.append(None)
        addresses.append(None)

In [8]:
incorrect_coords_df.loc[:, 'Latitude'] = latitudes
incorrect_coords_df.loc[:, 'Longitude'] = longitudes
incorrect_coords_df.loc[:, 'google_address_corrected'] = addresses
print(incorrect_coords_df)

        ...1     Animal # Species        Primary Breed Gender Altered  \
68        69  A0043487853     Cat   Domestic Shorthair      M     Yes   
81        82  A0018665727     Dog  Retriever, Labrador      F     Yes   
120      121  A0041719634     Dog       Siberian Husky      F     Yes   
209      210  A0042677934     Cat   Domestic Shorthair      F     Yes   
228      229  A0045913101     Dog    Terrier, Pit Bull      M     Yes   
...      ...          ...     ...                  ...    ...     ...   
11285  11286  A0054093167  Rodent           Guinea Pig      F      No   
11306  11307  A0054109808     Cat   Domestic Shorthair      F     Yes   
11314  11315  A0054115024     Cat   Domestic Shorthair      F     Yes   
11315  11316  A0054115026     Cat    Domestic Longhair      F     Yes   
11316  11317  A0054115027     Cat   Domestic Shorthair      F     Yes   

            Intake Date    Intake Subtype                      Location Found  \
68     12/24/2019 10:53  Stray without ID 

#### Remove observations found at HSHV 

In [10]:
incorrect_coords_df.loc[incorrect_coords_df['LocationPlus'].str.contains('HSHV', na=False), 'address_google'] = '3100 Cherry Hill Rd, Ann Arbor, MI 48105'
incorrect_coords_df.loc[incorrect_coords_df['LocationPlus'].str.contains('HSHV', na=False), 'google_address_corrected'] = '3100 Cherry Hill Rd, Ann Arbor, MI 48105'
incorrect_coords_df.loc[incorrect_coords_df['LocationPlus'].str.contains('HSHV', na=False), 'Latitude'] = 42.306139
incorrect_coords_df.loc[incorrect_coords_df['LocationPlus'].str.contains('HSHV', na=False), 'lat'] = 42.306139
incorrect_coords_df.loc[incorrect_coords_df['LocationPlus'].str.contains('HSHV', na=False), 'Longitude'] = -83.654887
incorrect_coords_df.loc[incorrect_coords_df['LocationPlus'].str.contains('HSHV', na=False), 'lon'] = -83.654887

In [11]:
df.loc[incorrect_coords_df.index, 'lat'] = incorrect_coords_df['Latitude']
df.loc[incorrect_coords_df.index, 'lon'] = incorrect_coords_df['Longitude']
df.loc[incorrect_coords_df.index, 'address_google'] = incorrect_coords_df['google_address_corrected']

#### Find the observations that are just Michigan, USA and manually change them 

In [13]:
incorrect_coords_df[incorrect_coords_df['google_address_corrected'] == 'Michigan, USA']

Unnamed: 0,...1,Animal #,Species,Primary Breed,Gender,Altered,Intake Date,Intake Subtype,Location Found,Jurisdiction In,...,...22,...23,LocationPlus,address_google,pnt,lon,lat,Latitude,Longitude,google_address_corrected
2867,2868,A0041587203,Cat,Domestic Shorthair,F,Yes,5/3/2019 14:26,Stray without ID,Holmes and Ford Blvd,York,...,,,"Holmes / Ford Blvd WC-York Twp , Michigan","Michigan, USA","-85.6023643, 44.3148443",-85.602364,44.314844,44.314844,-85.602364,"Michigan, USA"
5215,5216,A0049120683,Cat,Domestic Shorthair,M,Yes,11/21/2021 15:57,Orphan Animal,Norman and Owens,Other Out of County,...,,,"Norman and Owens Other Out of County , Michigan","Michigan, USA","-85.6023643, 44.3148443",-85.602364,44.314844,44.314844,-85.602364,"Michigan, USA"
5219,5220,A0051573388,Cat,Domestic Shorthair,F,Yes,11/21/2022 16:01,Stray without ID,Central and Main St,Dexter,...,,,"Central and Main St WC-Dexter Twp , Michigan","Michigan, USA","-85.6023643, 44.3148443",-85.602364,44.314844,44.314844,-85.602364,"Michigan, USA"
5227,5228,A0049121987,Cat,Domestic Shorthair,F,Yes,11/22/2021 9:23,Orphan Animal,West college and Park street,Other Out of County,...,,,West college and Park street Other Out of Coun...,"Michigan, USA","-85.6023643, 44.3148443",-85.602364,44.314844,44.314844,-85.602364,"Michigan, USA"
7014,7015,A0050897660,Cat,Domestic Shorthair,F,Yes,8/15/2022 14:27,Orphan Animal,Mowery and Thayer,Out of State,...,,,"Mowery and Thayer Out of State , Michigan","Michigan, USA","-85.6023643, 44.3148443",-85.602364,44.314844,44.314844,-85.602364,"Michigan, USA"
7015,7016,A0050897662,Cat,Domestic Shorthair,M,No,8/15/2022 14:27,Orphan Animal,Mowery and Thayer,Out of State,...,,,"Mowery and Thayer Out of State , Michigan","Michigan, USA","-85.6023643, 44.3148443",-85.602364,44.314844,44.314844,-85.602364,"Michigan, USA"
7291,7292,A0048387156,Cat,Domestic Shorthair,F,Yes,8/6/2021 9:10,Stray without ID,I-475 near Toledo,Out of State,...,,,"I-475 near Toledo Out of State , Michigan","I-475, Toledo, OH, USA","-83.6933438, 41.6297286",-83.693344,41.629729,44.314844,-85.602364,"Michigan, USA"
11202,11203,A0056964944,Cat,Domestic Shorthair,M,Yes,9/26/2024 18:36,Kitten/Puppy,Hull and Barnes,Other Out of County,...,,,"Hull and Barnes Other Out of County , Michigan","Michigan, USA","-85.6023643, 44.3148443",-85.602364,44.314844,44.314844,-85.602364,"Michigan, USA"
11203,11204,A0056964946,Cat,Domestic Shorthair,F,Yes,9/26/2024 18:36,Kitten/Puppy,Hull and Barnes,Other Out of County,...,,,"Hull and Barnes Other Out of County , Michigan","Michigan, USA","-85.6023643, 44.3148443",-85.602364,44.314844,44.314844,-85.602364,"Michigan, USA"
11204,11205,A0056964949,Cat,Domestic Shorthair,F,Yes,9/26/2024 18:36,Kitten/Puppy,Hull and Barnes,Other Out of County,...,,,"Hull and Barnes Other Out of County , Michigan","Michigan, USA","-85.6023643, 44.3148443",-85.602364,44.314844,44.314844,-85.602364,"Michigan, USA"


In [14]:
df['pnt'] = df['lon'].astype(str) + ', ' + df['lat'].astype(str)

In [15]:
df[df['address_google'] == 'Michigan, USA']

Unnamed: 0,...1,Animal #,Species,Primary Breed,Gender,Altered,Intake Date,Intake Subtype,Location Found,Jurisdiction In,...,Returned to Address,...20,...21,...22,...23,LocationPlus,address_google,pnt,lon,lat
2867,2868,A0041587203,Cat,Domestic Shorthair,F,Yes,5/3/2019 14:26,Stray without ID,Holmes and Ford Blvd,York,...,,,,,,"Holmes / Ford Blvd WC-York Twp , Michigan","Michigan, USA","-85.60236429999999, 44.3148443",-85.602364,44.314844
5215,5216,A0049120683,Cat,Domestic Shorthair,M,Yes,11/21/2021 15:57,Orphan Animal,Norman and Owens,Other Out of County,...,,,,,,"Norman and Owens Other Out of County , Michigan","Michigan, USA","-85.60236429999999, 44.3148443",-85.602364,44.314844
5219,5220,A0051573388,Cat,Domestic Shorthair,F,Yes,11/21/2022 16:01,Stray without ID,Central and Main St,Dexter,...,,,,,,"Central and Main St WC-Dexter Twp , Michigan","Michigan, USA","-85.60236429999999, 44.3148443",-85.602364,44.314844
5227,5228,A0049121987,Cat,Domestic Shorthair,F,Yes,11/22/2021 9:23,Orphan Animal,West college and Park street,Other Out of County,...,,,,,,West college and Park street Other Out of Coun...,"Michigan, USA","-85.60236429999999, 44.3148443",-85.602364,44.314844
7014,7015,A0050897660,Cat,Domestic Shorthair,F,Yes,8/15/2022 14:27,Orphan Animal,Mowery and Thayer,Out of State,...,,,,,,"Mowery and Thayer Out of State , Michigan","Michigan, USA","-85.60236429999999, 44.3148443",-85.602364,44.314844
7015,7016,A0050897662,Cat,Domestic Shorthair,M,No,8/15/2022 14:27,Orphan Animal,Mowery and Thayer,Out of State,...,,,,,,"Mowery and Thayer Out of State , Michigan","Michigan, USA","-85.60236429999999, 44.3148443",-85.602364,44.314844
7291,7292,A0048387156,Cat,Domestic Shorthair,F,Yes,8/6/2021 9:10,Stray without ID,I-475 near Toledo,Out of State,...,,,,,,"I-475 near Toledo Out of State , Michigan","Michigan, USA","-85.60236429999999, 44.3148443",-85.602364,44.314844
11202,11203,A0056964944,Cat,Domestic Shorthair,M,Yes,9/26/2024 18:36,Kitten/Puppy,Hull and Barnes,Other Out of County,...,,,,,,"Hull and Barnes Other Out of County , Michigan","Michigan, USA","-85.60236429999999, 44.3148443",-85.602364,44.314844
11203,11204,A0056964946,Cat,Domestic Shorthair,F,Yes,9/26/2024 18:36,Kitten/Puppy,Hull and Barnes,Other Out of County,...,,,,,,"Hull and Barnes Other Out of County , Michigan","Michigan, USA","-85.60236429999999, 44.3148443",-85.602364,44.314844
11204,11205,A0056964949,Cat,Domestic Shorthair,F,Yes,9/26/2024 18:36,Kitten/Puppy,Hull and Barnes,Other Out of County,...,,,,,,"Hull and Barnes Other Out of County , Michigan","Michigan, USA","-85.60236429999999, 44.3148443",-85.602364,44.314844


In [16]:
df.to_csv('C:/Users/khapr/OneDrive/Documents/MS BIOSTAT/HSHV/finalcorrected_hshvNov18.csv', index=False)

#### 1. Based on Wendy's Feedback Remove animals found at HSHV
#### 2. Remove Out of State Observations

In [18]:
final_df = pd.read_csv("C:/Users/khapr/OneDrive/Documents/MS BIOSTAT/HSHV/finalhandcorrected_hshvNov18.csv")

In [19]:
final_df = final_df[~final_df['Location Found'].str.contains('HSHV', na=False)]
final_df = final_df[final_df['address_google'].str.contains('MI', na=False)]

In [20]:
final_df.to_csv('C:/Users/khapr/OneDrive/Documents/MS BIOSTAT/HSHV/final_excludinghshvNov19.csv', index=False)

## Folium

In [22]:
folium_df = pd.read_csv("C:/Users/khapr/OneDrive/Documents/MS BIOSTAT/HSHV/final_excludinghshvNov19.csv")

In [23]:
print(folium_df[['lat', 'lon']].isnull().sum())

lat    0
lon    0
dtype: int64


In [24]:
folium_df['lat'] = pd.to_numeric(folium_df['lat'], errors='coerce')
folium_df['lon'] = pd.to_numeric(folium_df['lon'], errors='coerce')
folium_df['pnt_split'] = folium_df['pnt'].str.split(',').str[1].str.strip() 
folium_df.loc[folium_df['lat'].isnull(), 'lat'] = pd.to_numeric(folium_df['pnt_split'], errors='coerce')
folium_df.drop(columns=['pnt_split'], inplace=True)

#### Heatmap of where animals have been found

In [26]:
from folium.plugins import HeatMap
map_center = [folium_df['lat'].mean(), folium_df['lon'].mean()]
m = folium.Map(location=map_center, zoom_start=10)
heat_data = folium_df[['lat', 'lon']].values.tolist()
HeatMap(heat_data, radius=15).add_to(m)
m

## Data Exploration

#### 7803 rows with full addresses, 2825 without full addresses

In [29]:
sum(folium_df['address_google'] == 'Ypsilanti Charter Twp, MI, USA')/sum(final_df['address_google'].str.contains('Ypsilanti Charter Twp', na=False))

0.31175556706369756

In [30]:
sum(folium_df['address_google'] == 'Superior Charter Twp, MI, USA')/sum(final_df['address_google'].str.contains('Superior Charter Twp', na=False))

0.15861214374225527

In [31]:
folium_df['address_no_zipcode'] = ~folium_df['address_google'].str.contains(r'\b\d{5}\b')
hshv_no_zipcode = folium_df[folium_df['address_no_zipcode'] == True]
hshv_no_zipcode

Unnamed: 0,...1,Animal #,Species,Primary Breed,Gender,Altered,Intake Date,Intake Subtype,Location Found,Jurisdiction In,...,...20,...21,...22,...23,LocationPlus,address_google,pnt,lon,lat,address_no_zipcode
1,2,A0040554409,Dog,"Retriever, Labrador",F,Yes,1/12/2019 15:06,Stray with ID,Carpenter and Ellsworth,Pittsfield,...,,Ypsilanti,MI,48197.0,"Carpenter / Ellsworth WC-Pittsfield Twp , Mich...","E Ellsworth Rd, Pittsfield Charter Twp, MI, USA","-83.6995447, 42.230374",-83.699545,42.230374,True
5,6,A0040588619,Dog,"Chihuahua, Short Coat",M,No,1/17/2019 14:16,Stray without ID,Around Michigan and Ecorse,Ypsilanti,...,,Ypsilanti,MI,48198.0,"Around Michigan / Ecorse WC-Ypsilanti Twp , Mi...","Ecorse Dr, Ypsilanti Charter Twp, MI, USA","-83.5463831, 42.2457404",-83.546383,42.245740,True
6,7,A0040598374,Cat,Domestic Shorthair,M,Yes,1/18/2019 15:44,Stray without ID,Martz and Bunton,Ypsilanti,...,193.0,Ypsilanti,MI,48197.0,"Martz and Bunton WC-Ypsilanti Twp , Michigan","Ypsilanti Charter Twp, MI, USA","-83.6194916, 42.2101369",-83.619492,42.210137,True
10,11,A0040613747,Cat,Domestic Shorthair,M,Yes,1/22/2019 8:49,Stray without ID,Prospect and Holmes,Ypsilanti,...,,Ypsilanti,MI,48198.0,"Prospect and Holmes WC-Ypsilanti Twp , Michigan","Ypsilanti Charter Twp, MI, USA","-83.6194916, 42.2101369",-83.619492,42.210137,True
12,13,A0005951543,Cat,Domestic Shorthair,F,Yes,1/24/2019 12:09,Stray with ID,Harris and Forest and Michigan and Holmes,Ypsilanti,...,,Ypsilanti,MI,48198.0,Harris/Forest/Michigan/Holmes WC-Ypsilanti Twp...,"Ypsilanti Charter Twp, MI, USA","-83.6194916, 42.2101369",-83.619492,42.210137,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10613,11325,A0054114230,Cat,Domestic Shorthair,F,Yes,9/8/2023 9:55,Kitten/Puppy,Austin st and Noggles Rd,Manchester,...,,,,,"Austin st and Noggles Rd WC-Manchester Twp , M...","Manchester Township, MI, USA","-84.0801613, 42.1035271",-84.080161,42.103527,True
10614,11326,A0056843133,Dog,"Terrier, Pit Bull",F,Yes,9/8/2024 10:07,Stray without ID,Clark and Leforge,Superior,...,,,,,"Clark and Leforge WC-Superior Twp , Michigan","Leforge Rd, Superior Charter Twp, MI, USA","-83.6230319, 42.2738459",-83.623032,42.273846,True
10618,11330,A0054122829,Dog,"Poodle, Miniature",F,Yes,9/9/2023 13:47,Stray without ID,Clark and Prospect,Superior,...,,,,,"Clark and Prospect WC-Superior Twp , Michigan","Superior Charter Twp, MI, USA","-83.6044771, 42.3046849",-83.604477,42.304685,True
10619,11331,A0054121764,Cat,Domestic Shorthair,F,No,9/9/2023 11:31,Kitten/Puppy,Bemis and Whittaker,Ypsilanti,...,,,,,"Bemis and Whittaker WC-Ypsilanti Twp , Michigan","Ypsilanti Charter Twp, MI, USA","-83.6194916, 42.2101369",-83.619492,42.210137,True


#### Around 1663 rows that are not mentioning a more granular location 

In [33]:
hshv_no_zipcode = hshv_no_zipcode.copy()
hshv_no_zipcode.loc[:,'no_full_address'] = ~hshv_no_zipcode.loc[:, 'address_google'].str.contains(r'Ave|Rd|Dr|Trail|St|Pkwy|&|Park|Lake', regex=True, na=False)
hshv_no_fulladd = hshv_no_zipcode[hshv_no_zipcode['no_full_address'] == True]
hshv_no_fulladd['address_google'].unique()
hshv_no_fulladd

Unnamed: 0,...1,Animal #,Species,Primary Breed,Gender,Altered,Intake Date,Intake Subtype,Location Found,Jurisdiction In,...,...21,...22,...23,LocationPlus,address_google,pnt,lon,lat,address_no_zipcode,no_full_address
6,7,A0040598374,Cat,Domestic Shorthair,M,Yes,1/18/2019 15:44,Stray without ID,Martz and Bunton,Ypsilanti,...,Ypsilanti,MI,48197.0,"Martz and Bunton WC-Ypsilanti Twp , Michigan","Ypsilanti Charter Twp, MI, USA","-83.6194916, 42.2101369",-83.619492,42.210137,True,True
10,11,A0040613747,Cat,Domestic Shorthair,M,Yes,1/22/2019 8:49,Stray without ID,Prospect and Holmes,Ypsilanti,...,Ypsilanti,MI,48198.0,"Prospect and Holmes WC-Ypsilanti Twp , Michigan","Ypsilanti Charter Twp, MI, USA","-83.6194916, 42.2101369",-83.619492,42.210137,True,True
12,13,A0005951543,Cat,Domestic Shorthair,F,Yes,1/24/2019 12:09,Stray with ID,Harris and Forest and Michigan and Holmes,Ypsilanti,...,Ypsilanti,MI,48198.0,Harris/Forest/Michigan/Holmes WC-Ypsilanti Twp...,"Ypsilanti Charter Twp, MI, USA","-83.6194916, 42.2101369",-83.619492,42.210137,True,True
14,15,A0047526328,Dog,Siberian Husky,F,Yes,1/26/2019 16:19,Stray with ID,Geddes and Harris,Superior,...,Ypsilanti,MI,48198.0,"Geddes / Harris WC-Superior Twp , Michigan","Superior Charter Twp, MI, USA","-83.6044771, 42.3046849",-83.604477,42.304685,True,True
21,22,A0041596040,Dog,"Terrier, Pit Bull",M,Yes,10/11/2019 11:05,Stray with ID,Michigan Ave and Mansfield,Ypsilanti,...,Ypsilanti,MI,48197.0,"Michigan Ave and Mansfield WC-Ypsilanti City ,...","Ypsilanti, MI, USA","-83.6129939, 42.2411499",-83.612994,42.241150,True,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10604,11316,A0054115026,Cat,Domestic Longhair,F,Yes,9/8/2023 11:19,Kitten/Puppy,M-52 btwn Pleasant Lake and Pekins,Sharon,...,,,,M-52 btwn Pleasant Lake and Pekins WC-Sharon T...,"Sharon Township, MI, USA","-84.0801613, 42.2216618",-84.080161,42.221662,True,True
10605,11317,A0054115027,Cat,Domestic Shorthair,F,Yes,9/8/2023 11:19,Kitten/Puppy,M-52 btwn Pleasant Lake and Pekins,Sharon,...,,,,M-52 btwn Pleasant Lake and Pekins WC-Sharon T...,"Sharon Township, MI, USA","-84.0801613, 42.2216618",-84.080161,42.221662,True,True
10613,11325,A0054114230,Cat,Domestic Shorthair,F,Yes,9/8/2023 9:55,Kitten/Puppy,Austin st and Noggles Rd,Manchester,...,,,,"Austin st and Noggles Rd WC-Manchester Twp , M...","Manchester Township, MI, USA","-84.0801613, 42.1035271",-84.080161,42.103527,True,True
10618,11330,A0054122829,Dog,"Poodle, Miniature",F,Yes,9/9/2023 13:47,Stray without ID,Clark and Prospect,Superior,...,,,,"Clark and Prospect WC-Superior Twp , Michigan","Superior Charter Twp, MI, USA","-83.6044771, 42.3046849",-83.604477,42.304685,True,True


#### Drop the above columns from our analyses since location isn't specific (8965 rows left)

In [35]:
folium_df = folium_df[~folium_df.index.isin(hshv_no_fulladd.index)]
folium_df

Unnamed: 0,...1,Animal #,Species,Primary Breed,Gender,Altered,Intake Date,Intake Subtype,Location Found,Jurisdiction In,...,...20,...21,...22,...23,LocationPlus,address_google,pnt,lon,lat,address_no_zipcode
0,1,A0040552205,Cat,Domestic Longhair,M,Yes,1/12/2019 11:23,Stray with ID,Huron River Dr and Tuttle Hill,Ypsilanti,...,,Ypsilanti,MI,48197.0,Huron River Dr and Tuttle Hill WC-Ypsilanti Tw...,"S Huron River Dr & Tuttle Hill Rd, Ypsilanti C...","-83.5819195, 42.210799",-83.581919,42.210799,False
1,2,A0040554409,Dog,"Retriever, Labrador",F,Yes,1/12/2019 15:06,Stray with ID,Carpenter and Ellsworth,Pittsfield,...,,Ypsilanti,MI,48197.0,"Carpenter / Ellsworth WC-Pittsfield Twp , Mich...","E Ellsworth Rd, Pittsfield Charter Twp, MI, USA","-83.6995447, 42.230374",-83.699545,42.230374,True
2,3,A0033047934,Dog,Terrier,M,Yes,1/13/2019 12:11,Stray with ID,Waters rd and Wagner Rd,Scio,...,,Ann Arbor,MI,48103.0,"Waters rd and Wagner Rd WC-Scio Twp , Michigan","W Waters Rd & S Wagner Rd, Lodi Township, MI 4...","-83.7984686, 42.242459",-83.798469,42.242459,False
3,4,A0035187693,Dog,"Terrier, Jack Russell",M,Yes,1/14/2019 9:03,Stray with ID,Adams and Harriet,Ypsilanti,...,,Ypsilanti,MI,48197.0,"Adams and Harriet WC-Ypsilanti City , Michigan","Harriet St, Ypsilanti, MI 48197, USA","-83.6201156, 42.2335459",-83.620116,42.233546,False
4,5,A0040580759,Cat,Domestic Shorthair,M,Yes,1/16/2019 14:51,Stray without ID,Oakwood and Sherman,Ypsilanti,...,,Ypsilanti,MI,48198.0,"Oakwood/Sherman WC-Ypsilanti City , Michigan","Sherman St & Oakwood St, Ypsilanti, MI 48197, USA","-83.6289739, 42.243699",-83.628974,42.243699,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10623,11335,A0056846624,Cat,Domestic Shorthair,F,Yes,9/9/2024 10:02,Stray with ID,Carpenter rd and Washtenaw Rd,Pittsfield,...,,,,,Carpenter rd and Washtenaw Rd WC-Pittsfield Tw...,"Washtenaw Ave & Carpenter Rd, Pittsfield Chart...","-83.6807369, 42.2540335",-83.680737,42.254033,False
10624,11337,A0056849779,Cat,Domestic Medium Hair,M,Yes,9/9/2024 14:16,Stray without ID,Wilcox and Edwards N Hines Dr,Plymouth,...,,,,,"Wilcox and Edwards N Hines Dr Plymouth City , ...","Edward N Hines Dr, Plymouth Charter Twp, MI 48...","-83.4683398, 42.394534",-83.468340,42.394534,False
10625,11338,A0056851074,Cat,Domestic Shorthair,F,Yes,9/9/2024 16:00,Kitten/Puppy,Joy Rd and Main St,Plymouth,...,,,,,"Joy Rd and Main St Plymouth Twp , Michigan","S Main St & Joy Rd, Plymouth Charter Twp, MI 4...","-83.4691092, 42.3515033",-83.469109,42.351503,False
10626,11339,A0056851629,Cat,Domestic Shorthair,M,Yes,9/9/2024 16:52,Stray with ID,Carpenter and Washtenaw Ave,Pittsfield,...,,,,,Carpenter and Washtenaw Ave WC-Pittsfield Twp ...,"Washtenaw Ave & Carpenter Rd, Pittsfield Chart...","-83.6807369, 42.2540335",-83.680737,42.254033,False


#### Remove duplicates based on 'Intake Date', 'Species', 'Primary Breed', 'Location Found', 'Returned to Address' (3910 duplicates)

In [37]:
dup_factors = ['Intake Date', 'Species', 'Primary Breed', 'Location Found', 'Returned to Address']
folium_df['Duplicates'] = folium_df.duplicated(subset=dup_factors, keep=False).astype(int)
duplicates = folium_df[folium_df['Duplicates'] == 1].copy()
duplicates

Unnamed: 0,...1,Animal #,Species,Primary Breed,Gender,Altered,Intake Date,Intake Subtype,Location Found,Jurisdiction In,...,...21,...22,...23,LocationPlus,address_google,pnt,lon,lat,address_no_zipcode,Duplicates
190,195,A0042782397,Dog,"Terrier, Pit Bull",M,Yes,9/20/2019 9:59,Stray without ID,Packard and Turnberry,Ann Arbor,...,Ann Arbor,MI,48108.0,"Packard and Turnberry WC-Ann Arbor City , Mich...","Packard St & Turnberry Ln, Ann Arbor, MI 48108...","-83.6875647, 42.245323",-83.687565,42.245323,False,1
191,196,A0042782398,Dog,"Terrier, Pit Bull",F,Yes,9/20/2019 9:59,Stray without ID,Packard and Turnberry,Ann Arbor,...,Ann Arbor,MI,48108.0,"Packard and Turnberry WC-Ann Arbor City , Mich...","Packard St & Turnberry Ln, Ann Arbor, MI 48108...","-83.6875647, 42.245323",-83.687565,42.245323,False,1
251,257,A0046185537,Cat,Domestic Shorthair,M,No,11/30/2020 16:30,Stray without ID,Leforge and Huron River Dr,Ypsilanti,...,Ypsilanti,MI,48197.0,"Leforge and Huron River Dr WC-Ypsilanti City ,...","Leforge Rd & N Huron River Dr, Ypsilanti, MI 4...","-83.6223608, 42.2544429",-83.622361,42.254443,False,1
252,258,A0046185542,Cat,Domestic Shorthair,M,No,11/30/2020 16:30,Stray without ID,Leforge and Huron River Dr,Ypsilanti,...,Ypsilanti,MI,48197.0,"Leforge and Huron River Dr WC-Ypsilanti City ,...","Leforge Rd & N Huron River Dr, Ypsilanti, MI 4...","-83.6223608, 42.2544429",-83.622361,42.254443,False,1
297,307,A0023963675,Dog,"Retriever, Labrador",M,Yes,3/19/2020 17:34,Stray with ID,Pontiac Trail and N. Territorial,Salem,...,Ann Arbor,MI,48105.0,Pontiac Trail and N. Territorial WC-Salem Twp ...,"E North Territorial Rd, Salem Township, MI, USA","-83.6619289, 42.3652126",-83.661929,42.365213,True,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10608,11320,A0054119203,Dog,"Terrier, Pit Bull",M,Yes,9/8/2023 16:29,Stray without ID,Fuller rd and Huron Parkway,Ann Arbor,...,,,,Fuller rd and Huron Parkway WC-Ann Arbor City ...,"Fuller Rd & Huron Pkwy, Ann Arbor, MI 48105, USA","-83.6990446, 42.2776226",-83.699045,42.277623,False,1
10609,11321,A0054119206,Dog,"Terrier, Pit Bull",F,Yes,9/8/2023 16:29,Stray without ID,Fuller rd and Huron Parkway,Ann Arbor,...,,,,Fuller rd and Huron Parkway WC-Ann Arbor City ...,"Fuller Rd & Huron Pkwy, Ann Arbor, MI 48105, USA","-83.6990446, 42.2776226",-83.699045,42.277623,False,1
10610,11322,A0054120050,Cat,Domestic Shorthair,M,Yes,9/8/2023 17:47,Kitten/Puppy,EMU Campus: Oakwood St and Washtenaw Ave,Ypsilanti,...,,,,EMU Campus: Oakwood St and Washtenaw Ave WC-Yp...,"900 Oakwood St, Ypsilanti, MI 48197, USA","-83.624089, 42.2506803",-83.624089,42.250680,False,1
10611,11323,A0054120055,Cat,Domestic Shorthair,M,Yes,9/8/2023 17:47,Kitten/Puppy,EMU Campus: Oakwood St and Washtenaw Ave,Ypsilanti,...,,,,EMU Campus: Oakwood St and Washtenaw Ave WC-Yp...,"900 Oakwood St, Ypsilanti, MI 48197, USA","-83.624089, 42.2506803",-83.624089,42.250680,False,1


In [38]:
folium_df = folium_df.drop('address_no_zipcode', axis=1)
folium_df.to_csv('C:/Users/khapr/OneDrive/Documents/MS BIOSTAT/HSHV/final_duplicateasindicator.csv', index=False)

#### We are left with 6424 datapoints in total that are not duplicates

In [40]:
df_noduplicates = folium_df.drop_duplicates(subset=dup_factors, keep='first').reset_index(drop=True).copy()
df_noduplicates

Unnamed: 0,...1,Animal #,Species,Primary Breed,Gender,Altered,Intake Date,Intake Subtype,Location Found,Jurisdiction In,...,...20,...21,...22,...23,LocationPlus,address_google,pnt,lon,lat,Duplicates
0,1,A0040552205,Cat,Domestic Longhair,M,Yes,1/12/2019 11:23,Stray with ID,Huron River Dr and Tuttle Hill,Ypsilanti,...,,Ypsilanti,MI,48197.0,Huron River Dr and Tuttle Hill WC-Ypsilanti Tw...,"S Huron River Dr & Tuttle Hill Rd, Ypsilanti C...","-83.5819195, 42.210799",-83.581919,42.210799,0
1,2,A0040554409,Dog,"Retriever, Labrador",F,Yes,1/12/2019 15:06,Stray with ID,Carpenter and Ellsworth,Pittsfield,...,,Ypsilanti,MI,48197.0,"Carpenter / Ellsworth WC-Pittsfield Twp , Mich...","E Ellsworth Rd, Pittsfield Charter Twp, MI, USA","-83.6995447, 42.230374",-83.699545,42.230374,0
2,3,A0033047934,Dog,Terrier,M,Yes,1/13/2019 12:11,Stray with ID,Waters rd and Wagner Rd,Scio,...,,Ann Arbor,MI,48103.0,"Waters rd and Wagner Rd WC-Scio Twp , Michigan","W Waters Rd & S Wagner Rd, Lodi Township, MI 4...","-83.7984686, 42.242459",-83.798469,42.242459,0
3,4,A0035187693,Dog,"Terrier, Jack Russell",M,Yes,1/14/2019 9:03,Stray with ID,Adams and Harriet,Ypsilanti,...,,Ypsilanti,MI,48197.0,"Adams and Harriet WC-Ypsilanti City , Michigan","Harriet St, Ypsilanti, MI 48197, USA","-83.6201156, 42.2335459",-83.620116,42.233546,0
4,5,A0040580759,Cat,Domestic Shorthair,M,Yes,1/16/2019 14:51,Stray without ID,Oakwood and Sherman,Ypsilanti,...,,Ypsilanti,MI,48198.0,"Oakwood/Sherman WC-Ypsilanti City , Michigan","Sherman St & Oakwood St, Ypsilanti, MI 48197, USA","-83.6289739, 42.243699",-83.628974,42.243699,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6419,11335,A0056846624,Cat,Domestic Shorthair,F,Yes,9/9/2024 10:02,Stray with ID,Carpenter rd and Washtenaw Rd,Pittsfield,...,,,,,Carpenter rd and Washtenaw Rd WC-Pittsfield Tw...,"Washtenaw Ave & Carpenter Rd, Pittsfield Chart...","-83.6807369, 42.2540335",-83.680737,42.254033,0
6420,11337,A0056849779,Cat,Domestic Medium Hair,M,Yes,9/9/2024 14:16,Stray without ID,Wilcox and Edwards N Hines Dr,Plymouth,...,,,,,"Wilcox and Edwards N Hines Dr Plymouth City , ...","Edward N Hines Dr, Plymouth Charter Twp, MI 48...","-83.4683398, 42.394534",-83.468340,42.394534,0
6421,11338,A0056851074,Cat,Domestic Shorthair,F,Yes,9/9/2024 16:00,Kitten/Puppy,Joy Rd and Main St,Plymouth,...,,,,,"Joy Rd and Main St Plymouth Twp , Michigan","S Main St & Joy Rd, Plymouth Charter Twp, MI 4...","-83.4691092, 42.3515033",-83.469109,42.351503,0
6422,11339,A0056851629,Cat,Domestic Shorthair,M,Yes,9/9/2024 16:52,Stray with ID,Carpenter and Washtenaw Ave,Pittsfield,...,,,,,Carpenter and Washtenaw Ave WC-Pittsfield Twp ...,"Washtenaw Ave & Carpenter Rd, Pittsfield Chart...","-83.6807369, 42.2540335",-83.680737,42.254033,0


In [41]:
df_noduplicates.to_csv('C:/Users/khapr/OneDrive/Documents/MS BIOSTAT/HSHV/final_noduplicates.csv', index=False)

## Folium with Modified Dataset 

#### Heatmap of where Lost Pets were found 

In [44]:
from folium.plugins import HeatMap
map_center = [df_noduplicates['lat'].mean(), df_noduplicates['lon'].mean()]
m = folium.Map(location=map_center, zoom_start=10)
heat_data = df_noduplicates[['lat', 'lon']].values.tolist()
HeatMap(heat_data, radius=15).add_to(m)
m

#### Heatmap of where Lost Pets who were not Returned were found 

In [46]:
df_nodup_notreturned = df_noduplicates[df_noduplicates['Returned to Address'].isna()].copy()
df_nodup_notreturned

Unnamed: 0,...1,Animal #,Species,Primary Breed,Gender,Altered,Intake Date,Intake Subtype,Location Found,Jurisdiction In,...,...20,...21,...22,...23,LocationPlus,address_google,pnt,lon,lat,Duplicates
20,27,A0018665727,Dog,"Retriever, Labrador",F,Yes,10/13/2019 16:33,Stray with ID,Dixboro Road,Ann Arbor,...,,,,,"Dixboro Road WC-Ann Arbor City , Michigan","Dixboro Rd, Ann Arbor, MI, USA","-83.6629493, 42.3721466",-83.662949,42.372147,0
67,82,A0018665727,Dog,"Retriever, Labrador",F,Yes,12/9/2019 17:39,Stray with ID,Adoptions lobby,HSHV,...,,,,,"Adoptions lobby WC-HSHV , Michigan","3100 Cherry Hill Rd, Ann Arbor, MI 48105","-83.654887, 42.306139",-83.654887,42.306139,0
80,96,A0041109176,Dog,Miniature Pinscher,F,Yes,3/19/2019 16:39,Stray with ID,Cross and Washtenaw,Ypsilanti,...,,,,,"Cross and Washtenaw WC-Ypsilanti Twp , Michigan","Washtenaw Ave & W Cross St, Ypsilanti, MI 4819...","-83.6263711, 42.2458513",-83.626371,42.245851,0
119,149,A0042171159,Cat,Persian,M,Yes,7/12/2019 11:27,Stray without ID,Clark and Hogback,Pittsfield,...,,,,,"Clark/Hogback WC-Pittsfield Twp , Michigan","Clark + Hogback, Pittsfield Charter Twp, MI 48...","-83.677791, 42.2589",-83.677791,42.258900,0
125,157,A0042297098,Dog,"Dachshund, Standard Smooth Haired",M,Yes,7/24/2019 12:32,Stray without ID,West Liberty and Scio Church,Ann Arbor,...,,,,,"West Liberty/Scio Church WC-Ann Arbor City , M...","Scio Church Rd, Ann Arbor, MI, USA","-83.7764965, 42.2562965",-83.776496,42.256296,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6419,11335,A0056846624,Cat,Domestic Shorthair,F,Yes,9/9/2024 10:02,Stray with ID,Carpenter rd and Washtenaw Rd,Pittsfield,...,,,,,Carpenter rd and Washtenaw Rd WC-Pittsfield Tw...,"Washtenaw Ave & Carpenter Rd, Pittsfield Chart...","-83.6807369, 42.2540335",-83.680737,42.254033,0
6420,11337,A0056849779,Cat,Domestic Medium Hair,M,Yes,9/9/2024 14:16,Stray without ID,Wilcox and Edwards N Hines Dr,Plymouth,...,,,,,"Wilcox and Edwards N Hines Dr Plymouth City , ...","Edward N Hines Dr, Plymouth Charter Twp, MI 48...","-83.4683398, 42.394534",-83.468340,42.394534,0
6421,11338,A0056851074,Cat,Domestic Shorthair,F,Yes,9/9/2024 16:00,Kitten/Puppy,Joy Rd and Main St,Plymouth,...,,,,,"Joy Rd and Main St Plymouth Twp , Michigan","S Main St & Joy Rd, Plymouth Charter Twp, MI 4...","-83.4691092, 42.3515033",-83.469109,42.351503,0
6422,11339,A0056851629,Cat,Domestic Shorthair,M,Yes,9/9/2024 16:52,Stray with ID,Carpenter and Washtenaw Ave,Pittsfield,...,,,,,Carpenter and Washtenaw Ave WC-Pittsfield Twp ...,"Washtenaw Ave & Carpenter Rd, Pittsfield Chart...","-83.6807369, 42.2540335",-83.680737,42.254033,0


In [47]:
map_center = [df_nodup_notreturned['lat'].mean(), df_nodup_notreturned['lon'].mean()]
m = folium.Map(location=map_center, zoom_start=10)
heat_data = df_nodup_notreturned[['lat', 'lon']].values.tolist()
HeatMap(heat_data, radius=15).add_to(m)
m