In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import chi2_contingency
import folium
import re
from shapely.geometry import Point, Polygon

In [None]:
nyc_squirrels=pd.read_csv('2018_Central_Park_Squirrel_Census_-_Squirrel_Data_20250304.csv')

In [None]:
hectare = pd.read_csv('2018_Central_Park_Squirrel_Census_-_Hectare_Grid_20250304.csv')

In [None]:
hectare_data = pd.read_csv('2018_Central_Park_Squirrel_Census_-_Hectare_Data_20250325.csv')

Data Wrangling and Cleaning

In [None]:
# Extract coordinate pairs using regex
def extract_coords(wkt):
    # Remove the leading "MULTIPOLYGON (((" and trailing ")))"
    cleaned = re.sub(r'^MULTIPOLYGON\s+\(\(\(|\)\)\)$', '', wkt)
    # Split into individual coordinate pairs
    coord_pairs = cleaned.split(', ')
    # Split each pair into lon and lat
    coords = [coord.split() for coord in coord_pairs]
    return coords

# Apply extraction
hectare['coords'] = hectare['the_geom'].apply(extract_coords)

# Convert coordinate lists to individual columns
# Flatten coordinates into new DataFrame columns
coord_df = hectare['coords'].apply(lambda x: pd.Series([float(num) for pair in x for num in pair]))

# Rename the columns as lon_1, lat_1, lon_2, lat_2, ...
coord_df.columns = [f"{'lon' if i % 2 == 0 else 'lat'}_{i//2 + 1}" for i in coord_df.columns]

# Concatenate with original DataFrame
df_final = pd.concat([hectare.drop(columns='coords'), coord_df], axis=1)

In [None]:
#saving the location of one squirrel identified in each hectare
squirrel_hec_id_mw = nyc_squirrels[['X','Y','Hectare']]
squirrel_hec_id_mw = squirrel_hec_id_mw.sort_values(by = 'Hectare')
squirrel_hec_id_mw.drop_duplicates(subset=["Hectare"], keep="first", inplace=True)
squirrel_hec_id_mw.reset_index(drop=True, inplace=True)

#making and naming the polygons with shapely
hectares_shapely = []
for i in range(len(df_final)):
    p = (Polygon([(df_final.iloc[i,7],df_final.iloc[i,6]),
                  (df_final.iloc[i,9],df_final.iloc[i,8]),
                  (df_final.iloc[i,11],df_final.iloc[i,10]),
                  (df_final.iloc[i,13],df_final.iloc[i,12])]),
                 f'{df_final.iloc[i,1]}')
    hectares_shapely.append(p)

#finding which hectare each squirrel point belongs in
hectare_assign = []
for i in range(len(squirrel_hec_id_mw)):
    p = Point(squirrel_hec_id_mw.iloc[i,1],squirrel_hec_id_mw.iloc[i,0])
    for s in hectares_shapely:
        if s[0].contains(p) == True:
            h = (([squirrel_hec_id_mw.iloc[i,2],s[1]]))
            hectare_assign.append(h)
     
hectare_assign = pd.DataFrame(hectare_assign, columns = ['Hectare', 'id'])
hectare_assign['id'] = hectare_assign['id'].astype('int64')

#joinging the squirrels data with the hectare id number
nyc_squirrels = nyc_squirrels.merge(hectare_assign, how='left', on='Hectare')

In [None]:
#Dispays of fear
fear_columns_ap = ['Quaas', 'Moans', 'Tail flags', 'Runs from']
fear_ap = nyc_squirrels[fear_columns_ap].apply(pd.Series.value_counts)
print(fear_ap)

#add a new column where it is marked true if it showed at least one fear trait
nyc_squirrels["showed_fear_ap"] = nyc_squirrels[fear_columns_ap].applymap(
    lambda x: str(x).strip().lower() in ["yes", "true", "1"]
).any(axis=1)

In [None]:
#missingness
plt.figure(figsize=(8,6))
sns.heatmap(nyc_squirrels.isnull(), cbar=False, cmap='viridis')
plt.title('Missing Values Heatmap')
plt.savefig('Missing Squirrel Data', dpi=300)
plt.show()

In [None]:
#check amount of missing data in each column 
missing_data = nyc_squirrels.isna().sum()
print(missing_data)

print("Total squirrels in dataset: ", len(nyc_squirrels))

#drop unneded columns 
drop_columns_ap = ['Color notes', 'Specific Location', 'Other Activities', 'Other Interactions', 'Highlight Fur Color']
clean_squirrels_ap = nyc_squirrels.drop(columns = drop_columns_ap)
clean_squirrels_ap = clean_squirrels_ap.dropna()
missing_data_ap = clean_squirrels_ap.isna().sum()
print(missing_data_ap)
clean_squirrels_ap

In [None]:
#countingthe number of squirrels in each hectare with certain traints
no_fear_count = (clean_squirrels_ap['showed_fear_ap'] == False).groupby(clean_squirrels_ap['id']).sum()
fear_count = (clean_squirrels_ap['showed_fear_ap'] == True).groupby(clean_squirrels_ap['id']).sum()
gray_count = (clean_squirrels_ap['Primary Fur Color'] == 'Gray').groupby(clean_squirrels_ap['id']).sum()
red_count = (clean_squirrels_ap['Primary Fur Color'] == 'Red').groupby(clean_squirrels_ap['id']).sum()
black_count = (clean_squirrels_ap['Primary Fur Color'] == 'Black').groupby(clean_squirrels_ap['id']).sum()
AM_count = (clean_squirrels_ap['Shift'] == 'AM').groupby(clean_squirrels_ap['id']).sum()
PM_count = (clean_squirrels_ap['Shift'] == 'PM').groupby(clean_squirrels_ap['id']).sum()

In [None]:
#adding those counts to the hectare_assign dataframe
hectare_assign['no_fear_count'] = hectare_assign['id'].map(no_fear_count)
hectare_assign['fear_count'] = hectare_assign['id'].map(fear_count)
hectare_assign['gray_count'] = hectare_assign['id'].map(gray_count)
hectare_assign['red_count'] = hectare_assign['id'].map(red_count)
hectare_assign['black_count'] = hectare_assign['id'].map(black_count)
hectare_assign['AM_count'] = hectare_assign['id'].map(AM_count)
hectare_assign['PM_count'] = hectare_assign['id'].map(PM_count)
hectare_assign

Descriptive

In [None]:
#Primary fur color
fur_color_ap = clean_squirrels_ap['Primary Fur Color'].value_counts()

plt.figure(figsize=(8, 5))
fur_color_ap.plot(kind="bar", color=["Gray", "Red", "Black"])

plt.title("Frequency of Squirrel Fur Colors")
plt.xlabel("Primary Fur Color")
plt.ylabel("Count")
plt.xticks(rotation=45)
plt.grid(axis="y", linestyle="--", alpha=0.7)
plt.show()

#Combination of primary and highlight
prim_high_color_ap = clean_squirrels_ap['Combination of Primary and Highlight Color'].value_counts()

plt.figure(figsize=(10, 5))
prim_high_color_ap.plot(kind="bar", color=["Gray", "indianred", "lightgray", "rosybrown", "firebrick", "mistyrose", "Black", "Maroon", "rosybrown", "darkred", "dimgray",
                                          "brown", "brown", "saddlebrown", "dimgray", "gray", "sienna", "sienna", "saddlebrown", "gainsboro", "gray"])

plt.title("Frequency of Primary and Highlight Squirrel Fur Colors")
plt.xlabel("Primary Fur Color")
plt.ylabel("Count")
plt.xticks(rotation=45, ha="right")
plt.grid(axis="y", linestyle="--", alpha=0.7)
plt.show()

In [None]:
#Dispays of fear

fear_columns_ap = ['Quaas', 'Moans', 'Tail flags', 'Runs from']
fear_ap = clean_squirrels_ap[fear_columns_ap].apply(pd.Series.value_counts)
print(fear_ap)

plt.figure(figsize=(8, 5))
fear_ap.plot(kind="bar", color=["Blue", "lightblue", "Darkblue"])

plt.title("Frequency of Display of Fear Behaviors")
plt.xlabel("Expressing Fear Based Behavior")
plt.ylabel("Count")
plt.xticks(rotation=45)
plt.grid(axis="y", linestyle="--", alpha=0.7)
plt.show()

In [None]:
#percentage of squirels who showed fear



print(clean_squirrels_ap["showed_fear_ap"].value_counts())

total_fear_ap = 758/2026
total_fear_ap

In [None]:
#See if physical characteristics and fear have a relation

fear_columns_ap = ['Quaas', 'Moans', 'Tail flags', 'Runs from']

#Group by fur color and count how many did/didn't show fear
fur_fear_ap = clean_squirrels_ap.groupby("Primary Fur Color")["showed_fear_ap"].value_counts().unstack()

print(clean_squirrels_ap["showed_fear_ap"].value_counts())

#Plot
fur_fear_ap.plot(kind="bar", stacked=True, figsize=(8, 5), color=["steelblue", "firebrick"])
print(fur_fear_ap)

plt.title("Comparison of Fur Color and Fear Behaviors in Squirrels")
plt.xlabel("Fur Color")
plt.ylabel("Count of Squirrels")
plt.legend(["Did Not Show Fear", "Showed Fear"])
plt.xticks(rotation=45)
plt.grid(axis="y", linestyle="--", alpha=0.7)

plt.show()


fur_fear_ap = clean_squirrels_ap.groupby("Primary Fur Color")["showed_fear_ap"].value_counts().unstack(fill_value=0)

#contingency table
print(fur_fear_ap)

#Chi-Square test
chi2, p, dof, expected = chi2_contingency(fur_fear_ap)

print(f"\nChi-Square statistic: {chi2}")
print(f"Degrees of freedom: {dof}")
print(f"Expected frequencies:\n{expected}")
print(f"P-value: {p}")

In [None]:
#See if fear and location have a relationship

loc_fear_ap = clean_squirrels_ap.groupby("Lat/Long")["showed_fear_ap"].value_counts().unstack(fill_value=0)
print(clean_squirrels_ap["showed_fear_ap"].value_counts())

#Plot
colors = clean_squirrels_ap['showed_fear_ap'].map({True: "firebrick", False: "steelblue"})

plt.figure(figsize=(8, 5))
plt.scatter(clean_squirrels_ap["X"], clean_squirrels_ap["Y"], color = colors, alpha=0.6)

plt.title("Squirrel Locations and Fear Response")
plt.xlabel("Longitude (X)")
plt.ylabel("Latitude (Y)")
plt.grid(True, linestyle="--", alpha=0.5)
plt.legend(["Did Not Show Fear", "Showed Fear"])

plt.show()

#Chi-Square test
chi2, p, dof, expected = chi2_contingency(loc_fear_ap)

print(f"\nChi-Square statistic: {chi2}")
print(f"Degrees of freedom: {dof}")
print(f"Expected frequencies:\n{expected}")
print(f"P-value: {p}")

In [None]:
#Hectare and fear response relationship

hect_fear_ap = clean_squirrels_ap.groupby("id")["showed_fear_ap"].value_counts().unstack(fill_value=0)
print(clean_squirrels_ap["showed_fear_ap"].value_counts())

#Plot
hect_fear_ap.plot(kind="barh", stacked=True, figsize=(15, 20), color=["steelblue", "firebrick"])

plt.title("Fear Response by Hectare")
plt.xlabel("Hectare", rotation=0)
plt.ylabel("Number of Squirrels")
plt.legend(["Did Not Show Fear", "Showed Fear"])
plt.xticks(rotation=0) 
plt.grid(axis="y", alpha=0.5)

plt.show()

#Chi-Square test
chi2, p, dof, expected = chi2_contingency(hect_fear_ap)

print(f"\nChi-Square statistic: {chi2}")
print(f"Degrees of freedom: {dof}")
print(f"Expected frequencies:\n{expected}")
print(f"P-value: {p}")

In [None]:
#day/night and fear response relationship

time_fear_ap = clean_squirrels_ap.groupby("Shift")["showed_fear_ap"].value_counts().unstack(fill_value=0)
print(clean_squirrels_ap["showed_fear_ap"].value_counts())

#Plot
time_fear_ap.plot(kind="bar", stacked=True, figsize=(6, 8), color=["steelblue", "firebrick"])

plt.title("Fear Response vs Time of Day")
plt.xlabel("Time", rotation=0)
plt.ylabel("Number of Squirrels")
plt.legend(["Did Not Show Fear", "Showed Fear"])
plt.xticks(rotation=0) 
plt.grid(axis="y", alpha=0.5)

plt.show()

#Chi-Square test
chi2, p, dof, expected = chi2_contingency(time_fear_ap)

print(f"\nChi-Square statistic: {chi2}")
print(f"Degrees of freedom: {dof}")
print(f"Expected frequencies:\n{expected}")
print(f"P-value: {p}")

In [None]:
#age and fear response relationship

age_fear_ap = clean_squirrels_ap.groupby("Age")["showed_fear_ap"].value_counts().unstack(fill_value=0)
print(clean_squirrels_ap["showed_fear_ap"].value_counts())

#Plot
age_fear_ap.plot(kind="bar", stacked=True, figsize=(6, 5), color=["steelblue", "firebrick"])

plt.title("Fear Response vs Age")
plt.xlabel("Age", rotation=0)
plt.ylabel("Number of Squirrels")
plt.legend(["Did Not Show Fear", "Showed Fear"])
plt.xticks(rotation=0) 
plt.grid(axis="y", alpha=0.5)

plt.show()

#Chi-Square test
chi2, p, dof, expected = chi2_contingency(age_fear_ap)

print(f"\nChi-Square statistic: {chi2}")
print(f"Degrees of freedom: {dof}")
print(f"Expected frequencies:\n{expected}")
print(f"P-value: {p}")

Squirrel mapping- Mikayla

In [None]:
#mapping the squirrels"

#Determine the center of your map
center_lat_mc = nyc_squirrels['X'].mean() 
center_lon_mc = nyc_squirrels['Y'].mean()  

# Create the map object",
my_map_mc = folium.Map(location=[center_lat_mc, center_lon_mc], zoom_start=10)

for index, row in nyc_squirrels.iterrows():
    folium.Marker([row['X'], row['Y']], popup=row['Unique Squirrel ID']).add_to(my_map_mc)  
my_map_mc.save("my_map.html")

In [None]:
#making cute squirrel icons
squirrel_icon_url = "squirrel.png"  # Cute squirrel icon

squirrel_layer = folium.FeatureGroup(name="Squirrels")

for index, row in nyc_squirrels.iterrows():
    icon = folium.CustomIcon(
        icon_image=squirrel_icon_url,
        icon_size=(20, 20)  # Adjust icon size as needed
    )
    folium.Marker(
        location=[row['Y'], row['X']],
        icon=icon,
        popup=row['Unique Squirrel ID']
    ).add_to(squirrel_layer)

In [None]:
#making hectare polygons
polygon_layer = folium.FeatureGroup(name="Hectare Polygons")

for index, row in df_final.iterrows():
    try:
        locations = [
            (row["lat_1"], row["lon_1"]),
            (row["lat_2"], row["lon_2"]),
            (row["lat_3"], row["lon_3"]),
            (row["lat_4"], row["lon_4"]),
            (row["lat_1"], row["lon_1"])
        ]
        folium.Polygon(
            locations=locations,
            color="red" if index % 2 == 0 else "blue",
            fill=True,
            fill_color="red" if index % 2 == 0 else "blue",
            fill_opacity=0.3,
            popup=str(row.get("id", f"Hectare {index}"))
        ).add_to(polygon_layer)
    except Exception as e:
        print(f"Error in polygon {index}: {e}")

In [None]:
# Initialize the map
my_map_mc = folium.Map(location=[nyc_squirrels['Y'].mean(), nyc_squirrels['X'].mean()], zoom_start=15)

# Add layers
squirrel_layer.add_to(my_map_mc)
polygon_layer.add_to(my_map_mc)

# Add layer control toggle
folium.LayerControl().add_to(my_map_mc)

# Save or display
my_map_mc.save("interactive_squirrel_map.html")
my_map_mc

In [None]:
fur_colors = {
    "Gray": {
        "icon_url": "graysquirrel.png",  # Gray squirrel
        "layer": folium.FeatureGroup(name="Gray Squirrels")
    },
    "Cinnamon": {
        "icon_url": "squirrel.png",  # Cinnamon squirrel
        "layer": folium.FeatureGroup(name="Cinnamon Squirrels")
    },
    "Black": {
        "icon_url": "blacksquirrel.png",  # Black squirrel
        "layer": folium.FeatureGroup(name="Black Squirrels")
    }
}

for index, row in nyc_squirrels.iterrows():
    fur_color = row.get('Primary Fur Color', 'Unknown')

    if fur_color in fur_colors:
        icon = folium.CustomIcon(
            icon_image=fur_colors[fur_color]["icon_url"],
            icon_size=(20, 20)
        )
        folium.Marker(
            location=[row['Y'], row['X']],
            icon=icon,
            popup=row['Unique Squirrel ID']
        ).add_to(fur_colors[fur_color]["layer"])

polygon_layer = folium.FeatureGroup(name="Hectare Polygons")

for index, row in df_final.iterrows():
    try:
        locations = [
            (row["lat_1"], row["lon_1"]),
            (row["lat_2"], row["lon_2"]),
            (row["lat_3"], row["lon_3"]),
            (row["lat_4"], row["lon_4"]),
            (row["lat_1"], row["lon_1"])
        ]
        folium.Polygon(
            locations=locations,
            color="red" if index % 2 == 0 else "blue",
            fill=True,
            fill_color="red" if index % 2 == 0 else "blue",
            fill_opacity=0.3,
            popup=str(row.get("id", f"Hectare {index}"))
        ).add_to(polygon_layer)
    except Exception as e:
        print(f"Error in polygon {index}: {e}")

my_map_mc = folium.Map(location=[nyc_squirrels['Y'].mean(), nyc_squirrels['X'].mean()], zoom_start=15)


polygon_layer.add_to(my_map_mc)

for info in fur_colors.values():
    info["layer"].add_to(my_map_mc)

# Add toggle control
folium.LayerControl(collapsed=False).add_to(my_map_mc)

# Save and/or display
my_map_mc.save("grouped_squirrels_map.html")
my_map_mc

In [None]:
drop_columns_ap = ['Color notes', 'Specific Location', 'Other Activities', 'Other Interactions', 'Highlight Fur Color']
clean_squirrels_ap = nyc_squirrels.drop(columns = drop_columns_ap)
fear_columns_ap = ['Quaas', 'Moans', 'Tail flags', 'Runs from']
fear_ap = clean_squirrels_ap[fear_columns_ap].apply(pd.Series.value_counts)

clean_squirrels_ap["showed_fear_ap"] = clean_squirrels_ap[fear_columns_ap].applymap(
    lambda x: str(x).strip().lower() in ["yes", "true", "1"]).any(axis=1)


print(clean_squirrels_ap["showed_fear_ap"].value_counts())

In [None]:
# Rebuild the map from scratch to avoid duplicates or conflicts
my_map_mc = folium.Map(location=[nyc_squirrels['Y'].mean(), nyc_squirrels['X'].mean()], zoom_start=15)

# Hectare polygons
polygon_layer = folium.FeatureGroup(name="Hectare Polygons")

for index, row in df_final.iterrows():
    try:
        locations = [
            (row["lat_1"], row["lon_1"]),
            (row["lat_2"], row["lon_2"]),
            (row["lat_3"], row["lon_3"]),
            (row["lat_4"], row["lon_4"]),
            (row["lat_1"], row["lon_1"])
        ]
        folium.Polygon(
            locations=locations,
            color="red" if index % 2 == 0 else "blue",
            fill=True,
            fill_color="red" if index % 2 == 0 else "blue",
            fill_opacity=0.3,
            popup=str(row.get("id", f"Hectare {index}"))
        ).add_to(polygon_layer)
    except Exception as e:
        print(f"Polygon error at {index}: {e}")

polygon_layer.add_to(my_map_mc)

# fear-based icons
showed_fear_mc = {
    "Fear": {
        "icon_url": "fear.png",
        "layer": folium.FeatureGroup(name="Fear")
    },
    "No Fear": {
        "icon_url": "happiness.png",
        "layer": folium.FeatureGroup(name="No Fear")
    }
}

for index, row in clean_squirrels_ap.iterrows():
    fear = row.get('showed_fear_ap', False)
    fear_label = "Fear" if fear else "No Fear"

    icon = folium.CustomIcon(
        icon_image=showed_fear_mc[fear_label]["icon_url"],
        icon_size=(20, 20)
    )
    folium.Marker(
        location=[row['Y'], row['X']],
        icon=icon,
        popup=row['Unique Squirrel ID']
    ).add_to(showed_fear_mc[fear_label]["layer"])

# Add fear layers to map
for info in showed_fear_mc.values():
    info["layer"].add_to(my_map_mc)

# Add the layer control toggle
folium.LayerControl(collapsed=False).add_to(my_map_mc)

# Save or display
my_map_mc.save("fear_map.html")
my_map_mc


joining the hectare labels