In [None]:
# Shelterluv Adoption & Length of Stay Analysis
# ---------------------------------------------
# Basic setup: load libraries and configure visuals

import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np

# Make plots look nice
sns.set(style="whitegrid", palette="muted", font_scale=1.1)

# Show all columns in pandas output
pd.set_option("display.max_columns", None)

In [None]:
# Load Data

## Load demographic data
df_demo = pd.read_csv("../data/SL_data_20251112.csv")

## Load length-of-stay/outcome data
df_los = pd.read_csv("../data/SL_length_of_stay_20251112.csv")

## Merge datasets on 'Animal ID'
df = pd.merge(df_demo, df_los, on='Animal ID', how='inner')

## Quick peak at data
df.head()

In [None]:
# Clean and Prepare Data

## Check column types, is possibly a string at this point
print(df[['Intake Date_x','Outcome Date']].dtypes)
      
## Convert date columns to datetime
df['Intake Date'] = pd.to_datetime(df['Intake Date_x'], errors='coerce')
df['Outcome Date'] = pd.to_datetime(df['Outcome Date'], errors='coerce')

## Check column types, should be a datetime
print(df[['Intake Date','Outcome Date']].dtypes)

## Calculate Length of Stay
df['Length of Stay (days)'] = (df['Outcome Date'] - df['Intake Date']).dt.days

## Remove rows with missing critical data
df = df.dropna(subset=['Intake Date', 'Outcome Date', 'Length of Stay (days)'])

## Remove negative LOS
df = df[df['Length of Stay (days)'] >= 0]

## Take a peak
df.head()
