In [None]:
# Importing necessary libraries
import pandas as pd
import numpy as np

# Function to load the dataset
def load_dataset(file_path):
    df = pd.read_csv(file_path)
    return df

# Function to check for missing values
def check_missing_values(df):
    print("\nMissing values in the dataset:")
    print(df.isnull().sum())

# Function to fix data types
def fix_data_types(df):
    df['LicenseIssuedDate'] = pd.to_datetime(df['LicenseIssuedDate'], errors='coerce')
    df['LicenseExpiredDate'] = pd.to_datetime(df['LicenseExpiredDate'], errors='coerce')
    df['ZipCode'] = pd.to_numeric(df['ZipCode'], errors='coerce')
    return df

# Function to standardize breed names
def standardize_breed_names(df):
    df['BreedName'] = df['BreedName'].str.strip().str.lower()
    return df

# Function to calculate dog age
def calculate_dog_age(df, current_year=2024):
    df['DogAge'] = current_year - df['AnimalBirthYear']
    return df

# Function to remove duplicates
def remove_duplicates(df):
    df = df.drop_duplicates()
    return df

# Function to fill missing values
def fill_missing_values(df):
    df['DogAge'] = df['DogAge'].fillna(df['DogAge'].mean())
    return df

# Function to perform breed popularity analysis
def analyze_breed_popularity(df, top_n=10):
    breed_popularity = df['BreedName'].value_counts().head(top_n)
    print("\nMost popular dog breeds in NYC:")
    print(breed_popularity)

# Function to analyze gender distribution
def analyze_gender_distribution(df):
    gender_distribution = df['AnimalGender'].value_counts()
    print("\nGender distribution of dogs:")
    print(gender_distribution)

# Function to analyze license issuance trends over time
def analyze_license_trend(df):
    license_trend = df['LicenseIssuedDate'].dt.year.value_counts().sort_index()
    print("\nLicense issuance trend by year:")
    print(license_trend)

# Function to analyze dog age distribution
def analyze_age_distribution(df):
    age_distribution = df['DogAge'].value_counts().sort_index()
    print("\nAge distribution of dogs:")
    print(age_distribution)

# Function to save the cleaned dataset
def save_cleaned_dataset(df, output_path='cleaned_nyc_dog_licensing.csv'):
    df.to_csv(output_path, index=False)
    print(f"\nCleaned dataset saved to {output_path}")

# Main function to orchestrate the data cleaning and analysis
def main():
    # File path to the dataset
    file_path = '/mnt/data/NYC_Dog_Licensing_Dataset_20240923.csv'
    
    # Load dataset
    df = load_dataset(file_path)
    
    # Step 1: Check for missing values
    check_missing_values(df)
    
    # Step 2: Fix data types
    df = fix_data_types(df)
    
    # Step 3: Standardize breed names
    df = standardize_breed_names(df)
    
    # Step 4: Calculate dog age
    df = calculate_dog_age(df)
    
    # Step 5: Remove duplicates
    df = remove_duplicates(df)
    
    # Step 6: Fill missing values
    df = fill_missing_values(df)
    
    # Step 7: Perform analysis
    analyze_breed_popularity(df)
    analyze_gender_distribution(df)
    analyze_license_trend(df)
    analyze_age_distribution(df)
    
    # Step 8: Save the cleaned dataset
    save_cleaned_dataset(df)

# Run the main function
if __name__ == '__main__':
    main()
