In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import ipywidgets as widgets
from ipywidgets import interact
import seaborn as sns

data = pd.read_csv("Utils/craigslist_vehicles.csv")

data['year'] = data['year'].astype(str)
data.dropna(subset=['year'], inplace=True)
data['year'] = pd.to_numeric(data['year'], errors='coerce')
data.dropna(subset=['year'], inplace=True)

data['manufacturer'] = data['manufacturer'].str.strip().replace('', 'Unknown')
data['condition'] = data['condition'].str.strip().replace('', 'Unknown')
data['cylinders'] = data['cylinders'].str.strip().replace('', 'Unknown')
data['fuel'] = data['fuel'].str.strip().replace('', 'Unknown')
data['odometer'] = data['odometer'].astype(str).str.replace('mi', '').str.replace(',', '')
data['odometer'] = pd.to_numeric(data['odometer'], errors='coerce')
data['drive'] = data['drive'].str.strip().replace('', 'Unknown')
data['transmission'] = data['transmission'].str.strip().replace('', 'Unknown')

data.sort_values(by=['year', 'region'], inplace=True)

regions = ['All'] + list(data['region'].unique())
models = ['All'] + list(data['model'].unique())
manufacturers = ['All'] + list(data['manufacturer'].unique())

conditions = ['All'] + list(data['condition'].unique())
cylinders = ['All'] + list(data['cylinders'].unique())
fuels = ['All'] + list(data['fuel'].unique())
drives = ['All'] + list(data['drive'].unique())
transmissions = ['All'] + list(data['transmission'].unique())

@interact
def plot_data(selected_region=widgets.Dropdown(options=regions, value='All', description='Select Region:'),
              selected_model=widgets.Dropdown(options=models, value='All', description='Select Model:'),
              selected_manufacturer=widgets.Dropdown(options=manufacturers, value='All', description='Select Manufacturer:'),
              selected_condition=widgets.Dropdown(options=conditions, value='All', description='Select Condition:'),
              selected_cylinders=widgets.Dropdown(options=cylinders, value='All', description='Select Cylinders:'),
              selected_fuel=widgets.Dropdown(options=fuels, value='All', description='Select Fuel:'),
              selected_drive=widgets.Dropdown(options=drives, value='All', description='Select Drive:'),
              selected_transmission=widgets.Dropdown(options=transmissions, value='All', description='Select Transmission:'),
              price_range=widgets.FloatRangeSlider(min=0, max=100000, step=1000, value=[0, 100000], description='Price Range ($)')):
    
    filtered_data = data.copy()

    filter_conditions = {
        'region': selected_region,
        'model': selected_model,
        'manufacturer': selected_manufacturer,
        'condition': selected_condition,
        'cylinders': selected_cylinders,
        'fuel': selected_fuel,
        'drive': selected_drive,
        'transmission': selected_transmission,
    }

    for column, value in filter_conditions.items():
        if value != 'All':
            filtered_data = filtered_data[filtered_data[column] == value]

    filtered_data = filtered_data.dropna(subset=['year'])

    filtered_data = filtered_data[(filtered_data['price'] >= price_range[0]) & (filtered_data['price'] <= price_range[1])]

    filtered_data.dropna(subset=['year'], inplace=True)
    
    filtered_data = filtered_data[(filtered_data['price'] >= price_range[0]) & (filtered_data['price'] <= price_range[1])]
    
    plt.figure(figsize=(12, 6))
    
    # Group by 10-year intervals
    filtered_data_resampled = filtered_data.groupby(pd.cut(filtered_data['year'], bins=range(int(filtered_data['year'].min()), int(filtered_data['year'].max())+11, 10))).size()
    filtered_data_resampled.index = filtered_data_resampled.index.astype(str) 
    
    sns.barplot(x=filtered_data_resampled.index, y=filtered_data_resampled.values, alpha=0.7, color='red')
    
    sns.regplot(x=list(range(len(filtered_data_resampled))), y=filtered_data_resampled.values, color='orange', scatter=False)
    
    plt.title(f'Number of Available Vehicles Over Time -10 year intervals ')
    plt.xlabel('Year Interval')
    plt.ylabel('Number of Vehicles')
    plt.xticks(rotation=45, ha='right')  
    plt.grid(axis='y', linestyle='--', alpha=0.7)
    
    plt.ylim(0, None)
    
    plt.tight_layout()
    
    plt.show()
