In [1]:
import pandas as pd

# Load the dataset
vehicles_df = pd.read_csv('vehicles_us.csv')
# Display the first few rows of the dataframe
display(vehicles_df.head())

Unnamed: 0,price,model_year,model,condition,cylinders,fuel,odometer,transmission,type,paint_color,is_4wd,date_posted,days_listed
0,9400,2011.0,bmw x5,good,6.0,gas,145000.0,automatic,SUV,,1.0,2018-06-23,19
1,25500,,ford f-150,good,6.0,gas,88705.0,automatic,pickup,white,1.0,2018-10-19,50
2,5500,2013.0,hyundai sonata,like new,4.0,gas,110000.0,automatic,sedan,red,,2019-02-07,79
3,1500,2003.0,ford f-150,fair,8.0,gas,,automatic,pickup,,,2019-03-22,9
4,14900,2017.0,chrysler 200,excellent,4.0,gas,80903.0,automatic,sedan,black,,2019-04-02,28


In [None]:
from ipywidgets import interactive, HBox, VBox, widgets
import plotly.express as px
import plotly.graph_objects as go
import numpy as np

# Function to update the histogram based on the trend line visibility
def update_histogram(show_trendline):
    fig = px.histogram(vehicles_df, x='price', title='Vehicle Price Distribution')
    if show_trendline:
        fig.add_traces(go.Scatter(x=np.sort(vehicles_df['price']), 
                                  y=np.poly1d(np.polyfit(vehicles_df['price'], 
                                                         np.histogram(vehicles_df['price'], bins=40)[0], 
                                                         1))(np.sort(vehicles_df['price'])),
                                  mode='lines', name='Trend Line'))
    fig.show()

# Ensure there are no duplicate options by using unique widget identifiers if needed

# Create interactive widget
interactive_plot = interactive(update_histogram, show_trendline=widgets.Checkbox(value=False, description='Show Trend Line'))
display(interactive_plot)

interactive(children=(Checkbox(value=False, description='Show Trend Line'), Output()), _dom_classes=('widget-i…

In [3]:
from ipywidgets import interactive, HBox, VBox, widgets, Layout
import plotly.express as px
import plotly.graph_objects as go
import numpy as np

# Function to update the histogram based on the trend line, outlier removal options, and ensuring no negative y values
def update_histogram(show_trendline, remove_outliers):
    # Filter out outliers if the checkbox is checked
    if remove_outliers:
        Q1 = vehicles_df['price'].quantile(0.25)
        Q3 = vehicles_df['price'].quantile(0.75)
        IQR = Q3 - Q1
        filtered_df = vehicles_df.query('(@Q1 - 1.5 * @IQR) <= price <= (@Q3 + 1.5 * @IQR)')
    else:
        filtered_df = vehicles_df
    
    fig = px.histogram(filtered_df, x='price', title='Vehicle Price Distribution')
    
    if show_trendline:
        # Calculate and display the trend line
        # Ensure no negative y values by clipping at 0
        y_values = np.poly1d(np.polyfit(filtered_df['price'], 
                                        np.histogram(filtered_df['price'], bins=40)[0], 
                                        1))(np.sort(filtered_df['price']))
        y_values_clipped = np.clip(y_values, a_min=0, a_max=None)  # Clip negative values to 0
        fig.add_traces(go.Scatter(x=np.sort(filtered_df['price']), 
                                  y=y_values_clipped,
                                  mode='lines', name='Trend Line'))
    fig.update_layout(showlegend=True)
    fig.show()

# Create interactive widgets with specified layout for better appearance
checkbox_trendline = widgets.Checkbox(value=False, description='Show Trend Line', layout=Layout(width='auto', margin='0 0 0 20px'))
checkbox_outliers = widgets.Checkbox(value=False, description='Remove Outliers', layout=Layout(width='auto', margin='0 0 0 20px'))

# Layout adjustments for better UI
ui = VBox([checkbox_trendline, checkbox_outliers])

interactive_plot = interactive(update_histogram, 
                               show_trendline=checkbox_trendline, 
                               remove_outliers=checkbox_outliers)

# Display the UI and interactive plot together
display(ui, interactive_plot)

VBox(children=(Checkbox(value=False, description='Show Trend Line', layout=Layout(margin='0 0 0 20px', width='…

interactive(children=(Checkbox(value=False, description='Show Trend Line', layout=Layout(margin='0 0 0 20px', …

In [4]:
import pandas as pd
import plotly.express as px

# Assuming avg_price_by_year_type is the DataFrame you want to visualize
# Assign individual variables to enhance readability
data_frame = avg_price_by_year_type
x_axis = 'model_year'
y_axis = 'price'
color_dimension = 'type'
label_dict = {'price': 'Average Price'}
title = 'Average Price by Model Year and Type'

# Use the assigned variables in the function call
fig_bar = px.bar(data_frame, x=x_axis, y=y_axis, color=color_dimension, labels=label_dict, title=title)

# Display the figure
fig_bar.show()

NameError: name 'avg_price_by_year_type' is not defined

In [None]:
import pandas as pd
import plotly.express as px

# Assuming vehicles_df_filtered is already defined in the environment
# Generate a bar plot for Average Vehicle Price vs. Model Year by Type with an interactive legend
# Limiting the plot to vehicles below 200k miles, limiting y axis to below 100,000, and only showing years from 1960 onward

# First, filter the dataframe for vehicles below 200k miles
vehicles_below_200k = vehicles_df_filtered[vehicles_df_filtered['odometer'] < 200000]

# Then, calculate the average price per model year and type
avg_price_by_year_type = vehicles_below_200k.groupby(['model_year', 'type'])['price'].mean().reset_index()

fig = px.bar(avg_price_by_year_type, x='model_year', y='price', color='type', 
             labels={'price': 'Average Vehicle Price', 'model_year': 'Model Year'},
             title='Average Vehicle Price vs. Model Year by Type (Below 300k Miles)')

# Update layout to improve visibility and interactivity, including limiting y axis to below 100,000 and only showing years from 1960 onward
fig.update_layout(legend_title_text='Vehicle Type',
                  xaxis=dict(
                      title='Model Year',
                      range=[1960, max(avg_price_by_year_type['model_year'])+1]  # Only show years from 1960 onward
                  ),
                  yaxis=dict(
                      title='Average Vehicle Price',
                      range=[0, 300000]  # Limiting y axis to below 100,000
                  ),
                  legend=dict(
                      title_font_size=12,
                      itemclick="toggleothers",
                      itemdoubleclick="toggle",
                      # Enable selecting multiple legend items by setting 'groupclick' to 'togglegroup'
                      groupclick="togglegroup"
                  ),
                  font=dict(
                      family="Arial, sans-serif",
                      size=12,
                      color="RebeccaPurple"
                  ))

fig.show()