Question 1:

In [9]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import ipywidgets as widgets
from ipywidgets import interact

In [12]:
# Load the data
df_1 = pd.read_csv('diamonds.csv')

# Define the desired order of clarity levels
clarity_order = ['SI2', 'SI1', 'VS2', 'VS1', 'VVS2', 'VVS1', 'IF']

# Set the 'clarity' column as categorical with desired order
df_1['clarity'] = pd.Categorical(df_1['clarity'], categories=clarity_order, ordered=True)

@interact(origin=(['All'] + df_1['type'].unique().tolist()))
def update_heatmap(origin):
    if origin == 'All':
        filtered_df = df_1
    else:
        filtered_df = df_1[df_1['type'] == origin]
    pivot = filtered_df.pivot_table(values='carat', index='colour', columns='clarity', aggfunc='mean', observed=False)
    # Fill missing values with 0
    pivot.fillna(0, inplace=True)
    fig, ax = plt.subplots(figsize=(8, 6))

    sns.heatmap(pivot, cmap='mako', fmt=".1f")


interactive(children=(Dropdown(description='origin', options=('All', 'natural', 'lab'), value='All'), Output()…

Question 2:

In [16]:
# Load the data
df_2 = pd.read_csv('olympic_athletes.csv')
df_2 = df_2.dropna(subset=['Height', 'Weight'])
# Filter data for the years 2000 to 2016, Summer season, and Gold medalists
df_2 = df_2[(df_2['Year'] >= 2000) & (df_2['Year'] <= 2016) & (df_2['Season'] == 'Summer') & (df_2['Medal'] == 'Gold')]

# Drop duplicates based on athlete's name
df_2 = df_2.drop_duplicates(subset=['Name'])

# Filter data for gold-medal winning athletes in the specified sports categories
sports_categories = ['Swimming', 'Wrestling', 'Fencing', 'Athletics', 'Shooting', 'Cycling', 'Rowing', 'Gymnastics']

# Function to update visualization based on selected options
@interact(sport=['Combined'] + sports_categories, kde=False, display_data=False)
def update_visualization(sport, kde, display_data):
    
    # Filter data based on selected sport
    if sport == 'Combined':
        filtered_data = df_2[df_2['Sport'].isin(sports_categories)]
    else:
        filtered_data = df_2[df_2['Sport'] == sport]
    
    # Create heatmap or contour plot
    if kde:
        sns.kdeplot(data=filtered_data, x='Weight', y='Height',fill=True,  cmap='Blues')
    else:
        sns.histplot(data=filtered_data, x='Weight', y='Height', cbar=False, cmap='Blues', binwidth=5)
    
    # Overlay raw data points if display_data is True
    if display_data:
        sns.scatterplot(data=filtered_data, x='Weight', y='Height', hue='Sex', legend=True, edgecolor = 'w')
    
    # Set axes limits and labels
    plt.xlim(20, 140)
    plt.ylim(130, 220)
    plt.xlabel('Weight')
    plt.ylabel('Height')
    
    plt.show()


interactive(children=(Dropdown(description='sport', options=('Combined', 'Swimming', 'Wrestling', 'Fencing', '…

Question 3:

In [14]:
# Load the dataset
df_3 = pd.read_csv('used_cars.csv')



year = widgets.Dropdown(
    options = df_3['year'].unique(), description = 'Year', disabled = False
)

brand = widgets.Dropdown(
    options = df_3['brand'].unique(), description = 'Brand', disabled = False
)

model = widgets.SelectMultiple(
    options = df_3['model'].unique(), description = 'Model', disabled = False
)

def update_brand(*args):
    year_dropdown = year.value
    brand.options = df_3[df_3['year'] == year_dropdown]['brand'].unique()
    update_model()
    
def update_model(*args):
    year_dropdown = year.value
    brand_dropdown = brand.value
    model.options = df_3[(df_3['year'] == year_dropdown) & (df_3['brand'] == brand_dropdown)]['model'].unique()

year.observe(update_brand, names = 'value')
brand.observe(update_model, names = 'value')

update_model()
@interact(year= year, brand=brand, model=model)
def plot_used_cars(year, brand, model):
    # Filter the dataset based on the selected options
    filtered_df = df_3[(df_3['year'] == year) & (df_3['brand'] == brand)]

    if model:
        filtered_df = filtered_df[filtered_df['model'].isin(model)]
    # Plotting
    plt.figure(figsize=(10, 6))
    sns.scatterplot(filtered_df, x = 'price', y ='mileage', hue='model', palette='tab10', s = 60)
    plt.xlabel('Price')
    plt.ylabel('Mileage')
        

    plt.legend(loc = 'upper right', title = 'model')
    plt.title(f'{year} {brand}')
    plt.show()


interactive(children=(Dropdown(description='Year', options=(2017, 2016, 2019, 2015, 2014, 2018, 2013, 2020, 20…