In [6]:
import pandas as pd
import plotly.express as px

# Load the data from the JSON file
df = pd.read_json('mammals_rand_2_norm.json')

# Exclude rows with missing weight_g or length_mm
df = df.dropna(subset=['weight_g', 'length_mm'])

# Create a 3D scatter plot with color based on 'year'
fig = px.scatter_3d(df,hover_data=['title'], x='year', y='length_mm', z='weight_g', color='year')

# Show the plot
fig.show()

In [27]:
# Create a new subset of df that includes only the top 25 in terms of length
top_50_df = df.nlargest(50, 'length_mm')

# Create a 3D bubble chart with size based on 'weight_g' for the top 25 items
fig = px.scatter_3d(top_50_df, hover_data=['title'], x='year', y='length_mm', z='weight_g', size='length_mm', color='length_mm')

# Optionally, update the layout if needed
fig.update_layout(scene_zaxis_type="log")
# fig.update_layout(scene_yaxis_type="log")


# Show the plot
fig.show()


In [25]:
from ipywidgets import interact
import plotly.graph_objects as go

# Calculate cumulative length_mm for each year
df_sorted['cumulative_length'] = df_sorted.groupby('year')['length_mm'].cumsum()

# Function to update the gauge chart based on the selected year
def update_gauge(selected_year):
    cumulative_length = df_sorted[df_sorted['year'] <= selected_year]['length_mm'].sum() * 180
    fig = go.Figure(go.Indicator(
        domain={'x': [0, 1], 'y': [0, 1]},
        value=cumulative_length,
        mode="gauge+number",
        title={'text': "Cumulative Length (mm)"},
        gauge={'axis': {'range': [None, df_sorted['length_mm'].sum() * 180]},
               'steps': [
                   {'range': [0, df_sorted['length_mm'].sum() * 90], 'color': "lightgray"},
                   {'range': [df_sorted['length_mm'].sum() * 90, df_sorted['length_mm'].sum() * 180], 'color': "gray"}],
               'threshold': {'line': {'color': "red", 'width': 4}, 'thickness': 0.75, 'value': df_sorted['length_mm'].sum() * 180}}))
    fig.show()

# Create an interactive slider for the year
interact(update_gauge, selected_year=(int(df_sorted['year'].min()), int(df_sorted['year'].max()), 1))


interactive(children=(IntSlider(value=1957, description='selected_year', max=2014, min=1901), Output()), _dom_…

<function __main__.update_gauge(selected_year)>

In [7]:
import pandas as pd
import plotly.express as px

# Load the data from the JSON file
df = pd.read_json('mammals_rand_2_norm.json')

# Exclude rows with missing weight_g or length_mm
df = df.dropna(subset=['weight_g', 'length_mm'])

# Create a scatter chart with y axis as length_mm * 216, x axis as year, and color as length_mm
fig = px.scatter(
    df, 
    x='year', 
    y=df['length_mm'], 
    color='length_mm', 
    hover_data=['title'], 
    color_continuous_scale='Oranges'  # You can replace 'Viridis' with any other color scale
)

# Update layout to change background color
fig.update_layout(
    plot_bgcolor='rgba(0, 0, 0, 0)',  # Set the plot background color (transparent in this case)
    paper_bgcolor='#FAF9F7',  # Set the background color around the plot
    title='Mammals Length vs Year',  # Set the title of the plot
)

# Show the plot
fig.show()

In [14]:
# Load the additional datasets
df_inv = pd.read_json('inv_rand_2_norm.json')
df_fishes = pd.read_json('fishes_rand_2_norm.json')
df_anthro = pd.read_json('anthro_rand_2_norm.json')
df_ento = pd.read_json('ento_rand_2_norm.json')

# Exclude rows with missing weight_g or length_mm in the additional datasets
df_inv = df_inv.dropna(subset=['weight_g', 'length_mm'])
df_fishes = df_fishes.dropna(subset=['weight_g', 'length_mm'])
df_anthro = df_anthro.dropna(subset=['weight_g', 'length_mm'])
df_ento = df_ento.dropna(subset=['weight_g', 'length_mm'])

# Add a column to each dataframe to indicate the dataset
df_inv['dataset'] = 'Invertebrates'
df_fishes['dataset'] = 'Fishes'
df_anthro['dataset'] = 'Anthropods'
df_ento['dataset'] = 'Entomology'

# Combine all the dataframes
df_combined = pd.concat([df, df_birds, df_inv, df_fishes, df_anthro, df_ento])

# Create a histogram comparing the datasets
fig = px.histogram(df_combined, x='year', y='length_mm', color='dataset', barmode='overlay')

# Update the y-axis to use a logarithmic scale
fig.update_layout(yaxis_type="log")

# Show the plot
fig.show()

In [22]:
fig = px.density_heatmap(
    df, 
    x='year', 
    y='length_mm', 
    z='weight_g', 
    nbinsx=30, 
    nbinsy=30, 
    color_continuous_scale='Viridis',  # You can replace 'Viridis' with any other color scale
    title='Density Heatmap of Length vs Year'
)

# Update layout to change background color
fig.update_layout(
    plot_bgcolor='rgba(0, 0, 0, 0)',  # Set the plot background color (transparent in this case)
    paper_bgcolor='#FAF9F7'  # Set the background color around the plot
)

# Show the plot
fig.show()


In [17]:
fig = px.violin(df, y="length_mm", hover_data=["title"], title="Violin Plot of Mammal Lengths", 
                color_discrete_sequence=["#FFCD70"])
fig.update_layout(
    plot_bgcolor='white',  # Set the plot background color to white
    paper_bgcolor='white'  # Set the background color around the plot to white
)
fig.show()
