In [25]:
import pandas as pd
import altair as alt

# Load the licenses dataset (local file path)
licenses_url = "licenses_fall.csv"
licenses = pd.read_csv(licenses_url)

# Inspect the first few rows to understand the structure of the dataset
licenses.head()

# 1. Degree Distribution Plot (Static Plot)
# Group by user (first name + last name) and count licenses per user
degree_count = licenses.groupby(['First Name', 'Last Name']).size().reset_index(name='license_count')

# Filter out users with less than a threshold number of licenses (e.g., keep only users with more than 1 license)
degree_count_filtered = degree_count[degree_count['license_count'] > 1]

# Create the plot
plot1 = alt.Chart(degree_count_filtered).mark_bar().encode(
    x=alt.X('license_count', bin=alt.Bin(maxbins=50), title='License Count (Degree)'),
    y=alt.Y('count()', title='Number of Users'),
    color=alt.value("steelblue")
).properties(
    title='License Count Distribution of Users'
)

# Display plot1
plot1



In [50]:
# Ensure 'License Number' is numeric, coercing errors to NaN
licenses_sample['License Number'] = pd.to_numeric(licenses_sample['License Number'], errors='coerce')

# Drop rows with NaN in 'License Number' (if coercion fails)
licenses_sample = licenses_sample.dropna(subset=['License Number'])

# Convert 'License Number' to integer (optional, depending on the data)
licenses_sample['License Number'] = licenses_sample['License Number'].astype(int)

# Add a binned version of 'License Number' for systematic grouping
licenses_sample['License Number (Binned)'] = pd.cut(
    licenses_sample['License Number'],
    bins=20,  # Adjust the number of bins as needed
    labels=False
)

# Get unique license types for the dropdown filter
unique_licenses = licenses_sample['License Type'].unique()

# Dropdown selection for specific license type
license_dropdown = alt.binding_select(options=list(unique_licenses), name="Select License Type: ")
license_select = alt.selection_point(fields=['License Type'], bind=license_dropdown, value=unique_licenses[0])
# Brush selection to highlight data points within selected region
brush = alt.selection_interval(encodings=['x', 'y'])

base = alt.Chart(licenses_sample).mark_circle(size=60).encode(
    x=alt.X('License Number (Binned):Q', title='License Number (Binned)', bin=True),
    y=alt.Y('License Number:Q', title='Original License Number'),
    color=alt.condition(license_select, 'License Type:N', alt.value('lightgray')),
    tooltip=['License Number', 'License Type', '_id']
).properties(
    width=600,
    height=400,
    title='Interactive Scatter Plot of License Types (Binned) with Brush and Dropdown Filter'
).add_params(
    brush  # Add brush selection using add_params
).add_params(
    license_select  # Add dropdown selection using add_params
)

# Display the interactive plot
plot2 = base.interactive()
plot2



<div class="left">
  {% include elements/button.html link="https://github.com/username/repository/raw/main/licenses_fall.csv" text="The Data" %}
</div>

<div class="right">
  {% include elements/button.html link="https://github.com/sanyamii/repository/blob/main/notebooks/your_notebook.ipynb" text="The Analysis" %}
</div>



<div class="left">
  {% include elements/button.html link="https://github.com/username/repository/raw/main/licenses_fall.csv" text="The Data" %}
</div>

<div class="right">
  {% include elements/button.html link="https://github.com/username/repository/blob/main/notebooks/your_notebook.ipynb" text="The Analysis" %}
</div>
