In [2]:
!pip install altair


Defaulting to user installation because normal site-packages is not writeable



[notice] A new release of pip is available: 23.1.2 -> 24.3.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [13]:
import pandas as pd

# Load the dataset
DATA_URL = "https://raw.githubusercontent.com/narenshetty98/narenshetty98.github.io/main/assets/building_inventory.csv"
df = pd.read_csv(DATA_URL)

# Data cleaning
df = df.dropna(subset=['Year Acquired', 'Square Footage', 'Usage Description'])
df['Year Acquired'] = pd.to_numeric(df['Year Acquired'], errors='coerce')
df['Square Footage'] = pd.to_numeric(df['Square Footage'], errors='coerce')
df['Total Floors'] = pd.to_numeric(df['Total Floors'], errors='coerce')
df = df[df['Year Acquired'] >= 1800]  # Filter valid years



# **Visualisation 1**

**Static Line Chart - Average number of floors acquired by year**

In [14]:
import altair as alt

# Aggregate data for the line chart
line_data = df.groupby('Year Acquired').agg({'Total Floors': 'mean'}).reset_index()

# Create the line chart
line_chart = alt.Chart(line_data).mark_line(point=True).encode(
    x=alt.X('Year Acquired:Q', title='Year Acquired'),
    y=alt.Y('Total Floors:Q', title='Average Number of Floors'),
    tooltip=['Year Acquired', 'Total Floors']
).properties(
    title="Static Line Chart: Average Number of Floors in Buildings Acquired by Year",
    width=700,
    height=400
)

# Save the chart as an HTML file
line_chart.save("C:\\Users\\naren\\Desktop\\Data visualisation Files\\line_chart.json")

line_chart


# **Visualisation 2**

**Interactive Bar Chart: Top 10 Counties by Building Count (Filtered by Usage Description)**

In [15]:
# Create dropdown for filtering by Usage Description
usage_dropdown = alt.binding_select(options=df['Usage Description'].unique(), name='Filter by Usage:')
usage_selection = alt.selection_point(fields=['Usage Description'], bind=usage_dropdown)

# Aggregate data for the bar chart
county_data = df.groupby(['County', 'Usage Description']).size().reset_index(name='Building Count')

# Filter the top 10 counties for each Usage Description
top_counties = county_data.groupby('Usage Description', group_keys=False).apply(
    lambda x: x.nlargest(10, 'Building Count')
)

# Create interactive bar chart
top_county_bar_chart = alt.Chart(top_counties).mark_bar().encode(
    x=alt.X('Building Count:Q', title='Number of Buildings'),
    y=alt.Y('County:N', sort='-x', title='County'),
    color=alt.Color('Usage Description:N', title='Usage Description'),
    tooltip=['County', 'Building Count', 'Usage Description']
).add_selection(
    usage_selection
).transform_filter(
    usage_selection
).properties(
    title="Interactive Top 10 Counties by Building Count (Filtered by Usage Description)",
    width=700,
    height=400
)

# Save the chart as an HTML file
top_county_bar_chart.save("C:\\Users\\naren\\Desktop\\Data visualisation Files\\interactive_bar chart.json")

top_county_bar_chart

  ).add_selection(
