In [None]:
#importing required packages
import pandas as pd
import altair as alt
from vega import VegaLite
from vega.widget import VegaWidget

In [None]:
#Data Processing
df = pd.read_csv('Chicago_Traffic_Tracker_-_Historical_Congestion_Estimates_by_Segment_-_2018-Current.csv')
df.shape
df.dropna(inplace=True)
df.shape
df = df[df['SPEED'] >= 0]
df.shape
df.head(10)

In [None]:
df_clean = df

In [None]:
#Task 2 - 1st. Interactive Visualization using VegaLite (Interactive Street Speed Visualization)
vegainteract = {
  "$schema": "https://vega.github.io/schema/vega-lite/v5.json",
  "data": {"values": df[:100000].to_dict(orient="records")},
  "transform": [
    {"aggregate": [{"op": "mean", "field": "SPEED", "as": "average_speed"}], "groupby": ["STREET"]},
    {"window": [{"op": "rank", "as": "rank"}], "sort": [{"field": "average_speed", "order": "descending"}]}
  ],
  "layer": [
    {
      "selection": {
        "barHover": {"type": "single", "encodings": ["x"], "on": "mouseover", "empty": "none"}
      },
      "mark": "bar",
      "encoding": {
        "x": {"field": "STREET", "type": "nominal", "sort": "-y", "title": "Street"},
        "y": {"field": "average_speed", "type": "quantitative", "title": "Average Speed"},
        "opacity": {
          "condition": {"selection": "barHover", "value": 1},
          "value": 0.7
        },
        "tooltip": [
          {"field": "STREET", "title": "Street"},
          {"field": "average_speed", "title": "Average Speed"}
        ]
      },
      "width": 400,
      "height": 200
    },
    {
      "transform": [
        {"filter": {"selection": "barHover"}}
      ],
      "mark": "line",
      "encoding": {
        "x": {"field": "STREET", "type": "nominal", "title": "Street"},
        "y": {"field": "SPEED", "type": "quantitative", "title": "Speed"}
      },
      "width": 400,
      "height": 200
    }
  ]
}
VegaLite(vegainteract)

In [None]:
#Task 2 - 2nd. Interactive Visualization using VegaLite (Linked Line Chart and Bubble Chart)
vega1 = {
  "$schema": "https://vega.github.io/schema/vega-lite/v5.json",
  "data": {"values": df[:100000].to_dict(orient="records")},
  "vconcat": [
    {
      "selection": {
        "streetName": {
          "type": "single",
          "on": "mouseover",
          "fields": ["STREET"]
        }
      },
      "mark": "line",
      "encoding": {
        "x": {
          "field": "HOUR",
          "type": "ordinal",
          "title": "Hour of the Day"
        },
        "y": {
          "aggregate": "average",
          "field": "SPEED",
          "type": "quantitative",
          "title": "Average Speed"
        },
        "color": {
          "field": "STREET",
          "type": "nominal",
          "scale": {"scheme": "category20"}
        },
        "opacity": {
          "condition": {"selection": "streetName", "value": 1},
          "value": 0.2
        }
      }
    },
    {
      "mark": "point",
      "encoding": {
        "x": {
          "field": "STREET",
          "type": "nominal",
          "title": "Street Name"
        },
        "y": {
          "aggregate": "sum",
          "field": "BUS_COUNT",
          "type": "quantitative",
          "title": "Number of Buses"
        },
        "size": {"value": 200},
        "color": {
          "field": "STREET",
          "type": "nominal",
          "scale": {"scheme": "category20"}
        },
        "opacity": {
          "condition": {"selection": "streetName", "value": 1},
          "value": 0.2
        }
      },
      "transform": [
        {
          "filter": {"selection": "streetName"}
        }
      ]
    }
  ]
}
VegaLite(vega1)

In [None]:
#Task 2: Interactive visualization 3 (Linked Scatter Plot and Histogram)
from vega_datasets import data
import pandas as pd
import altair as alt

traffic_data = df_clean
traffic_data = traffic_data[:4999]

# Create a selection for the scatter plot
selection = alt.selection_single(empty='all', fields=['SEGMENT_ID'])

# Create the scatter plot for the first view
scatter_plot = alt.Chart(traffic_data).mark_circle().encode(
    x=alt.X('SPEED:Q', title='Speed'),
    y=alt.Y('BUS_COUNT:Q', title='Bus Count'),
    color=alt.condition(selection, 'SPEED:Q', alt.value('lightgray'), scale=alt.Scale(scheme='viridis'), title='Speed'),
    size=alt.Size('MESSAGE_COUNT:Q', title='Message Count'),
    tooltip=['SPEED:Q', 'BUS_COUNT:Q', 'MESSAGE_COUNT:Q']
).add_selection(selection).properties(
    width=400,
    height=300
)

# Filter the data for the second view (histogram)
filtered_data = traffic_data[traffic_data['SPEED'].notna() & (traffic_data['SPEED'] > 0)]
filtered_data = filtered_data[:4999]

# Create the histogram for the second view
histogram = alt.Chart(filtered_data).mark_bar().encode(
    x=alt.X('SPEED:Q', bin=True, title='Speed Binned'),
    y=alt.Y('count():Q', title='Count'),
    color=alt.value('steelblue')
).transform_filter(selection).properties(
    width=400,
    height=150
)

# Create the linked view visualization
linked_view = scatter_plot & histogram

linked_view


In [None]:
#Task 2: Interactive visualization 4 (Linked Line Chart and Heatmap)
import altair as alt
import pandas as pd

# Load the traffic data from the CSV file
traffic_data = df_clean
traffic_data = traffic_data[:4999]

# Create a selection for the line chart
click = alt.selection_multi(encodings=['color'])

# Create the line chart for the first view
line_chart = alt.Chart(traffic_data).mark_line().encode(
    x=alt.X('TIME:T', title='Time'),
    y=alt.Y('SPEED:Q', title='Speed'),
    color=alt.condition(click, 'STREET:N', alt.value('lightgray'), title='Street'),
    tooltip=['STREET:N', 'TIME:T', 'SPEED:Q']
).add_selection(click).properties(
    width=600,
    height=200
)

# Create a heatmap for the second view
heatmap = alt.Chart(traffic_data).mark_rect().encode(
    x=alt.X('HOUR:O', title='Hour'),
    y=alt.Y('DAY_OF_WEEK:O', title='Day of Week'),
    color=alt.Color('average(SPEED):Q', scale=alt.Scale(scheme='viridis'), title='Average Speed'),
    tooltip=['HOUR:O', 'DAY_OF_WEEK:O', 'average(SPEED):Q']
).transform_filter(click).properties(
    width=600,
    height=200
)

# Combine the linked views
linked_views = alt.vconcat(line_chart, heatmap)

linked_views


In [None]:
#Task 2: Interactive visualization 5 (Linked Bar Chart and Bubble Chart)
import altair as alt
import pandas as pd

data = df_clean
data = data[:4999]

# Top 10 congested streets
top_streets = data.groupby('STREET')['SPEED'].mean().nlargest(10).reset_index()

# Create a selection for the bar chart
select_street = alt.selection_multi(fields=['STREET'], empty='all', name='StreetSelector')

# Bar chart for Top 10 Congested Streets
bar_chart = alt.Chart(top_streets).mark_bar().encode(
    x=alt.X('STREET:N', title='Street', axis=alt.Axis(labelAngle=-45)),
    y=alt.Y('SPEED:Q', title='Average Congestion (Speed)'),
    color=alt.condition(select_street, alt.ColorValue("steelblue"), alt.ColorValue("lightgray")),
    tooltip=['STREET:N', 'SPEED:Q']
).add_selection(select_street).properties(
    width=600
)

# Filter the data based on selected streets
filtered_data = alt.Chart(data).transform_filter(select_street)

# Bubble chart showing Bus Count vs. Congestion
base_bubble = filtered_data.mark_circle(size=60).encode(
    x=alt.X('BUS_COUNT:Q', title='Bus Count'),
    y=alt.Y('SPEED:Q', title='Congestion Level (Speed)'),
    color=alt.Color('LENGTH:Q', scale=alt.Scale(scheme='viridis'), title='Segment Length (miles)'),
    tooltip=['STREET:N', 'BUS_COUNT:Q', 'SPEED:Q', 'LENGTH:Q']
).properties(
    width=600,
    height=400
)

# Combine the bar chart and bubble chart vertically
linked_bubble_chart = bar_chart & base_bubble

linked_bubble_chart.properties(
    title='Top 10 Congested Streets and Bubble Chart'
)
