In [1]:
pip install streamlit dash plotly

Collecting dash
  Downloading dash-2.18.2-py3-none-any.whl.metadata (10 kB)
Collecting dash-html-components==2.0.0 (from dash)
  Downloading dash_html_components-2.0.0-py3-none-any.whl.metadata (3.8 kB)
Collecting dash-core-components==2.0.0 (from dash)
  Downloading dash_core_components-2.0.0-py3-none-any.whl.metadata (2.9 kB)
Collecting dash-table==5.0.0 (from dash)
  Downloading dash_table-5.0.0-py3-none-any.whl.metadata (2.4 kB)
Collecting retrying (from dash)
  Downloading retrying-1.3.4-py3-none-any.whl.metadata (6.9 kB)
Downloading dash-2.18.2-py3-none-any.whl (7.8 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.8/7.8 MB[0m [31m28.1 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25hDownloading dash_core_components-2.0.0-py3-none-any.whl (3.8 kB)
Downloading dash_html_components-2.0.0-py3-none-any.whl (4.1 kB)
Downloading dash_table-5.0.0-py3-none-any.whl (3.9 kB)
Downloading retrying-1.3.4-py3-none-any.whl (11 kB)
Installing collected packages: dash-tab

In [14]:
import streamlit as st
import pandas as pd
import plotly.express as px

# ============================
# 📂 Load Required Data
# ============================

# Load Benchmark Results (Ensure this file exists)
benchmark_file = "benchmark_results.csv"
try:
    benchmark_results = pd.read_csv(benchmark_file)
except FileNotFoundError:
    st.error(f"❌ Error: `{benchmark_file}` not found! Run Part 1 first.")
    st.stop()

# Load Stock Predictions (Ensure this file exists)
predictions_file = "stock_predictions.csv"
try:
    predictions = pd.read_csv(predictions_file)
except FileNotFoundError:
    st.error(f"❌ Error: `{predictions_file}` not found! Run Part 2 first.")
    st.stop()

# Ensure "name" and "date" columns exist in predictions
if "name" not in predictions.columns or "date" not in predictions.columns:
    st.error("❌ Error: `stock_predictions.csv` is missing required columns!")
    st.stop()

# ============================
# 🌟 Section A: Benchmarking Results
# ============================

st.title("📊 Stock Data Benchmarking & Predictions")

st.subheader("⚡ Benchmarking Results (CSV vs. Parquet)")

st.dataframe(benchmark_results)

# Plot Read/Write Times
fig_time = px.bar(
    benchmark_results, 
    x="Scale", 
    y=["CSV Read Time (s)", "Parquet Read Time (s)"],
    barmode="group", 
    title="📊 Read Time Comparison"
)
st.plotly_chart(fig_time)

fig_size = px.bar(
    benchmark_results, 
    x="Scale", 
    y=["CSV Size (MB)", "Parquet Size (MB)"],
    barmode="group", 
    title="📂 File Size Comparison"
)
st.plotly_chart(fig_size)

# ============================
# 📈 Section B: Stock Predictions
# ============================

st.subheader("📈 Stock Price Prediction")

# Select company
companies = predictions["name"].unique()
company = st.selectbox("Select a company:", companies)

# Filter Data for Selected Company
company_data = predictions[predictions["name"] == company]

# Ensure required columns exist
required_columns = {"date", "actual_close", "predicted_close"}
if not required_columns.issubset(company_data.columns):
    st.error("❌ Error: `stock_predictions.csv` is missing required columns!")
    st.stop()

# Plot Actual vs Predicted Prices
fig_pred = px.line(
    company_data, 
    x="date", 
    y=["actual_close", "predicted_close"],
    labels={"value": "Stock Price", "date": "Date"},
    title=f"📊 Predicted vs. Actual Prices for {company}"
)
st.plotly_chart(fig_pred)

st.write("🔍 Select a different company to see predictions.")

st.success("✅ Dashboard Loaded Successfully!")


DeltaGenerator()