In [5]:
import altair as alt
alt.data_transformers.disable_max_rows()

DataTransformerRegistry.enable('default')

In [23]:
import pandas as pd
import altair as alt



df = pd.read_excel("../data/new_data_file.xlsx")


df.columns = df.columns.astype(str).str.strip()


df = df.rename(columns={"Total ": "Total"})

print("Columns:", df.columns.tolist())
year_cols = [col for col in df.columns if col.isdigit()]
print("Detected year columns:", year_cols)


state_data_all = pd.melt(
    df,
    id_vars=["GeoFips", "States", "LineCode", "Industry", "Total"],
    value_vars=year_cols,
    var_name="Year",
    value_name="Total in USD Billions")



state_data_all["States"] = state_data_all["States"].astype(str).str.strip()
state_data_all["Industry"] = state_data_all["Industry"].astype(str).str.strip()
state_data_all["Year"] = state_data_all["Year"].astype(int)

# Remove national totals
state_data_all = state_data_all[state_data_all["States"] != "United States"]



industry_list = sorted(state_data_all["Industry"].unique().tolist())
year_list = sorted(state_data_all["Year"].unique().tolist())

industry_dropdown = alt.binding_select(
    options=industry_list,
    name="Select Industry: "
)

year_dropdown = alt.binding_select(
    options=year_list,
    name="Select Year: "
)

industry_select = alt.selection_point(
    fields=["Industry"],
    bind=industry_dropdown,
    value=industry_list[0]   # default first industry
)

year_select = alt.selection_point(
    fields=["Year"],
    bind=year_dropdown,
    value=year_list[0]
)


industry_colors = industry_colors = industry_colors = {
    "Agriculture, forestry, fishing and hunting": "#1f77b4",
    "Mining, quarrying, and oil and gas extraction": "#ff7f0e",
    "Utilities": "#2ca02c",
    "Construction": "#d62728",
    "Manufacturing": "#9467bd",
    "Wholesale trade": "#8c564b",
    "Retail trade": "#e377c2",
    "Transportation and warehousing": "#7f7f7f",
    "Information": "#bcbd22",
    "Finance and insurance": "#17becf",
    "Real estate and rental and leasing": "#aec7e8",
    "Professional, scientific, and technical services": "#ffbb78",
    "Management of companies and enterprises": "#98df8a",
    "Administrative and support and waste management and remediation services": "#ff9896",
    "Educational services": "#c5b0d5",
    "Health care and social assistance": "#c49c94",
    "Arts, entertainment, and recreation": "#f7b6d2",
    "Accommodation and food services": "#dbdb8d",
    "Other services (except government and government enterprises)": "#9edae5",
    "Government and government enterprises": "#ad494a",


    "Private industries": "#9ecae1",
    "All industry total": "#6baed6"
}





chart = (
    alt.Chart(state_data_all)
    .mark_bar()
    .encode(
        x=alt.X("States:N", sort="-y", title="States"),
        y=alt.Y("Total in USD Billions:Q", title="Earnings (USD Billions)"),

        color=alt.Color(
            "Industry:N",
            scale=alt.Scale(
                domain=list(industry_colors.keys()),
                range=list(industry_colors.values())
            ),
            legend=None
        ),

        tooltip=["States", "Industry", "Year", "Total in USD Billions"]
    )
    .add_selection(industry_select, year_select)
    .transform_filter(industry_select)
    .transform_filter(year_select)
    .properties(
        width=900,
        height=480,
        title="State-wise Earnings Comparison by Industry and Year"
    )
)

chart
chart.save("charts/chart1.html")


Columns: ['GeoFips', 'States', 'LineCode', 'Industry', 'Total', '2019', '2020', '2021', '2022', '2023', '2024']
Detected year columns: ['2019', '2020', '2021', '2022', '2023', '2024']


Deprecated since `altair=5.0.0`. Use add_params instead.
  .add_selection(industry_select, year_select)


In [None]:
import pandas as pd
import altair as alt


df = pd.read_excel("../data/industry_wise.xlsx")

df.columns = df.columns.astype(str).str.strip()

year_cols = [col for col in df.columns if col.isdigit()]

for col in year_cols:
    df[col] = df[col].astype(str).str.replace(",", "")
    df[col] = pd.to_numeric(df[col], errors="coerce")


state_data_all = pd.melt(
    df,
    id_vars=["GeoFips", "States", "LineCode", "Industry"],
    value_vars=year_cols,
    var_name="Year",
    value_name="Total in USD Billions"
)



state_data_all["States"] = state_data_all["States"].astype(str).str.strip()
state_data_all["Industry"] = state_data_all["Industry"].astype(str).str.strip()
state_data_all["Year"] = state_data_all["Year"].astype(int)

bad_labels = ["United States *", "United States", "United States*"]
state_data_all = state_data_all[~state_data_all["States"].isin(bad_labels)]


state_list = sorted(state_data_all["States"].unique().tolist())
year_list = sorted(state_data_all["Year"].unique().tolist())

state_dropdown = alt.binding_select(options=state_list, name="Select State: ")
year_dropdown = alt.binding_select(options=year_list, name="Select Year: ")

state_select = alt.selection_point(fields=["States"], bind=state_dropdown, value=state_list[0])
year_select = alt.selection_point(fields=["Year"], bind=year_dropdown, value=year_list[0])


industry_colors = {
    "Agriculture, forestry, fishing and hunting": "#1f77b4",
    "Mining, quarrying, and oil and gas extraction": "#ff7f0e",
    "Utilities": "#2ca02c",
    "Construction": "#d62728",
    "Manufacturing": "#9467bd",
    "Wholesale trade": "#8c564b",
    "Retail trade": "#e377c2",
    "Transportation and warehousing": "#7f7f7f",
    "Information": "#bcbd22",
    "Finance and insurance": "#17becf",
    "Real estate and rental and leasing": "#aec7e8",
    "Professional, scientific, and technical services": "#ffbb78",
    "Management of companies and enterprises": "#98df8a",
    "Administrative and support and waste management and remediation services": "#ff9896",
    "Educational services": "#c5b0d5",
    "Health care and social assistance": "#c49c94",
    "Arts, entertainment, and recreation": "#f7b6d2",
    "Accommodation and food services": "#dbdb8d",
    "Other services (except government and government enterprises)": "#9edae5",
    "Government and government enterprises": "#ad494a",
    "All industry total": "#6baed6",
    "Private industries": "#9ecae1"
}


chart = (
    alt.Chart(state_data_all)
    .mark_bar()
    .encode(
        y=alt.Y(
            "Industry:N",
            sort="-x",
            title="Industry",
            axis=alt.Axis(labelLimit=300)
        ),
        x=alt.X("Total in USD Billions:Q", title="Earnings (Billions USD)"),
        color=alt.Color(
            "Industry:N",
            scale=alt.Scale(
                domain=list(industry_colors.keys()),
                range=list(industry_colors.values())
            ),
            legend=None
        ),
        tooltip=["States", "Industry", "Year", "Total in USD Billions"]
    )
    .add_selection(state_select, year_select)
    .transform_filter(state_select)
    .transform_filter(year_select)
    .properties(
        width=900,
        height=480,
        title="Industry Earnings for Selected State and Year"
    )
)


chart
chart.save("charts/chart2.html")


Deprecated since `altair=5.0.0`. Use add_params instead.
  .add_selection(state_select, year_select)


In [25]:
import pandas as pd
import altair as alt



df = pd.read_excel("../data/new_data_file.xlsx")

# Clean column names
df.columns = df.columns.astype(str).str.strip()

# Identify actual year columns
year_cols = ["2019", "2020", "2021", "2022", "2023", "2024"]

for col in year_cols:
    df[col] = (
        df[col]
        .astype(str)
        .str.replace(",", "")
    )
    df[col] = pd.to_numeric(df[col], errors="coerce")



growth_map = {
    "2019-20": ("2019", "2020"),
    "2020-21": ("2020", "2021"),
    "2021-22": ("2021", "2022"),
    "2022-23": ("2022", "2023"),
    "2023-24": ("2023", "2024"),
}

for new_col, (prev_year, next_year) in growth_map.items():
    df[new_col] = (df[next_year] - df[prev_year]) / df[prev_year]

growth_cols = list(growth_map.keys())


df_long = pd.melt(
    df,
    id_vars=["GeoFips", "States", "LineCode", "Industry"],
    value_vars=growth_cols,
    var_name="Period",
    value_name="GrowthRate"
)



df_long["States"] = df_long["States"].astype(str).str.strip()
df_long["Industry"] = df_long["Industry"].astype(str).str.strip()

bad = ["United States", "United States*", "United States *"]
df_long = df_long[~df_long["States"].isin(bad)]


state_list = sorted(df_long["States"].unique().tolist())
industry_list = sorted(df_long["Industry"].unique().tolist())

state_dropdown = alt.binding_select(options=state_list, name="Select State: ")
industry_dropdown = alt.binding_select(options=industry_list, name="Select Industry: ")

state_select = alt.selection_point(fields=["States"], bind=state_dropdown, value=state_list[0])
industry_select = alt.selection_point(fields=["Industry"], bind=industry_dropdown, value=industry_list[0])


industry_colors = {
    "Agriculture, forestry, fishing and hunting": "#1f77b4",
    "Mining, quarrying, and oil and gas extraction": "#ff7f0e",
    "Utilities": "#2ca02c",
    "Construction": "#d62728",
    "Manufacturing": "#9467bd",
    "Wholesale trade": "#8c564b",
    "Retail trade": "#e377c2",
    "Transportation and warehousing": "#7f7f7f",
    "Information": "#bcbd22",
    "Finance and insurance": "#17becf",
    "Real estate and rental and leasing": "#aec7e8",
    "Professional, scientific, and technical services": "#ffbb78",
    "Management of companies and enterprises": "#98df8a",
    "Administrative and support and waste management and remediation services": "#ff9896",
    "Educational services": "#c5b0d5",
    "Health care and social assistance": "#c49c94",
    "Arts, entertainment, and recreation": "#f7b6d2",
    "Accommodation and food services": "#dbdb8d",
    "Other services (except government and government enterprises)": "#9edae5",
    "Government and government enterprises": "#ad494a",
    "All industry total": "#6baed6",
    "Private industries": "#9ecae1"
}



chart = (
    alt.Chart(df_long)
    .mark_line(point=True, strokeWidth=3)
    .encode(
        x=alt.X("Period:N", sort=growth_cols, title="Yearly Growth Period"),
        y=alt.Y("GrowthRate:Q", title="Growth Rate", axis=alt.Axis(format="%", labelOverlap=True)),
        color=alt.Color(
            "Industry:N",
            scale=alt.Scale(
                domain=list(industry_colors.keys()),
                range=list(industry_colors.values())
            ),
            legend=None
        ),
        tooltip=[
            "States",
            "Industry",
            "Period",
            alt.Tooltip("GrowthRate:Q", format=".1%")
        ]
    )
    .add_selection(state_select, industry_select)
    .transform_filter(state_select)
    .transform_filter(industry_select)
    .properties(
        width=700,
        height=400,
        title="Growth Trend for Selected State & Industry (2019â€“24)"
    )
)

chart
chart.save("charts/chart3.html")


Deprecated since `altair=5.0.0`. Use add_params instead.
  .add_selection(state_select, industry_select)
