# Consumer Sentiment
> This notebook fetches, processes and visualizes the University of Michigan's [Index of Consumer Sentiment](http://www.sca.isr.umich.edu/), one of the most-cited measures of Americans' feelings about the US economy.

---

#### Import Python tools and Jupyter config

In [1]:
import us
import json
import requests
import pandas as pd
import jupyter_black
import altair as alt
from io import StringIO
from bs4 import BeautifulSoup

In [2]:
jupyter_black.load()
pd.options.display.max_columns = 100
pd.options.display.max_rows = 100
pd.options.display.max_colwidth = None

In [3]:
today = pd.Timestamp("today").strftime("%Y-%m-%d")

---

## Fetch

In [4]:
response = requests.get("http://www.sca.isr.umich.edu/")
page = BeautifulSoup(response.text, "html.parser")

In [5]:
latest = pd.read_html(StringIO(response.text), skiprows=0, header=[0, 1])[0]

In [6]:
latest

Unnamed: 0_level_0,Unnamed: 0_level_0,May,Apr,May,M-M,Y-Y
Unnamed: 0_level_1,Unnamed: 0_level_1.1,2024,2024,2023,Change,Change
0,Index of Consumer Sentiment,69.1,77.2,59.0,-10.5%,+17.1%
1,Current Economic Conditions,69.6,79.0,65.1,-11.9%,+6.9%
2,Index of Consumer Expectations,68.8,76.0,55.1,-9.5%,+24.9%


In [7]:
# Melt the DataFrame to get a long format
melted = pd.melt(latest, id_vars=[('Unnamed: 0_level_0', 'Unnamed: 0_level_1')])

# The melting operation might not automatically handle multi-level names well, so let's fix that
# Create new column names for the variables indicating the category and the time period
melted['Category'] = melted[('Unnamed: 0_level_0', 'Unnamed: 0_level_1')].apply(lambda x: x[0])
melted['Time Period'] = melted['variable'].apply(lambda x: f"{x[1]} {x[0]}")  # Format: "Year Month"

# Drop the original multi-level columns to tidy up
melted.drop(columns=[('Unnamed: 0_level_0', 'Unnamed: 0_level_1'), 'variable'], inplace=True)

# Rename the 'value' column to something more descriptive if necessary
melted.rename(columns={'value': 'Measurement'}, inplace=True)

# Now you have a tidy DataFrame
print(melted)

KeyError: 'variable'

In [None]:
src_table = pd.read_html('http://www.sca.isr.umich.edu/')

UnicodeDecodeError: 'utf-8' codec can't decode byte 0x92 in position 5074: invalid start byte

#### Read monthly index of consumer sentiment from University of Michigan's [survey of consumers](http://www.sca.isr.umich.edu/)

In [None]:
src = pd.read_csv(
    "http://www.sca.isr.umich.edu/files/tbmics.csv",
    names=["month", "year", "index"],
    header=0,
    dtype={"year": str},
)

#### Create a date for timeseries

In [17]:
src["date"] = pd.to_datetime(src["month"] + " " + src["year"], format="%B %Y")

In [18]:
df = src.copy()

In [19]:
df.tail()

Unnamed: 0,month,year,index,date
641,October,2023,63.8,2023-10-01
642,November,2023,61.3,2023-11-01
643,December,2023,69.7,2023-12-01
644,January,2024,79.0,2024-01-01
645,February,2024,76.9,2024-02-01


----

#### Recessions

In [7]:
wiki_src = pd.read_html(
    "https://en.wikipedia.org/wiki/List_of_recessions_in_the_United_States"
)[2]

In [8]:
wiki_src.columns = (
    wiki_src.columns.str.lower()
    .str.replace(" ", "_")
    .str.replace("_(peak_to_trough)", "", regex=False)
)

In [9]:
wiki_slim = wiki_src[["name", "period_range", "gdp_decline"]].copy()

In [10]:
wiki_slim[["start", "end"]] = (
    wiki_slim["period_range"].str.split("[").str[0].str.split("–", expand=True)
)

In [11]:
wiki_slim["gdp_decline"] = (
    wiki_slim["gdp_decline"]
    .str.split("[")
    .str[0]
    .str.replace("%", "", regex=False)
    .str.replace("−", "", regex=False)
    .astype(float)
) * -1

In [12]:
wiki_slim.drop(["period_range"], axis=1, inplace=True)

In [13]:
wiki_slim["start"] = pd.to_datetime(wiki_slim["start"])
wiki_slim["end"] = pd.to_datetime(wiki_slim["end"])

In [14]:
wiki_slim["start_year"] = wiki_slim["start"].dt.year

In [15]:
wiki_slim["type"] = "Recessions"

In [16]:
wiki_slim["gdp_decline_pos"] = wiki_slim["gdp_decline"] * -1

---

## Charts

#### Consumers and recessions

In [26]:
lines = (
    alt.Chart(df)
    .mark_line(size=2)
    .encode(
        x=alt.X("date", title="", axis=alt.Axis(tickCount=7, format="%Y")),
        y=alt.Y("index", axis=alt.Axis(tickCount=6), title=" "),
    )
    .properties(
        title="Index of consumer sentiment and recessions",
    )
)

points = (
    alt.Chart(df.query("date == date.max()"))
    .mark_point(color="green")
    .encode(
        x=alt.X("date", title=" ", axis=alt.Axis(tickCount=7, format="%Y")),
        y=alt.Y("index", axis=alt.Axis(tickCount=6), title=""),
    )
)

text = (
    alt.Chart(df.query("date == date.max()"))
    .mark_text(dy=-15, color="black")
    .encode(
        x=alt.X("date", title="", axis=alt.Axis(tickCount=7, format="%Y")),
        y=alt.Y("index", axis=alt.Axis(tickCount=6), title=" "),
        text=alt.Text("index:Q"),
    )
)

rect = (
    alt.Chart(wiki_slim[4:])
    .mark_rect(color="#e6e6e6")
    .encode(
        x="start:T",
        x2="end:T",
    )
)

rect + lines + points + text

#### Recessions

In [18]:
bubbles = (
    alt.Chart(wiki_slim)
    .mark_circle(color="#00d4d8")
    .encode(
        alt.X("start_year:O", axis=alt.Axis(), title=" "),
        alt.Y("type:N", title=" "),
        alt.Size(
            "gdp_decline_pos:Q",
            scale=alt.Scale(range=[0, 3000]),
            legend=None,
        ),
    )
    .properties(width=650, height=120)
)

text = (
    alt.Chart(wiki_slim)
    .mark_text(dy=-40, color="black")
    .encode(
        x=alt.X("start_year:O"),
        y=alt.Y("type:N"),
        text=alt.Text("gdp_decline:Q", format=""),
    )
)

text2 = (
    alt.Chart(wiki_slim.query("name=='Great Depression' | name=='COVID-19 recession'"))
    .mark_text(dy=40, color="black")
    .encode(
        x=alt.X("start_year:O"),
        y=alt.Y("type:N"),
        text=alt.Text("name"),
    )
)

(bubbles + text + text2).properties(
    title="U.S. recessions by start year and GDP decline"
)

---

In [19]:
df.to_csv("data/processed/consumer_sentiment_1952_current.csv", index=False)

In [20]:
wiki_slim.to_csv("data/processed/recessions_1929_current.csv", index=False)