# 1. Data Exploration

In [61]:
# Step 1: Importing packages
import altair as alt
import polars as pl
import pandas as pd
import pathlib as Path
import numpy as np
from datetime import datetime
from vega_datasets import data
import re
import math
#alt.data_transformers.enable("vegafusion")

In [62]:
Path = "/Users/mariajosereyesramirez/Documents/Autumn quarter 2025/Data Viz/Project/raw data/All_Historical_Data_Separately/Geopolitical Risk Index Daily.csv"
geo_risk = pd.read_csv(Path)
geo_risk.columns = geo_risk.columns.str.strip()
type(geo_risk['GPRD'])

pandas.core.series.Series

In [63]:
geo_risk['DATE']=pd.to_datetime(geo_risk['DATE'], dayfirst=True, errors='coerce')
geo_risk = geo_risk.dropna(subset=['DATE']).set_index('DATE').sort_index()

# Cleaning and GPRD to float
num_gdrp = geo_risk['GPRD'].astype(str).str.strip()

# Cleaning data for symbols and special characters
num_gdrp = num_gdrp.str.replace(r'[^\d,.\-]', '', regex=True)

mask_decimal_coma = num_gdrp.str.contains(',') & ~num_gdrp.str.contains(r'\.')
num_gdrp = num_gdrp.where(~mask_decimal_coma, num_gdrp.str.replace(',', '.', regex=False))
num_gdrp = num_gdrp.str.replace(',', '', regex=False).str.replace(' ', '', regex=False)

geo_risk['GPRD'] = pd.to_numeric(num_gdrp, errors='coerce')

# Converting the other columns
for c in ['GPRD_ACT', 'GPRD_THREAT']:
    if c in geo_risk.columns and geo_risk[c].dtype == 'object':
        t = geo_risk[c].astype(str).str.strip()
        t = t.str.replace(r'[^\d,.\-]', '', regex=True)
        mask = t.str.contains(',') & ~t.str.contains(r'\.')
        t = t.where(~mask, t.str.replace(',', '.', regex=False))
        t = t.str.replace(',', '', regex=False).str.replace(' ', '', regex=False)
        geo_risk[c] = pd.to_numeric(t, errors='coerce')

# Checking data types
print(geo_risk[['GPRD','GPRD_ACT','GPRD_THREAT']].dtypes)
print(geo_risk[['GPRD','GPRD_ACT','GPRD_THREAT']].head(3))


GPRD           float64
GPRD_ACT       float64
GPRD_THREAT    float64
dtype: object
              GPRD  GPRD_ACT  GPRD_THREAT
DATE                                     
1985-01-01  230.04    275.20       153.03
1985-01-02  115.68    146.77        87.44
1985-01-03   97.43    158.94        29.46


In [64]:
measure_cols = [c for c in ['GPRD', 'GPRD_ACT', 'GPRD_THREAT'] if c in geo_risk.columns]
# Time periods
monthly = geo_risk[measure_cols].resample('MS').mean().reset_index().rename(columns={'DATE': 'date'})
annual = geo_risk[measure_cols].resample('Y').mean().reset_index().rename(columns={'DATE': 'year_end'})

# Decades
tmp = geo_risk[measure_cols].copy()
tmp['year'] = geo_risk.index.year
tmp['decade'] = pd.cut(tmp['year'], bins=[1989, 1999, 2009, 2019, 2025], labels=['1989–1999','2000–2009','2010–2019', '2020-2025'])
decadal = (tmp.dropna(subset=['decade'])
           .groupby('decade')[measure_cols]
           .mean()
           .reset_index())

print(decadal)

      decade        GPRD    GPRD_ACT  GPRD_THREAT
0  1989–1999   91.692607   77.853450   101.818469
1  2000–2009  114.240750  135.223567    99.796348
2  2010–2019   93.286295   87.270140    97.574765
3  2020-2025  117.090019  101.883585   135.866083


  annual = geo_risk[measure_cols].resample('Y').mean().reset_index().rename(columns={'DATE': 'year_end'})
  .groupby('decade')[measure_cols]


In [65]:
print(monthly)

          date        GPRD    GPRD_ACT  GPRD_THREAT
0   1985-01-01  102.877742   91.824194   106.769677
1   1985-02-01  116.861071   97.070714   125.718571
2   1985-03-01  124.430968  115.170968   128.587742
3   1985-04-01   88.570000   73.419667    95.778000
4   1985-05-01  101.943226   91.601935   108.894839
..         ...         ...         ...          ...
484 2025-05-01  164.700968  147.016129   202.527419
485 2025-06-01  218.458000  179.304333   281.886667
486 2025-07-01  133.862903  119.761613   155.873226
487 2025-08-01  146.931290  133.121935   166.171935
488 2025-09-01  133.366250  121.438750   151.542500

[489 rows x 4 columns]


## Monthly Geopolitical Risk Index (single)
The following chart shows the monthly behavior of the Geopolitical Risk Index (GPRD) from 1985 to 2025. This index measures "adverse geopolitical events based n tally of news paaper articles covering geopolitical tensions, and examine its evolution and economic effects since 1900". 

The index considers 10 news papers: 

- Chicago Tribune
- The Daily Telegraph
- Financial Times
- The Globe and Mail 
- The Guardian
- The Los Angeles Times
- The New York Times
- USA Today
- The Wall Street Journal 
- The Washington Post

From the chart it can be observed that there are 3 major pikes in the data. For 3 different time periods, each one of 5 years, some possible events that might have caused the pikes are:

**1.  1990-1995:**
    *Gulf War* Iraq's invasion of Kuwait and the subsequent U.S. led coalition war to liberate Kuwait. 

**2. 2000-2005:**
    *9/11* Terrorist attacks in the U.S.
    *Invasion of Afghanistan* The start of the "War on Terror" campaing added sustained geopolitical tension. 
    *Iraq War* The U.S.-led invasion of Iraq.

**3. 2020-2025**
    *2022* Russian invasion of Ukraine, one of the highest peaks after 9/11, reflecting direct military conflict between a nuclear power and Western-backed Ukraine.
    *Middle East Tensions* Rising hostilities involving Iran, Israel, and non-state actors increased geopolitical risk.
    *COVID-19 Pandemic* This is not precisely a geopolitial event, but it triggered global instability and economic shocks that indirectly increased perceived risk and international tensions.



# Chatgpt "I want to make the horizontal axis of this graph longer and the line thiner, how can I do that in altair ? "
gprd = (alt.Chart(monthly).mark_line().encode(
    x='date:T',
    y='GPRD:Q'
    )
    .properties(title='Monthly Geopolitical Risk Index (1986-2025)',
                width=800,
                height=300)
)
# Step 2: Adding labels to higest peaks in chart 
events = pd.DataFrame({
    "date": pd.to_datetime([
        '1990-08-01',
        '1991-02-01',
        '2001-10-01',
        '2002-09-01',
        '2022-03-01',
    ]),
    'label': [
        "Kuwait's Invasion",
        "Gulf War",
        "Afganisthan's Invasion",
        "9/11",
        "Ukraine's Invasion"
    ]
})

ann_base = (
    alt.Chart(events)
    .transform_lookup(
        lookup='date',
        from_=alt.LookupData(monthly, 'date', ['GPRD'])
    )
)
rules = ann_base.mark_rule(strokeDash=[4,4], opacity=0.6).encode(
    x='date:T'
)
points = ann_base.mark_point(size=60).encode(
    x='date:T',
    y='GPRD:Q'
)
labels = ann_base.mark_text(align='left', dx=6, dy=-8).encode(
    x='date:T',
    y='GPRD:Q',
    text='label:N'
)

labels_chart = gprd + rules + points + labels
labels_chart.save('figures/gprd_index.png')

labels_chart


alt.Chart(monthly).mark_line().encode(
        x='Date:T',
        y='GPRD_ACT:Q',
        color='risk_index:Q'
    )

# Monthly Geopolitical Risk Index, GPRD_ACT, GPRD_THREAT

This data set counts with 2 subsets of the Geopolitical risk index, which are: 

**1.  GPRD_ACT** 
This index captures realized geopolitical acts between 1985 and 2019. Although originally measured on a daily basis, for the purposes of this project the data has been aggregated into monthly averages. 

**2. GPDR_THREAT** This subindex captures perceived geopolitical threats between 1985-2019

The idea of this graph is to compare the behavior of the main index (GPRD), with its corresponding subindexes, and see which one has more incidence on the main index. If the therat of a geopoitical act or the actual occurrence of a geopolitical act that brings uncertainty to the market. 


In [66]:
assert {'date', 'GPRD', 'GPRD_ACT', 'GPRD_THREAT'}.issubset(set(monthly.columns)), monthly.columns
indexes = ['GPRD', 'GPRD_ACT', 'GPRD_THREAT']

# ChatGPT "How can I make a graph in Altair that puts the three indexes in a single chart? I'm trying this syntax but is not working"
risk_behavior = (  
    alt.Chart(monthly)
    .transform_fold(indexes, as_=['metric', 'value'])
    .mark_line()
    .encode(
        x=alt.X('date:T', title='Date'),
          y=alt.Y('value:Q', title='Index'),
          color=alt.Color('metric:N', title='Serie'),
          tooltip=[
              alt.Tooltip('yearmonth(date):O', title='Month'),
              alt.Tooltip('metric:N', title='Serie'),
              alt.Tooltip('value:Q', title='Value', format='.2f')
          ]
      )
      .properties(title='Risk Indexes Behavior (1989-2025)',
                  width=800,
                  height=300)
      .interactive()
)


risk_behavior.save('figures/risk_behavior.png')

risk_behavior

## Periods Peaks Closer look
Considering the analysis of the previous two charts, the purpose of this visualization is to focus on the three time periods with the highest peaks in the data in order to understand what occurred during those years and how different newspapers included in the index reported these events. As mentioned before, these peaks correspond to three major geopolitical crises: the Gulf War (1990–1991), the 9/11 terrorist attacks (2001), and the Russian invasion of Ukraine (2022).

From this closer look, we can observe that the factor influencing the global index the most is not necessarily an ongoing war, but rather the threat of one—or, more broadly, the anticipation of geopolitical conflict. This raises an important question: Which scenario makes financial markets more vulnerable—an active war or the threat of geopolitical instability that prompts investors to relocate assets and adjust investment strategies?

In [67]:
if 'date' not in monthly.columns:
    monthly = monthly.reset_index().rename(columns={'DATE':'date'})
monthly['date'] = pd.to_datetime(monthly['date'])

indexes = [c for c in ['GPRD','GPRD_ACT','GPRD_THREAT'] if c in monthly.columns]
ymax = float(pd.concat([monthly[i] for i in indexes], axis=1).max().max())
ymax_nice = math.ceil(ymax/50)*50

def zoom_chart(df, start, end, title, show_y=True):
    return (
        alt.Chart(df)
          .transform_filter(
              (alt.datum.date >= pd.to_datetime(start)) &
              (alt.datum.date <= pd.to_datetime(end))
          )
          .transform_fold(indexes, as_=['metric','value'])
          .mark_line()
          .encode(
              x=alt.X('date:T', title='', scale=alt.Scale(domain=[start, end])),
              y=alt.Y('value:Q',
                      title='Index' if show_y else '',
                      axis=None if not show_y else alt.Axis(),
                      scale=alt.Scale(domain=[0, ymax_nice])),
              color=alt.Color('metric:N', title='Geopolitcal risk index'),
              tooltip=[
                  alt.Tooltip('yearmonth(date):O', title='Month'),
                  alt.Tooltip('metric:N', title='Serie'),
                  alt.Tooltip('value:Q', title='Value', format='.2f')
              ]
          )
          .properties(title=title, width=350, height=220)
    )

c1 = zoom_chart(monthly, '1990-01-01', '1995-12-31', '1990–1995')
c2 = zoom_chart(monthly, '2000-01-01', '2005-12-31', '2000–2005')
c3 = zoom_chart(monthly, '2020-01-01', '2025-12-31', '2020–2025')

year_periodchart = (c1 | c2 | c3).resolve_scale(y='independent')

year_periodchart.save('figures/year_periodchart.png')

year_periodchart


## Stocks Market Analysis

In [68]:
# Reading Data
Path_stocks = "/Users/mariajosereyesramirez/Documents/Autumn quarter 2025/Data Viz/Project/raw data/Gold-Silver-GeopoliticalRisk_HistoricalData.csv"
stocks_data = pd.read_csv(Path_stocks)
stocks_data
stocks_data.columns
stocks_data.head()

Unnamed: 0,DATE,GOLD_PRICE,GOLD_OPEN,GOLD_HIGH,GOLD_LOW,GOLD_CHANGE_%,SILVER_PRICE,SILVER_OPEN,SILVER_HIGH,SILVER_LOW,SILVER_CHANGE_%,GPRD,GPRD_ACT,GPRD_THREAT,EVENT
0,2025-09-10,3630.9,3633.61,3634.42,3620.9,-0.07,40.92,40.89,40.94,40.72,0.09,,,,
1,2025-09-09,3633.61,3637.1,3674.75,3625.33,-0.06,40.89,41.34,41.5,40.77,-1.13,,,,
2,2025-09-08,3635.84,3586.82,3646.6,3579.67,1.24,41.36,41.01,41.68,40.51,1.2,117.26,97.42,146.26,
3,2025-09-07,3591.19,3592.07,3596.56,3586.95,0.12,40.86,41.0,41.01,40.76,-0.34,83.51,111.0,92.59,
4,2025-09-05,3586.81,3547.0,3600.33,3540.05,1.15,41.01,40.69,41.44,40.55,0.76,166.42,110.61,224.05,


In [69]:
stocks_data['GOLD_CHANGE_%']

0       -0.07
1       -0.06
2        1.24
3        0.12
4        1.15
         ... 
10566    1.27
10567   -1.62
10568    0.26
10569   -1.05
10570   -0.91
Name: GOLD_CHANGE_%, Length: 10571, dtype: float64

In [70]:
alt.data_transformers.disable_max_rows()

gold_first_demo = (
    alt.Chart(stocks_data)
      .transform_filter(alt.datum["GOLD_CHANGE_%"] != None)
      # creating months for Altair
      .transform_timeunit(month="yearmonth(DATE)")      
      .transform_aggregate(
          # computing mean values in Altair
          gold_change_pct="mean(GOLD_CHANGE_%)", 
          groupby=["month"]
      )
      .transform_calculate(row='"Gold"')              
      .mark_rect(stroke="white", strokeWidth=0.2)
      .encode(
          x=alt.X("month:T", axis=alt.Axis(title="", format="%b-%y", labelOverlap=True)),
          y=alt.Y("row:N", axis=alt.Axis(title="")),
          color=alt.Color("gold_change_pct:Q",
                          scale=alt.Scale(scheme="redblue", domain=[-2, 1]),
                          legend=alt.Legend(title="MoM change (%)", format=".1f")),
          tooltip=[alt.Tooltip("month:T", title="Month"),
                   alt.Tooltip("gold_change_pct:Q", title="Change (%)", format=".2f")]
      )
      .properties(title="Gold month-on-month change (%)", width=900, height=60)
      .configure_view(stroke=None)
)

gold_first_demo.save('figures/gold_first_demo.png')

gold_first_demo

In [71]:
alt.data_transformers.disable_max_rows()

gold_silver_var = (
    alt.Chart(stocks_data)
      .transform_fold(
          ["GOLD_CHANGE_%", "SILVER_CHANGE_%"],
          as_=["metal_raw", "value"]
      )
      .transform_calculate(
          metal="datum.metal_raw == 'GOLD_CHANGE_%' ? 'Gold' : 'Silver'"
      )
      .transform_filter("isValid(datum.value) && isValid(datum.DATE)")
      .transform_timeunit(month="yearmonth(DATE)")       
      .transform_aggregate(
          value="mean(value)",                            
          groupby=["month", "metal"]
      )
      .mark_rect(stroke="Black", strokeWidth=0.2)
      .encode(
          x=alt.X("month:T", axis=alt.Axis(title="", format="%b-%y", labelOverlap=True)),
          y=alt.Y("metal:N", sort=["Gold","Silver"], axis=alt.Axis(title="")),
          color=alt.Color("value:Q",
                          scale=alt.Scale(scheme="redblue", domain=[-1, 1]),
                          legend=alt.Legend(title="MoM change (%)", format=".1f")),
          tooltip=[
              alt.Tooltip("month:T", title="Month"),
              alt.Tooltip("metal:N", title="Metal"),
              alt.Tooltip("value:Q", title="Change (%)", format=".2f"),
          ]
      )
      .properties(title="Gold & Silver month-on-month change (%)", width=900, height=110)
      .configure_view(stroke=None)
)

gold_silver_var.save('figures/gold_silver_var.png')
gold_silver_var


In [72]:
# testing new graphs, not convinced
silver = (
    stocks_data[['DATE','SILVER_PRICE','SILVER_CHANGE_%']].dropna()
    .assign(DATE=lambda d: pd.to_datetime(d['DATE']))
)

bars = (
    alt.Chart(silver).mark_bar(opacity=0.45)
    .encode(
        x=alt.X('DATE:T', title=''),
        y=alt.Y('SILVER_CHANGE_%:Q', axis=alt.Axis(title='Change % (MoM)', orient='right')),
        color=alt.condition(
            alt.datum["SILVER_CHANGE_%"] >= 0,
            alt.value('#2ca02c'),
            alt.value('#d62728')
        )
    )
)

line = (
    alt.Chart(silver).mark_line(strokeWidth=2)
    .encode(
        x=alt.X('DATE:T', title=''),
        y=alt.Y('SILVER_PRICE:Q', axis=alt.Axis(title='Silver Price (USD/oz)'))
    )
)

chart_ts = (
    alt.layer(line, bars)
      .resolve_scale(y='independent')
      .properties(title='Silver: price vs variation %', width=900, height=320)
      .configure_view(stroke=None)
)
chart_ts


In [73]:
# testing new graphs, not convinced
gold = (
    stocks_data[['DATE','GOLD_PRICE','GOLD_CHANGE_%']].dropna()
    .assign(DATE=lambda d: pd.to_datetime(d['DATE']))
)

bars = (
    alt.Chart(gold).mark_bar(opacity=0.45)
    .encode(
        x=alt.X('DATE:T', title=''),
        y=alt.Y('GOLD_CHANGE_%:Q', axis=alt.Axis(title='variation % (MoM)', orient='right')),
        color=alt.condition(
            alt.datum["GOLD_CHANGE_%"] >= 0,
            alt.value('#2ca02c'),
            alt.value('#d62728')
        )
    )
)

line = (
    alt.Chart(gold).mark_line(strokeWidth=2)
    .encode(
        x=alt.X('DATE:T', title=''),
        y=alt.Y('GOLD_PRICE:Q', axis=alt.Axis(title='Gold Price (USD/oz)'))
    )
)

chart_ts = (
    alt.layer(line, bars)
      .resolve_scale(y='independent')
      .properties(title='Gold: price vs variation %', width=900, height=320)
      .configure_view(stroke=None)
)
chart_ts


In [74]:

stocks_monthly = stocks_data.copy()
stocks_monthly['DATE'] = pd.to_datetime(stocks_monthly['DATE'])

monthly_close = (
    stocks_monthly.set_index('DATE')[['GOLD_PRICE', 'SILVER_PRICE']]
      .resample('M').last()
)

mom = (
    monthly_close.pct_change().mul(100)
      .reset_index()
      .rename(columns={'GOLD_PRICE': 'Gold', 'SILVER_PRICE':'Silver'})
)

tidy = (
    mom.melt(id_vars='DATE', var_name='metal', value_name='mom_pct')
       .dropna(subset=['mom_pct'])
)

vmax = float(tidy['mom_pct'].abs().quantile(0.98))
vmax = max(1.0, round(vmax, 1))

alt.data_transformers.disable_max_rows()

stocks_heat = (
    alt.Chart(tidy)
      .mark_rect(stroke='black', strokeWidth=0.2)
      .encode(
          x=alt.X('yearmonth(DATE):T', title='', axis=alt.Axis(format='%b-%y', labelOverlap=True)),
          y=alt.Y('metal:N', sort=['Gold', 'Silver'], title=''),
          color=alt.Color('mom_pct:Q',
                          scale=alt.Scale(scheme='redblue', domain=[-vmax, vmax]),
                          legend=alt.Legend(title='MoM (close-to-close) %', format='.1f')),
          tooltip=[
              alt.Tooltip('yearmonth(DATE):T', title='Month'),
              alt.Tooltip('metal:N', title='Metal'),
              alt.Tooltip('mom_pct:Q', title='MoM (%)', format='.2f'),
          ]
      )
      .properties(title='Gold & Silver — Monthly close-to-close % change', width=900, height=110)
      .configure_view(stroke=None)
)
stocks_heat.save('figures/stocks_heat.png')

stocks_heat 


  .resample('M').last()


In [75]:
# GPRD monthly (taking the first date)
gprd_m = monthly.copy()
if 'date' not in gprd_m.columns and 'DATE' in gprd_m.columns:
    gprd_m = gprd_m.rename(columns={'DATE':'date'})
gprd_m['date'] = pd.to_datetime(gprd_m['date'])
# MS: Month Start
gprd_m['month'] = gprd_m['date'].dt.to_period('M').dt.to_timestamp()  
gprd_m = gprd_m[['month','GPRD']].dropna()

# Gold % MoM (close-to-close)
gold = stocks_data[['DATE','GOLD_PRICE']].copy()
gold['DATE'] = pd.to_datetime(gold['DATE'])
gold_m = (gold.set_index('DATE')
          # ME for Month End
              .resample('ME').last()                   
              .pct_change().mul(100)
              .rename(columns={'GOLD_PRICE':'Gold_MoM_%'})
              .reset_index()
              .rename(columns={'DATE':'month'}))
# Changing ME to MS
gold_m['month'] = gold_m['month'].dt.to_period('M').dt.to_timestamp() 
gold_m = gold_m.dropna(subset=['Gold_MoM_%'])

# Making an Inner join

df = (pd.merge(gprd_m, gold_m, on='month', how='inner')
        .sort_values('month'))

# Rolling correlation (12 month er row numer
win = 12
s = df.set_index('month')[['GPRD','Gold_MoM_%']].sort_index()
corr12 = (s['GPRD']
          .rolling(window=win, min_periods=max(3, win//2))
          .corr(s['Gold_MoM_%'])
          .reset_index(name='corr')
          .dropna())

# Chart
corr_line = (
    alt.Chart(corr12).mark_line()
      .encode(
          x=alt.X('month:T', title=''),
          y=alt.Y('corr:Q', title=f'Rolling Correlation ({win} months)', scale=alt.Scale(domain=[-1,1]))
      )
      .properties(width=900, height=160, title='GPRD vs. Gold MoM — Rolling Correlation')
)

corr_line.save('figures/corr_line.png')
corr_line


In [76]:
# Silver correlation
silver = stocks_data[['DATE','SILVER_PRICE']].copy()
silver['DATE'] = pd.to_datetime(silver['DATE'])
silver_m = (silver.set_index('DATE')
                 .resample('ME').last()
                 .pct_change().mul(100)
                 .rename(columns={'SILVER_PRICE':'Silver_MoM_%'})
                 .reset_index().rename(columns={'DATE':'month'}))
silver_m['month'] = silver_m['month'].dt.to_period('M').dt.to_timestamp()

# merging with GPRD
df2 = (df[['month','GPRD','Gold_MoM_%']]
       .merge(silver_m, on='month', how='inner')
       .sort_values('month'))

# computing correlations

pairs = [('Gold_MoM_%','Gold'), ('Silver_MoM_%','Silver')]
out = []
for col, label in pairs:
    s2 = df2.set_index('month')[['GPRD', col]].dropna()
    c = (s2['GPRD'].rolling(12, min_periods=6).corr(s2[col])
         .reset_index(name='corr').dropna())
    c['pair'] = label
    out.append(c)

corr_pairs = pd.concat(out, ignore_index=True)

chart_pairs = (
    alt.Chart(corr_pairs).mark_line()
      .encode(
          x=alt.X('month:T', title=''),
          y=alt.Y('corr:Q', title='Rolling Correlation (12 months)', scale=alt.Scale(domain=[-1,1])),
          color=alt.Color('pair:N', title='Against')
      )
      .properties(width=900, height=220, title='GPRD rolling correlation with Gold/Silver MoM')
)

zero_rule = alt.Chart(pd.DataFrame({'y':[0]})).mark_rule(strokeDash=[4,4], color='gray').encode(y='y:Q')


(chart_pairs + zero_rule).save('figures/(chart_pairs + zero_rule).png')
(chart_pairs + zero_rule)



In [None]:
# Testing density chart
layer_test =(
    alt.Chart(tidy).mark_area(opacity=0.3).transform_density(
        'mom_pct', groupby=['metal'], as_=['mom_pct','density'])
        .encode(x='mom_pct:Q', y='density:Q', color='metal:N'))

layer_test.save('figures/layer_test.png')
layer_test

In [87]:
density_silver_gold = (
    alt.Chart(tidy)
      .transform_density(
          'mom_pct', groupby=['metal'], as_=['mom_pct', 'density']
      )
      .mark_area(opacity=0.3)
      .encode(
          x=alt.X('mom_pct:Q', title='Month-over-Month change (%)'),
          y=alt.Y('density:Q', title='Density'),
          color=alt.Color('metal:N', title='Metal')
      )
      .properties(
          title=alt.TitleParams(
              text='Gold vs Silver — Distribution of Monthly Price Changes',
              subtitle='Kernel density estimate (MoM %), close-to-close',
               # 'start'|'middle'|'end'
              anchor='start'  
          ),
          width=800, height=420
      )
)

density_silver_gold.save('figures/density_silver_gold.png')
density_silver_gold


In [None]:
# Copying orginal Dataframe

tidy_3p = tidy.copy()
tidy_3p['DATE'] = pd.to_datetime(tidy_3p['DATE'])
# Creating conditions (bools) to look for specific dates
conditions = [
    (tidy_3p['DATE'] >= '1990-01-01') & (tidy_3p['DATE'] <= '1995-12-31'),
    (tidy_3p['DATE'] >= '2000-01-01') & (tidy_3p['DATE'] <= '2005-12-31'),
    (tidy_3p['DATE'] >= '2020-01-01') & (tidy_3p['DATE'] <= '2025-12-31'),
]
labels = ['1990–1995', '2000–2005', '2020–2025']
# tags for labels gicen the date condition
tidy_3p['period'] = np.select(conditions, labels, default='Other')
tidy_3p = tidy_3p[tidy_3p['period'] != 'Other']


In [95]:
# Base using data from tidy_3pBase  (mom_pct, metal, period)
base = alt.Chart(tidy_3p)

# 1st layer density by period and metal tyoe
period_metal_dens = (
    base.transform_density(
        'mom_pct',
        groupby=['metal', 'period'],
        as_=['mom_pct', 'density']
    )
    .mark_area(opacity=0.35)
    .encode(
        x=alt.X('mom_pct:Q', title='Monthly % change'),
        y=alt.Y('density:Q', title='Density'),
        color=alt.Color('metal:N', title='Metal', sort=['Gold','Silver'])
    )
)

# 2nd layer: vertical rule in 0 
rule0 = (
    base.transform_calculate(x0='0')
        .mark_rule(strokeDash=[3,3], opacity=0.6)
        .encode(x='x0:Q')
)

# layer and then facet
layered = alt.layer(period_metal_dens, rule0).properties(width=320, height=220)

fin_sil_gold = (
    layered
      .facet(column=alt.Column('period:N', title=None, sort=['1990–1995','2000–2005','2020–2025']))
      .resolve_scale(x='shared', y='shared')
      .properties(title='Gold & Silver Monthly Change — Density by Period')
)
fin_sil_gold.save('figures/fin_sil_gold.png')
fin_sil_gold


In [103]:
box_variation = (alt.Chart(tidy).mark_boxplot()
                 .encode(x='year(DATE):O', y='mom_pct:Q', color='metal:N')
                 .properties(title='Gold vs Silver - Monthly Cahnge by Year')
)

box_variation.save('figures/box_variation.png')
box_variation



In [None]:
alt.Chart(long_df).mark_point(opacity=0.4).encode(
  x='value:Q', y='value2:Q'
).facet(row='var:N', column='var2:N')

NameError: name 'long_df' is not defined