In [48]:
import pandas as pd
import numpy as np
import regex as re
import matplotlib.pyplot as plt
from plotnine import ggplot, aes, geom_line, labs, theme_minimal, theme, element_text
import plotly.express as px

In [49]:
# Load the dataset (assumed to be in CSV format)
data = pd.read_csv('Factiva_Indo_News.csv')

In [50]:
# Display basic information about the dataset
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6340 entries, 0 to 6339
Data columns (total 8 columns):
 #   Column           Non-Null Count  Dtype 
---  ------           --------------  ----- 
 0   Headline         6340 non-null   object
 1   Number of Words  6340 non-null   object
 2   Date             6340 non-null   object
 3   Publisher        6340 non-null   object
 4   Publisher Code   6340 non-null   object
 5   Language         6340 non-null   object
 6   Copyright        6340 non-null   object
 7   Main Article     6340 non-null   object
dtypes: object(8)
memory usage: 396.4+ KB


In [51]:
# Check for the relevant column containing the article text and date
data.head()

Unnamed: 0,Headline,Number of Words,Date,Publisher,Publisher Code,Language,Copyright,Main Article
0,TRADE MINISTERS OF APEC TO DISCUSS NEW GATT HERE.,585 words,4 October 1994,Jakarta Post,JKPOST,English,(c) 1994 The Jakarta Post,JAKARTA (JP): Ministers responsible for trade ...
1,GENERAL OUTLOOK FOR THE OIL MARKET. \n By Subroto,838 words,7 October 1994,Jakarta Post,JKPOST,English,(c) 1994 The Jakarta Post,The following is the first of a two-part artic...
2,GENERAL OUTLOOK FOR THE OIL MARKET (2).,"1,620 words",8 October 1994,Jakarta Post,JKPOST,English,(c) 1994 The Jakarta Post,The following is the second of a two-part arti...
3,TRADE LIBERALIZATION SOUGHT.,717 words,17 October 1994,Jakarta Post,JKPOST,English,(c) 1994 The Jakarta Post,JAKARTA (JP): A business advisory group of APE...
4,INFLATIONARY PRESSURES.,597 words,4 November 1994,Jakarta Post,JKPOST,English,(c) 1994 The Jakarta Post,"Over the last few months, President Soeharto h..."


In [52]:
# Define keyword categories
economic_terms = r'\b(economic|economy)\b'
uncertainty_terms = r'\b(uncertain|uncertainty|uncertainties)\b'
policy_terms = (r'\b(policy|public|tax|taxation|taxes|taxed|spending|expenditure|fiscal|stimulus|budget|deficit|national debt|tariff|subsidies|subsidy|Bank Indonesia|central bank|reserves|interest rates|legislation|legislative|house of representatives|regulation)\b')
# Function to check if an article contains terms from all three categories
def contains_epu_terms(article):
    has_economy = bool(re.search(economic_terms, article, re.IGNORECASE))
    has_policy = bool(re.search(policy_terms, article, re.IGNORECASE))
    has_uncertainty = bool(re.search(uncertainty_terms, article, re.IGNORECASE))
    return 1 if has_economy and has_policy and has_uncertainty else 0

In [53]:
# Apply the function to classify articles as EPU-related
data['contains_epu'] = data['Main Article'].apply(lambda x: contains_epu_terms(str(x)))

# Count the number of EPU-related articles per month
data['Date'] = pd.to_datetime(data['Date'], errors='coerce')  # Ensure 'Date' column is datetime type
data['month'] = data['Date'].dt.to_period('M')
data

Unnamed: 0,Headline,Number of Words,Date,Publisher,Publisher Code,Language,Copyright,Main Article,contains_epu,month
0,TRADE MINISTERS OF APEC TO DISCUSS NEW GATT HERE.,585 words,1994-10-04,Jakarta Post,JKPOST,English,(c) 1994 The Jakarta Post,JAKARTA (JP): Ministers responsible for trade ...,1,1994-10
1,GENERAL OUTLOOK FOR THE OIL MARKET. \n By Subroto,838 words,1994-10-07,Jakarta Post,JKPOST,English,(c) 1994 The Jakarta Post,The following is the first of a two-part artic...,1,1994-10
2,GENERAL OUTLOOK FOR THE OIL MARKET (2).,"1,620 words",1994-10-08,Jakarta Post,JKPOST,English,(c) 1994 The Jakarta Post,The following is the second of a two-part arti...,1,1994-10
3,TRADE LIBERALIZATION SOUGHT.,717 words,1994-10-17,Jakarta Post,JKPOST,English,(c) 1994 The Jakarta Post,JAKARTA (JP): A business advisory group of APE...,1,1994-10
4,INFLATIONARY PRESSURES.,597 words,1994-11-04,Jakarta Post,JKPOST,English,(c) 1994 The Jakarta Post,"Over the last few months, President Soeharto h...",1,1994-11
...,...,...,...,...,...,...,...,...,...,...
6335,Business\nRupiah Gains Ground Amid Declining D...,504 words,2024-12-30,The Jakarta Globe,JGLOBE,English,Copyright 2024. PT Jakarta Globe Media.,Jakarta. The Indonesian rupiah closed stronger...,1,2024-12
6336,Business\n Uncertainty Hits Crypto Industry A...,695 words,2024-12-30,The Jakarta Globe,JGLOBE,English,Copyright 2024. PT Jakarta Globe Media.,Jakarta. Uncertainty currently hits Indonesia’...,1,2024-12
6337,IDX falters in 2024 as global markets soar to ...,800 words,2024-12-31,The Jakarta Post,JKPOST,English,(c) 2024 The Jakarta Post,A stellar year for stock markets around the wo...,1,2024-12
6338,Business\nIndonesian Stock Market Faces Headwi...,302 words,2024-12-31,The Jakarta Globe,JGLOBE,English,Copyright 2024. PT Jakarta Globe Media.,Jakarta. The Indonesian stock market is set to...,1,2024-12


In [54]:
# Update the Publisher column based on the Publisher Code
data['Publisher'] = data['Publisher Code'].replace({
    'BISNIS': 'Bisnis Indonesia',
    'TEMPOE': 'Tempo.co',
    'JKPOST': 'The Jakarta Post',
    'ANTARA': 'LKBN ANTARA',
    'JGLOBE': 'The Jakarta Globe',
    'TEMGEN': 'Tempo Magazine',
})

data

Unnamed: 0,Headline,Number of Words,Date,Publisher,Publisher Code,Language,Copyright,Main Article,contains_epu,month
0,TRADE MINISTERS OF APEC TO DISCUSS NEW GATT HERE.,585 words,1994-10-04,The Jakarta Post,JKPOST,English,(c) 1994 The Jakarta Post,JAKARTA (JP): Ministers responsible for trade ...,1,1994-10
1,GENERAL OUTLOOK FOR THE OIL MARKET. \n By Subroto,838 words,1994-10-07,The Jakarta Post,JKPOST,English,(c) 1994 The Jakarta Post,The following is the first of a two-part artic...,1,1994-10
2,GENERAL OUTLOOK FOR THE OIL MARKET (2).,"1,620 words",1994-10-08,The Jakarta Post,JKPOST,English,(c) 1994 The Jakarta Post,The following is the second of a two-part arti...,1,1994-10
3,TRADE LIBERALIZATION SOUGHT.,717 words,1994-10-17,The Jakarta Post,JKPOST,English,(c) 1994 The Jakarta Post,JAKARTA (JP): A business advisory group of APE...,1,1994-10
4,INFLATIONARY PRESSURES.,597 words,1994-11-04,The Jakarta Post,JKPOST,English,(c) 1994 The Jakarta Post,"Over the last few months, President Soeharto h...",1,1994-11
...,...,...,...,...,...,...,...,...,...,...
6335,Business\nRupiah Gains Ground Amid Declining D...,504 words,2024-12-30,The Jakarta Globe,JGLOBE,English,Copyright 2024. PT Jakarta Globe Media.,Jakarta. The Indonesian rupiah closed stronger...,1,2024-12
6336,Business\n Uncertainty Hits Crypto Industry A...,695 words,2024-12-30,The Jakarta Globe,JGLOBE,English,Copyright 2024. PT Jakarta Globe Media.,Jakarta. Uncertainty currently hits Indonesia’...,1,2024-12
6337,IDX falters in 2024 as global markets soar to ...,800 words,2024-12-31,The Jakarta Post,JKPOST,English,(c) 2024 The Jakarta Post,A stellar year for stock markets around the wo...,1,2024-12
6338,Business\nIndonesian Stock Market Faces Headwi...,302 words,2024-12-31,The Jakarta Globe,JGLOBE,English,Copyright 2024. PT Jakarta Globe Media.,Jakarta. The Indonesian stock market is set to...,1,2024-12


In [55]:
data.contains_epu.value_counts()

contains_epu
1    6236
0     104
Name: count, dtype: int64

In [56]:
# Plot the number of articles per publisher over time
fig = px.scatter(data, x='Date', y='Publisher Code', color='Publisher', title='Article Coverages per Publisher Over Time', labels={'Date': 'Date', 'Publisher': 'Publisher'})
fig.update_layout(
    xaxis_title='',
    yaxis_title='Publisher Code',
    xaxis=dict(tickangle=0),
    template='plotly_white'
)
fig.show()

In [57]:
epu_df= data[['month', 'Publisher Code', 'contains_epu']].reset_index()

In [58]:
# Clean and prepare the dataset
epu_df = epu_df.rename(columns={
    'month': 'Month',
    'Publisher Code': 'Newspaper',
    'contains_epu': 'Frequency'
})

# Ensure correct data types
epu_df['Month'] = epu_df['Month'].dt.to_timestamp()


In [59]:
# Aggregate data to compute frequency counts (Xit) per newspaper per month
epu_df = epu_df.groupby(['Month', 'Newspaper'])['Frequency'].sum().reset_index()
epu_df

Unnamed: 0,Month,Newspaper,Frequency
0,1994-10-01,JKPOST,4
1,1994-11-01,JKPOST,5
2,1994-12-01,JKPOST,3
3,1995-01-01,JKPOST,5
4,1995-03-01,JKPOST,2
...,...,...,...
948,2024-12-01,ANTARA,4
949,2024-12-01,JGLOBE,12
950,2024-12-01,JKPOST,13
951,2024-12-01,TEMGEN,4


In [60]:
# Define the time intervals T1 and T2 for standardization and normalization
T1_start, T1_end = '1994-10-01', '2024-12-31'  # interval T1
T2_start, T2_end = '1994-10-01', '2024-12-31'  # interval T2

# Step 1: Compute time-series variance Var(i) in the interval T1
T1_data = epu_df[(epu_df['Month'] >= T1_start) & (epu_df['Month'] <= T1_end)]
var_i = T1_data.groupby('Newspaper')['Frequency'].var()
var_i


Newspaper
ANTARA    11.831421
BISNIS     3.494943
JGLOBE     6.694969
JKPOST    41.645008
TEMGEN     2.170008
TEMPOE     2.903500
Name: Frequency, dtype: float64

In [61]:
# Step 2: Standardize Xit by dividing through by the standard deviation STDi
std_i = np.sqrt(var_i)
epu_df = epu_df.merge(std_i.rename('STD'), on='Newspaper')
epu_df['Yit'] = epu_df['Frequency'] / epu_df['STD']
epu_df

Unnamed: 0,Month,Newspaper,Frequency,STD,Yit
0,1994-10-01,JKPOST,4,6.453294,0.619838
1,1994-11-01,JKPOST,5,6.453294,0.774798
2,1994-12-01,JKPOST,3,6.453294,0.464879
3,1995-01-01,JKPOST,5,6.453294,0.774798
4,1995-03-01,JKPOST,2,6.453294,0.309919
...,...,...,...,...,...
948,2024-12-01,ANTARA,4,3.439683,1.162898
949,2024-12-01,JGLOBE,12,2.587464,4.637746
950,2024-12-01,JKPOST,13,6.453294,2.014475
951,2024-12-01,TEMGEN,4,1.473095,2.715372


In [62]:
# Step 3: Compute the mean over newspapers of Yit in each month to obtain the series Zt
Zt = epu_df.groupby('Month')['Yit'].mean().reset_index()
Zt

Unnamed: 0,Month,Yit
0,1994-10-01,0.619838
1,1994-11-01,0.774798
2,1994-12-01,0.464879
3,1995-01-01,0.774798
4,1995-03-01,0.309919
...,...,...
357,2024-08-01,1.728521
358,2024-09-01,1.607316
359,2024-10-01,2.206568
360,2024-11-01,2.738616


In [63]:
# Step 4: Compute M, the mean value of Zt in the interval T2
T2_data = Zt[(Zt['Month'] >= T2_start) & (Zt['Month'] <= T2_end)]
M = T2_data['Yit'].mean()
M

1.4678511630081768

In [64]:
# Step 5: Multiply Zt by (100 / M) for all t to obtain the normalized EPU index
Zt['EPU_Index'] = Zt['Yit'] * (100 / M)

Zt

Unnamed: 0,Month,Yit,EPU_Index
0,1994-10-01,0.619838,42.227609
1,1994-11-01,0.774798,52.784511
2,1994-12-01,0.464879,31.670707
3,1995-01-01,0.774798,52.784511
4,1995-03-01,0.309919,21.113805
...,...,...,...
357,2024-08-01,1.728521,117.758584
358,2024-09-01,1.607316,109.501294
359,2024-10-01,2.206568,150.326394
360,2024-11-01,2.738616,186.573165


In [65]:
# Plot the normalized EPU index using plotly
fig1 = px.line(Zt, x=Zt.Month.astype(str), y='EPU_Index', title='Economic Policy Uncertainty (EPU) Index')
fig1.update_layout(
    xaxis_title=None,
    yaxis_title='EPU Index',
    xaxis=dict(tickangle=0),
    template='plotly_white'
)

fig1.update_xaxes(
    # dtick="M3",
    nticks=20,
    tickformat="%b\n%Y",
    ticklabelmode="period",
    tickmode="auto",
    )

# Add annotation for June 1998
fig1.add_annotation(
    x='1998-06',
    y=Zt[Zt['Month'] == '1998-06']['EPU_Index'].values[0],
    text="Asian Financial Crisis",
    showarrow=True,
    arrowhead=2,
    ax=-30,
    ay=-20,
)
# Add annotation for March 2001
fig1.add_annotation(
    x='2001-05',
    y=Zt[Zt['Month'] == '2001-05']['EPU_Index'].values[0],
    text="Scandal of <br> the President Wahid <br> on Buloggate <br> and Bruneigate",
    showarrow=True,
    arrowhead=2,
    ax=0,
    ay=-70,
)

# Add annotation for August 2002
fig1.add_annotation(
    x='2002-08',
    y=Zt[Zt['Month'] == '2002-08']['EPU_Index'].values[0],
    text="Constitutional <br> Amendments",
    showarrow=True,
    arrowhead=2,
    ax=10,
    ay=-25,
)

# Add annotation for December 2003
fig1.add_annotation(
    x='2003-12',
    y=Zt[Zt['Month'] == '2003-12']['EPU_Index'].values[0],
    text="Iraq War, <br> SARS Epidemic, <br> and Terrorist Attack ",
    showarrow=True,
    arrowhead=2,
    ax=40,
    ay=-100,
)

# Add annotation for September 2004
fig1.add_annotation(
    x='2004-09',
    y=Zt[Zt['Month'] == '2004-09']['EPU_Index'].values[0],
    text="Series of <br> Bomb Attack <br> and Election Year",
    showarrow=True,
    arrowhead=2,
    ax=60,
    ay=-60,
)

# Add annotation for May 2008
fig1.add_annotation(
    x='2008-05',
    y=Zt[Zt['Month'] == '2008-05']['EPU_Index'].values[0],
    text="Global <br> Financial Crisis",
    showarrow=True,
    arrowhead=2,
    ax=-10,
    ay=-30,
)

# Add annotation for May 2010
fig1.add_annotation(
    x='2010-05',
    y=Zt[Zt['Month'] == '2010-05']['EPU_Index'].values[0],
    text="Eurozone <br> Debt Crisis",
    showarrow=True,
    arrowhead=2,
    ax=0,
    ay=-40,
)

# Add annotation for December 2011
fig1.add_annotation(
    x='2011-12',
    y=Zt[Zt['Month'] == '2011-12']['EPU_Index'].values[0],
    text="US Debt <br> Ceiling Crisis",
    showarrow=True,
    arrowhead=2,
    ax=-10,
    ay=-70,
)

# Add annotation for December 2014
fig1.add_annotation(
    x='2014-12',
    y=Zt[Zt['Month'] == '2014-12']['EPU_Index'].values[0],
    text="Reduction in <br> Fuel Subsidies",
    showarrow=True,
    arrowhead=2,
    ax=-40,
    ay=-30,
)

# Add annotation for October 2015
fig1.add_annotation(
    x='2015-10',
    y=Zt[Zt['Month'] == '2015-10']['EPU_Index'].values[0],
    text="China Economic Turmoil",
    showarrow=True,
    arrowhead=2,
    ax=-10,
    ay=-60,
)

# Add annotation for November 2016
fig1.add_annotation(
    x='2016-11',
    y=Zt[Zt['Month'] == '2016-11']['EPU_Index'].values[0],
    text="Trump Effect <br> and US Election",
    showarrow=True,
    arrowhead=2,
    ax=20,
    ay=-30,
)

# Add annotation for September 2020
fig1.add_annotation(
    x='2020-09',
    y=Zt[Zt['Month'] == '2020-09']['EPU_Index'].values[0],
    text="COVID 19 Pandemic",
    showarrow=True,
    arrowhead=2,
    ax=-10,
    ay=-30,
)

# Add annotation for August 2022
fig1.add_annotation(
    x='2022-08',
    y=Zt[Zt['Month'] == '2022-08']['EPU_Index'].values[0],
    text="Polycrisis & <br> Russia-Ukraine War",
    showarrow=True,
    arrowhead=2,
    ax=0,
    ay=-80,
)

# Add annotation for November 2024
fig1.add_annotation(
    x='2024-11',
    y=Zt[Zt['Month'] == '2024-11']['EPU_Index'].values[0],
    text="US Election",
    showarrow=True,
    arrowhead=2,
    ax=-20,
    ay=-30,
)
fig1.show()

In [None]:
import json
import os

# Create directories
os.makedirs('data', exist_ok=True)
os.makedirs('js', exist_ok=True)

# 1. Export EPU Index Data as JavaScript
epu_js_data = []
for _, row in Zt.iterrows():
    epu_js_data.append({
        'Month': row['Month'].strftime('%Y-%m-%d'),
        'EPU_Index': round(row['EPU_Index'], 2)
    })

# Save as JavaScript file
with open('js/epu_data.js', 'w') as f:
    f.write(f'const epuData = {json.dumps(epu_js_data, indent=2)};')

print(f"✓ Exported {len(epu_js_data)} EPU data points")

# 2. Export Article Coverage Data for scatter plot
article_js_data = []
for _, row in data.iterrows():
    article_js_data.append({
        'Date': row['Date'].strftime('%Y-%m-%d'),
        'Publisher': row['Publisher'],
        'Publisher_Code': row['Publisher Code'],
        'Headline': row['Headline'][:80] + '...' if len(str(row['Headline'])) > 80 else str(row['Headline'])
    })

# Save as JavaScript file
with open('js/article_data.js', 'w') as f:
    f.write(f'const articleData = {json.dumps(article_js_data, indent=2)};')

print(f"✓ Exported {len(article_js_data)} article data points")

# 3. Export Major Events Data
events_data = [
    {'date': '1998-06-01', 'text': 'Asian Financial Crisis', 'value': 319.96},
    {'date': '2001-05-01', 'text': 'Buloggate & Bruneigate Scandal', 'value': 254.69},
    {'date': '2002-08-01', 'text': 'Constitutional<br>Amendments', 'value': 247.48},
    {'date': '2003-12-01', 'text': 'Iraq War & SARS<br>Epidemic', 'value': 200.58},
    {'date': '2004-09-01', 'text': 'Bomb Attacks &<br>Election Year', 'value': 170.79},
    {'date': '2008-05-01', 'text': 'Global Financial<br>Crisis', 'value': 128.56},
    {'date': '2010-05-01', 'text': 'Eurozone Debt<br>Crisis', 'value': 171.36},
    {'date': '2011-12-01', 'text': 'US Debt Ceiling<br>Crisis', 'value': 189.77},
    {'date': '2014-12-01', 'text': 'Fuel Subsidies<br>Reduction', 'value': 190.02},
    {'date': '2015-10-01', 'text': 'China Economic<br>Turmoil', 'value': 224.15},
    {'date': '2016-11-01', 'text': 'Trump Election', 'value': 182.50},
    {'date': '2020-09-01', 'text': 'COVID-19<br>Pandemic', 'value': 227.74},
    {'date': '2022-08-01', 'text': 'Russia-Ukraine<br>War', 'value': 220.29},
    {'date': '2024-11-01', 'text': 'US Election 2024', 'value': 186.57}
]

with open('js/events_data.js', 'w') as f:
    f.write(f'const majorEvents = {json.dumps(events_data, indent=2)};')

print("✓ Exported major events data")

# 4. Export Publisher Statistics
publisher_stats = data['Publisher'].value_counts().to_dict()
publisher_code_mapping = dict(zip(data['Publisher Code'], data['Publisher']))

stats_data = {
    'total_articles': len(data),
    'epu_articles': int(data['contains_epu'].sum()),
    'epu_percentage': round((data['contains_epu'].sum() / len(data)) * 100, 1),
    'date_range': {
        'start': data['Date'].min().strftime('%Y-%m-%d'),
        'end': data['Date'].max().strftime('%Y-%m-%d')
    },
    'publisher_stats': publisher_stats,
    'publisher_mapping': publisher_code_mapping,
    'months_covered': len(Zt),
    'peak_epu': {
        'date': Zt.loc[Zt['EPU_Index'].idxmax(), 'Month'].strftime('%Y-%m-%d'),
        'value': round(Zt['EPU_Index'].max(), 2)
    }
}

with open('js/stats_data.js', 'w') as f:
    f.write(f'const statsData = {json.dumps(stats_data, indent=2)};')

print("✓ Exported statistics data")

# 5. Create combined data file for easier loading
combined_data = {
    'epu_index': epu_js_data,
    'articles': article_js_data[:100],  # Limit for performance
    'events': events_data,
    'stats': stats_data
}

with open('data/combined_data.json', 'w') as f:
    json.dump(combined_data, f, indent=2)

print("\n✓ All data exported successfully!")
print("\nFiles created:")
print("- js/epu_data.js")
print("- js/article_data.js") 
print("- js/events_data.js")
print("- js/stats_data.js")
print("- data/combined_data.json")
print("\nYou can now use these files in your Plotly.js website!")

# 6. Also export as CSV for backup
Zt.to_csv('data/epu_index.csv', index=False)
data[['Date', 'Publisher', 'Publisher Code', 'Headline']].to_csv('data/articles_summary.csv', index=False)

print("\n✓ CSV backup files also created:")
print("- data/epu_index.csv")
print("- data/articles_summary.csv")

✓ Exported 362 EPU data points
✓ Exported 6340 article data points
✓ Exported major events data
✓ Exported statistics data

✓ All data exported successfully!

Files created:
- js/epu_data.js
- js/article_data.js
- js/events_data.js
- js/stats_data.js
- data/combined_data.json

You can now use these files in your Plotly.js website!

✓ CSV backup files also created:
- data/epu_index.csv
- data/articles_summary.csv
