In [None]:
!pip install -q huggingface_hub

from huggingface_hub import login

login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [None]:
import pandas as pd
import torch
import numpy as np

#If using Google Collab
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

Mounted at /content/drive


In [None]:
df = pd.read_csv('/content/drive/MyDrive/255 Data/fed_speeches_1996_2020.csv')

df.head()

Unnamed: 0,link,title,speaker,event,year,text,date,text_len,location
0,https://www.federalreserve.gov/boarddocs/speec...,Supervision of bank risk-taking,Vice Chair Alice M. Rivlin,At the The Brookings Institution National Issu...,1996.0,I discovered when I joined the Board of Govern...,19961219.0,3671,D.C.
1,https://www.federalreserve.gov/boarddocs/speec...,Social security,Chairman Alan Greenspan,At the Abraham Lincoln Award Ceremony of the U...,1996.0,I am privileged to accept the Union League of...,19961206.0,2596,Pennsylvania
2,https://www.federalreserve.gov/boarddocs/speec...,The challenge of central banking in a democrat...,Chairman Alan Greenspan,At the Annual Dinner and Francis Boyer Lecture...,1996.0,The Challenge of Central Banking in a Democrat...,19961205.0,4344,D.C.
3,https://www.federalreserve.gov/boarddocs/speec...,Clearinghouses and risk management,"Governor Edward W. Kelley, Jr.","At the 1996 Payments System Risk Conference, W...",1996.0,It is a pleasure to be with you this morning ...,19961203.0,2527,D.C.
4,https://www.federalreserve.gov/boarddocs/speec...,Supervisory and regulatory responses to financ...,Governor Susan M. Phillips,At the BAI Seminar on Regulatory Policy Change...,1996.0,Supervisory and Regulatory Responses to Financ...,19961125.0,2222,D.C.


In [None]:
# drop irrelevant columns
# 'link', 'title', 'year', 'event', 'text_len', 'location'

df = df[['speaker', 'text', 'date']]

# change date to use datetime instead

df['date'] = (
    df['date']
      .dropna()
      .astype(int)
      .astype(str)
      .reindex(df.index)
)

df['date'] = pd.to_datetime(df['date'], format='%Y%m%d', errors='coerce')
df

Unnamed: 0,speaker,text,date
0,Vice Chair Alice M. Rivlin,I discovered when I joined the Board of Govern...,1996-12-19
1,Chairman Alan Greenspan,I am privileged to accept the Union League of...,1996-12-06
2,Chairman Alan Greenspan,The Challenge of Central Banking in a Democrat...,1996-12-05
3,"Governor Edward W. Kelley, Jr.",It is a pleasure to be with you this morning ...,1996-12-03
4,Governor Susan M. Phillips,Supervisory and Regulatory Responses to Financ...,1996-11-25
...,...,...,...
1451,Governor Lael Brainard,I want to thank Darrell Duffie for inviting m...,2020-02-05
1452,Vice Chair for Supervision Randal K. Quarles,It's a great pleasure to be with you today at...,2020-01-17
1453,Governor Michelle W. Bowman,Few sectors are as central to the success of ...,2020-01-16
1454,Vice Chairman Richard H. Clarida,Thank you for the opportunity to join you bri...,2020-01-09


## FinBERT

In [None]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline

model_name = "ProsusAI/finbert"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSequenceClassification.from_pretrained(model_name)

finbert_pipeline = pipeline(
    "sentiment-analysis",
    model=model,
    tokenizer=tokenizer,
    device=0 if torch.cuda.is_available() else -1  # Use GPU if available
)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.
Device set to use cuda:0


In [None]:
def chunk_text(text, max_tokens=400):
    tokens = tokenizer.tokenize(text)
    for i in range(0, len(tokens), max_tokens):
        chunk_tokens = tokens[i:i+max_tokens]
        yield tokenizer.convert_tokens_to_string(chunk_tokens)

def finbert_on_long_text(text, max_tokens=400):
    if not isinstance(text, str) or text.strip() == "":
        return None

    chunks = list(chunk_text(text, max_tokens=max_tokens))
    if not chunks:
        return None

    results = finbert_pipeline(
        chunks,
        truncation=True,
        max_length=512
    )

    # convert to simple numeric scores: +1 (positive), -1 (negative), 0 (neutral)
    label_to_score = {"positive": 1, "negative": -1, "neutral": 0}
    numeric_scores = [label_to_score[r["label"].lower()] for r in results]

    # can change the scoring methodology if necessary
    avg_score = np.mean(numeric_scores)
    if avg_score > 0.1:
        label = "positive"
    elif avg_score < -0.1:
        label = "negative"
    else:
        label = "neutral"

    return {"label": label, "score": float(avg_score)}


In [None]:
# 1) Get all texts as a list
texts = df["text"].fillna("").tolist()

# 2) Run pipeline once on the whole list (batched internally)
results = finbert_pipeline(
    texts,
    truncation=True,
    max_length=512,
    batch_size=32   # tweak for your GPU / CPU
)

# 3) Put results back into the DataFrame
df["finbert_label"] = [r["label"] for r in results]
df["finbert_score"] = [r["score"] for r in results]

In [None]:
df[df['finbert_label'] != 'neutral'].count()

Unnamed: 0,0
speaker,335
text,336
date,336
finbert_label,336
finbert_score,336


In [None]:
df.count()

Unnamed: 0,0
speaker,1454
text,1456
date,1455
finbert_label,1456
finbert_score,1456


## Loughran–McDonald

In [None]:
!pip install pysentiment2



In [None]:
import pysentiment2 as ps
import re

lm = ps.lm.LM()

In [None]:
def clean_text(t):
    if pd.isna(t):
        return ""
    t = t.replace("\n", " ")
    t = re.sub(r"\s+", " ", t)
    return t.strip()

df["text_clean"] = df["text"].apply(clean_text)

def lm_scores(text):
    tokens = lm.tokenize(text)
    return lm.get_score(tokens)   # returns a dict of sentiment metrics

scores = df["text_clean"].apply(lm_scores)

# Turn list of dicts into columns and join back to df
scores_df = pd.DataFrame(list(scores))
df = pd.concat([df, scores_df], axis=1)

def token_count(text):
    return len(lm.tokenize(text))

df["n_tokens"] = df["text_clean"].apply(token_count)

df["LM_pos_rate"] = df["Positive"] / df["n_tokens"]
df["LM_neg_rate"] = df["Negative"] / df["n_tokens"]
df["LM_net_polarity_per_token"] = (df["Positive"] - df["Negative"]) / df["n_tokens"]

## Removing Stop Words
This is only needed for BERTopic

In [None]:
import pandas as pd
import re
from nltk.corpus import stopwords
import nltk
nltk.download('stopwords')

stop_words = set(stopwords.words('english'))

def preprocess(text):
    text = text.lower()
    text = re.sub(r"\s+", " ", text)
    tokens = text.split()
    tokens = [t for t in tokens if t not in stop_words]
    return " ".join(tokens)

df["text_clean_nostop"] = df["text_clean"].apply(preprocess)


[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


## BERTopic

In [None]:
!pip install bertopic sentence-transformers

Collecting bertopic
  Downloading bertopic-0.17.4-py3-none-any.whl.metadata (24 kB)
Downloading bertopic-0.17.4-py3-none-any.whl (154 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m154.7/154.7 kB[0m [31m5.8 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: bertopic
Successfully installed bertopic-0.17.4


In [None]:
from bertopic import BERTopic
import pandas as pd


  $max \{ core_k(a), core_k(b), 1/\alpha d(a,b) \}$.


In [None]:
docs = df["text_clean_nostop"].tolist()  # or df["text"]

topic_model = BERTopic(
    language="english",
    embedding_model="all-MiniLM-L6-v2",
    calculate_probabilities=True
)

topics, probs = topic_model.fit_transform(docs)

import numpy as np

df["topic"] = topics
df["topic_confidence"] = probs.max(axis=1)          # max probability across topics


In [None]:
probs_df = pd.DataFrame(
    probs,
    columns=[f"topic_prob_{i}" for i in range(probs.shape[1])]
)

df = pd.concat([df.reset_index(drop=True), probs_df], axis=1)


In [None]:
topic_info = topic_model.get_topic_info()
print(topic_info.head())


   Topic  Count                                Name  \
0     -1    493  -1_financial_policy_market_federal   
1      0    135      0_labor_rate_inflation_percent   
2      1    119   1_community_banks_cra_development   
3      2     70      2_policy_rate_federal_monetary   
4      3     63   3_education_women_economic_school   

                                      Representation  \
0  [financial, policy, market, federal, bank, ban...   
1  [labor, rate, inflation, percent, growth, econ...   
2  [community, banks, cra, development, communiti...   
3  [policy, rate, federal, monetary, inflation, t...   
4  [education, women, economic, school, students,...   

                                 Representative_Docs  
0  [titled talk "policy challenges federal reserv...  
1  [thank economic club washington inviting speak...  
2  [pleasure speak federal deposit insurance corp...  
3  [century, economic club new york served one na...  
4  [pleased opportunity meet today address remark...  


## Hawkish-Dovish Index

In [58]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline
import pandas as pd

model_name = "gtfintechlab/FOMC-RoBERTa"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSequenceClassification.from_pretrained(model_name)

fomc_cls = pipeline(
    "text-classification",
    model=model,
    tokenizer=tokenizer,
    return_all_scores=True,
    truncation=True,
    max_length=512,
    device=0 if torch.cuda.is_available() else -1  # Use GPU if available
)

Device set to use cuda:0


In [60]:
def fomc_scores(texts, batch_size=8):
    hawk_list = []
    dove_list = []
    neutral_list = []

    for i in range(0, len(texts), batch_size):
        batch = texts[i:i+batch_size]
        outputs = fomc_cls(batch)   # list of lists

        for out in outputs:
            # initialize
            hawk = dove = neutral = 0.0
            for d in out:
                label = d["label"]  # e.g. "LABEL_0"
                score = d["score"]
                if label == "LABEL_1":      # Hawkish
                    hawk = score
                elif label == "LABEL_0":    # Dovish
                    dove = score
                elif label == "LABEL_2":    # Neutral
                    neutral = score

            hawk_list.append(hawk)
            dove_list.append(dove)
            neutral_list.append(neutral)

    return pd.DataFrame({
        "fomc_hawk": hawk_list,
        "fomc_dove": dove_list,
        "fomc_neutral": neutral_list,
    })


df_fomc = fomc_scores(df["text_clean"].tolist(), batch_size=8)

# Overwrite/assign safely instead of concat (prevents duplicate columns)
df[["fomc_hawk", "fomc_dove", "fomc_neutral"]] = df_fomc.values

df["HDI_fomc"] = df["fomc_hawk"] - df["fomc_dove"]

In [None]:
df.head()

Unnamed: 0,speaker,text,date,finbert_label,finbert_score,text_clean,Positive,Negative,Polarity,Subjectivity,...,topic_prob_29,topic_prob_30,topic_prob_31,hawk_count,dove_count,HDI_dict,fomc_hawk,fomc_dove,fomc_other,HDI_fomc
0,Vice Chair Alice M. Rivlin,I discovered when I joined the Board of Govern...,1996-12-19,neutral,0.896602,I discovered when I joined the Board of Govern...,50,88,-0.275362,0.081802,...,0.003773,0.0131741,0.005232198,0,0,0.0,0.0,0.0,1.0,0.0
1,Chairman Alan Greenspan,I am privileged to accept the Union League of...,1996-12-06,neutral,0.646885,I am privileged to accept the Union League of ...,59,70,-0.085271,0.102707,...,1.0,6.330262e-308,7.818059e-308,7,1,0.666667,0.0,0.0,1.0,0.0
2,Chairman Alan Greenspan,The Challenge of Central Banking in a Democrat...,1996-12-05,neutral,0.887193,The Challenge of Central Banking in a Democrat...,62,137,-0.376884,0.107684,...,0.005472,0.008693169,0.006906108,18,3,0.681818,0.0,0.0,1.0,0.0
3,"Governor Edward W. Kelley, Jr.",It is a pleasure to be with you this morning ...,1996-12-03,neutral,0.765716,It is a pleasure to be with you this morning t...,36,59,-0.242105,0.076305,...,0.005974,0.009061292,0.007424955,0,0,0.0,0.0,0.0,1.0,0.0
4,Governor Susan M. Phillips,Supervisory and Regulatory Responses to Financ...,1996-11-25,neutral,0.808607,Supervisory and Regulatory Responses to Financ...,52,32,0.238095,0.076853,...,0.007788,0.01155057,0.01003424,0,0,0.0,0.0,0.0,1.0,0.0


In [62]:
cols_to_drop = [c for c in df.columns if c.startswith("fomc_") or c.startswith("HDI_fomc")]
df = df.drop(columns=cols_to_drop, errors="ignore")

# also remove any duplicated columns generally, just in case
df = df.loc[:, ~df.columns.duplicated()]

df[["fomc_hawk", "fomc_dove", "fomc_neutral"]] = df_fomc

In [65]:
df.describe()

Unnamed: 0,date,finbert_score,Positive,Negative,Polarity,Subjectivity,n_tokens,LM_pos_rate,LM_neg_rate,LM_net_polarity_per_token,...,topic_prob_28,topic_prob_29,topic_prob_30,topic_prob_31,hawk_count,dove_count,HDI_dict,fomc_hawk,fomc_dove,fomc_neutral
count,1455,1456.0,1456.0,1456.0,1456.0,1456.0,1456.0,1456.0,1456.0,1456.0,...,1456.0,1456.0,1456.0,1456.0,1456.0,1456.0,1456.0,1456.0,1456.0,1456.0
mean,2007-12-26 03:05:04.329896960,0.765768,60.800137,91.68544,-0.137018,0.098984,1534.934066,0.041532,0.057452,-0.015921,...,0.01913822,0.01745676,0.02077427,0.01887339,10.79739,4.849588,0.037467,0.054572,0.087195,0.858234
min,1996-06-13 00:00:00,0.34061,0.0,0.0,-1.0,0.0,3.0,0.0,0.0,-0.2,...,7.77124e-309,6.673994e-309,7.406991e-309,6.852995e-309,0.0,0.0,-0.991803,8e-05,4.3e-05,0.000282
25%,2002-10-22 12:00:00,0.678517,41.0,49.0,-0.333333,0.082768,1048.75,0.031584,0.040484,-0.034676,...,1.6957149999999999e-307,1.131053e-307,1.585389e-307,1.451137e-307,0.0,0.0,-0.289286,0.000202,0.000111,0.977739
50%,2007-03-06 00:00:00,0.808972,58.0,82.0,-0.172739,0.097234,1512.0,0.039609,0.055686,-0.0164,...,0.008897444,0.00633669,0.009172671,0.008443941,1.0,1.0,0.0,0.000376,0.000243,0.999282
75%,2013-05-25 12:00:00,0.874925,78.0,124.0,0.028854,0.114967,1931.0,0.048912,0.072111,0.00234,...,0.01504631,0.01388152,0.02205817,0.0169801,9.0,6.0,0.461538,0.001711,0.002796,0.999652
max,2020-06-19 00:00:00,0.969811,263.0,391.0,1.0,0.25,5183.0,0.25,0.2,0.25,...,1.0,1.0,1.0,1.0,301.0,121.0,0.990826,0.998993,0.9991,0.999841
std,,0.135948,30.55553,60.443583,0.282381,0.025972,739.995375,0.015731,0.023914,0.031051,...,0.08567981,0.0848015,0.08338028,0.08406124,26.031058,9.08388,0.47166,0.194271,0.25383,0.309795


## Financial Data and Formatting

In [None]:
df_fin = pd.read_csv('/content/drive/MyDrive/255 Data/index_prices_1996_2020.csv')

df_fin = df_fin.rename(columns={'Date': 'date'})

df_fin['date'] = pd.to_datetime(df['date'], format='%Y%m%d', errors='coerce')

df_fin.head()

Unnamed: 0,date,SP500,RSL,DJIA,NDQ
0,1996-12-19,5177.450195,620.72998,585.940002,316.809998
1,1996-12-06,5194.069824,621.320007,572.289978,315.209991
2,1996-12-05,5173.839844,617.700012,563.47998,310.769989
3,1996-12-03,5181.430176,616.710022,565.140015,312.190002
4,1996-11-25,5197.700195,618.460022,563.150024,312.390015


In [68]:
combined_df = df.set_index('date').join(df_fin.set_index('date'))

In [69]:
combined_df.head()

Unnamed: 0_level_0,speaker,text,finbert_label,finbert_score,text_clean,Positive,Negative,Polarity,Subjectivity,n_tokens,...,hawk_count,dove_count,HDI_dict,fomc_hawk,fomc_dove,fomc_neutral,SP500,RSL,DJIA,NDQ
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1996-12-19,Vice Chair Alice M. Rivlin,I discovered when I joined the Board of Govern...,neutral,0.896602,I discovered when I joined the Board of Govern...,50,88,-0.275362,0.081802,1687,...,0,0,0.0,0.000294,0.0001,0.999606,5177.450195,620.72998,585.940002,316.809998
1996-12-06,Chairman Alan Greenspan,I am privileged to accept the Union League of...,neutral,0.646885,I am privileged to accept the Union League of ...,59,70,-0.085271,0.102707,1256,...,7,1,0.666667,0.09928,0.041803,0.858917,5194.069824,621.320007,572.289978,315.209991
1996-12-05,Chairman Alan Greenspan,The Challenge of Central Banking in a Democrat...,neutral,0.887193,The Challenge of Central Banking in a Democrat...,62,137,-0.376884,0.107684,1848,...,18,3,0.681818,0.002163,0.00035,0.997486,5173.839844,617.700012,563.47998,310.769989
1996-12-03,"Governor Edward W. Kelley, Jr.",It is a pleasure to be with you this morning ...,neutral,0.765716,It is a pleasure to be with you this morning t...,36,59,-0.242105,0.076305,1245,...,0,0,0.0,0.000181,7.7e-05,0.999741,5181.430176,616.710022,565.140015,312.190002
1996-11-25,Governor Susan M. Phillips,Supervisory and Regulatory Responses to Financ...,neutral,0.808607,Supervisory and Regulatory Responses to Financ...,52,32,0.238095,0.076853,1093,...,0,0,0.0,0.000161,0.00013,0.999709,5197.700195,618.460022,563.150024,312.390015


In [70]:
combined_df['log_DJAI'] = np.log2(combined_df['DJIA'])
combined_df['log_SP500'] = np.log2(combined_df['SP500'])
combined_df['log_RSL'] = np.log2(combined_df['RSL'])
combined_df['log_NDQ'] = np.log2(combined_df['NDQ'])

combined_df['pct_DJAI'] = combined_df['log_DJAI'].pct_change()
combined_df['pct_SP500'] = combined_df['log_SP500'].pct_change()
combined_df['pct_RSL'] = combined_df['log_RSL'].pct_change()
combined_df['pct_NDQ'] = combined_df['log_NDQ'].pct_change()

In [None]:
combined_df.head()

Unnamed: 0_level_0,speaker,text,finbert_label,finbert_score,text_clean,Positive,Negative,Polarity,Subjectivity,n_tokens,...,DJIA,NDQ,log_DJAI,log_SP500,log_RSL,log_NDQ,pct_DJAI,pct_SP500,pct_RSL,pct_NDQ
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1996-12-19,Vice Chair Alice M. Rivlin,I discovered when I joined the Board of Govern...,neutral,0.896602,I discovered when I joined the Board of Govern...,50,88,-0.275362,0.081802,1687,...,585.940002,316.809998,9.194609,12.338026,9.277822,8.307474,,,,
1996-12-06,Chairman Alan Greenspan,I am privileged to accept the Union League of...,neutral,0.646885,I am privileged to accept the Union League of ...,59,70,-0.085271,0.102707,1256,...,572.289978,315.209991,9.160603,12.34265,9.279193,8.300169,-0.003699,0.000375,0.000148,-0.000879
1996-12-05,Chairman Alan Greenspan,The Challenge of Central Banking in a Democrat...,neutral,0.887193,The Challenge of Central Banking in a Democrat...,62,137,-0.376884,0.107684,1848,...,563.47998,310.769989,9.138221,12.33702,9.270763,8.279703,-0.002443,-0.000456,-0.000909,-0.002466
1996-12-03,"Governor Edward W. Kelley, Jr.",It is a pleasure to be with you this morning ...,neutral,0.765716,It is a pleasure to be with you this morning t...,36,59,-0.242105,0.076305,1245,...,565.140015,312.190002,9.142465,12.339135,9.268448,8.286281,0.000464,0.000171,-0.00025,0.000794
1996-11-25,Governor Susan M. Phillips,Supervisory and Regulatory Responses to Financ...,neutral,0.808607,Supervisory and Regulatory Responses to Financ...,52,32,0.238095,0.076853,1093,...,563.150024,312.390015,9.137376,12.343658,9.272537,8.287205,-0.000557,0.000367,0.000441,0.000112


In [71]:
combined_df.to_csv("preprocessed.csv")

In [75]:
combined_no_text = combined_df.drop(['text', 'text_clean', 'text_clean_nostop'], axis=1)

In [76]:
combined_no_text.head()

Unnamed: 0_level_0,speaker,finbert_label,finbert_score,Positive,Negative,Polarity,Subjectivity,n_tokens,LM_pos_rate,LM_neg_rate,...,DJIA,NDQ,log_DJAI,log_SP500,log_RSL,log_NDQ,pct_DJAI,pct_SP500,pct_RSL,pct_NDQ
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1996-12-19,Vice Chair Alice M. Rivlin,neutral,0.896602,50,88,-0.275362,0.081802,1687,0.029638,0.052164,...,585.940002,316.809998,9.194609,12.338026,9.277822,8.307474,,,,
1996-12-06,Chairman Alan Greenspan,neutral,0.646885,59,70,-0.085271,0.102707,1256,0.046975,0.055732,...,572.289978,315.209991,9.160603,12.34265,9.279193,8.300169,-0.003699,0.000375,0.000148,-0.000879
1996-12-05,Chairman Alan Greenspan,neutral,0.887193,62,137,-0.376884,0.107684,1848,0.03355,0.074134,...,563.47998,310.769989,9.138221,12.33702,9.270763,8.279703,-0.002443,-0.000456,-0.000909,-0.002466
1996-12-03,"Governor Edward W. Kelley, Jr.",neutral,0.765716,36,59,-0.242105,0.076305,1245,0.028916,0.04739,...,565.140015,312.190002,9.142465,12.339135,9.268448,8.286281,0.000464,0.000171,-0.00025,0.000794
1996-11-25,Governor Susan M. Phillips,neutral,0.808607,52,32,0.238095,0.076853,1093,0.047575,0.029277,...,563.150024,312.390015,9.137376,12.343658,9.272537,8.287205,-0.000557,0.000367,0.000441,0.000112


In [77]:
combined_no_text.to_csv("preprocessed_small.csv")