In [1]:
import torch

In [2]:
from transformers import pipeline

In [3]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification

In [4]:
import json
import os
import numpy as np
import pandas as pd

In [16]:
tokenizer = AutoTokenizer.from_pretrained("cffl/bert-base-styleclassification-subjective-neutral")



In [17]:
def process_sentences(json_file_path, max_length=512):
    processed_sentences = []
    
    # read json file
    with open(json_file_path, 'r') as file:
        sentences_data = json.load(file)
    
    # iterate each sentence
    for entry in sentences_data:
        sentence = entry['sentence']
        
        # tokenize and truncate
        tokens = tokenizer(sentence, truncation=True, max_length=512, return_tensors="pt", padding=True)
        
        # convert to text
        truncated_sentence = tokenizer.decode(tokens['input_ids'][0], skip_special_tokens=True)
        
        # save processed sentence
        processed_sentences.append(truncated_sentence)
    
    return processed_sentences

In [5]:
classify = pipeline(
    task="sentiment-analysis",
    model="nlptown/bert-base-multilingual-uncased-sentiment",
    top_k=None
)



In [6]:
text = "All around the rim of China the nations of non-Communist Asia are building a new prosperity and developing a new cohesiveness, which together suggest that they should be able to play far more assertive roles in their own defense.\n The Soviet drive for strategic supremacy — which the Soviets already have very nearly achieved, while the United States has passively watched — is deeply troubling and seriously threatening"

In [7]:
text2 = "Since China joined the WTO, Americans have witnessed the closure of more than 50,000 factories and the loss of tens of millions of jobs"

In [8]:
scores = classify(text)
print(scores)

[[{'label': '4 stars', 'score': 0.34373730421066284}, {'label': '5 stars', 'score': 0.2400761842727661}, {'label': '3 stars', 'score': 0.17142046988010406}, {'label': '2 stars', 'score': 0.1672779768705368}, {'label': '1 star', 'score': 0.07748813927173615}]]


In [14]:
scores2 = classify(text2)
print(scores2[0][0]['label'])


1 star


In [15]:
print(scores[0][0]['label'].split(' ')[0])


4


In [19]:
with open('./speech_json/37_nixon_speech.json', 'r') as file:
    nixon_speech = json.load(file)

with open('./speech_json/38_ford_speech.json', 'r') as file:
    ford_speech = json.load(file)

with open('./speech_json/39_carter_speech.json', 'r') as file:
    carter_speech = json.load(file)

with open('./speech_json/40_reagan_speech.json', 'r') as file:
    reagan_speech = json.load(file)

with open('./speech_json/41_herbertbush_speech.json', 'r') as file:
    herbertbush_speech = json.load(file)

with open('./speech_json/42_clinton_speech.json', 'r') as file:
    clinton_speech = json.load(file)

with open('./speech_json/43_walkerbush_speech.json', 'r') as file:
    walkerbush_speech = json.load(file)

with open('./speech_json/44_obama_speech.json', 'r') as file:
    obama_speech = json.load(file)

with open('./speech_json/45_trump_speech.json', 'r') as file:
    trump_speech = json.load(file)

with open('./speech_json/46_biden_speech.json', 'r') as file:
    biden_speech = json.load(file)

with open('./speech_json/47_vicepresident_biden_speech.json', 'r') as file:
    vicepresident_biden_speech = json.load(file)



In [20]:
def calculate_scores(speech_file, output_file):
    results = []

    for entry in speech_file:
        sentence = entry['sentence']
        date = entry['date']
        tokens = tokenizer(sentence, truncation=True, max_length=512, return_tensors="pt", padding=True)
        truncated_sentence = tokenizer.decode(tokens['input_ids'][0], skip_special_tokens=True)
        result = classify(truncated_sentence)
        sentiment_score = result[0][0]['label'].split(' ')[0]

        # Collect sentences with their scores and dates
        results.append({
            "date": date,
            "sentiment_score": sentiment_score,
            "sentence": sentence
        })

    # Save results to a JSON file
    with open(output_file, 'w') as outfile:
        json.dump(results, outfile, indent=4)

    return results

In [22]:
nixon_results = calculate_scores(nixon_speech, 'nixon_sentiment_scores.json')
# ford_results = calculate_scores(ford_speech, 'ford_sentiment_scores.json')
# carter_results = calculate_scores(carter_speech, 'carter_sentiment_scores.json')
# reagan_results = calculate_scores(reagan_speech, 'reagan_sentiment_scores.json')
# herbertbush_results = calculate_scores(herbertbush_speech, 'herbertbush_sentiment_scores.json')
# clinton_results = calculate_scores(clinton_speech, 'clinton_sentiment_scores.json')
# walkerbush_results = calculate_scores(walkerbush_speech, 'walkerbush_sentiment_scores.json')
# obama_results = calculate_scores(obama_speech, 'obama_sentiment_scores.json')
# trump_results = calculate_scores(trump_speech, 'trump_sentiment_scores.json')
# biden_results = calculate_scores(biden_speech, 'biden_sentiment_scores.json')
# vicepresident_biden_results = calculate_scores(vicepresident_biden_speech, 'vicepresident_biden_sentiment_scores.json')


In [23]:
with open('./sentiment_scores_json/nixon_sentiment_scores.json', 'r') as file:
    nixon_sentiment_scores = json.load(file)

with open('./sentiment_scores_json/ford_sentiment_scores.json', 'r') as file:
    ford_sentiment_scores = json.load(file)

with open('./sentiment_scores_json/carter_sentiment_scores.json', 'r') as file:
    carter_sentiment_scores = json.load(file)

with open('./sentiment_scores_json/reagan_sentiment_scores.json', 'r') as file:
    reagan_sentiment_scores = json.load(file)

with open('./sentiment_scores_json/herbertbush_sentiment_scores.json', 'r') as file:
    herbertbush_sentiment_scores = json.load(file)

with open('./sentiment_scores_json/clinton_sentiment_scores.json', 'r') as file:
    clinton_sentiment_scores = json.load(file)

with open('./sentiment_scores_json/walkerbush_sentiment_scores.json', 'r') as file:
    walkerbush_sentiment_scores = json.load(file)

with open('./sentiment_scores_json/obama_sentiment_scores.json', 'r') as file:
    obama_sentiment_scores = json.load(file)

with open('./sentiment_scores_json/trump_sentiment_scores.json', 'r') as file:
    trump_sentiment_scores = json.load(file)

with open('./sentiment_scores_json/biden_sentiment_scores.json', 'r') as file:
    biden_sentiment_scores = json.load(file)

with open('./sentiment_scores_json/vicepresident_biden_sentiment_scores.json', 'r') as file:
    vicepresident_biden_sentiment_scores = json.load(file)
