In [1]:
from datetime import datetime
import pandas as pd
import numpy as np
import json
import os
import csv
import sys
import matplotlib.pyplot as plt
from scipy.spatial.distance import cosine
import networkx as nx
from networkx.algorithms import community

from langchain import OpenAI
from langchain.chat_models import ChatOpenAI
from langchain.prompts import PromptTemplate
from langchain.chains.llm import LLMChain
from langchain.embeddings import OpenAIEmbeddings
from langchain.docstore.document import Document
from langchain.chains.summarize import load_summarize_chain
from langchain.text_splitter import RecursiveCharacterTextSplitter
import torch
import random

os.environ["OPENAI_API_KEY"] = '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'

random.seed(1234)
np.random.seed(1234)
torch.manual_seed(1234)

VERSION="v1" # no rewritten
METHOD_NAME="sequential_host_guest_split"


In [2]:

print(torch.cuda.is_available())
print(torch.cuda.device_count())
print(torch.cuda.current_device())
print(torch.cuda.device(0))
print(torch.cuda.get_device_name(0))


True
2
0
<torch.cuda.device object at 0x7fa0cae59850>
NVIDIA GeForce RTX 2080 Ti


In [3]:
csv.field_size_limit(sys.maxsize)

podcast_data = []
row_num = 0
with open(f'taking_the_temp_of_ai_{METHOD_NAME}.txt') as file:
    lines = file.readlines()
    print(f"RRR # lines: {len(lines)}")
    
    lines = [ line for line in lines if line.strip()]
    
    print(f"RRR # lines after cleaned: {len(lines)}")

    podcast = {    
        "episode_number": "taking_the_temp_of_ai",    
        "transcript": lines,
    }
    podcast_data.append(podcast)
    

RRR # lines: 69
RRR # lines after cleaned: 35


In [29]:
def getKeyPointsFromSpeaker(speaker, speech):
    
    if len(speech) <= 200:
        return speech
    
    eval_prompt_template = """
    You will extract the key points in the given speech in the first person.
    
    Then, combine them to form a complete speech as if it is by the speaker him/herself.

    Here is the speech:
    SPEECH: {speech}
    
    Here is the output format:
    KEY_POINTS
    COMBINED_SPEECH
    """
    
    eval_prompt = PromptTemplate(template=eval_prompt_template, input_variables=["speaker", "speech"])
    eval_prompt = PromptTemplate(template=eval_prompt_template, input_variables=["speech"])

    # Define the LLMs
    map_llm = ChatOpenAI(temperature=0, model_name='gpt-3.5-turbo-1106')

    map_llm_chain = LLMChain(llm = map_llm, prompt = eval_prompt)

    eval_input_data = [
        {
#             'speaker': speaker,
            'speech': speech,    
        }
    ]
    
    map_llm_chain_input = eval_input_data
    # Run the input through the LLM chain (works in parallel)
    map_llm_chain_results = map_llm_chain.apply(map_llm_chain_input)
    return map_llm_chain_results[0]['text']    
    
    

In [38]:

summarized_podcast_conversations = []

for podcast in podcast_data:
    transcript = podcast['transcript']
    
    for line in transcript:
        speaker = line[0:line.find(":")].strip()
        speech = line[line.find(":")+1:].strip()
        print(f"speaker: {speaker}")
        print(f"speech: {speech}")
        
        
        summarized_speech = getKeyPointsFromSpeaker(speaker, speech)
        
        print("summarized speech:")
        print(summarized_speech)
        print("===============================================================")
        
        keypoints_summaries = {
            'keypoints': '',
            'combined_speech':''
        }
        
        is_keypoints = False
        is_combined = False
        for line in summarized_speech.splitlines():
            if line.strip() == '':
                continue
                
            if 'KEY_POINTS' in line.strip():
                is_keypoints = True
                is_combined = False
                continue
            elif 'COMBINED_SPEECH' in line.strip():
                is_combined = True
                is_keypoints = False
                continue
                
            if is_keypoints:
                keypoints_summaries['keypoints'] = keypoints_summaries['keypoints'] +'\n'+ line
                keypoints_summaries['keypoints'] = keypoints_summaries['keypoints'].strip()
            else:    
                keypoints_summaries['combined_speech'] = keypoints_summaries['combined_speech'] +'\n'+ line
                keypoints_summaries['combined_speech'] = keypoints_summaries['combined_speech'].strip()
             
        # if the combined summary is longer than the original speech, not need to summarize.    
        if len(keypoints_summaries['combined_speech']) > len(speech):
            keypoints_summaries['combined_speech'] = speech     
                
        print("--- keypoints_summaries:")
        print(keypoints_summaries)
        
        summarized_podcast_conversations.append({
            'speaker': speaker,
            'keypoints_summaries': keypoints_summaries
        })

    
    

speaker: Deb
speech: This is Deb Donig with technically Human, a podcast about ethics and technology, where I ask what it means to be human in the age of tech. Each week I interview industry leaders, thinkers, writers, and technologists, and I ask them about how they understand the relationship between humans and the technologies we create. We discuss how we can build a better vision for technology, one that represents the best of our human values. Today, I'm sitting down with Doctor Tamara Neese. Doctor Tamara Kneese is project director of Data and society's Algorithmic Impact Methods Lab, where she is also a senior researcher. For the 2023 and 2024 academic year, she's a visiting scholar at UC Berkeley's center for Science, Technology, Medicine and Society. Before joining data and society, she was lead researcher at Green Software foundation, director of developer engagement on the green software team at intel, and assistant professor of media studies and director of gender and sexua

summarized speech:
KEY_POINTS
- Excitement at data and society about the bill calling for robust socio technical research
- Difficulty in measuring environmental impacts of AI, especially across the entire supply chain
- Importance of considering the effects of AI production and use globally
- Advocacy for legislation pushing for standards around measurement and reporting

COMBINED_SPEECH
Yeah, so, first of all, we were really excited at data and society to hear about this bill because it is a bill that really calls for a very robust form of socio technical research. One of the problems with measuring the environmental impacts of AI is that it's actually quite hard to do, particularly if you're trying to measure impacts around every single part of the AI supply chain. So really thinking about all of the different effects that AI production and use can have around the world is something that a lot of technologists and advocates have been wanting to measure for quite some time. And so ha

summarized speech:
KEY_POINTS
- Large language models and generative AI require specialized chips and a lot of power, energy, and water for efficient operation.
- The energy and water consumption associated with training and deploying AI is a growing problem, especially in the ICT sector.
- Data centers require a tremendous amount of energy, can pollute local areas, and consume a lot of water, leading to environmental justice issues and potential carbon emissions.
- AI is just one part of a larger problem that has been ongoing for a long time, and there is a need for robust research to address all aspects of AI's environmental impact.
- The history of environmental issues related to technology, such as toxic chemicals in the water supply in the Silicon Valley region in the 1980s, highlights the importance of community action and regulation to prevent future contamination.
- The current AI boom presents an opportunity to focus on making regulatory changes and understanding the real envi

summarized speech:
KEY_POINTS
1. Environmental impacts of AI are connected to labor problems.
2. Focusing on the environmental impact of AI forces a more holistic view of potential harms.
3. The entire AI supply chain involves different parts of the world and various forms of labor exploitation.
4. Trade-offs need to be considered, such as the impact of data centers on communities.
5. Data centers can have negative effects on health and the environment.
6. Justice concerns and impacts on marginalized communities are linked to labor and ethics in AI.
7. Considering the effects on habitats, ecosystems, and other species is important in understanding the full spectrum of AI impacts.

COMBINED_SPEECH
I believe that the environmental impacts of AI are closely tied to labor problems. Focusing on the environmental impact of AI forces us to take a more holistic view of the potential harms it can cause. The entire AI supply chain involves different parts of the world and various forms of labor 

summarized speech:
KEY_POINTS
- Working across different sectors including academia, industry, and public interest technology
- Identifying fractures in the approach to AI benefits and harms
- Recognizing gaps in knowledge between different sectors
- Addressing silos in knowledge and controversial conversations across industries

COMBINED_SPEECH
As someone who has worked across academia, industry, and public interest technology, I have noticed significant fractures in the way different sectors approach the benefits and harms of AI. There are gaps in knowledge between these sectors, leading to silos in knowledge and controversial conversations. It is important to address these tensions and work towards a more cohesive approach to AI across industries.
--- keypoints_summaries:
{'keypoints': '- Working across different sectors including academia, industry, and public interest technology\n- Identifying fractures in the approach to AI benefits and harms\n- Recognizing gaps in knowledge betw

summarized speech:
KEY_POINTS
1. AI is seen as a solution to climate change and environmental harm by reducing inefficiencies and creating new technologies.
2. However, there are concerns about the energy requirements of data centers, e-waste, and environmental damage caused by mining for tech resources.
3. There is a debate about the job creation and retraining efforts in the AI industry.
4. The controversy is viewed from both environmental and labor perspectives.
5. The assessment of benefits against harms and impact assessment is questioned.

COMBINED_SPEECH
I am grappling with the tensions surrounding climate change, environmental harms, and AI. On one hand, leaders in the tech industry tout AI as a solution to mitigate climate change and environmental harm by reducing inefficiencies and creating new technologies. However, there are valid concerns about the energy requirements of data centers, e-waste, and environmental damage caused by mining for tech resources. Additionally, ther

summarized speech:
KEY_POINTS:
1. Algorithmic impact assessments are important.
2. Unanticipated consequences occur when people and ecologies meet technical systems.
3. The need to identify and document push back against ambient harms.

COMBINED_SPEECH:
I believe that algorithmic impact assessments are crucial in today's technological world. As I have mentioned before, when people and ecologies intersect with technical systems, there are always unanticipated consequences. It is essential for us to be able to identify and document a push back against these sometimes ambient harms.
--- keypoints_summaries:
{'keypoints': '1. Algorithmic impact assessments are important.\n2. Unanticipated consequences occur when people and ecologies meet technical systems.\n3. The need to identify and document push back against ambient harms.', 'combined_speech': "I believe that algorithmic impact assessments are crucial in today's technological world. As I have mentioned before, when people and ecologies 

summarized speech:
KEY_POINTS
- Identifying and assessing the harms of AI products
- The problem of AI products not working as intended
- Example of AI poop picking up robots as a solution to homelessness in San Francisco
- The incentive for developers to create marketable products rather than solving the problem

COMBINED_SPEECH
I want to delve deeper into the issue of identifying and assessing the harms of AI products. It's not just about recognizing the problems, but also about pushing back when the products don't work as intended. Take the example of AI poop picking up robots developed to address the issue of homelessness in San Francisco. While it may seem like a technical solution, the better alternative, as pointed out by Dan Lyons, is public restrooms. This brings to light the underlying economics of product development - the incentive for developers is often to create marketable products rather than truly solving the problem at hand. So, the question is, how do we push back an

summarized speech:
KEY_POINTS
1. The power differential between those who create AI and those who are subjected to it.
2. The options to push back on tech companies and AI companies.
3. The norms and standards set by these companies to maximize profit and limit liability for harms caused.
4. The mass reach and institutionalization of their products in our environment.
5. The ability of these companies to fight lawsuits and pay fines without affecting their profit.

COMBINED_SPEECH
I want to delve deeper into the power differential between those who create AI and those who are subjected to it. We need to consider our options to push back on tech companies and AI companies, especially those who have accrued tremendous power and wealth. These companies have set norms and standards that maximize their ability to profit and limit their liability for the harms they cause. Their products have become deeply institutionalized in our environment, making it difficult for us to alter their effect 

summarized speech:
KEY_POINTS
- Harm caused by tech products can be intentional or unintended.
- There may be distinctions between different ways in which tech products cause harm.
- Some argue that so-called unintended consequences are actually predictable but overlooked in favor of profit or progress.
- The value system of tech production may disregard the harm caused by the product.
- The idea of harm caused by tech products can vary depending on the perspective of the developers and users.

COMBINED_SPEECH
I want to delve into the concept of harm caused by tech products. It's important to consider that harm can be intentional or unintended. There may be distinctions between different ways in which tech products cause harm. Some argue that so-called unintended consequences are actually predictable but overlooked in favor of profit or progress. The value system of tech production may disregard the harm caused by the product. The idea of harm caused by tech products can vary depending

summarized speech:
KEY_POINTS
- Importance of present moment of technological production being informed by narratives and histories of the past
- Concerns about generative AI and its environmental impact
- Human costs of using generative AI to resurrect the dead

COMBINED_SPEECH
I just want to take a moment to encourage everyone listening to check out Mar's episode of this show. In our interview, she emphasized the significance of the present moment of technological production being shaped by the narratives and histories of the past. This is something that we should all consider as we move forward with AI technologies.

Speaking of AI, I recently read a piece in Wired about using generative AI to bring back the voices of the deceased. It raised some important concerns about the environmental impact of this technology and the human costs associated with it. It's a topic that we should all be thinking about as we continue to develop and utilize AI in our society.
--- keypoints_summaries:

summarized speech:
KEY_POINTS
- The speaker wants to delve deeper into the links between labor, environmental damage, and the preservation of data, especially data related to the deceased.
- The speaker has extensively written about how socio-technical systems, including social media platforms and AI, are changing cultural rituals and perceptions of death.
- The speaker's book, "Death Glitch: How Techno-Solutionism Fails Us in This Life and Beyond," explores these themes.
- The speaker is also involved in work with the AI impact lab at data and society.

COMBINED_SPEECH
I am deeply interested in exploring the connections between labor, environmental harm, and the preservation of data, particularly data related to the deceased. My extensive writing has focused on how socio-technical systems, such as social media platforms and AI, are reshaping cultural rituals and attitudes towards death. My book, "Death Glitch: How Techno-Solutionism Fails Us in This Life and Beyond," delves into these

summarized speech:
KEY_POINTS
- Techno solutionism can be useful for making small changes in technology to solve specific problems efficiently.
- The larger problem is using technology to fix social issues that may not require technological solutions.
- Building relationships and maintaining dialogue with communities impacted by technology is crucial.
- It is important to focus on relationships with techno solutionism rather than just the technology itself.
- Imagining alternative futures and reimagining technology is essential.
- Dismantling the current way technology is built and deployed is necessary for realizing alternative futures.
- Interested in the magical and mystical qualities of technology and its transcendent effects on people.

COMBINED_SPEECH
I believe that techno solutionism can be useful for making small changes in technology to solve specific problems efficiently. However, the larger problem lies in using technology to fix social issues that may not require technologi

summarized speech:
KEY_POINTS
- Younger generations are more aware of potential problems in the tech industry and are eager for training in critical technology studies.
- It is important to understand the history of technology and the tech industry, and to learn from activists and organizers from the past.
- Building coalitions with people outside of tech and learning from their experiences is valuable.
- It is important to engage with different communities and their concerns, and to volunteer or get involved with issues where you live.

COMBINED_SPEECH
Yeah, it's a great question. When I talk to younger generations interested in tech, I see a strong awareness of potential problems and a hunger for training in critical technology studies. It's important to understand the history of technology and the tech industry, and to learn from activists and organizers from the past. Building coalitions with people outside of tech and learning from their experiences is valuable. Engaging with diff

In [39]:
        

with open(f'./summarized_dataset/podcast_summaries_openai_gpt35turbo_taking_the_temp_of_ai_{METHOD_NAME}_{VERSION}.txt', 'w') as f:
        
    for speaker_combined_summaries in summarized_podcast_conversations:
        speaker = speaker_combined_summaries['speaker']
        keypoints_summaries = speaker_combined_summaries['keypoints_summaries']
        combined_speech = keypoints_summaries['combined_speech']
        
        f.write(f'{speaker}:\n')
        f.write(f'{combined_speech}')
        f.write('\n\n')
    