In [1]:
import os
import pandas as pd
with open("APIKEY.txt", "r") as apikey:
    # Reading from a file
    oak = apikey.read().strip()

os.environ["OPENAI_API_KEY"] = oak




In [2]:
from langchain.prompts import PromptTemplate

promptWheelMidMap= PromptTemplate(
    input_variables=["topic"],
    template="Acting as a futurist, start by creating a simple MindMap on the consequences of {topic}.\nCan you give the mindmap in PlantUML format. Keep it structured from the core central topic branching out to first order consequences (don’t label as first order consequences). Treat the second order consequences as branches from the first orders ones, third order as branches from the second and so on. Let’s go to 4 levels to begin with. Add the start and end mindmap tags.",
)

#print(promptWheelMidMap.format(topic="lower energy cost"))

In [3]:
from langchain.chat_models import ChatOpenAI
from langchain.llms import OpenAI
from langchain.chains import LLMChain

chatopenai = ChatOpenAI(
                model_name="gpt-3.5-turbo-16k")
llmchain_chat = LLMChain(llm=chatopenai, prompt=promptWheelMidMap)
llmchain_chat.run({"topic": "lower energy costs"})

'@startmindmap\n* Lower Energy Costs\n** First Order Consequences\n*** Increased disposable income\n*** Decreased production costs\n*** Higher consumer spending\n*** Increased economic growth\n** Second Order Consequences\n*** Improved standard of living\n*** Increased innovation and entrepreneurship\n*** Reduced environmental impact\n*** Enhanced global competitiveness\n** Third Order Consequences\n*** Shift towards sustainable energy sources\n*** Greater investment in energy-efficient technologies\n*** Reduction in poverty rates\n*** Improved public infrastructure\n** Fourth Order Consequences\n*** Advancements in healthcare and education\n*** Creation of new job opportunities\n*** Expansion of global trade and connectivity\n*** Strengthened social welfare systems\n@endmindmap'

```The Institute for the Future suggests processing signals by asking the following questions:

What kind of change do these signals represent? From what to what?
What’s one driving force, or motivation, behind this change? (Why would someone, or some group, WANT this change?)
What might be different 10 years from now if this signal becomes more common and widespread?
Is this a future YOU want to help make? Or a future you’d like to slow down, or prevent?
```

In [4]:
from langchain.prompts import PromptTemplate

personas = ["celebrity", "politician", "nurse", "teacher","futurist" ]
promptSummaryTable= PromptTemplate(
    input_variables=["actor","text"],
    template="Acting as a {actor}, process the following text as a signal in a TABLE, first column a short 5–7 word summary of the signal, second column what kind of change is this (from what to what) 5–10 word summary, third column what might be different in 10 years time 5–10 word summary, fourth column What’s one driving force, or motivation, behind this change? 5–10 word summary:\n\n{text}",
)

promptKeyword= PromptTemplate(
    input_variables=["text"],
    template="Process the following text, and give back only the top 10 most relevant keywords for the text, as a Python list (looking like KEYWORDS = ['keyword1', 'keyword2']). Then, on a new line, provide a list of the top three themes or categories the text belongs to (looking like THEMES = ['theme1', 'theme2']). On another new line, add a paragraph starting with 'Summary:'  which  summarizes the text in 4 to 5 sentences. \n\n{text}",
)

promptTitle= PromptTemplate(
    input_variables=["text"],
    template="Process the following text, and give this text a title, which should not be longer than 6 words, in the form 'TITLE: title of the article'.\n\n{text}",
)

In [5]:
df = pd.read_parquet('articles.parquet.gzip')
df = df[(df.LEN > 1500) & (df.LEN < 30000)]
df

Unnamed: 0,file_name,content,LEN
0,7fcef9a240c0738d2390c83f9713dc98,Let’s start with the end. If you know anything...,9567
1,c24651d7a89dbc792142e26a99579bfe,There’s a calamity on the horizon if you belie...,2561
2,3aeffd629aa221d55fc9749940af8a4e,Lettre à ma fille de 15 ans\nDécryptage des te...,16587
3,b1f6991b9b718672751c9427c3f5b794,ABSTRACT\nThe nature has myriad plant organism...,3829
4,7e5c1ed5c33c7dbc323cd0264cc94099,"Microsoft’s GitHub Copilot, the automatic code...",2172
...,...,...,...
1435,edbb7336dfb26098bcc966ca17c074b9,It was time to consider calling it a night. Sh...,15656
1436,3fbb11d6e949d1e662aa6a146bb6cda0,China’s growing number of insomniacs are turni...,2859
1437,e8b7faf708568f9f39abe04b778c4631,I would say my efficiency is up ~20% since sta...,14337
1438,b79a2baa87b68283198416791b93bce4,The U.S. government has restricted sales of Nv...,2125


In [6]:
llmchain_chat = LLMChain(llm=chatopenai, prompt=promptSummaryTable)
llmchain_chat_kw = LLMChain(llm=chatopenai, prompt=promptKeyword)
llmchain_chat_title = LLMChain(llm=chatopenai, prompt=promptTitle)

for ix, row in df.iterrows():
    ID, txt = row.file_name, row.content
    CACHE1 = ".cache/"+str(ID)
    CACHE2 = ".cache_keywords/"+str(ID)
    CACHE3 = ".cache_title/"+str(ID)
    print(row.file_name)
    if ix < 100000:
        if not os.path.isfile(CACHE1):
            table = llmchain_chat.run({"actor": "Futurist", "text": txt})
            with open(CACHE1, 'w', encoding='utf-8') as f:
                f.write(table)
        if not os.path.isfile(CACHE2):
            table = llmchain_chat_kw.run({"text": txt})
            with open(CACHE2, 'w', encoding='utf-8') as f:
                f.write(table)
        if not os.path.isfile(CACHE3):
            table = llmchain_chat_title.run({"text": txt})
            with open(CACHE3, 'w', encoding='utf-8') as f:
                f.write(table)       

7fcef9a240c0738d2390c83f9713dc98
c24651d7a89dbc792142e26a99579bfe
3aeffd629aa221d55fc9749940af8a4e
b1f6991b9b718672751c9427c3f5b794
7e5c1ed5c33c7dbc323cd0264cc94099
2978134725e2d7543f50469f53a5ca3d
5793d28b92e90a64b918cfaefad509db
3c87907a359edc6a80187a597d0c3074
36b36c58561712964f7a9920eb97d56f
bc433d7cb21513ab55af7f1198e0fbb6
4a29de3bba5a755ccf2dd3db47e0c32f
6ca0eb61f6679e513cde2fa1b3637de7
7d8c56d6e0991c4df704374e4bd6f6ec
917c65b1876df03d3794348b22d9fb31
089706e00a9c0d142049a6a6c557e3e7
447aaddba1b3b4ac319e14628e6ed8f2
d3507b95bda7c6c7671bad243b14b6c3
adc1258885aa937c5350875662ee892f
576f8cf76f713e057b075e2424ea709c
97d9a79d85abb98e49308c266dcda203
d91570978a514e8aedb83c47a45c169d
cca4ca46edfcac58a750fffde3522e7e
ba6b48e83806da532c7578d24e914455
cb1de23b85f5c592ad2f8e720a7811a0
78cbf4c846d2549699dc09df3ae30ad4
52ec2cf0aebdc7af56249f1702652ebe
2ef1681fbde001909e631d18aad43f79
0ba4fa557cd2aae4760bd7a2abca844e
3f93494c0a224e19c9a86b8b566bb280
a238d311a34b43af7599af5c8ad1e41f
715753b68e

1f02642f54cf28611a00e4c83c1d428f
646824b48390d9ff516d3e6ef2e44c82
b39cd180dcfb9a92e51a09e5dcb0a0f4
8bff520dc945204951b32a6c4b10571e
9c2ba0d61960b055646e246bf4727cd1
f2c164b7ff8eb0a6fd223511dba15804
d4b9614dce0dbe9cd944ed6cb3022d12
028e8b0caa69712c4fda4048ad84de72
e0f6ae75e034df0a32044fe8a9075673
e88f2221b01e6b71b610a1e6c9e2c576
814283a354f997b39eef0684e747b8ef
2f9b0e015264ffb84681864aa348771a
54491b49e9fd2c6eab777dca0ba67e76
0544a93e245f3721d389e3269b6390b6
dad9dbd97cc50604963911f16fbb27aa
ec3b733c80a9fc0239d120b8450c979b
86a282a77d55807be1c735f8a3989880
c94f72ff677c7517a836417c1f1df620
8f692340509e6a4be9518522b4cca022
c62d2942a1454cd9810be2b3a93f7bc6
77758e23f4c4a5892d58e3f5cff8f0ca
a5c0ba498382a4edc0f2bf0d9653ad16
e37681e9430b42da8833c0133e69c45d
a6bfcabefea560d7dfefd5aab87f047c
16492dc2f92cae2688e0f452eea5596b
878524351f159f818bc2c8ebcd95c0d7
28af1e3d007d49741667ce0c19401620
1bebd9c22d66a412bc9dc0ff5b4b562e
858dac884c8fe7dfa6fc0c2cf093e97f
0e3aecb9945b5787eebfacb7ab40e7cb
65c4c98af7

In [7]:
import io
def txtRead(filename):
    with io.open(filename,'r',encoding='utf8') as f:
        txt= f.read()
    return txt

In [8]:
import glob

In [9]:
SEEDS = list(glob.glob(".cache_title/*"))

allSeeds = []
for seed in SEEDS: 
    try:
        ID = seed.split("/")[-1]
        txt = txtRead(seed) 
        txt = txt.replace("\n\n","\n")
        SUMMARY = txt.replace("TITLE:","").split("\n")[0].strip()
        allSeeds.append([ID,SUMMARY])
    except:
        print("Error with:",seed)

In [10]:
DDTitle = pd.DataFrame(allSeeds)
DDTitle.columns = ["src","title"]
DDTitle.to_parquet("titles.parquet.gzip",compression='gzip')
DDTitle

Unnamed: 0,src,title
0,ea74b217d36ec570bb432fb5aa679090,The Impact of AI on Education
1,841546c0efc4c82f0aabc545a47a09e1,Researchers Uncover Backdoor in Encrypted Radi...
2,f6637d1fa0e3fe0e1e94bea8b3338ef9,Hack-a-Sat: Researchers Launch Sandbox Satelli...
3,7ff6239e13a43e856fa36887902c5ede,Building a Knowledge Graph with Chat-GPT and P...
4,58c165e1a46408307f2b60f63df7c549,E-seeds Drill Themselves into Soil
...,...,...
1062,edbb7336dfb26098bcc966ca17c074b9,The Radical Power of Hanging Out
1063,3fbb11d6e949d1e662aa6a146bb6cda0,China's Insomniacs Turn to Sleeping Assistants
1064,e8b7faf708568f9f39abe04b778c4631,The Impact of ChatGPT on Efficiency and Inform...
1065,b79a2baa87b68283198416791b93bce4,US Restricts Nvidia GPU Sales to Thwart China'...
