# Running llama2-13b locally on a Macbook Pro M1 32GB

https://python.langchain.com/docs/integrations/llms/llamacpp#installation-with-metal

In [6]:
import pandas as pd 
from langchain.llms import LlamaCpp
from langchain import PromptTemplate, LLMChain
from langchain.callbacks.manager import CallbackManager
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler

from tqdm import tqdm

In [7]:
from langchain.prompts import PromptTemplate, ChatPromptTemplate, HumanMessagePromptTemplate
from langchain.llms import OpenAI
from langchain.chat_models import ChatOpenAI
from langchain.output_parsers import PydanticOutputParser, StructuredOutputParser, ResponseSchema
from pydantic import BaseModel, Field, validator
from typing import List

from langchain.chat_models import ChatOpenAI
from langchain.prompts.chat import(
    SystemMessagePromptTemplate,
    HumanMessagePromptTemplate,
    ChatPromptTemplate,
)
from langchain.schema import (
    AIMessage,
    HumanMessage,
    SystemMessage
)

In [2]:
template = """Question: {question}

Answer: Let's work this out in a step by step way to be sure we have the right answer."""

prompt = PromptTemplate(template=template, input_variables=["question"])

# Callbacks support token-wise streaming
callback_manager = CallbackManager([StreamingStdOutCallbackHandler()])
# Verbose is required to pass to the callback manager

In [12]:
# Make sure the model path is correct for your system!
MODEL_LLAMA2_13B = "llama-2-13b-chat.ggmlv3.q4_0.bin"
n_gpu_layers = 1  # Metal set to 1 is enough.
n_batch = 512  # Should be between 1 and n_ctx, consider the amount of RAM of your Apple Silicon Chip.

llm = LlamaCpp(
            model_path=f"/Users/lawrence.wu/Documents/github/llama.cpp/{MODEL_LLAMA2_13B}",
            temperature=0.7,
            top_p=1,
            max_tokens=2048,
            n_ctx=2048,
            n=-1,
            repeat_penalty=1.1,
            n_threads=8,
            callback_manager=callback_manager,
            n_gpu_layers=n_gpu_layers,
            n_batch=2048,  # Should be between 1 and n_ctx, consider the amount of RAM of your Apple Silicon Chip.
            f16_kv=True,  # MUST set to True, otherwise you will run into problem after a couple of calls
            verbose=True,
)

llama.cpp: loading model from /Users/lawrence.wu/Documents/github/llama.cpp/llama-2-13b-chat.ggmlv3.q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 32000
llama_model_load_internal: n_ctx      = 2048
llama_model_load_internal: n_embd     = 5120
llama_model_load_internal: n_mult     = 256
llama_model_load_internal: n_head     = 40
llama_model_load_internal: n_head_kv  = 40
llama_model_load_internal: n_layer    = 40
llama_model_load_internal: n_rot      = 128
llama_model_load_internal: n_gqa      = 1
llama_model_load_internal: rnorm_eps  = 1.0e-06
llama_model_load_internal: n_ff       = 13824
llama_model_load_internal: freq_base  = 10000.0
llama_model_load_internal: freq_scale = 1
llama_model_load_internal: ftype      = 2 (mostly Q4_0)
llama_model_load_internal: model size = 13B
llama_model_load_internal: ggml ctx size =    0.11 MB
llama_model_load_internal: mem required  = 7349.72 MB (+ 1600.00 MB per state)
llama_new_context_with

In [13]:
# prompt = """
# Question: A rap battle between Stephen Colbert and John Oliver
# """
# llm(prompt)

In [14]:
llm("What's the meaning of life?")

 It's a question that has puzzled philosophers and theologians for centuries, and one that has been explored in various forms of media. Here are some of the most thought-provoking films, books, and TV shows that delve into this existential question:
1. "The Matrix" (1999) - This iconic sci-fi film posits a world where humans are unknowingly trapped in a simulated reality created by intelligent machines. The main character, Neo, is awakened to this truth and must choose between embracing his newfound freedom or returning to the comfort of ignorance.
2. "The Tree of Life" (2011) - This Oscar-nominated film follows a family over several decades, exploring their joys and struggles as they navigate life's ups and downs. The film is a meditation on the meaning of existence and the interconnectedness of all things.
3. "The Shawshank Redemption" (1994) - This highly acclaimed drama tells the story of two prisoners who form an unlikely friendship and find hope in a bleak and unforgiving environ


llama_print_timings:        load time =  3616.70 ms
llama_print_timings:      sample time =   837.72 ms /   893 runs   (    0.94 ms per token,  1065.99 tokens per second)
llama_print_timings: prompt eval time =  3616.65 ms /     9 tokens (  401.85 ms per token,     2.49 tokens per second)
llama_print_timings:        eval time = 59172.35 ms /   892 runs   (   66.34 ms per token,    15.07 tokens per second)
llama_print_timings:       total time = 66526.86 ms


' It\'s a question that has puzzled philosophers and theologians for centuries, and one that has been explored in various forms of media. Here are some of the most thought-provoking films, books, and TV shows that delve into this existential question:\n1. "The Matrix" (1999) - This iconic sci-fi film posits a world where humans are unknowingly trapped in a simulated reality created by intelligent machines. The main character, Neo, is awakened to this truth and must choose between embracing his newfound freedom or returning to the comfort of ignorance.\n2. "The Tree of Life" (2011) - This Oscar-nominated film follows a family over several decades, exploring their joys and struggles as they navigate life\'s ups and downs. The film is a meditation on the meaning of existence and the interconnectedness of all things.\n3. "The Shawshank Redemption" (1994) - This highly acclaimed drama tells the story of two prisoners who form an unlikely friendship and find hope in a bleak and unforgiving e

In [15]:
llm("What's the meaning of life?")

Llama.generate: prefix-match hit




For centuries, philosophers and theologians have been grappling with this question, and there is no one definitive answer. However, here are some possible perspectives on the meaning of life:

1. Religious perspective: Many religious traditions teach that the purpose of life is to serve a higher power or to fulfill a divine plan. For example, some believe that the purpose of life is to love and worship God, while others believe that it is to follow the teachings of a particular religion or spiritual path.
2. Personal fulfillment: Some people believe that the meaning of life is to find personal fulfillment and happiness. According to this view, individuals should pursue their passions and interests, cultivate meaningful relationships, and strive to live a life that is rich in experience and personal growth.
3. Social and cultural perspective: Others believe that the meaning of life is tied to one's social and cultural context. For example, some people find meaning in their work, their


llama_print_timings:        load time =  3616.70 ms
llama_print_timings:      sample time =   486.41 ms /   588 runs   (    0.83 ms per token,  1208.86 tokens per second)
llama_print_timings: prompt eval time =     0.00 ms /     1 tokens (    0.00 ms per token,      inf tokens per second)
llama_print_timings:        eval time = 37882.36 ms /   588 runs   (   64.43 ms per token,    15.52 tokens per second)
llama_print_timings:       total time = 40652.60 ms


"\n\nFor centuries, philosophers and theologians have been grappling with this question, and there is no one definitive answer. However, here are some possible perspectives on the meaning of life:\n\n1. Religious perspective: Many religious traditions teach that the purpose of life is to serve a higher power or to fulfill a divine plan. For example, some believe that the purpose of life is to love and worship God, while others believe that it is to follow the teachings of a particular religion or spiritual path.\n2. Personal fulfillment: Some people believe that the meaning of life is to find personal fulfillment and happiness. According to this view, individuals should pursue their passions and interests, cultivate meaningful relationships, and strive to live a life that is rich in experience and personal growth.\n3. Social and cultural perspective: Others believe that the meaning of life is tied to one's social and cultural context. For example, some people find meaning in their work

# Temperature = 1

In [16]:
llm = LlamaCpp(
            model_path=f"/Users/lawrence.wu/Documents/github/llama.cpp/{MODEL_LLAMA2_13B}",
            temperature=1,
            top_p=1,
            max_tokens=2048,
            n_ctx=2048,
            n=-1,
            repeat_penalty=1.1,
            n_threads=8,
            callback_manager=callback_manager,
            n_gpu_layers=n_gpu_layers,
            n_batch=2048,  # Should be between 1 and n_ctx, consider the amount of RAM of your Apple Silicon Chip.
            f16_kv=True,  # MUST set to True, otherwise you will run into problem after a couple of calls
            verbose=True,
)

llama.cpp: loading model from /Users/lawrence.wu/Documents/github/llama.cpp/llama-2-13b-chat.ggmlv3.q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 32000
llama_model_load_internal: n_ctx      = 2048
llama_model_load_internal: n_embd     = 5120
llama_model_load_internal: n_mult     = 256
llama_model_load_internal: n_head     = 40
llama_model_load_internal: n_head_kv  = 40
llama_model_load_internal: n_layer    = 40
llama_model_load_internal: n_rot      = 128
llama_model_load_internal: n_gqa      = 1
llama_model_load_internal: rnorm_eps  = 1.0e-06
llama_model_load_internal: n_ff       = 13824
llama_model_load_internal: freq_base  = 10000.0
llama_model_load_internal: freq_scale = 1
llama_model_load_internal: ftype      = 2 (mostly Q4_0)
llama_model_load_internal: model size = 13B
llama_model_load_internal: ggml ctx size =    0.11 MB
llama_model_load_internal: mem required  = 7349.72 MB (+ 1600.00 MB per state)
llama_new_context_with

In [18]:
llm("What's the meaning of life?")

Llama.generate: prefix-match hit




This is a question that has puzzled philosophers and theologians for centuries, and there is no one definitive answer. However, here are some possible perspectives on this question:

1. Biological perspective: From a biological perspective, the meaning of life is to survive and reproduce, as these are the fundamental drives that have shaped the evolution of life on Earth. However, this perspective does not provide much insight into the human experience or the nature of existence.
2. Psychological perspective: From a psychological perspective, the meaning of life is to find fulfillment and happiness. This can be achieved through personal growth, relationships, and experiences that bring joy and satisfaction.
3. Social perspective: From a social perspective, the meaning of life is to contribute to the greater good of society. This can be done through work, volunteering, or other forms of service that benefit others.
4. Philosophical perspective: From a philosophical perspective, the me


llama_print_timings:        load time =  2115.84 ms
llama_print_timings:      sample time =   928.42 ms /   714 runs   (    1.30 ms per token,   769.05 tokens per second)
llama_print_timings: prompt eval time =     0.00 ms /     1 tokens (    0.00 ms per token,      inf tokens per second)
llama_print_timings:        eval time = 48435.80 ms /   714 runs   (   67.84 ms per token,    14.74 tokens per second)
llama_print_timings:       total time = 52765.75 ms


'\n\nThis is a question that has puzzled philosophers and theologians for centuries, and there is no one definitive answer. However, here are some possible perspectives on this question:\n\n1. Biological perspective: From a biological perspective, the meaning of life is to survive and reproduce, as these are the fundamental drives that have shaped the evolution of life on Earth. However, this perspective does not provide much insight into the human experience or the nature of existence.\n2. Psychological perspective: From a psychological perspective, the meaning of life is to find fulfillment and happiness. This can be achieved through personal growth, relationships, and experiences that bring joy and satisfaction.\n3. Social perspective: From a social perspective, the meaning of life is to contribute to the greater good of society. This can be done through work, volunteering, or other forms of service that benefit others.\n4. Philosophical perspective: From a philosophical perspective

In [19]:
llm("As an entry-level Data Scientist, what are 10 areas of your work that you think will get disrupted by the widespread usage of LLMs or tools built on LLMs?")

Llama.generate: prefix-match hit



Here's a list of 10 areas of my work as an entry-level Data Scientist that I think will be disrupted by the widespread usage of Large Language Models (LLMs) or tools built on LLMs:
1. Data Preprocessing: Many tasks in data preprocessing, such as tokenization, stemming, and lemmatization, can be automated using LLMs. This could make my work more efficient and reduce the time spent on these tasks.
2. Feature Engineering: LLMs can help generate new features that can improve model performance. For example, LLMs can be used to generate sentiment scores or topic models for text data. This could lead to better insights and improved accuracy in machine learning models.
3. Data Visualization: LLMs can be used to generate interactive visualizations that provide more detailed insights into data. For example, LLMs can be used to create interactive heatmaps or word clouds that show the distribution of words in a dataset.
4. Text Summarization: LLMs can be used to summarize long documents or articl


llama_print_timings:        load time =  2115.84 ms
llama_print_timings:      sample time =   545.15 ms /   601 runs   (    0.91 ms per token,  1102.44 tokens per second)
llama_print_timings: prompt eval time =  7841.22 ms /    44 tokens (  178.21 ms per token,     5.61 tokens per second)
llama_print_timings:        eval time = 43740.43 ms /   600 runs   (   72.90 ms per token,    13.72 tokens per second)
llama_print_timings:       total time = 55364.09 ms


"\nHere's a list of 10 areas of my work as an entry-level Data Scientist that I think will be disrupted by the widespread usage of Large Language Models (LLMs) or tools built on LLMs:\n1. Data Preprocessing: Many tasks in data preprocessing, such as tokenization, stemming, and lemmatization, can be automated using LLMs. This could make my work more efficient and reduce the time spent on these tasks.\n2. Feature Engineering: LLMs can help generate new features that can improve model performance. For example, LLMs can be used to generate sentiment scores or topic models for text data. This could lead to better insights and improved accuracy in machine learning models.\n3. Data Visualization: LLMs can be used to generate interactive visualizations that provide more detailed insights into data. For example, LLMs can be used to create interactive heatmaps or word clouds that show the distribution of words in a dataset.\n4. Text Summarization: LLMs can be used to summarize long documents or 

In [27]:
prompt="""
As an entry-level Pediatric Pharmacist, what are 10 areas of your work that you think will get disrupted by the widespread usage of LLMs or tools built on LLMs?
"""
llm(prompt)

Llama.generate: prefix-match hit


Although I am an early career pharmacist, and I lack industry expertise as a seasoned professional in the field of pediatrics or AI technology I can imagine several ways that LLMs could disrupt various aspects of my work:
1. Medication dosing: One potential area of disruption is medication dosing. LLMs may be able to quickly and accurately determine appropriate drug dosages for children based on their weight, age, and other factors, potentially reducing the need for human intervention and improving medication safety. 
2.Drug interactions: Another possible area of disruption is the identification of potential drug interactions. LLMs may be able to analyze large amounts of data and identify potential drug interaction that a human pharmacist might miss, helping to prevent adverse events and improve patient outcomes. 
3. Allergic reactions: With their ability to analyze large amounts of data, LLMs may also be useful in identifying patterns related to allergic reactions, allowing healthcare


llama_print_timings:        load time =  2115.84 ms
llama_print_timings:      sample time =   706.37 ms /   769 runs   (    0.92 ms per token,  1088.66 tokens per second)
llama_print_timings: prompt eval time =  7440.91 ms /    44 tokens (  169.11 ms per token,     5.91 tokens per second)
llama_print_timings:        eval time = 52303.43 ms /   768 runs   (   68.10 ms per token,    14.68 tokens per second)
llama_print_timings:       total time = 62987.68 ms


'Although I am an early career pharmacist, and I lack industry expertise as a seasoned professional in the field of pediatrics or AI technology I can imagine several ways that LLMs could disrupt various aspects of my work:\n1. Medication dosing: One potential area of disruption is medication dosing. LLMs may be able to quickly and accurately determine appropriate drug dosages for children based on their weight, age, and other factors, potentially reducing the need for human intervention and improving medication safety. \n2.Drug interactions: Another possible area of disruption is the identification of potential drug interactions. LLMs may be able to analyze large amounts of data and identify potential drug interaction that a human pharmacist might miss, helping to prevent adverse events and improve patient outcomes. \n3. Allergic reactions: With their ability to analyze large amounts of data, LLMs may also be useful in identifying patterns related to allergic reactions, allowing health

# Get LLM risks for all titles

In [8]:
df_titles = pd.read_csv("../data/df_titles.csv")

In [9]:
df_titles.head(1)

Unnamed: 0,id,name,pluralName,isSupervisor,levelBand
0,ET4A446A1A5F6142AD,.NET Application Architect,.NET Application Architects,False,Professional


In [40]:
list_titles_llm_risk = []

for index, row in tqdm(df_titles.sample(2).iterrows(), total=len(df_titles.head(2))):
    TITLE=row['name']
    prompt=f"""As an entry-level {TITLE}, what are 10 areas of your work that you think will get disrupted by the widespread usage of LLMs or tools built on LLMs?"""
    llm_generation = llm(prompt)

    # store title and generation as a dict
    llm_risk_dict = {
        "title": TITLE,
        "generation": llm_generation
    }

    # append to list
    list_titles_llm_risk.append(llm_risk_dict)

  0%|          | 0/2 [00:00<?, ?it/s]Llama.generate: prefix-match hit



While I am not aware of any direct evidence of widespread usage of LLMs in the field of President/Artistic Director, here are 10 possible areas where LLMs could have a significant impact:
1. Casting Decisions: LLMs could analyze vast amounts of data on actor performance, ratings, and box office results to help Presidents/Artistic Directors make informed casting decisions.
2. Script Development: LLMs could assist in the development of screenplays by analyzing and generating ideas based on market trends, audience preferences, and successful precedents.
3. Set Design: LLMs could analyze design concepts and generate new ideas for set design based on previous successful productions and artistic preferences.
4. Marketing Strategies: LLMs could help identify and target specific demographics, analyize market trends, and predict the success of marketing campaigns.
5. Fundraising Efforts: LLMs could analyze giving patterns and predict potential donation amounts to assist in fundraising efforts.


llama_print_timings:        load time =  2115.84 ms
llama_print_timings:      sample time =   380.94 ms /   429 runs   (    0.89 ms per token,  1126.15 tokens per second)
llama_print_timings: prompt eval time =  8904.59 ms /    41 tokens (  217.19 ms per token,     4.60 tokens per second)
llama_print_timings:        eval time = 28873.21 ms /   428 runs   (   67.46 ms per token,    14.82 tokens per second)
llama_print_timings:       total time = 40347.63 ms
 50%|█████     | 1/2 [00:40<00:40, 40.36s/it]Llama.generate: prefix-match hit



As an entry-level Director of Medical Surgical Services, I foresee significant disruptions in my work with the widespread use of large language models (LLMs) or tools built on LLMs. Here are 10 areas that could be impacted:
1. Clinical Decision Support: With LLMs' ability to analyze vast amounts of medical literature, they could become a go-to resource for clinicians in need of quick and accurate decision-making support, potentially reducing the need for experienced clinicians to make certain judgments.
2. Medical Billing and Coding: LLMs can quickly process and accurately code medical records and generate billing information, streamlining administrative tasks and reducing errors.
3. Patient Communication: AI-powered tools could automate patient communication, such as appointment reminders, prescription refill requests, and post-procedure instructions, freeing up staff time for more critical tasks.
4. Supply Chain Management: With LLMs' ability to analyze trends and patterns in medica


llama_print_timings:        load time =  2115.84 ms
llama_print_timings:      sample time =   611.21 ms /   635 runs   (    0.96 ms per token,  1038.92 tokens per second)
llama_print_timings: prompt eval time =  4799.41 ms /    43 tokens (  111.61 ms per token,     8.96 tokens per second)
llama_print_timings:        eval time = 40711.69 ms /   634 runs   (   64.21 ms per token,    15.57 tokens per second)
llama_print_timings:       total time = 48173.42 ms
100%|██████████| 2/2 [01:28<00:00, 44.27s/it]


In [36]:
list_titles_llm_risk

[{'title': 0,
  'generation': '\nI can provide my perspective as an entry-level linguist. Here are ten potential areas of my work that could be disrupted by the widespread usage of large language models (LLMs) or tools built on LLMs:\n1. Language Translation: With the ability to generate human-like text, LLMs could potentially automate language translation tasks with high accuracy, making human translators redundant.\n2. Content Creation: LLMs can generate content in various formats, including articles, blog posts, and social media updates. This could disrupt the work of content creators, especially those who rely on formulaic writing.\n3. Copy Editing and Proofreading: AI-powered tools built on LLMs could automate copy editing and proofreading tasks with high accuracy, potentially making human editors redundant.\n4. Language Localization: With the ability to generate translated text in various languages, LLMs could disrupt the language localization industry by providing high-quality t

## Run on all onet titles

In [13]:
df_onet_titles = pd.read_excel("../data/db_27_3_excel/Occupation Data.xlsx",)

In [14]:
df_onet_titles.head(5)

Unnamed: 0,O*NET-SOC Code,Title,Description
0,11-1011.00,Chief Executives,Determine and formulate policies and provide o...
1,11-1011.03,Chief Sustainability Officers,"Communicate and coordinate with management, sh..."
2,11-1021.00,General and Operations Managers,"Plan, direct, or coordinate the operations of ..."
3,11-1031.00,Legislators,"Develop, introduce, or enact laws and statutes..."
4,11-2011.00,Advertising and Promotions Managers,"Plan, direct, or coordinate advertising polici..."


In [49]:
df_onet_titles.shape

(1016, 3)

In [49]:
llm_no_verbose = LlamaCpp(
            model_path=f"/Users/lawrence.wu/Documents/github/llama.cpp/{MODEL_LLAMA2_13B}",
            temperature=1,
            top_p=1,
            max_tokens=2048,
            n_ctx=2048,
            n=-1,
            repeat_penalty=1.1,
            n_threads=8,
            # callback_manager=callback_manager,
            n_gpu_layers=n_gpu_layers,
            n_batch=2048,  # Should be between 1 and n_ctx, consider the amount of RAM of your Apple Silicon Chip.
            f16_kv=True,  # MUST set to True, otherwise you will run into problem after a couple of calls
            verbose=False,
)

llama.cpp: loading model from /Users/lawrence.wu/Documents/github/llama.cpp/llama-2-13b-chat.ggmlv3.q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 32000
llama_model_load_internal: n_ctx      = 2048
llama_model_load_internal: n_embd     = 5120
llama_model_load_internal: n_mult     = 256
llama_model_load_internal: n_head     = 40
llama_model_load_internal: n_head_kv  = 40
llama_model_load_internal: n_layer    = 40
llama_model_load_internal: n_rot      = 128
llama_model_load_internal: n_gqa      = 1
llama_model_load_internal: rnorm_eps  = 1.0e-06
llama_model_load_internal: n_ff       = 13824
llama_model_load_internal: freq_base  = 10000.0
llama_model_load_internal: freq_scale = 1
llama_model_load_internal: ftype      = 2 (mostly Q4_0)
llama_model_load_internal: model size = 13B
llama_model_load_internal: ggml ctx size =    0.11 MB
llama_model_load_internal: mem required  = 7349.72 MB (+ 1600.00 MB per state)
llama_new_context_with

In [62]:
list_onet_titles_llm_risk = []

for index, row in tqdm(df_onet_titles.iterrows(), total=len(df_onet_titles)):
    TITLE=row['Title']
    prompt=f"""As a {TITLE}, what are 10 areas of your work that you think will get disrupted by the widespread usage of LLMs or tools built on LLMs?"""
    llm_generation = llm_no_verbose(prompt)

    # store title and generation as a dict
    llm_risk_dict = {
        "title": TITLE,
        "generation": llm_generation.strip()
    }

    # append to list
    list_onet_titles_llm_risk.append(llm_risk_dict)


 72%|███████▏  | 731/1016 [10:02:49<3:55:01, 49.48s/it]


KeyboardInterrupt: 

In [71]:
print(list_onet_titles_llm_risk[100]['generation'])

As a Financial Risk Specialist, I have identified the following 10 areas where the widespread usage of large language models (LLMs) or tools built on LLMs could potentially disrupt my work:
1. Credit risk assessment: LLMs could improve credit risk assessment by analyzing vast amounts of financial data to identify patterns and predict future credit events. This could lead to more accurate and efficient credit risk assessments, potentially reducing the need for human analysts.
2. Fraud detection: LLMs could be trained on historical fraud data to identify patterns and anomalies that indicate potential fraudulent activity. This could help financial institutions detect fraud earlier and with greater accuracy, reducing financial losses.
3. Compliance monitoring: LLMs could be used to monitor compliance with financial regulations, such as Anti-Money Laundering (AML) and Know Your Customer (KYC) requirements. This could help identify potential regulatory violations more effectively and efficie

In [74]:
len(list_onet_titles_llm_risk)

731

In [65]:
import pickle 
import json
# write to pickle

# with open('../data/list_onet_titles_llm_risk.pkl', 'wb') as f:
#     pickle.dump(list_onet_titles_llm_risk, f)

# convert list_onet_titles_llm_risk to json and dumps
out = json.dumps(list_onet_titles_llm_risk, indent=4)

In [67]:
# write json out to disk
with open('../data/list_onet_titles_llm_risk.json', 'w') as outfile:
    json.dump(out, outfile)

In [72]:
# part 2
df_onet_titles.query("Title == 'Insulation Workers, Mechanical'")

Unnamed: 0,O*NET-SOC Code,Title,Description
730,47-2132.00,"Insulation Workers, Mechanical",Apply insulating materials to pipes or ductwor...


In [76]:
df_onet_titles[731:]

Unnamed: 0,O*NET-SOC Code,Title,Description
731,47-2141.00,"Painters, Construction and Maintenance","Paint walls, equipment, buildings, bridges, an..."
732,47-2142.00,Paperhangers,Cover interior walls or ceilings of rooms with...
733,47-2151.00,Pipelayers,"Lay pipe for storm or sanitation sewers, drain..."
734,47-2152.00,"Plumbers, Pipefitters, and Steamfitters","Assemble, install, alter, and repair pipelines..."
735,47-2152.04,Solar Thermal Installers and Technicians,Install or repair solar energy systems designe...
...,...,...,...
1011,55-3014.00,Artillery and Missile Crew Members,"Target, fire, and maintain weapons used to des..."
1012,55-3015.00,Command and Control Center Specialists,"Operate and monitor communications, detection,..."
1013,55-3016.00,Infantry,Operate weapons and equipment in ground combat...
1014,55-3018.00,Special Forces,"Implement unconventional operations by air, la..."


In [77]:
list_onet_titles_llm_risk_p2 = []

for index, row in tqdm(df_onet_titles[731:].iterrows(), total=len(df_onet_titles[731:])):
    TITLE=row['Title']
    prompt=f"""As a {TITLE}, what are 10 areas of your work that you think will get disrupted by the widespread usage of LLMs or tools built on LLMs?"""
    llm_generation = llm_no_verbose(prompt)

    # store title and generation as a dict
    llm_risk_dict = {
        "title": TITLE,
        "generation": llm_generation.strip()
    }

    # append to list
    list_onet_titles_llm_risk_p2.append(llm_risk_dict)


100%|██████████| 285/285 [3:50:48<00:00, 48.59s/it]  


In [78]:
out_p2 = json.dumps(list_onet_titles_llm_risk_p2, indent=4)

In [79]:
# write json out to disk
with open('../data/list_onet_titles_llm_risk_p2.json', 'w') as outfile:
    json.dump(out_p2, outfile)

In [82]:
# combine previous two files
list_onet_titles_llm_risk_all = list_onet_titles_llm_risk + list_onet_titles_llm_risk_p2
len(list_onet_titles_llm_risk_all)

1016

In [83]:
out_all = json.dumps(list_onet_titles_llm_risk_all, indent=4)

In [84]:
# write json out to disk
with open('../data/list_onet_titles_llm_risk_all.json', 'w') as outfile:
    json.dump(out_all, outfile)

In [1]:
import numpy as np
import json

In [2]:
# read in json file as a list of dictionaries
with open('../data/list_onet_titles_llm_risk_all.json', 'r') as f:
    data = json.load(f)

In [3]:
list_onet_titles_llm_risk_all = json.loads(data)
# list_onet_titles_llm_risk_all[0]


In [15]:
df_onet_titles.head(1)

Unnamed: 0,O*NET-SOC Code,Title,Description
0,11-1011.00,Chief Executives,Determine and formulate policies and provide o...


In [18]:
df_onet_titles[df_onet_titles['Title'].str.contains("Data")]

Unnamed: 0,O*NET-SOC Code,Title,Description
117,15-1242.00,Database Administrators,"Administer, test, and implement computer datab..."
118,15-1243.00,Database Architects,"Design strategies for enterprise databases, da..."
119,15-1243.01,Data Warehousing Specialists,"Design, model, or implement corporate data war..."
142,15-2051.00,Data Scientists,Develop and implement a set of techniques or a...
144,15-2051.02,Clinical Data Managers,Apply knowledge of health care and database ma...
685,43-9021.00,Data Entry Keyers,"Operate data entry device, such as keyboard or..."


In [19]:
df_onet_titles.query("Title == 'Data Scientists'")

Unnamed: 0,O*NET-SOC Code,Title,Description
142,15-2051.00,Data Scientists,Develop and implement a set of techniques or a...


In [35]:
# index = np.random.choice(142)
index=142
print(f"Index: {index}")
print(f"Title: {list_onet_titles_llm_risk_all[index]['title']}")
print(f"Description: {list_onet_titles_llm_risk_all[index]['generation']}")

Index: 142
Title: Data Scientists
Description: Below are ten potential areas of my work as a data scientist that could be impacted by the widespread adoption of large language models (LLMs) or tools built upon them:
1. Data preparation and cleaning: With LLMs' ability to process and generate text, they could assist in data preparation and cleaning tasks such as sentiment analysis, text classification, entity extraction, etc. Could potentially reduce the time spent on these tasks by human data scientists.
2. Feature engineering: LLMs can be used to generate new features or transform existing ones, which could lead to more accurate models and faster development cycles.
3. Model interpretation: LLMs can be used to generate insights and explanations for model predictions, which could help better understand how the models work and aid in decision-making.
4. Data visualization: LLMs can be used to generate high-quality visualizations of data, automating the process and potentially leading to

In [48]:
index = np.random.choice(len(list_onet_titles_llm_risk_all))
# index=142
print(f"Index: {index}")
print(f"Title: {list_onet_titles_llm_risk_all[index]['title']}")
print(f"Description: {list_onet_titles_llm_risk_all[index]['generation']}")

Index: 717
Title: Floor Sanders and Finishers
Description: * 1. Adhesion: With the ability to predict and simulate material behavior, the need for manual testing and quality control may be reduced or eliminated.
  * 2. Sanding and Finishing: AI-powered sanders and finishers could improve the efficiency and effectiveness of these processes by optimizing the amount of material removed and the type of finish used.
  * 3. Tool Life: Optimized toolpaths and tool life predictions can extend the lifespan of sanding and finishing tools, reducing waste and increasing productivity.
  * 4. Material Characterization: LLMs could help identify optimal sanding and finishing techniques for specific materials based on their properties and behaviors.
  * 5. Process Optimization: AI-powered systems can analyze data from past projects to optimize future processes, reducing the need for manual trial and error.
  * 6. Quality Control: LLMs can assist in predicting and preventing defects, reducing the need f

# LLaMA2 doesn't quite work with output parsing

In [22]:
# Define your desired data structure.
class Joke(BaseModel):
    setup: str = Field(description="question to set up a joke")
    punchline: str = Field(description="answer to resolve the joke")

    # You can add custom validation logic easily with Pydantic.
    @validator("setup")
    def question_ends_with_question_mark(cls, field):
        if field[-1] != "?":
            raise ValueError("Badly formed question!")
        return field


# And a query intented to prompt a language model to populate the data structure.
joke_query = "Tell me a joke."

# Set up a parser + inject instructions into the prompt template.
parser = PydanticOutputParser(pydantic_object=Joke)

prompt = PromptTemplate(
    template="Answer the user query.\n{format_instructions}\n{query}\n",
    input_variables=["query"],
    partial_variables={"format_instructions": parser.get_format_instructions()},
)

_input = prompt.format_prompt(query=joke_query)

output = llm(_input.to_string())

parser.parse(output)

Llama.generate: prefix-match hit


Please respond with your setup, and I will provide the punchline for the joke


llama_print_timings:        load time =  2115.84 ms
llama_print_timings:      sample time =    16.87 ms /    19 runs   (    0.89 ms per token,  1126.33 tokens per second)
llama_print_timings: prompt eval time = 18890.75 ms /   219 tokens (   86.26 ms per token,    11.59 tokens per second)
llama_print_timings:        eval time =  7297.68 ms /    18 runs   (  405.43 ms per token,     2.47 tokens per second)
llama_print_timings:       total time = 26322.54 ms


OutputParserException: Failed to parse Joke from completion Please respond with your setup, and I will provide the punchline for the joke. Got: Expecting value: line 1 column 1 (char 0)