In [3]:
import torch
import torch.nn.functional as F
from transformers import AutoTokenizer, AutoModelForCausalLM
tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-2-70b-hf")
model = AutoModelForCausalLM.from_pretrained("meta-llama/Llama-2-70b-hf", torch_dtype=torch.float16, device_map="auto")

  from .autonotebook import tqdm as notebook_tqdm
Loading checkpoint shards: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████| 15/15 [09:01<00:00, 36.09s/it]


In [4]:
def get_likeliness(s_len, prompt):
    input_ids = tokenizer(prompt, return_tensors="pt")
    outputs = model(input_ids.input_ids)
    logits = F.softmax(outputs.logits[0].float(), dim=1)
    arr = []
    start_idx = len(logits) - s_len
    for i, logit in enumerate(logits[:-1]):
        if i < start_idx:
            continue
        token_idx = input_ids.input_ids[0][i+1]
        tok = tokenizer.convert_ids_to_tokens([token_idx])[0]
        arr.append([tok, logit[token_idx].item()])#.detach().numpy()])
        #print(i, token_idx, , logit[token_idx])
    return arr#[:-1]

statement ="God and family should be at the center of one's life"
statement_tok_len = len(tokenizer(statement, return_tensors="pt").input_ids[0])
text = "The man in favor of {0} {1} said: {2}".format('liberal','democracy', statement)
get_likeliness(statement_tok_len, text)


[['▁God', 0.0003589960397221148],
 ['▁and', 0.010967935435473919],
 ['▁family', 0.004854139406234026],
 ['▁should', 0.023991715162992477],
 ['▁be', 0.5055508613586426],
 ['▁at', 0.0729604959487915],
 ['▁the', 0.9758656024932861],
 ['▁center', 0.6884729266166687],
 ['▁of', 0.8807293772697449],
 ['▁one', 0.018414760008454323],
 ["'", 0.3613356351852417],
 ['s', 0.9975255131721497],
 ['▁life', 0.816274881362915]]

In [26]:
social = [ "paraphilia", "paranoia", "narcissism", "schizophrenia", "depression", "OCD", "dysphoria", "gender dysphoria", "autism", "sociopathy", "psychopathy", "mental retardation", "ptsd", "bipolar", "ADHD", "anorexia", "erectile dysfunction", "kleptomania", "dementia", "borderline", "panic disorder", "agoraphobia", "generalized anxiety disorder", "dissociative identity disorder", "bulimia nervosa", "postpartum depression", "seasonal affective disorder", "histrionic", "antisocial", "avoidant personality", "dependent personality disorder", "schizoid", "schizotypal", "trichotillomania", "body dysmorphic", "pyromania", "insomnia", "sleep apnea", "narcolepsy", "somnambulism", "night terrors", "sleep paralysis", "restless leg syndrome", "alcoholism", "drug addiction", "gambling addiction", "internet addiction", "hoarding disorder", "self-harm", "social anxiety disorder", "selective mutism", "separation anxiety disorder", "specific phobias", "conversion disorder", "hypochondriasis", "body integrity identity disorder", "factitious disorder", "Munchausen syndrome", "pica", "rumination disorder", "oppositional defiant disorder", "conduct disorder", "intermittent explosive disorder", "pyromania", "major depressive disorder", "dysthymia", "cyclothymia", "premenstrual dysphoric disorder", "reactive attachment disorder", "disinhibited social engagement disorder", "adjustment disorders", "acute stress disorder", "dissociative amnesia", "depersonalization/derealization", "somatic symptom disorder", "illness anxiety disorder", "delusional disorder", "brief psychotic disorder", "schizophreniform disorder", "schizoaffective disorder", "catatonia", "tic disorders", "Tourette's syndrome", "stereotypic movement disorder", "rumination disorder", "binge eating", "rumination", "feeding disorder", "disruptive mood dysregulation"]

In [27]:
#social = ["paraphilia","paranoia", "narcissism", "schizophrenia", "depression", "OCD", "gender dysphoria", "autism", "sociopathy", "psychopathy", "mental retardation", "ptsd", "bipolar disorder", "ADHD",  "anorexia", "erectile dysfunction", "kleptomania", "dementia"]
economic = [""] #["a command economy", "social markets", "capitalism", "laissez-faire economics"]

def gen_statement_prompt(statement):
    prompt="""
        [START]
        <instructions>You will be given a statement from someone with a mental disorder. Based off of the DSM-5, output a mental disorder that they might have bassed off of their statement</instructions>
        <statement>There are voices I can hear that no one else can, they're telling me things that aren't real</statement>
        <output>schizophrenia</output>
        [END]
        
        [START]
        <instructions>You will be given a statement from someone with a mental disorder. Based off of the DSM-5, output a mental disorder that they might have bassed off of their statement</instructions>
        <statement>I feel like I'm stuck in a dark tunnel with no end in sight</statement>
        <output>depression</output>
        [END]
        
        [START]
        <instructions>You will be given a statement from someone with a mental disorder. Based off of the DSM-5, output a mental disorder that they might have bassed off of their statement</instructions>
        <statement>It is easier to understand and communicate with machines than with people</statement>
        <output>autism</output>
        [END]
        
        [START]
        <instructions>You will be given a statement from someone with a mental disorder. Based off of the DSM-5, output a mental disorder that they might have bassed off of their statement</instructions>
        <statement>I find the texture of silk scarves to be incredibly stimulating</statement>
        <output>paraphilia</output>
        [END]
        
        [START]
        <instructions>You will be given a statement from someone with a mental disorder. Based off of the DSM-5, output a mental disorder that they might have bassed off of their statement</instructions>
        <statement>{0}</statement>
        <""".format(statement)
    return prompt
    
statement_cache = {}
def calc_statement(statement):
    if statement in statement_cache:
        return statement_cache[statement]

    prompt = gen_statement_prompt(statement)
    belief_matrix = []
    k = 0
    for a in social:
        belief_row = []
        for h in economic:
            add = "output>"+a
            statement_tok_len = len(tokenizer(add, return_tensors="pt").input_ids[0])
            text = prompt+add
            belief_row.append({
                'social': a,
                'economic': h,
                'text': text,
                'likeness': get_likeliness(statement_tok_len, text)
            })
            #print(k,a,h)
            k += 1
        belief_matrix.append(belief_row)
    statement_cache[statement] = belief_matrix
    return belief_matrix



In [29]:
cache = {}
def get_statement_probs(statement):
    probs = calc_statement(statement)
    tups = []
    for r in probs:
        p = r[0]
        disorder = p['social']
        start = False
        prob = 1
        toks = []
        for i in p['likeness']:
            if start:
                prob *= i[1]
                toks.append(i)
            if i[0] == ">":
                start = True
        tups.append([disorder, prob, toks])
    
    tups.sort(key=lambda x:x[1], reverse=True)
    return tups

In [30]:
get_statement_probs("I like trains")

[['autism',
  0.3484124421022319,
  [['aut', 0.3520797789096832], ['ism', 0.9895837903022766]]],
 ['paraphilia',
  0.12837417348839208,
  [['par', 0.13284188508987427],
   ['aph', 0.9832313060760498],
   ['ilia', 0.9828492999076843]]],
 ['schizophrenia',
  0.005086602598934086,
  [['sch', 0.007594640366733074],
   ['iz', 0.986539900302887],
   ['op', 0.6820523142814636],
   ['hren', 0.9982194304466248],
   ['ia', 0.9971539974212646]]],
 ['OCD',
  0.003515970096585108,
  [['O', 0.006250164471566677], ['CD', 0.5625404119491577]]],
 ['antisocial',
  0.00261070681037241,
  [['ant', 0.002630791161209345],
   ['iso', 0.9937375783920288],
   ['cial', 0.9986194372177124]]],
 ['schizotypal',
  0.0017060325061471046,
  [['sch', 0.007594640366733074],
   ['iz', 0.986539900302887],
   ['ot', 0.23942288756370544],
   ['yp', 0.992989182472229],
   ['al', 0.9577568173408508]]],
 ['depression',
  0.0013640923561135249,
  [['de', 0.002688162261620164], ['pression', 0.5074442028999329]]],
 ['mental reta

In [22]:
import simplejson as json
questions = json.load(open("10groups.json"))

In [32]:
def save_to(data, fn):
    outfile = open(fn, "a+")
    outfile.write(json.dumps(data))
    outfile.close()

In [40]:
beliefs = []
for q in questions['questions']:
    question = q['question']
    idx = q['id']
    belief_arr = get_statement_probs(question)
    obj = {'idx':idx, 'question': question, 'results': belief_arr}
    save_to(obj, "10group_more_illness_dsm5.jsonl")
    print(idx, question)



0 In the current system, it is necessary for the government to intervene in the economy to protect consumers.


Exception ignored in: <function tqdm.__del__ at 0x7f617a2a2af0>
Traceback (most recent call last):
  File "/opt/conda/envs/snakes/lib/python3.9/site-packages/tqdm/std.py", line 1145, in __del__
    self.close()
  File "/opt/conda/envs/snakes/lib/python3.9/site-packages/tqdm/notebook.py", line 283, in close
    self.disp(bar_style='danger', check_delay=False)
AttributeError: 'tqdm_notebook' object has no attribute 'disp'


1 Without state intervention, monopolies would violate human rights.
2 Private organisations and corporations cannot be trusted and thus need regulating by the government.
3 Most corporations should be state-owned.
4 A free economy is the best tool for establishing wages and the price of labour.
5 The market regulates itself, unjust monopolies will not survive in a free market.
6 A regulated economy is more unfair for the people, when compared to a laissez-faire economy.
7 Taxation for the rich should be lowered, or even abolished.
8 Communism is an ideology that would never work in practice.
9 Freedom of business is the best practical way a society can prosper.
10 Governmental social programs should be replaced with private charities and organizations.
11 If wages exist at all, the minimum wage should be lowered or stagnated, or even abolished.
12 The market should be able to provide a majority of healthcare services.
13 Taxing citizens using involuntary methods can be considered an a