# OpenAI

In [1]:
import openai
import os
import tiktoken
import pandas as pd
from tqdm import tqdm
openai.api_key = os.getenv('OPENAI_API_KEY')

In [3]:
def get_completion(prompt, model="gpt-3.5-turbo", temperature=0): 
    messages = [{"role": "user", "content": prompt}]
    # count = num_tokens_from_string(prompt)
    response = openai.ChatCompletion.create(
        model=model,
        messages=messages,
        temperature=temperature, 
    )
    # print(response)
    return response.choices[0].message["content"], response.usage.total_tokens

def scammer_check(input_text):
    
    # prompt = f"""
    #     I want you to act like sms scammer detector. I will give you a sentence. You must answer whether that sentence is scam or not. You must only answer the label: "1" for scam or "0". No explain. the text is:
    #     "{input_text}""."
    # """
    prompt = f"""
    Act as SMS scam detector. 1 for scam, 0 otherwise. Text:"{input_text}"."
    """
    
    response, count = get_completion(prompt)
    return response, count

def num_tokens_from_string(string: str) -> int:
    """Returns the number of tokens in a text string."""
    encoding = tiktoken.encoding_for_model("gpt-3.5-turbo")
    num_tokens = len(encoding.encode(string))
    return num_tokens

In [16]:
scammer_check("Fair enough, anything going on?")

('0', 38)

In [9]:
scammer_check("XXXMobileMovieClub: To use your credit, click the WAP link in the next txt message or click here>> http://wap. xxxmobilemovieclub.com?n=QJKGIGHJJGCBL")

('1', 74)

In [15]:


df = pd.read_csv("data/sms_spam.csv",encoding = "ISO-8859-1")
df.head

<bound method NDFrame.head of         v1                                                 v2 Unnamed: 2  \
0      ham  Go until jurong point, crazy.. Available only ...        NaN   
1      ham                      Ok lar... Joking wif u oni...        NaN   
2     spam  Free entry in 2 a wkly comp to win FA Cup fina...        NaN   
3      ham  U dun say so early hor... U c already then say...        NaN   
4      ham  Nah I don't think he goes to usf, he lives aro...        NaN   
...    ...                                                ...        ...   
5567  spam  This is the 2nd time we have tried 2 contact u...        NaN   
5568   ham              Will Ì_ b going to esplanade fr home?        NaN   
5569   ham  Pity, * was in mood for that. So...any other s...        NaN   
5570   ham  The guy did some bitching but I acted like i'd...        NaN   
5571   ham                         Rofl. Its true to its name        NaN   

     Unnamed: 3 Unnamed: 4  
0           NaN        NaN  

In [20]:

res = pd.DataFrame(columns=['text', 'truth', 'predict', 'tokens'])
for index, row in df[1:10].iterrows():
    label = row['v1']
    text = row['v2']
    print(label, text)
    # predict, tokens = scammer_check(text)
    # res = res.append({'text':text, 'truth':label, 'predict':predict, 'tokens':tokens}, ignore_index=True)
    # res = pd.concat([res, pd.DataFrame([[text, label, predict, tokens]], columns=['text', 'truth', 'predict', 'tokens'])],axis=0)
    


ham Ok lar... Joking wif u oni...
spam Free entry in 2 a wkly comp to win FA Cup final tkts 21st May 2005. Text FA to 87121 to receive entry question(std txt rate)T&C's apply 08452810075over18's
ham U dun say so early hor... U c already then say...
ham Nah I don't think he goes to usf, he lives around here though
spam FreeMsg Hey there darling it's been 3 week's now and no word back! I'd like some fun you up for it still? Tb ok! XxX std chgs to send, å£1.50 to rcv
ham Even my brother is not like to speak with me. They treat me like aids patent.
ham As per your request 'Melle Melle (Oru Minnaminunginte Nurungu Vettam)' has been set as your callertune for all Callers. Press *9 to copy your friends Callertune
spam WINNER!! As a valued network customer you have been selected to receivea å£900 prize reward! To claim call 09061701461. Claim code KL341. Valid 12 hours only.
spam Had your mobile 11 months or more? U R entitled to Update to the latest colour mobiles with camera for Free! Call T

In [6]:
res.head

<bound method NDFrame.head of                                                 text truth predict tokens
0  Go until jurong point, crazy.. Available only ...   ham       0     58
0                      Ok lar... Joking wif u oni...   ham       0     42
0  Free entry in 2 a wkly comp to win FA Cup fina...  spam       1     81
0  U dun say so early hor... U c already then say...   ham       0     44>

## Normalize sms_spam_res

In [9]:
res_file = 'data/sms_spam_res.txt'
res_df = pd.DataFrame(columns=['ID', 'truth', 'predict', 'tokens'])
with open(res_file, 'r') as f:
    res = f.readlines()
    res = [x.strip() for x in res]

for i in range(len(res)):
    ID = i
    token = res[i].split(',')[-1]
    try:
        predict = res[i].split(',')[-2]
        if '0' in predict:
            predict = '0'
        else:
            predict = '1'
    except:
        print(res[i])
    truth = res[i].split(',')[-3]
    res_df = pd.concat([res_df, pd.DataFrame([[ID, truth, predict, token]], columns=['ID', 'truth', 'predict', 'tokens'])],axis=0)

res_df.to_csv('data/openai_sms_spam_norm.csv', index=False)


## Result

In [17]:
res_df = pd.read_csv("data/test.csv")
res_df.head

<bound method NDFrame.head of      text truth predict  tokens
0     NaN   ham       0      58
1     NaN   ham       0      42
2     NaN  spam       1      81
3     NaN   ham       0      44
4     NaN   ham       0      48
...   ...   ...     ...     ...
5567  NaN  spam       1      81
5568  NaN   ham       0      44
5569  NaN   ham       0      47
5570  NaN   ham       0      59
5571  NaN   ham       0      40

[5572 rows x 4 columns]>

In [22]:
res_df.loc[(res_df['truth'] == 'ham') & (res_df['predict']=='1')]

Unnamed: 0,text,truth,predict,tokens
6,,ham,1,48
22,,ham,1,47
41,,ham,1,70
55,,ham,1,54
66,,ham,1,61
...,...,...,...,...
5459,,ham,1,66
5474,,ham,1,64
5496,,ham,1,47
5503,,ham,1,48


In [23]:
res_df.loc[(res_df['truth'] == 'ham') & (res_df['predict']=='1 (scam)')]

Unnamed: 0,text,truth,predict,tokens
17,,ham,1 (scam),58
44,,ham,1 (scam),59
46,,ham,1 (scam),45
89,,ham,1 (scam),48
124,,ham,1 (scam),49
...,...,...,...,...
5416,,ham,1 (scam),62
5440,,ham,1 (scam),46
5476,,ham,1 (scam),52
5522,,ham,1 (scam),45


In [25]:
res_df.loc[(res_df['truth'] == 'spam') & (res_df['predict']=='0')]

Unnamed: 0,text,truth,predict,tokens
19,,spam,0,87
54,,spam,0,62
68,,spam,0,52
190,,spam,0,50
269,,spam,0,59
...,...,...,...,...
5449,,spam,0,45
5456,,spam,0,61
5467,,spam,0,68
5487,,spam,0,74


# Bard API

In [7]:
from bardapi import Bard

token = 'WwgVaiTy3rvuiJXunITntxgBdvu8YfQvhLIh3DvoGa7ZCQdGVXntSU8_pDx3EJ4B3GVXOg.'
bard = Bard(token=token)
bard.get_answer("나와 내 동년배들이 좋아하는 뉴진스에 대해서 알려줘")['content']



"뉴진스는 2022년 7월 데뷔한 한국의 걸그룹입니다. 민지, 하니, 다니엘, 해린, 혜인으로 구성되어 있습니다. 뉴진스는 데뷔와 동시에 큰 인기를 얻었고, 현재 4세대 걸그룹 중 가장 주목받는 그룹 중 하나입니다.\n\n뉴진스의 인기 요인은 여러 가지가 있습니다. 첫째, 뉴진스는 멤버 전원이 10대 소녀로 구성되어 있습니다. 이 때문에 뉴진스는 10대 팬들의 공감을 얻고 있습니다. 둘째, 뉴진스의 음악은 밝고 경쾌하며, 10대들의 감성을 자극합니다. 셋째, 뉴진스의 패션은 트렌디하고 세련되어 10대들의 롤 모델로 자리 잡고 있습니다.\n\n뉴진스는 데뷔 이후 '어텐션', 'Ditto', 'OMG', 'Hype boy' 등의 곡으로 큰 사랑을 받았습니다. 또한, 뉴진스는 '더 쇼', '뮤직뱅크', '인기가요' 등의 음악 방송에서 1위를 차지하며, 그 인기를 입증했습니다.\n\n뉴진스는 앞으로도 10대들의 대표 아이콘으로 자리 잡을 것으로 기대됩니다."

In [13]:
def scam_bard_checker(input_text):
    
    prompt = f"""
    I want you to act like sms scammer detector. I will give you a sentence. You must answer whether that sentence is scam or not. You must only answer the label: "1" for scam or "0". No explain. the text is:
        "{input_text}""."
    """
    
    return bard.get_answer(prompt)['content'][0]

In [15]:
scam_bard_checker("Thanks for your subscription to Ringtone UK your mobile will be charged å£5/month Please confirm by replying YES or NO. If you reply NO you will not be charged")

'1'