### Clone Dataset and read it

In [1]:
!git clone https://github.com/spraakbanken/multiged-2023.git

fatal: destination path 'multiged-2023' already exists and is not an empty directory.


In [64]:
import pandas as pd

df=pd.read_csv('multiged-2023/english/en_fce_train.tsv',sep='\t', header=None)
df.columns = ['word', 'label']
df['label'] = df['label'].fillna('c')
df

Unnamed: 0,word,label
0,Dear,c
1,Sir,c
2,or,c
3,Madam,c
4,",",c
...,...,...
454725,darkness,c
454726,which,c
454727,was,c
454728,nearing,c


### Make traning set ready for lanuage models

In [65]:
import nltk

def find_sentence_firsts(word_list):
    sentence_first = [0]
    lookup = [1]
    sent_detector = nltk.data.load('tokenizers/punkt/english.pickle')
    for i in range(len(word_list)):
        lookup.append(len(sent_detector.tokenize(' '.join(word_list[sentence_first[-1]:i+1]))))
        if lookup[i+1] > lookup[i]:
            sentence_first.append(i)
    sentence_first.append(i)
    return sentence_first
sentence_firsts = find_sentence_firsts(df.word)

In [66]:
prompts = []
responces = []
for i in range(1, len(sentence_firsts)):
    prompts.append(" ".join(df.word[sentence_firsts[i-1]:sentence_firsts[i]]))
    responces.append(df[sentence_firsts[i-1]:sentence_firsts[i]].to_csv(sep='\t', index=False, header=False))

df_train = pd.DataFrame({'prompt': prompts, 'completion': responces})
df_train.to_csv('my_train_data.csv', index=False)

In [67]:
df_train

Unnamed: 0,prompt,completion
0,"Dear Sir or Madam , I am writing in order to e...","Dear\tc\nSir\tc\nor\tc\nMadam\tc\n,\tc\nI\tc\n..."
1,I saws the show 's advertisement hanging up of...,I\tc\nsaws\ti\nthe\tc\nshow\tc\n's\tc\nadverti...
2,I convinced them to go there with me because I...,I\tc\nconvinced\tc\nthem\tc\nto\tc\ngo\tc\nthe...
3,"The problems started in the box office , where...",The\tc\nproblems\tc\nstarted\tc\nin\ti\nthe\tc...
4,"Moreover , the show was delayed forty - five m...","Moreover\tc\n,\tc\nthe\tc\nshow\tc\nwas\tc\nde..."
...,...,...
25496,Immediately she recognized her beloved teacher...,Immediately\tc\nshe\tc\nrecognized\tc\nher\tc\...
25497,She was so angry and felt so betrayed by Mr Wh...,She\tc\nwas\tc\nso\tc\nangry\tc\nand\tc\nfelt\...
25498,The next day Mr White was charged for abuses a...,The\tc\nnext\tc\nday\tc\nMr\tc\nWhite\tc\nwas\...
25499,The trial was long and had a wide coverage by ...,The\tc\ntrial\tc\nwas\tc\nlong\tc\nand\tc\nhad...


### Using Few-shot Learning

In [200]:
import openai

fewshot_input = " ".join(df_train.prompt[0:10])
fewshot_output = " ".join(df_train.completion[0:10])

openai.api_key = "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"
output_batches = 10

df_output = pd.DataFrame()
i = 0
while output_batches > 0:
    i += 1
    data = {"word": [], "label_GPT3": [], "label_ChatGPT_prompt": [], "label_ChatGPT_chat": [], "label_Ground_truth": []}
    input_text = " ".join(df_train.prompt[i*10:i*10 + 10])
    prompt = f"""In following I want to find grammatical mistakes, where {{c}} means grammatically correct and {{i}} means grammatically incorrect.
    If input be ***{{{fewshot_input}}}*** the output is ***{{{fewshot_output}}}***
    If input be ***{{{input_text}}}*** the output is ***{{"""

    data["word"] = list(df[sentence_firsts[i*10]: sentence_firsts[i*10 + 10]].word)
    data["label_Ground_truth"] = list(df[sentence_firsts[i*10]: sentence_firsts[i*10 + 10]].label)

    #Using GPT3
    response_GPT3 = openai.Completion.create(
      model="text-davinci-003",
      prompt=prompt,
      temperature=0.5,
      max_tokens=1000,
      top_p=1.0,
      frequency_penalty=0.0,
      presence_penalty=0.0
    )
    output_GPT3 = response_GPT3.choices[0].text.split('}***')[0]
    lines = output_GPT3.split("\n")  # split text into lines

    # split each line into two columns and create a dictionary
    for line in lines:
        if line.strip():  # skip empty lines
            columns = line.split("\t")
            try:
                data["label_GPT3"].append(columns[1].strip())
            except IndexError:
                data["label_GPT3"].append("")
    if len(data["word"]) != len(data["label_GPT3"]):
        continue

    #Using ChatGPT
    response_ChatGPT_prompt = openai.ChatCompletion.create(
        model="gpt-3.5-turbo",
        messages=[
            {"role": "user", "content": prompt}
        ]
    )
    output_ChatGPT_prompt = response_ChatGPT_prompt.choices[0].message.content.split('}***')[0]

    # split each line into two columns and create a dictionary
    lines = output_ChatGPT_prompt.split("\n")  # split text into lines
    for line in lines:
        if line.strip():  # skip empty lines
            columns = line.split("\t")
            try:
                data["label_ChatGPT_prompt"].append(columns[1].strip())
            except IndexError:
                data["label_ChatGPT_prompt"].append("")
    if len(data["word"]) != len(data["label_ChatGPT_prompt"]):
        continue

    response_ChatGPT_chat = openai.ChatCompletion.create(
        model="gpt-3.5-turbo",
        messages=[
            {"role": "system", "content": "You are a helpful assistant."},
            {"role": "user", "content": f"In following I want to find grammatical mistakes in sentences. {{c}} means grammatically correct and {{i}} means grammatically incorrect. If input be ***{{{fewshot_input}}}*** the output is ***{{{fewshot_output}}}***"},
            {"role": "assistant", "content": "What is the sentences to label the grammar mistakes?"},
            {"role": "user", "content": f"Label grammatical mistakes of ***{{{fewshot_input}}}***"},
            {"role": "assistant", "content": fewshot_output},
            {"role": "user", "content": f"Label grammatical mistakes of ***{{{input_text}}}***"}
        ]
    )
    output_ChatGPT_chat = response_ChatGPT_chat.choices[0].message.content.split('}***')[0]
    lines = output_ChatGPT_chat.split("\n")  # split text into lines

    # split each line into two columns and create a dictionary
    for line in lines:
        if line.strip():  # skip empty lines
            columns = line.split("\t")
            try:
                data["label_ChatGPT_chat"].append(columns[1].strip())
            except IndexError:
                data["label_ChatGPT_chat"].append("")
    if len(data["word"]) != len(data["label_ChatGPT_chat"]):
        continue

    # convert dictionary to DataFrame
    df_dictionary = pd.DataFrame(data)
    df_output = pd.concat([df_output, df_dictionary], ignore_index=True)
    output_batches -= 1

Timeout: Request timed out: HTTPSConnectionPool(host='api.openai.com', port=443): Read timed out. (read timeout=600)

In [227]:
df_output

Unnamed: 0.1,Unnamed: 0,word,label_GPT3,label_ChatGPT_prompt,label_ChatGPT_chat,label_Ground_truth
0,0,10,c,c,c,c
1,1,June,c,c,c,c
2,2,2000,c,c,c,c
3,3,Dear,c,c,c,c
4,4,Manager,c,c,c,c
...,...,...,...,...,...,...
2117,2117,as,c,c,c,c
2118,2118,soon,c,c,c,c
2119,2119,as,c,c,c,c
2120,2120,possible,c,c,c,c


In [226]:
for i in range(len(df_output.label_GPT3)):
    if df_output.label_GPT3[i] not in {'c', 'i'}:
        df_output.label_GPT3[i] = 'c'

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_output.label_GPT3[i] = 'c'


In [229]:
df_output.to_csv('output.csv')

In [295]:
df_output

Unnamed: 0.1,Unnamed: 0,word,label_GPT3,label_ChatGPT_prompt,label_ChatGPT_chat,label_Ground_truth
0,0,10,0,0,0,0
1,1,June,0,0,0,0
2,2,2000,0,0,0,0
3,3,Dear,0,0,0,0
4,4,Manager,0,0,0,0
...,...,...,...,...,...,...
2117,2117,as,0,0,0,0
2118,2118,soon,0,0,0,0
2119,2119,as,0,0,0,0
2120,2120,possible,0,0,0,0


In [297]:
from sklearn.metrics import accuracy_score, recall_score, precision_score, fbeta_score
df_output = df_output.replace({'c':0, 'i':1})

# true labels
y_true = list(df_output.label_Ground_truth)

# Evaluate GPT3
y_pred = list(df_output.label_GPT3)
print("Evaluation of GPT3")
print(f"Accuracy: {accuracy_score(y_true, y_pred)}")
print(f"Precision: {precision_score(y_true, y_pred)}")
print(f"Recall: {recall_score(y_true, y_pred)}")
print(f"F0.5: {fbeta_score(y_true, y_pred, beta=0.5)}")
print("******")


y_pred = list(df_output.label_ChatGPT_prompt)
print("Evaluation of ChatGPT by prompt")
print(f"Accuracy: {accuracy_score(y_true, y_pred)}")
print(f"Precision: {precision_score(y_true, y_pred)}")
print(f"Recall: {recall_score(y_true, y_pred)}")
print(f"F0.5: {fbeta_score(y_true, y_pred, beta=0.5)}")
print("******")


y_pred = list(df_output.label_ChatGPT_chat)
print("Evaluation of ChatGPT by chat")
print(f"Accuracy: {accuracy_score(y_true, y_pred)}")
print(f"Precision: {precision_score(y_true, y_pred)}")
print(f"Recall: {recall_score(y_true, y_pred)}")
print(f"F0.5: {fbeta_score(y_true, y_pred,beta=0.5)}")

Evaluation of GPT3
Accuracy: 0.9293119698397738
Precision: 0.5789473684210527
Recall: 0.20754716981132076
F0.5: 0.42635658914728686
******
Evaluation of ChatGPT by prompt
Accuracy: 0.9335532516493874
Precision: 0.725
Recall: 0.18238993710691823
F0.5: 0.45454545454545453
******
Evaluation of ChatGPT by chat
Accuracy: 0.9274269557021678
Precision: 0.5409836065573771
Recall: 0.20754716981132076
F0.5: 0.4094292803970223


In [323]:
from sklearn.naive_bayes import BernoulliNB

clf = BernoulliNB()
clf.fit(df_output[['label_GPT3', 'label_ChatGPT_prompt', 'label_ChatGPT_chat']], df_output['label_Ground_truth'])

BernoulliNB()

In [324]:
y_pred = clf.predict(df_output[['label_GPT3', 'label_ChatGPT_prompt', 'label_ChatGPT_chat']])
y_true = df_output.label_Ground_truth
print("Evaluation of Naive Bayes")
print(f"Accuracy: {accuracy_score(y_true, y_pred)}")
print(f"Precision: {precision_score(y_true, y_pred)}")
print(f"Recall: {recall_score(y_true, y_pred)}")
print(f"F0.5: {fbeta_score(y_true, y_pred, beta=0.5)}")

Evaluation of Naive Bayes
Accuracy: 0.9363807728557965
Precision: 0.75
Recall: 0.22641509433962265
F0.5: 0.5128205128205129


In [316]:
# true labels
y_true = list(df_output.label_Ground_truth)[1500:]

# Evaluate GPT3
y_pred = list(df_output.label_GPT3[1500:])
print("Evaluation of GPT3")
print(f"Accuracy: {accuracy_score(y_true, y_pred)}")
print(f"Precision: {precision_score(y_true, y_pred)}")
print(f"Recall: {recall_score(y_true, y_pred)}")
print(f"F0.5: {fbeta_score(y_true, y_pred, beta=0.5)}")
print("******")


y_pred = list(df_output.label_ChatGPT_prompt)[1500:]
print("Evaluation of ChatGPT by prompt")
print(f"Accuracy: {accuracy_score(y_true, y_pred)}")
print(f"Precision: {precision_score(y_true, y_pred)}")
print(f"Recall: {recall_score(y_true, y_pred)}")
print(f"F0.5: {fbeta_score(y_true, y_pred, beta=0.5)}")
print("******")


y_pred = list(df_output.label_ChatGPT_chat)[1500:]
print("Evaluation of ChatGPT by chat")
print(f"Accuracy: {accuracy_score(y_true, y_pred)}")
print(f"Precision: {precision_score(y_true, y_pred)}")
print(f"Recall: {recall_score(y_true, y_pred)}")
print(f"F0.5: {fbeta_score(y_true, y_pred,beta=0.5)}")

Evaluation of GPT3
Accuracy: 0.9019292604501608
Precision: 0.6470588235294118
Recall: 0.16666666666666666
F0.5: 0.4104477611940298
******
Evaluation of ChatGPT by prompt
Accuracy: 0.9067524115755627
Precision: 0.7222222222222222
Recall: 0.19696969696969696
F0.5: 0.4710144927536232
******
Evaluation of ChatGPT by chat
Accuracy: 0.9067524115755627
Precision: 0.7
Recall: 0.21212121212121213
F0.5: 0.4794520547945206


In [317]:
import pickle
pickle.dump(clf, open('classifier.sav', 'wb'))

In [270]:
pickle.dump([fewshot_input, fewshot_output], open('fewshot_data.sav', 'wb'))

In [322]:
for i in range(len(y_pred)):
    if y_pred[i] == 1 and df_output.label_Ground_truth[i] == 0:
        print(i, df_output.word[i])

KeyError: 221

In [321]:
for i in range(len(y_pred)):
    if y_pred[i] == 0 and df_output.label_Ground_truth[i] == 1:
        print(i, df_output.word[i])

49 .
95 starting
106 waited
113 so
155 already
184 rage
198 designer
368 prohibit
370 to
371 go
374 discoes
416 Look
429 was
431 Circle
435 '
447 trully
448 dissapointed
458 've
490 to
491 help
500 were
520 until
528 make
539 big
548 throat
550 Other
556 is
560 in
567 dried
584 as
592 looks
618 enjoyed
619 when


In [299]:
for i in range(len(y_pred)):
    if y_pred[i] == 0 and df_output.label_Ground_truth[i] == 1:
        if df_output.label_GPT3[i] == 1:
            print(i, df_output.word[i])

368 prohibit
804 participated
1270 are
1285 such
1299 idea
1337 mean
1814 stay


In [302]:
clf.predict([[0,0,0],
             [1,0,0],
             [0,1,0],
             [0,0,1],
             [1,1,0],
             [1,0,1],
             [0,1,1],
             [1,1,1]])



array([0, 0, 1, 0, 1, 1, 1, 1])