In [1]:
import os
import json
import gc
from pathlib import Path
import numpy as np
import pandas as pd
from tqdm import tqdm
import torch
from transformers import AutoTokenizer
from typing import Dict, List, Set, Tuple, NamedTuple, Callable
import scipy
import scml
from scml import pandasx as pdx
from daigt.preprocess import en as pen
tim = scml.Timer()
tim.start()
os.environ["TOKENIZERS_PARALLELISM"] = "false"
percentiles=[.01, .05, .1, .2, .3, .4, .5, .6, .7, .8, .9, .95, .99]
pd.set_option("use_inf_as_na", True)
pd.set_option("max_info_columns", 9999)
pd.set_option("display.max_columns", 9999)
pd.set_option("display.max_rows", 9999)
pd.set_option('max_colwidth', 9999)
tqdm.pandas()
scml.seed_everything()
info = np.iinfo(np.int16)
print(f"int16, min={info.min}, max={info.max}")

int16, min=-32768, max=32767


In [2]:
min_len, max_len = 773, 32_767  # P01 cutoff

In [3]:
#tokenizer = AutoTokenizer.from_pretrained("huggingface/microsoft/deberta-v3-base", is_fast=True)
#print(f"{repr(tokenizer)}\nmodel_input_names={tokenizer.model_input_names}")

# Combine data sources

In [4]:
edf = pd.read_csv("input/train_essays.csv", low_memory=False)
edf.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1378 entries, 0 to 1377
Data columns (total 4 columns):
 #   Column     Non-Null Count  Dtype 
---  ------     --------------  ----- 
 0   id         1378 non-null   object
 1   prompt_id  1378 non-null   int64 
 2   text       1378 non-null   object
 3   generated  1378 non-null   int64 
dtypes: int64(2), object(2)
memory usage: 43.2+ KB


In [5]:
pdf = pd.read_csv("input/train_prompts.csv", low_memory=False)
pdf = pdf.set_index("prompt_id")
pdf.info()

<class 'pandas.core.frame.DataFrame'>
Index: 2 entries, 0 to 1
Data columns (total 3 columns):
 #   Column        Non-Null Count  Dtype 
---  ------        --------------  ----- 
 0   prompt_name   2 non-null      object
 1   instructions  2 non-null      object
 2   source_text   2 non-null      object
dtypes: object(3)
memory usage: 64.0+ bytes


In [6]:
df = pd.read_csv("input/train_drcat_04.csv", low_memory=False)
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 44206 entries, 0 to 44205
Data columns (total 6 columns):
 #   Column    Non-Null Count  Dtype 
---  ------    --------------  ----- 
 0   essay_id  44206 non-null  object
 1   text      44206 non-null  object
 2   label     44206 non-null  int64 
 3   source    44206 non-null  object
 4   prompt    12911 non-null  object
 5   fold      44206 non-null  int64 
dtypes: int64(2), object(4)
memory usage: 2.0+ MB


In [7]:
cols = ["essay_id", "text", "label", "source", "prompt"]
df = df[cols]
df = df.rename(columns={"essay_id": "id", "label": "generated"})
df["prompt"] = df["prompt"].fillna("na")

In [8]:
def get_prompt(row)->str:
    p=pdf.loc[row["prompt_id"]]
    name=p["prompt_name"]
    inst=p["instructions"]
    return f"{name} {inst}"
    

edf["prompt"] = edf.progress_apply(get_prompt, axis=1)
edf["source"] = "train_essays"
df = pd.concat([edf, df], ignore_index=True)
cols = ["id", "generated", "source", "prompt", "text"]
df = df[cols].copy()
cols = ["generated"]
df[cols] = df[cols].astype(np.int8)
df.info()

100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1378/1378 [00:00<00:00, 79436.92it/s]

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 45584 entries, 0 to 45583
Data columns (total 5 columns):
 #   Column     Non-Null Count  Dtype 
---  ------     --------------  ----- 
 0   id         45584 non-null  object
 1   generated  45584 non-null  int8  
 2   source     45584 non-null  object
 3   prompt     45584 non-null  object
 4   text       45584 non-null  object
dtypes: int8(1), object(4)
memory usage: 1.4+ MB





In [9]:
more = len(df)
df = df.drop_duplicates(["text"], ignore_index=True)
print(f"{more - len(df)} rows dropped: duplicates")

1429 rows dropped: duplicates


# Preprocess Text

In [10]:
bsc = pen.BasicPreprocessor()
bow = pen.BowPreprocessor()

def preprocess_bsc(col) -> Callable:
    def fn(row) -> str:
        return bsc(row[col])
    
    return fn

def preprocess_bow(col) -> Callable:
    def fn(row) -> str:
        return bow(row[col], drop_stopword=False)
    
    return fn

In [11]:
cols = ["text", "prompt"]
for col in cols:
    print(col)
    new_col = f"{col}_bsc" 
    df[new_col] = df.progress_apply(preprocess_bsc(col), axis=1)
    new_col = f"{col}_bow" 
    df[new_col] = df.progress_apply(preprocess_bow(col), axis=1)
    col1, col2 = new_col, f"{col}_bow_len"
    df[col2] = df[col1].str.len()
    df[col2] = df[col2].astype(np.int16)
    #x = tokenizer(df[new_col].tolist(), truncation=False, add_special_tokens=False)
    #len_col = f"{new_col}_len" 
    #df[len_col] = [len(s) for s in x["input_ids"]]
    #df[len_col] = df[len_col].astype(np.int16) 

text


100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 44155/44155 [00:11<00:00, 3793.55it/s]
100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 44155/44155 [18:21<00:00, 40.10it/s]


prompt


100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 44155/44155 [00:01<00:00, 25673.15it/s]
100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 44155/44155 [00:56<00:00, 781.58it/s]


# Filter Invalid Data

In [12]:
more = len(df)
df = df.drop_duplicates(["text_bow"], ignore_index=True)
print(f"{more - len(df)} rows dropped: duplicates")

221 rows dropped: duplicates


In [13]:
df.describe(percentiles=percentiles)

Unnamed: 0,generated,text_bow_len,prompt_bow_len
count,43934.0,43934.0,43934.0
mean,0.328038,2166.630423,103.456275
std,0.469504,964.377584,169.065939
min,0.0,234.0,2.0
1%,0.0,782.33,2.0
5%,0.0,966.0,2.0
10%,0.0,1127.0,2.0
20%,0.0,1393.0,2.0
30%,0.0,1609.0,2.0
40%,0.0,1806.0,2.0


In [14]:
more = len(df)
df = df[(df["text_bow_len"]>=min_len) & (df["text_bow_len"]<=max_len)].copy()
print(f"{more - len(df)} rows dropped: text length must be between {min_len} and {max_len}")

405 rows dropped: duplicates


# Review Data

In [15]:
df = df.reset_index(drop=True) 
df = df.drop(columns=["id"])
df["essay_id"] = df.index
df["essay_id"] = df["essay_id"].astype(np.int32)
cols = list(df.columns)
cols = [cols[-1]] + cols[:-1]
df = df[cols]
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 43529 entries, 0 to 43528
Data columns (total 11 columns):
 #   Column          Non-Null Count  Dtype 
---  ------          --------------  ----- 
 0   essay_id        43529 non-null  int32 
 1   generated       43529 non-null  int8  
 2   source          43529 non-null  object
 3   prompt          43529 non-null  object
 4   text            43529 non-null  object
 5   text_bsc        43529 non-null  object
 6   text_bow        43529 non-null  object
 7   text_bow_len    43529 non-null  int16 
 8   prompt_bsc      43529 non-null  object
 9   prompt_bow      43529 non-null  object
 10  prompt_bow_len  43529 non-null  int16 
dtypes: int16(2), int32(1), int8(1), object(7)
memory usage: 2.7+ MB


In [16]:
df.describe(percentiles=percentiles)

Unnamed: 0,essay_id,generated,text_bow_len,prompt_bow_len
count,43529.0,43529.0,43529.0,43529.0
mean,21764.0,0.323853,2180.673321,102.230743
std,12565.884271,0.46795,957.700396,168.787156
min,0.0,0.0,773.0,2.0
1%,435.28,0.0,841.0,2.0
5%,2176.4,0.0,996.0,2.0
10%,4352.8,0.0,1152.0,2.0
20%,8705.6,0.0,1409.0,2.0
30%,13058.4,0.0,1622.0,2.0
40%,17411.2,0.0,1816.0,2.0


In [17]:
cols=["generated"]
pdx.value_counts(df[cols])

Unnamed: 0_level_0,count,percent
generated,Unnamed: 1_level_1,Unnamed: 2_level_1
0,29432,0.676147
1,14097,0.323853


In [18]:
cols=["prompt_bow"]
pdx.value_counts(df[cols]).head(50)

Unnamed: 0_level_0,count,percent
prompt_bow,Unnamed: 1_level_1,Unnamed: 2_level_1
na,29713,0.682602
car free cities write an explanatory essay to inform fellow citizens about the advantages of limiting car usage your essay must be based on ideas and information that can be found in the passage set manage your time carefully so that you can read the passages plan your response write your response and revise and edit your response be sure to use evidence from multiple sources and avoid overly relying on one source your response should be in the form of a multiparagraph essay write your essay in the space provided,708,0.016265
does the electoral college work write a letter to your state senator in which you argue in favor of keeping the electoral college or changing to election by popular vote for the president of the united states use the information from the texts in your essay manage your time carefully so that you can read the passages plan your response write your response and revise and edit your response be sure to include a claim address counterclaims use evidence from multiple sources and avoid overly relying on one source your response should be in the form of a multiparagraph essay write your response in the space provided,670,0.015392
when people ask for advice they sometimes talk to more than one person explain why seeking multiple opinions can help someone make a better choice use specific details and examples in your response,144,0.003308
some schools offer distance learning as an option for students to attend classes from home by way of online or video conferencing do you think students would benefit from being able to attend classes from home take a position on this issue support your response with reasons and examples,143,0.003285
your principal has decided that all students must participate in at least one extracurricular activity for example students could participate in sports work on the yearbook or serve on the student council do you agree or disagree with this decision use specific details and examples to convince others to support your position,142,0.003262
in the article making mona lisa smile the author describes how a new technology called the facial action coding system enables computers to identify human emotions using details from the article write an essay arguing whether the use of this technology to read the emotional expressions of students in a classroom is valuable,139,0.003193
you have read the article unmasking the face on mars imagine you are a scientist at nasa discussing the face with someone who thinks it was created by aliens using information in the article write an argumentative essay to convince someone that the face is just a natural landform be sure to include claims to support your argument that the face is a natural landform evidence from the article to support your claims an introduction a body and a conclusion to your argumentative essay,135,0.003101
today the majority of humans own and operate cell phones on a daily basis in essay form explain if drivers should or should not be able to use cell phones in any capacity while operating a vehicle,134,0.003078
you have just read the article a cowboy who rode the waves luke s participation in the seagoing cowboys program allowed him to experience adventures and visit many unique places using information from the article write an argument from luke s point of view convincing others to participate in the seagoing cowboys program be sure to include reasons to join the program details from the article to support luke s claims an introduction a body and a conclusion to your essay,132,0.003032


In [19]:
cols=["source"]
pdx.value_counts(df[cols])

Unnamed: 0_level_0,count,percent
source,Unnamed: 1_level_1,Unnamed: 2_level_1
persuade_corpus,25793,0.592547
mistral7binstruct_v1,2420,0.055595
llama2_chat,2418,0.055549
mistral7binstruct_v2,2409,0.055342
original_moth,2264,0.052011
chat_gpt_moth,2124,0.048795
train_essays,1378,0.031657
llama_70b_v1,1172,0.026925
falcon_180b_v1,1051,0.024145
darragh_claude_v6,1000,0.022973


In [20]:
df.groupby(["source", "generated"])["essay_id"].count()

source                generated
chat_gpt_moth         1             2124
darragh_claude_v6     1             1000
darragh_claude_v7     1             1000
falcon_180b_v1        1             1051
llama2_chat           1             2418
llama_70b_v1          1             1172
mistral7binstruct_v1  1             2420
mistral7binstruct_v2  1             2409
original_moth         0             2264
persuade_corpus       0            25793
radek_500             1              500
train_essays          0             1375
                      1                3
Name: essay_id, dtype: int64

In [21]:
df.sample(5).head()

Unnamed: 0,essay_id,generated,source,prompt,text,text_bsc,text_bow,text_bow_len,prompt_bsc,prompt_bow,prompt_bow_len
18893,18893,0,persuade_corpus,na,"Dear State Senator,\n\nIn this letter I would like to explame the importants of Electoral College. This'll explain what it is,its defenses to the attacks on getting rid of it,and finally why it was created in the first place.\n\nThe Electoral College was a process created by the four fathers in the Constitution to established the election of the ""President by a vote in Congress and election of the President by a popular vote of qualified citizens.""-\n\nSource 1: What Is the Electrol College,paragraph 1.\n\nThe Electoral College consists of 538 electors. Majority electorcal votes for President is 270. ""Congressional delegtion: On for each member in the House of Representatives plus two for your Senators.""-\n\nParagrah 2,What Is the Electoral College.\n\nYes this involves in the 23rd Amendment of the Contitution that Washington D.C or District or Columbia is allocated 3 electors and is treated like a state in the Electoral College. The electors for a candidate running for President has a group of electors generally by his/her's political party.\n\nMany believe there is something wrong with the Eledtoral College. Even some like ""Richard Nixon,Jimmy Carter, Bob Dole, the U.S Chamber of Commerce, And the AFL-CIO""-\n\nSource 2: The Indefensible Electoral College: Why even the best-laid defenses of the system are wrong.\n\nThey all have in common is that they want to abolish the Electoral College. However the party will get to ""select a slate of electors trusted to vote for the party's nominee.""-\n\nSource 3 paragraph 2.\n\nHowever rarely do you actually see it's possible for the winner to get eletoral vote but not popular vote. in 2000, when Gore had more popular votes then Bush. But the agurement falls apart since it happens rarely. It was the frist time since 1888 that's about 112 years that this has happened. It's even highly unlikely it will end in a tie-as 538 is an even number it is divied to the states.\n\nIt was started by the founding fathers in order to have a informed government. It is rumored that they believed that the common person the person who votes for popularity is to uninformed to choose who will be President. So in the Constitution they put it as a ""pompromise between election of the President by a vote in Congress by popular vote of qulified citizens.","Dear State Senator, In this letter I would like to explame the importants of Electoral College. This'll explain what it is,its defenses to the attacks on getting rid of it,and finally why it was created in the first place. The Electoral College was a process created by the four fathers in the Constitution to established the election of the ""President by a vote in Congress and election of the President by a popular vote of qualified citizens.""- Source 1: What Is the Electrol College,paragraph 1. The Electoral College consists of 538 electors. Majority electorcal votes for President is 270. ""Congressional delegtion: On for each member in the House of Representatives plus two for your Senators.""- Paragrah 2,What Is the Electoral College. Yes this involves in the 23rd Amendment of the Contitution that Washington D.C or District or Columbia is allocated 3 electors and is treated like a state in the Electoral College. The electors for a candidate running for President has a group of electors generally by his/her's political party. Many believe there is something wrong with the Eledtoral College. Even some like ""Richard Nixon,Jimmy Carter, Bob Dole, the U.S Chamber of Commerce, And the AFL-CIO""- Source 2: The Indefensible Electoral College: Why even the best-laid defenses of the system are wrong. They all have in common is that they want to abolish the Electoral College. However the party will get to ""select a slate of electors trusted to vote for the party's nominee.""- Source 3 paragraph 2. However rarely do you actually see it's possible for the winner to get eletoral vote but not popular vote. in 2000, when Gore had more popular votes then Bush. But the agurement falls apart since it happens rarely. It was the frist time since 1888 that's about 112 years that this has happened. It's even highly unlikely it will end in a tie-as 538 is an even number it is divied to the states. It was started by the founding fathers in order to have a informed government. It is rumored that they believed that the common person the person who votes for popularity is to uninformed to choose who will be President. So in the Constitution they put it as a ""pompromise between election of the President by a vote in Congress by popular vote of qulified citizens.",dear state senator in this letter i would like to explame the importants of electoral college this will explain what it is its defenses to the attacks on getting rid of it and finally why it was created in the first place the electoral college was a process created by the four fathers in the constitution to established the election of the president by a vote in congress and election of the president by a popular vote of qualified citizens source 1 what is the electrol college paragraph 1 the electoral college consists of 538 electors majority electorcal votes for president is 270 congressional delegtion on for each member in the house of representatives plus two for your senators paragrah 2 what is the electoral college yes this involves in the 23rd amendment of the contitution that washington d c or district or columbia is allocated 3 electors and is treated like a state in the electoral college the electors for a candidate running for president have a group of electors generally by his her s political party many believe there is something wrong with the eledtoral college even some like richard nixon jimmy carter bob dole the u s chamber of commerce and the afl cio source 2 the indefensible electoral college why even the best laid defenses of the system are wrong they all have in common is that they want to abolish the electoral college however the party will get to select a slate of electors trusted to vote for the party s nominee source 3 paragraph 2 however rarely do you actually see it is possible for the winner to get eletoral vote but not popular vote in 2000 when gore have more popular votes then bush but the agurement falls apart since it happens rarely it was the frist time since 1888 that is about 112 years that this have happened it is even highly unlikely it will end in a tie as 538 is an even number it is divied to the states it was started by the founding fathers in order to have a informed government it is rumored that they believed that the common person the person who votes for popularity is to uninformed to choose who will be president so in the constitution they put it as a pompromise between election of the president by a vote in congress by popular vote of qulified citizens,2234,na,na,2
12302,12302,1,falcon_180b_v1,"The use of standardized testing in schools has been a topic of debate for many years. Write an essay stating your position on whether standardized tests are an accurate measure of student ability, using specific reasons and examples to support your argument.","""Standardized Test: An Accurate Measure of Student Ability?""\n\nStandardized testing in schools is a controversial subject. Supporters argue that standardized tests provide an accurate and objective measure of student ability. However, others question whether these tests truly measure a student's potential and instead argue that there are better and more effective ways to evaluate students. In this essay, I will discuss why standardized tests are not always accurate measures of student ability.\n\nFirstly, standardized tests often do not account for the different learning styles of each student. Some students may excel in multiple-choice format while others might struggle with them due to the fact that they do not have an opportunity to express their thoughts and ideas. For example, a student who may excel at presenting information orally may struggle when asked to write an exam. This does not mean that they are any less knowledgeable or capable than other students; it simply means that standardized tests do not always allow students to exhibit their best work and knowledge.\n\nSecondly, standardized tests often do not account for the cultural differences of each student. These exams are usually designed with a specific demographic in mind, often ignoring the diversity of our classrooms. For example, a standardized exam may use language or phrases that a student whose first language is not English may not be familiar with, thereby putting them at a disadvantage. This is especially true of students from minority backgrounds, who may not have the same educational opportunities as others.\n\nLastly, standardized tests often do not take into account the unique challenges that each student may be facing. There may be students who are dealing with personal struggles like illness or family problems that affect their academic performance. In cases like these, standardized tests may not accurately reflect a student's actual level of knowledge or skill.\n\nTherefore, it can be concluded that standardized testing is not always an accurate measure of student ability. While it is a convenient way to measure a broad range of knowledge quickly and efficiently, it does have several limitations and shortcomings that need to be taken into account. Instead, it may be more beneficial to use a variety of different assessment methods, including classroom-based assessments and performance assignments, to gain a more comprehensive understanding of a student's abilities and skills. This way, we can be sure that each student is receiving a fair and equal opportunity to demonstrate their knowledge, regardless of their cultural background, learning style or personal challenges they may be facing.","""Standardized Test: An Accurate Measure of Student Ability?"" Standardized testing in schools is a controversial subject. Supporters argue that standardized tests provide an accurate and objective measure of student ability. However, others question whether these tests truly measure a student's potential and instead argue that there are better and more effective ways to evaluate students. In this essay, I will discuss why standardized tests are not always accurate measures of student ability. Firstly, standardized tests often do not account for the different learning styles of each student. Some students may excel in multiple-choice format while others might struggle with them due to the fact that they do not have an opportunity to express their thoughts and ideas. For example, a student who may excel at presenting information orally may struggle when asked to write an exam. This does not mean that they are any less knowledgeable or capable than other students; it simply means that standardized tests do not always allow students to exhibit their best work and knowledge. Secondly, standardized tests often do not account for the cultural differences of each student. These exams are usually designed with a specific demographic in mind, often ignoring the diversity of our classrooms. For example, a standardized exam may use language or phrases that a student whose first language is not English may not be familiar with, thereby putting them at a disadvantage. This is especially true of students from minority backgrounds, who may not have the same educational opportunities as others. Lastly, standardized tests often do not take into account the unique challenges that each student may be facing. There may be students who are dealing with personal struggles like illness or family problems that affect their academic performance. In cases like these, standardized tests may not accurately reflect a student's actual level of knowledge or skill. Therefore, it can be concluded that standardized testing is not always an accurate measure of student ability. While it is a convenient way to measure a broad range of knowledge quickly and efficiently, it does have several limitations and shortcomings that need to be taken into account. Instead, it may be more beneficial to use a variety of different assessment methods, including classroom-based assessments and performance assignments, to gain a more comprehensive understanding of a student's abilities and skills. This way, we can be sure that each student is receiving a fair and equal opportunity to demonstrate their knowledge, regardless of their cultural background, learning style or personal challenges they may be facing.",standardized test an accurate measure of student ability standardized testing in schools is a controversial subject supporters argue that standardized tests provide an accurate and objective measure of student ability however others question whether these tests truly measure a student s potential and instead argue that there are better and more effective ways to evaluate students in this essay i will discuss why standardized tests are not always accurate measures of student ability firstly standardized tests often do not account for the different learning styles of each student some students may excel in multiple choice format while others might struggle with them due to the fact that they do not have an opportunity to express their thoughts and ideas for example a student who may excel at presenting information orally may struggle when asked to write an exam this do not mean that they are any less knowledgeable or capable than other students it simply means that standardized tests do not always allow students to exhibit their best work and knowledge secondly standardized tests often do not account for the cultural differences of each student these exams are usually designed with a specific demographic in mind often ignoring the diversity of our classrooms for example a standardized exam may use language or phrases that a student whose first language is not english may not be familiar with thereby putting them at a disadvantage this is especially true of students from minority backgrounds who may not have the same educational opportunities as others lastly standardized tests often do not take into account the unique challenges that each student may be facing there may be students who are dealing with personal struggles like illness or family problems that affect their academic performance in cases like these standardized tests may not accurately reflect a student s actual level of knowledge or skill therefore it can be concluded that standardized testing is not always an accurate measure of student ability while it is a convenient way to measure a broad range of knowledge quickly and efficiently it do have several limitations and shortcomings that need to be taken into account instead it may be more beneficial to use a variety of different assessment methods including classroom based assessments and performance assignments to gain a more comprehensive understanding of a student s abilities and skills this way we can be sure that each student is receiving a fair and equal opportunity to demonstrate their knowledge regardless of their cultural background learning style or personal challenges they may be facing,2655,"The use of standardized testing in schools has been a topic of debate for many years. Write an essay stating your position on whether standardized tests are an accurate measure of student ability, using specific reasons and examples to support your argument.",the use of standardized testing in schools have been a topic of debate for many years write an essay stating your position on whether standardized tests are an accurate measure of student ability using specific reasons and examples to support your argument,256
2242,2242,0,persuade_corpus,na,"The article ""Making Mona Lisa Smile"" focuses on the complex software that can analyze a face and even read emotions. The author claims this could be very useful in teaching, for being able to acount for the student's emotions could very well help the computer adapt and change the lesson more efficiently. While this could be true, there are a lot of questions that should be asked before this technology can be implemented, such as it's effectiveness and privacy.\n\nThe sixth paragraph says how the computer can know if you're happy or sad, and gives an example ""If you smile when a Web ad appears on your screen, a similar ad may follow. But if you frown, the next ad will be different."" This seems to be very helpful, but what if said person was smiling about something else? what if the person remembers something funny that happened the other day and suddenly they notice they keep getting uninteresting ads that pop up? There would be no way for the computer to tell if it was really the ad that made the person smile. Another issue is how it is used in class. Paragraph six states ""A classroom computer could recognize if a student is becoming confused or bored. Then it could modify the lesson, like an effective human instructor."" How would the computer tell what degree a pupil is confused? If a student looks a little confused, the computer may read this and respond as if the student was very confused, thus wasting the student's time with more help than they need. One more way to look at it is the privacy violations the software may bring. What if a student is opposed to a teacher being able to technologically and truthfully read the students emotions? does the computer take a log of the faces of the students? There is too many suspicious questions in regard to the morality and safety of the use of such software.\n\nThe software definitely shows promise. It seems to show how far we have come to understanding the relation to facial expressions and emotion. That being said, there is still too much to account for before this new technology can be integrated into classrooms, such as efficiency and morality.","The article ""Making Mona Lisa Smile"" focuses on the complex software that can analyze a face and even read emotions. The author claims this could be very useful in teaching, for being able to acount for the student's emotions could very well help the computer adapt and change the lesson more efficiently. While this could be true, there are a lot of questions that should be asked before this technology can be implemented, such as it's effectiveness and privacy. The sixth paragraph says how the computer can know if you're happy or sad, and gives an example ""If you smile when a Web ad appears on your screen, a similar ad may follow. But if you frown, the next ad will be different."" This seems to be very helpful, but what if said person was smiling about something else? what if the person remembers something funny that happened the other day and suddenly they notice they keep getting uninteresting ads that pop up? There would be no way for the computer to tell if it was really the ad that made the person smile. Another issue is how it is used in class. Paragraph six states ""A classroom computer could recognize if a student is becoming confused or bored. Then it could modify the lesson, like an effective human instructor."" How would the computer tell what degree a pupil is confused? If a student looks a little confused, the computer may read this and respond as if the student was very confused, thus wasting the student's time with more help than they need. One more way to look at it is the privacy violations the software may bring. What if a student is opposed to a teacher being able to technologically and truthfully read the students emotions? does the computer take a log of the faces of the students? There is too many suspicious questions in regard to the morality and safety of the use of such software. The software definitely shows promise. It seems to show how far we have come to understanding the relation to facial expressions and emotion. That being said, there is still too much to account for before this new technology can be integrated into classrooms, such as efficiency and morality.",the article making mona lisa smile focuses on the complex software that can analyze a face and even read emotions the author claims this could be very useful in teaching for being able to acount for the student s emotions could very well help the computer adapt and change the lesson more efficiently while this could be true there are a lot of questions that should be asked before this technology can be implemented such as it is effectiveness and privacy the sixth paragraph says how the computer can know if you are happy or sad and gives an example if you smile when a web ad appears on your screen a similar ad may follow but if you frown the next ad will be different this seems to be very helpful but what if said person was smiling about something else what if the person remembers something funny that happened the other day and suddenly they notice they keep getting uninteresting ads that pop up there would be no way for the computer to tell if it was really the ad that made the person smile another issue is how it is used in class paragraph six states a classroom computer could recognize if a student is becoming confused or bored then it could modify the lesson like an effective human instructor how would the computer tell what degree a pupil is confused if a student looks a little confused the computer may read this and respond as if the student was very confused thus wasting the student s time with more help than they need one more way to look at it is the privacy violations the software may bring what if a student is opposed to a teacher being able to technologically and truthfully read the students emotions do the computer take a log of the faces of the students there is too many suspicious questions in regard to the morality and safety of the use of such software the software definitely shows promise it seems to show how far we have come to understanding the relation to facial expressions and emotion that being said there is still too much to account for before this new technology can be integrated into classrooms such as efficiency and morality,2086,na,na,2
32044,32044,0,persuade_corpus,na,"Many school systems now offer distance learning, an option for students to learn at home through online methods.\n\nStudents are able to video conference with teachers, watch prerecorded lectures, and receive and submit assignments on sites like Google Classroom and Blackboard. The question that should be asked is if students actually benefit from online classes. Some students with long term illnesses and specific learning styles would learn better, but overall most students should choose to avoid online school because of fewer social interactions with peers, more learning distractions at home, and additional costs for students' families and schools.\n\nStudents will have less social interaction which may put them behind when it comes to going to college and entering the workforce. If students who chose to attend an online school throughout high school decide to go to a traditional college they may be at a disadvantage. Depending on what they major in, it could be very similar to their experience with online school. Other students would be faced with new challenges like labs and group projects. If a students has never done group work before they may struggle with communication and teamwork aspect that it takes to complete difficult group projects. It is possible that these students may struggle during their first year or two at college purely because they have trouble interacting with their peers. Online students may also struggle when entering the workforce. They may lack certain social skills necessary to shine during interviews. It is wise to send student to traditional school so they can absorb some of the culture that is needed to get far in life.\n\nStudents need to be able to get close to their peers through shared experiences at traditional schools in order to really excel in life.\n\nStudents learn better in school environments because they are faced with less distractions. Students who switch to online school may see a drop in grades because they are faced with new distractions like family members, pets, and poor learning spaces. If a student stays at home all day they may have to interact with stay-at-home parents and family pets who could be very distracting. Students who feel that they can not tell a pesky parent to stop bothering them will end up juggling their schoolwork, family life, and walking the dog. In traditional schools, students learn in classrooms everyday. When students attend online school they could do their work anywhere. Locations such as bedrooms, living rooms and next to the television are not good environments for learning. Students should find a spot which is only used for their schoolwork. Students at traditional schools will be free from these distractions and poor learning environments which may improve their grades.\n\nOnline schools could end up costing families and schools systems more. If families have to make sure their student has a working laptop computer everyday then they will spend more money on care and maintenance. Some families might not be able to spend extra money on a new device. Kids could be stuck with old devices that malfunction and do not have updated software. Old computers may not be able to keep up with all the programs students are expected to use. School systems will also have to hire staff so they can operate online schools. New teachers and technological aides would cause taxes to be increased. Families may not be able to keep up with this change. Online schools may seem like a budget friendly option because of lower transportation costs, but for some families it might be catastrophic.\n\nOnline school could benefit some groups of students like those with long term illnesses and some certain types of learning styles. Students that can not come to a traditional school because of illness but still have enough time and energy to continue learning should look into online schools. That way student who may have to miss months at a time can still keep up with their classmates. Another good use of online school would be for students with different learning styles. Some kids can not sit through long block scheduled classes and would thrive if they could split up the work in a different way. These students could have more freedom with the way they wanted to work and might be more interested in learning. Online school could contribute to certain groups of students continuing to grow intellectually and good learning habits, but overall would not work for the majority of students.\n\nOnline school currently has many problems, but if those problems can be fixed, online schools would be a great resource depending on what the future holds. Online schools may be more popular in the future, but for the present traditional schools are more effective for the growth of students. In conclusion online schools are better for some students, but right now most students should stay at a traditional school because of more social interactions, fewer distractions, and fewer costs for schools and families.","Many school systems now offer distance learning, an option for students to learn at home through online methods. Students are able to video conference with teachers, watch prerecorded lectures, and receive and submit assignments on sites like Google Classroom and Blackboard. The question that should be asked is if students actually benefit from online classes. Some students with long term illnesses and specific learning styles would learn better, but overall most students should choose to avoid online school because of fewer social interactions with peers, more learning distractions at home, and additional costs for students' families and schools. Students will have less social interaction which may put them behind when it comes to going to college and entering the workforce. If students who chose to attend an online school throughout high school decide to go to a traditional college they may be at a disadvantage. Depending on what they major in, it could be very similar to their experience with online school. Other students would be faced with new challenges like labs and group projects. If a students has never done group work before they may struggle with communication and teamwork aspect that it takes to complete difficult group projects. It is possible that these students may struggle during their first year or two at college purely because they have trouble interacting with their peers. Online students may also struggle when entering the workforce. They may lack certain social skills necessary to shine during interviews. It is wise to send student to traditional school so they can absorb some of the culture that is needed to get far in life. Students need to be able to get close to their peers through shared experiences at traditional schools in order to really excel in life. Students learn better in school environments because they are faced with less distractions. Students who switch to online school may see a drop in grades because they are faced with new distractions like family members, pets, and poor learning spaces. If a student stays at home all day they may have to interact with stay-at-home parents and family pets who could be very distracting. Students who feel that they can not tell a pesky parent to stop bothering them will end up juggling their schoolwork, family life, and walking the dog. In traditional schools, students learn in classrooms everyday. When students attend online school they could do their work anywhere. Locations such as bedrooms, living rooms and next to the television are not good environments for learning. Students should find a spot which is only used for their schoolwork. Students at traditional schools will be free from these distractions and poor learning environments which may improve their grades. Online schools could end up costing families and schools systems more. If families have to make sure their student has a working laptop computer everyday then they will spend more money on care and maintenance. Some families might not be able to spend extra money on a new device. Kids could be stuck with old devices that malfunction and do not have updated software. Old computers may not be able to keep up with all the programs students are expected to use. School systems will also have to hire staff so they can operate online schools. New teachers and technological aides would cause taxes to be increased. Families may not be able to keep up with this change. Online schools may seem like a budget friendly option because of lower transportation costs, but for some families it might be catastrophic. Online school could benefit some groups of students like those with long term illnesses and some certain types of learning styles. Students that can not come to a traditional school because of illness but still have enough time and energy to continue learning should look into online schools. That way student who may have to miss months at a time can still keep up with their classmates. Another good use of online school would be for students with different learning styles. Some kids can not sit through long block scheduled classes and would thrive if they could split up the work in a different way. These students could have more freedom with the way they wanted to work and might be more interested in learning. Online school could contribute to certain groups of students continuing to grow intellectually and good learning habits, but overall would not work for the majority of students. Online school currently has many problems, but if those problems can be fixed, online schools would be a great resource depending on what the future holds. Online schools may be more popular in the future, but for the present traditional schools are more effective for the growth of students. In conclusion online schools are better for some students, but right now most students should stay at a traditional school because of more social interactions, fewer distractions, and fewer costs for schools and families.",many school systems now offer distance learning an option for students to learn at home through online methods students are able to video conference with teachers watch prerecorded lectures and receive and submit assignments on sites like google classroom and blackboard the question that should be asked is if students actually benefit from online classes some students with long term illnesses and specific learning styles would learn better but overall most students should choose to avoid online school because of fewer social interactions with peers more learning distractions at home and additional costs for students families and schools students will have less social interaction which may put them behind when it comes to going to college and entering the workforce if students who chose to attend an online school throughout high school decide to go to a traditional college they may be at a disadvantage depending on what they major in it could be very similar to their experience with online school other students would be faced with new challenges like labs and group projects if a students have never done group work before they may struggle with communication and teamwork aspect that it takes to complete difficult group projects it is possible that these students may struggle during their first year or two at college purely because they have trouble interacting with their peers online students may also struggle when entering the workforce they may lack certain social skills necessary to shine during interviews it is wise to send student to traditional school so they can absorb some of the culture that is needed to get far in life students need to be able to get close to their peers through shared experiences at traditional schools in order to really excel in life students learn better in school environments because they are faced with less distractions students who switch to online school may see a drop in grades because they are faced with new distractions like family members pets and poor learning spaces if a student stays at home all day they may have to interact with stay at home parents and family pets who could be very distracting students who feel that they can not tell a pesky parent to stop bothering them will end up juggling their schoolwork family life and walking the dog in traditional schools students learn in classrooms everyday when students attend online school they could do their work anywhere locations such as bedrooms living rooms and next to the television are not good environments for learning students should find a spot which is only used for their schoolwork students at traditional schools will be free from these distractions and poor learning environments which may improve their grades online schools could end up costing families and schools systems more if families have to make sure their student have a working laptop computer everyday then they will spend more money on care and maintenance some families might not be able to spend extra money on a new device kids could be stuck with old devices that malfunction and do not have updated software old computers may not be able to keep up with all the programs students are expected to use school systems will also have to hire staff so they can operate online schools new teachers and technological aides would cause taxes to be increased families may not be able to keep up with this change online schools may seem like a budget friendly option because of lower transportation costs but for some families it might be catastrophic online school could benefit some groups of students like those with long term illnesses and some certain types of learning styles students that can not come to a traditional school because of illness but still have enough time and energy to continue learning should look into online schools that way student who may have to miss months at a time can still keep up with their classmates another good use of online school would be for students with different learning styles some kids can not sit through long block scheduled classes and would thrive if they could split up the work in a different way these students could have more freedom with the way they wanted to work and might be more interested in learning online school could contribute to certain groups of students continuing to grow intellectually and good learning habits but overall would not work for the majority of students online school currently have many problems but if those problems can be fixed online schools would be a great resource depending on what the future holds online schools may be more popular in the future but for the present traditional schools are more effective for the growth of students in conclusion online schools are better for some students but right now most students should stay at a traditional school because of more social interactions fewer distractions and fewer costs for schools and families,4951,na,na,2
39574,39574,1,llama2_chat,"Task: Evaluate the potential pros and cons of three year high school graduation programs for high school students and analyze how this option would affect academics, extracurriculars, and student life.","Hey, I'm just a regular 8th grader, but I'm gonna give it my best shot! Here's my essay on the potential pros and cons of three year high school graduation programs for high school students.\n\nSo, the thing is, some people think that high school should only last three years instead of the usual four. And I can see why. I mean, think about it, four years is a long time, right? And some kids might get bored or lose interest in school after a while. But then again, there are some pretty big cons to consider too.\n\nOne pro is that three year high school graduation programs would give students more time to focus on their passions and interests. Like, imagine being able to take more advanced classes in a specific subject, or even start a business or club. It would be pretty cool to have more time to explore your interests and make them a bigger part of your high school experience.\n\nAnother pro is that it could help students who are struggling in school. If high school only lasted three years, students might have more time to catch up on their work or get extra help from teachers. And it could also help students who are ahead of the game, because they could take more advanced classes and get a head start on college or their future careers.\n\nBut there are also some pretty big cons to consider. One of them is that it could be really hard to fit in all the necessary classes and credits in just three years. Like, you'd have to take a lot of classes at once, and it might be hard to balance schoolwork with extracurriculars and other stuff. And if you don't get the right schedule, you might end up missing out on important classes or activities.\n\nAnother con is that it could be really tough on students who are not as academically inclined. If high school only lasts three years, they might not have as much time to catch up or get the help they need. And it could also be hard on students who are really into their extracurriculars, because they might not have as much time to participate in sports, clubs, or other activities.\n\nSo, overall, I think three year high school graduation programs could be a pretty interesting idea, but there are definitely some pros and cons to consider. It","Hey, I'm just a regular 8th grader, but I'm gonna give it my best shot! Here's my essay on the potential pros and cons of three year high school graduation programs for high school students. So, the thing is, some people think that high school should only last three years instead of the usual four. And I can see why. I mean, think about it, four years is a long time, right? And some kids might get bored or lose interest in school after a while. But then again, there are some pretty big cons to consider too. One pro is that three year high school graduation programs would give students more time to focus on their passions and interests. Like, imagine being able to take more advanced classes in a specific subject, or even start a business or club. It would be pretty cool to have more time to explore your interests and make them a bigger part of your high school experience. Another pro is that it could help students who are struggling in school. If high school only lasted three years, students might have more time to catch up on their work or get extra help from teachers. And it could also help students who are ahead of the game, because they could take more advanced classes and get a head start on college or their future careers. But there are also some pretty big cons to consider. One of them is that it could be really hard to fit in all the necessary classes and credits in just three years. Like, you'd have to take a lot of classes at once, and it might be hard to balance schoolwork with extracurriculars and other stuff. And if you don't get the right schedule, you might end up missing out on important classes or activities. Another con is that it could be really tough on students who are not as academically inclined. If high school only lasts three years, they might not have as much time to catch up or get the help they need. And it could also be hard on students who are really into their extracurriculars, because they might not have as much time to participate in sports, clubs, or other activities. So, overall, I think three year high school graduation programs could be a pretty interesting idea, but there are definitely some pros and cons to consider. It",hey i am just a regular 8th grader but i am going to give it my best shot here is my essay on the potential pros and cons of three year high school graduation programs for high school students so the thing is some people think that high school should only last three years instead of the usual four and i can see why i mean think about it four years is a long time right and some kids might get bored or lose interest in school after a while but then again there are some pretty big cons to consider too one pro is that three year high school graduation programs would give students more time to focus on their passions and interests like imagine being able to take more advanced classes in a specific subject or even start a business or club it would be pretty cool to have more time to explore your interests and make them a bigger part of your high school experience another pro is that it could help students who are struggling in school if high school only lasted three years students might have more time to catch up on their work or get extra help from teachers and it could also help students who are ahead of the game because they could take more advanced classes and get a head start on college or their future careers but there are also some pretty big cons to consider one of them is that it could be really hard to fit in all the necessary classes and credits in just three years like you would have to take a lot of classes at once and it might be hard to balance schoolwork with extracurriculars and other stuff and if you do not get the right schedule you might end up missing out on important classes or activities another con is that it could be really tough on students who are not as academically inclined if high school only lasts three years they might not have as much time to catch up or get the help they need and it could also be hard on students who are really into their extracurriculars because they might not have as much time to participate in sports clubs or other activities so overall i think three year high school graduation programs could be a pretty interesting idea but there are definitely some pros and cons to consider it,2163,"Task: Evaluate the potential pros and cons of three year high school graduation programs for high school students and analyze how this option would affect academics, extracurriculars, and student life.",task evaluate the potential pros and cons of three year high school graduation programs for high school students and analyze how this option would affect academics extracurriculars and student life,197


In [22]:
%%time
df.to_parquet(f"output/preprocess.parquet", index=False)
assert df.notna().all(axis=None)

CPU times: user 603 ms, sys: 64 ms, total: 667 ms
Wall time: 676 ms


In [23]:
tim.stop()
print(f"Total time taken {str(tim.elapsed)}")

Total time taken 0:19:32.812609
