In [1]:
import os
import json
import gc
from pathlib import Path
import numpy as np
import pandas as pd
from tqdm import tqdm
from transformers import AutoTokenizer
from typing import Dict, List, Set, Tuple, NamedTuple, Callable
import scipy
import scml
from scml import pandasx as pdx
from daigt.preprocess import en as pen
tim = scml.Timer()
tim.start()
os.environ["TOKENIZERS_PARALLELISM"] = "false"
percentiles=[.01, .05, .1, .2, .3, .4, .5, .6, .7, .8, .9, .95, .99]
pd.set_option("use_inf_as_na", True)
pd.set_option("max_info_columns", 9999)
pd.set_option("display.max_columns", 9999)
pd.set_option("display.max_rows", 9999)
pd.set_option('max_colwidth', 9999)
tqdm.pandas()
scml.seed_everything()
info = np.iinfo(np.int16)
print(f"int16, min={info.min}, max={info.max}")

int16, min=-32768, max=32767


In [2]:
min_len, max_len = 773, 32_767  # P01 cutoff

In [3]:
#tokenizer = AutoTokenizer.from_pretrained("huggingface/microsoft/deberta-v3-base", is_fast=True)
#print(f"{repr(tokenizer)}\nmodel_input_names={tokenizer.model_input_names}")

# Combine data sources

In [4]:
edf = pd.read_csv("input/train_essays.csv", low_memory=False)
edf.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1378 entries, 0 to 1377
Data columns (total 4 columns):
 #   Column     Non-Null Count  Dtype 
---  ------     --------------  ----- 
 0   id         1378 non-null   object
 1   prompt_id  1378 non-null   int64 
 2   text       1378 non-null   object
 3   generated  1378 non-null   int64 
dtypes: int64(2), object(2)
memory usage: 43.2+ KB


In [5]:
pdf = pd.read_csv("input/train_prompts.csv", low_memory=False)
pdf = pdf.set_index("prompt_id")
pdf.info()

<class 'pandas.core.frame.DataFrame'>
Index: 2 entries, 0 to 1
Data columns (total 3 columns):
 #   Column        Non-Null Count  Dtype 
---  ------        --------------  ----- 
 0   prompt_name   2 non-null      object
 1   instructions  2 non-null      object
 2   source_text   2 non-null      object
dtypes: object(3)
memory usage: 64.0+ bytes


In [6]:
df = pd.read_csv("input/train_drcat_02.csv", low_memory=False)
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 39785 entries, 0 to 39784
Data columns (total 6 columns):
 #   Column    Non-Null Count  Dtype 
---  ------    --------------  ----- 
 0   essay_id  39785 non-null  object
 1   text      39785 non-null  object
 2   label     39785 non-null  int64 
 3   source    39785 non-null  object
 4   prompt    9490 non-null   object
 5   fold      39785 non-null  int64 
dtypes: int64(2), object(4)
memory usage: 1.8+ MB


In [7]:
cols = ["essay_id", "text", "label", "source", "prompt"]
df = df[cols]
df = df.rename(columns={"essay_id": "id", "label": "generated"})
df["prompt"] = df["prompt"].fillna("na")

In [8]:
def get_prompt(row)->str:
    p=pdf.loc[row["prompt_id"]]
    name=p["prompt_name"]
    inst=p["instructions"]
    return f"{name} {inst}"
    

edf["prompt"] = edf.progress_apply(get_prompt, axis=1)
edf["source"] = "train_essays"
df = pd.concat([edf, df], ignore_index=True)
cols = ["id", "generated", "source", "prompt", "text"]
df = df[cols].copy()
cols = ["generated"]
df[cols] = df[cols].astype(np.int8)
df.info()

100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1378/1378 [00:00<00:00, 75769.21it/s]

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 41163 entries, 0 to 41162
Data columns (total 5 columns):
 #   Column     Non-Null Count  Dtype 
---  ------     --------------  ----- 
 0   id         41163 non-null  object
 1   generated  41163 non-null  int8  
 2   source     41163 non-null  object
 3   prompt     41163 non-null  object
 4   text       41163 non-null  object
dtypes: int8(1), object(4)
memory usage: 1.3+ MB





In [9]:
more = len(df)
df = df.drop_duplicates(["text"], ignore_index=True)
print(f"{more - len(df)} rows dropped: duplicates")

1429 rows dropped: duplicates


# Preprocess Text

In [10]:
bsc = pen.BasicPreprocessor()
bow = pen.BowPreprocessor()

def preprocess_bsc(col) -> Callable:
    def fn(row) -> str:
        return bsc(row[col])
    
    return fn

def preprocess_bow(col) -> Callable:
    def fn(row) -> str:
        return bow(row[col], drop_stopword=False)
    
    return fn

In [11]:
cols = ["text", "prompt"]
for col in cols:
    print(col)
    new_col = f"{col}_bsc" 
    df[new_col] = df.progress_apply(preprocess_bsc(col), axis=1)
    new_col = f"{col}_bow" 
    df[new_col] = df.progress_apply(preprocess_bow(col), axis=1)
    col1, col2 = new_col, f"{col}_bow_len"
    df[col2] = df[col1].str.len()
    df[col2] = df[col2].astype(np.int16)
    #x = tokenizer(df[new_col].tolist(), truncation=False, add_special_tokens=False)
    #len_col = f"{new_col}_len" 
    #df[len_col] = [len(s) for s in x["input_ids"]]
    #df[len_col] = df[len_col].astype(np.int16) 

text


100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 39734/39734 [00:10<00:00, 3682.15it/s]
100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 39734/39734 [16:42<00:00, 39.62it/s]


prompt


100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 39734/39734 [00:01<00:00, 27325.94it/s]
100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 39734/39734 [00:43<00:00, 921.40it/s]


# Filer Invalid Data

In [12]:
more = len(df)
df = df.drop_duplicates(["text_bow"], ignore_index=True)
print(f"{more - len(df)} rows dropped: duplicates")

219 rows dropped: duplicates


In [13]:
more = len(df)
df = df[(df["text_bow_len"]>=min_len) & (df["text_bow_len"]<=max_len)].copy()
print(f"{more - len(df)} rows dropped: duplicates")

0 rows dropped: duplicates


# Review Data

In [14]:
df = df.reset_index(drop=True) 
df = df.drop(columns=["id"])
df["essay_id"] = df.index
df["essay_id"] = df["essay_id"].astype(np.int32)
cols = list(df.columns)
cols = [cols[-1]] + cols[:-1]
df = df[cols]
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 39515 entries, 0 to 39514
Data columns (total 11 columns):
 #   Column          Non-Null Count  Dtype 
---  ------          --------------  ----- 
 0   essay_id        39515 non-null  int64 
 1   generated       39515 non-null  int8  
 2   source          39515 non-null  object
 3   prompt          39515 non-null  object
 4   text            39515 non-null  object
 5   text_bsc        39515 non-null  object
 6   text_bow        39515 non-null  object
 7   text_bow_len    39515 non-null  int32 
 8   prompt_bsc      39515 non-null  object
 9   prompt_bow      39515 non-null  object
 10  prompt_bow_len  39515 non-null  int32 
dtypes: int32(2), int64(1), int8(1), object(7)
memory usage: 2.8+ MB


In [15]:
df.describe(percentiles=percentiles)

Unnamed: 0,essay_id,generated,text_bow_len,prompt_bow_len
count,39515.0,39515.0,39515.0,39515.0
mean,19757.0,0.252891,2195.112818,87.917854
std,11407.14228,0.434675,1006.48068,161.0615
min,0.0,0.0,234.0,2.0
1%,395.14,0.0,773.0,2.0
5%,1975.7,0.0,951.0,2.0
10%,3951.4,0.0,1100.0,2.0
20%,7902.8,0.0,1359.0,2.0
30%,11854.2,0.0,1582.0,2.0
40%,15805.6,0.0,1792.0,2.0


In [16]:
cols=["generated"]
pdx.value_counts(df[cols])

Unnamed: 0_level_0,count,percent
generated,Unnamed: 1_level_1,Unnamed: 2_level_1
0,29522,0.747109
1,9993,0.252891


In [17]:
cols=["prompt_bow"]
pdx.value_counts(df[cols]).head(50)

Unnamed: 0_level_0,count,percent
prompt_bow,Unnamed: 1_level_1,Unnamed: 2_level_1
na,28769,0.728053
car free cities write an explanatory essay to inform fellow citizens about the advantages of limiting car usage your essay must be based on ideas and information that can be found in the passage set manage your time carefully so that you can read the passages plan your response write your response and revise and edit your response be sure to use evidence from multiple sources and avoid overly relying on one source your response should be in the form of a multiparagraph essay write your essay in the space provided,708,0.017917
does the electoral college work write a letter to your state senator in which you argue in favor of keeping the electoral college or changing to election by popular vote for the president of the united states use the information from the texts in your essay manage your time carefully so that you can read the passages plan your response write your response and revise and edit your response be sure to include a claim address counterclaims use evidence from multiple sources and avoid overly relying on one source your response should be in the form of a multiparagraph essay write your response in the space provided,670,0.016956
the legalization of marijuana have been a controversial topic in recent years write an essay arguing for or against the legalization of marijuana providing reasons and examples to support your position,77,0.001949
some schools offer distance learning as an option for students to attend classes from home by way of online or video conferencing do you think students would benefit from being able to attend classes from home take a position on this issue support your response with reasons and examples,77,0.001949
the role of libraries in the digital age have been a topic of discussion write an essay arguing for or against the continued importance of libraries providing reasons and examples to support your position,75,0.001898
write an explanatory essay to inform fellow citizens about the advantages of limiting car usage your essay must be based on ideas and information that can be found in the passage set manage your time carefully so that you can read the passages plan your response write your response and revise and edit your response be sure to use evidence from multiple sources and avoid overly relying on one source your response should be in the form of a multiparagraph essay write your essay in the space provided,74,0.001873
the role of zoos in conservation and education have been a topic of debate argue for or against the continued operation of zoos providing reasons and examples to justify your position,72,0.001822
the use of single use plastic products have become a controversial topic due to its impacts on the environment write an essay arguing for or against the ban of single use plastics providing reasons and examples to support your position,71,0.001797
in the article making mona lisa smile the author describes how a new technology called the facial action coding system enables computers to identify human emotions using details from the article write an essay arguing whether the use of this technology to read the emotional expressions of students in a classroom is valuable,69,0.001746


In [18]:
cols=["source"]
pdx.value_counts(df[cols])

Unnamed: 0_level_0,count,percent
source,Unnamed: 1_level_1,Unnamed: 2_level_1
persuade_corpus,25848,0.654131
chat_gpt_moth,2421,0.061268
llama2_chat,2421,0.061268
llammistral7binstruct,2421,0.061268
original_moth,2299,0.05818
train_essays,1378,0.034873
llama_70b_v1,1172,0.02966
falcon_180b_v1,1055,0.026699
radek_500,500,0.012653


In [19]:
df.sample(5).head()

Unnamed: 0,essay_id,generated,source,prompt,text,text_bsc,text_bow,text_bow_len,prompt_bsc,prompt_bow,prompt_bow_len
31613,31613,1,llammistral7binstruct,na,"Failure is an inevitable part of life that can have a significant impact on people's ability to achieve success. While failure can be discouraging and demotivating, it can also be beneficial in showing maturity and helping with enthusiasm.\n\nFirstly, failure can be a valuable learning experience that helps individuals develop resilience and perseverance. When people fail, they are forced to re-evaluate their approach and find new ways to solve problems. This process can help them become more resourceful and adaptable, which are essential skills for success. For example, Thomas Edison failed thousands of times before finally inventing the light bulb. However, he used each failure as an opportunity to learn and improve, eventually leading to his success.\n\nSecondly, failure can help individuals develop a growth mindset, which is the belief that abilities and intelligence can be developed through hard work and dedication. When people fail, they are forced to acknowledge their weaknesses and work towards improving them. This process can help them become more self-aware and confident, which are crucial for achieving success. For example, Michael Jordan missed over 9,000 shots in his career, but he used this failure as motivation to become an even better basketball player.\n\nFinally, failure can be a source of motivation and enthusiasm. When people fail, they are often driven to work harder and smarter to avoid future failures. This increased effort and focus can lead to greater success. For example, J.K. Rowling faced numerous rejections before finally finding a publisher for her Harry Potter series. However, she used this failure as motivation to continue writing and eventually achieved tremendous success.\n\nIn conclusion, failure can play a significant role in people's lives in achieving success. While failure can be discouraging, it can also be a valuable learning experience that helps individuals develop resilience and perseverance. Failure can also help individuals develop a growth mindset, which is essential for achieving success. Finally, failure can be a source of motivation and enthusiasm that drives individuals to work harder and smarter to avoid future failures.","Failure is an inevitable part of life that can have a significant impact on people's ability to achieve success. While failure can be discouraging and demotivating, it can also be beneficial in showing maturity and helping with enthusiasm. Firstly, failure can be a valuable learning experience that helps individuals develop resilience and perseverance. When people fail, they are forced to re-evaluate their approach and find new ways to solve problems. This process can help them become more resourceful and adaptable, which are essential skills for success. For example, Thomas Edison failed thousands of times before finally inventing the light bulb. However, he used each failure as an opportunity to learn and improve, eventually leading to his success. Secondly, failure can help individuals develop a growth mindset, which is the belief that abilities and intelligence can be developed through hard work and dedication. When people fail, they are forced to acknowledge their weaknesses and work towards improving them. This process can help them become more self-aware and confident, which are crucial for achieving success. For example, Michael Jordan missed over 9,000 shots in his career, but he used this failure as motivation to become an even better basketball player. Finally, failure can be a source of motivation and enthusiasm. When people fail, they are often driven to work harder and smarter to avoid future failures. This increased effort and focus can lead to greater success. For example, J.K. Rowling faced numerous rejections before finally finding a publisher for her Harry Potter series. However, she used this failure as motivation to continue writing and eventually achieved tremendous success. In conclusion, failure can play a significant role in people's lives in achieving success. While failure can be discouraging, it can also be a valuable learning experience that helps individuals develop resilience and perseverance. Failure can also help individuals develop a growth mindset, which is essential for achieving success. Finally, failure can be a source of motivation and enthusiasm that drives individuals to work harder and smarter to avoid future failures.",failure is an inevitable part of life that can have a significant impact on people s ability to achieve success while failure can be discouraging and demotivating it can also be beneficial in showing maturity and helping with enthusiasm firstly failure can be a valuable learning experience that helps individuals develop resilience and perseverance when people fail they are forced to re evaluate their approach and find new ways to solve problems this process can help them become more resourceful and adaptable which are essential skills for success for example thomas edison failed thousands of times before finally inventing the light bulb however he used each failure as an opportunity to learn and improve eventually leading to his success secondly failure can help individuals develop a growth mindset which is the belief that abilities and intelligence can be developed through hard work and dedication when people fail they are forced to acknowledge their weaknesses and work towards improving them this process can help them become more self aware and confident which are crucial for achieving success for example michael jordan missed over 9 000 shots in his career but he used this failure as motivation to become an even better basketball player finally failure can be a source of motivation and enthusiasm when people fail they are often driven to work harder and smarter to avoid future failures this increased effort and focus can lead to greater success for example j k rowling faced numerous rejections before finally finding a publisher for her harry potter series however she used this failure as motivation to continue writing and eventually achieved tremendous success in conclusion failure can play a significant role in people s lives in achieving success while failure can be discouraging it can also be a valuable learning experience that helps individuals develop resilience and perseverance failure can also help individuals develop a growth mindset which is essential for achieving success finally failure can be a source of motivation and enthusiasm that drives individuals to work harder and smarter to avoid future failures,2156,na,na,2
13797,13797,0,persuade_corpus,na,"I'm going to type an essay about an arugement for this technology to read students emotional expressions. The newest software update is called Facial Action Coding System. I really feel like this type of technogoly would be really helpful.\n\nThere is a lot of prons and cons to this technogoly because some students may not want you to know how they are feeling or what there emtions are, I feel as if some students need the help or may feel, let's say lonely but don't like saying they are so they fake a smile but those are the students who really need someone to know. So I'm going ot start out in my first paragraph saying why I think it would be good to have this type of system.\n\nMy first paragraph is going to be my first reason why I feel that it would help and a little bit about the software. The software update has promising applications for a variety of industries. The computer constrcuts a 3-D computer model of the face all 44 major muscels in the model must move like human muscles. I feel that it will help a lot more with young teenagers in our generation.\n\nA lot of people in our generation are going through depression and they don't like to telling people about it, so if you have this software it would help kids from sucide thoughts or some things like that.\n\nAnother reason why it woud be helpful is because you could use it on anyone not just students. My first reason why I think it would be a brillant idea would be an example such as this, if you went to class one day and seen your friend upset you would be able to look at her and know shes upset but if you asked her and she said no I'm fine, she would be lying. If you had this software update and you used it on your friend it would say she is 70 percent upset and 30 percent mad. So now you know she is upset most of the time and you could change that by talking about things with her letting her parents know. Little things like that could change someones life forever.\n\nMy last reason I think it would be helpful is because you would know how everyones feeling. The Mona Lisa is to demonstration is really intended to bring a smile to your face, while showoing how much a computer can do. It also says in paragraph 6 that ""\n\nA classroom computer could recogonize when a student is becoming bored or confused"". That could also help the teacher out a lot. Some students don't always let the teacher know if they are bored or confused because maybe they are scared or embarssed, but now your teacher would know if you were confused about something. She would be able to help you or do something a little more exciting.\n\nMy conclusion paragraph is going to wrap up my essay. As I said in the introduction I think this would be a really good idea for this software update to be in classrooms for teacher and students. I think it would help with teenagers. I gave a couple reasons why I would want this software update or why it would help. I have seen a lot of students falling asleep in class and the teacher just keeps going. I have also been one of those students were I'm really confused but I don't want to say in front of the class so I either don't say anything or I say something to my buddy next to me. Those are my reasons why I am for this Facial Action Coding System.","I'm going to type an essay about an arugement for this technology to read students emotional expressions. The newest software update is called Facial Action Coding System. I really feel like this type of technogoly would be really helpful. There is a lot of prons and cons to this technogoly because some students may not want you to know how they are feeling or what there emtions are, I feel as if some students need the help or may feel, let's say lonely but don't like saying they are so they fake a smile but those are the students who really need someone to know. So I'm going ot start out in my first paragraph saying why I think it would be good to have this type of system. My first paragraph is going to be my first reason why I feel that it would help and a little bit about the software. The software update has promising applications for a variety of industries. The computer constrcuts a 3-D computer model of the face all 44 major muscels in the model must move like human muscles. I feel that it will help a lot more with young teenagers in our generation. A lot of people in our generation are going through depression and they don't like to telling people about it, so if you have this software it would help kids from sucide thoughts or some things like that. Another reason why it woud be helpful is because you could use it on anyone not just students. My first reason why I think it would be a brillant idea would be an example such as this, if you went to class one day and seen your friend upset you would be able to look at her and know shes upset but if you asked her and she said no I'm fine, she would be lying. If you had this software update and you used it on your friend it would say she is 70 percent upset and 30 percent mad. So now you know she is upset most of the time and you could change that by talking about things with her letting her parents know. Little things like that could change someones life forever. My last reason I think it would be helpful is because you would know how everyones feeling. The Mona Lisa is to demonstration is really intended to bring a smile to your face, while showoing how much a computer can do. It also says in paragraph 6 that "" A classroom computer could recogonize when a student is becoming bored or confused."" That could also help the teacher out a lot. Some students don't always let the teacher know if they are bored or confused because maybe they are scared or embarssed, but now your teacher would know if you were confused about something. She would be able to help you or do something a little more exciting. My conclusion paragraph is going to wrap up my essay. As I said in the introduction I think this would be a really good idea for this software update to be in classrooms for teacher and students. I think it would help with teenagers. I gave a couple reasons why I would want this software update or why it would help. I have seen a lot of students falling asleep in class and the teacher just keeps going. I have also been one of those students were I'm really confused but I don't want to say in front of the class so I either don't say anything or I say something to my buddy next to me. Those are my reasons why I am for this Facial Action Coding System.",i am going to type an essay about an arugement for this technology to read students emotional expressions the newest software update is called facial action coding system i really feel like this type of technogoly would be really helpful there is a lot of prons and cons to this technogoly because some students may not want you to know how they are feeling or what there emtions are i feel as if some students need the help or may feel let us say lonely but do not like saying they are so they fake a smile but those are the students who really need someone to know so i am going ot start out in my first paragraph saying why i think it would be good to have this type of system my first paragraph is going to be my first reason why i feel that it would help and a little bit about the software the software update have promising applications for a variety of industries the computer constrcuts a 3 d computer model of the face all 44 major muscels in the model must move like human muscles i feel that it will help a lot more with young teenagers in our generation a lot of people in our generation are going through depression and they do not like to telling people about it so if you have this software it would help kids from sucide thoughts or some things like that another reason why it woud be helpful is because you could use it on anyone not just students my first reason why i think it would be a brillant idea would be an example such as this if you went to class one day and seen your friend upset you would be able to look at her and know she s upset but if you asked her and she said no i am fine she would be lying if you have this software update and you used it on your friend it would say she is 70 percent upset and 30 percent mad so now you know she is upset most of the time and you could change that by talking about things with her letting her parents know little things like that could change someones life forever my last reason i think it would be helpful is because you would know how everyones feeling the mona lisa is to demonstration is really intended to bring a smile to your face while showoing how much a computer can do it also says in paragraph 6 that a classroom computer could recogonize when a student is becoming bored or confused that could also help the teacher out a lot some students do not always let the teacher know if they are bored or confused because maybe they are scared or embarssed but now your teacher would know if you were confused about something she would be able to help you or do something a little more exciting my conclusion paragraph is going to wrap up my essay as i said in the introduction i think this would be a really good idea for this software update to be in classrooms for teacher and students i think it would help with teenagers i gave a couple reasons why i would want this software update or why it would help i have seen a lot of students falling asleep in class and the teacher just keeps going i have also been one of those students were i am really confused but i do not want to say in front of the class so i either do not say anything or i say something to my buddy next to me those are my reasons why i am for this facial action coding system,3228,na,na,2
11155,11155,0,persuade_corpus,na,"If a school was to require a summer break they should allow the students to create their summer projects because it will be more creative, teachers just make useless busy work and the students are more likely to personally learn from the experience.\n\nStudents being involved in their summer project will allow students to get creative and think for themselves. Summer projects get students in a negative state of mind because they would prefer to enjoy their break and hang out with friends. If they are given the creativity to make their own summer project they might collaborate with friends and make a really cool project with all the free time they have. If students were given their own opportunities to make fun projects with friends over the break they will create more interactive, and impressive work.\n\nIf a teacher was to make an assignment over the break it would be super boring and most likely just busy work. If it were a math assignment it would just be a packet or some problems to get you back into the unit before summer starts. English teachers often just make kids read books that are super boring and lack their interest over the summer. Teachers tend to make work over the break pointless and when the students do work over the break it makes them hate the class before they've even started it. It also adds an extra level of stress when summer is supposed to be free time. Some kids come back to school and have a forty-five page packet due in math class by the first week of school. Teachers overall make school harder than it has to be by assigning pointless busy work over summer break.\n\nOne way this could have a bad turnout if kids made their own projects due to the fact that they won't want design a creative and fun project. Some if not most kids will see that they have to make an assignment over the break, they'll buy a poster board, they'll copy and paste a few articles about a boring topic and glue them to the poster board. They won't put any effort into a summer project because they don't want to do work over their break. They find a project over the break to be very counter intuitive because they've just done work for nine months straight and do not want to keep doing work. Kids do not want to do a project over the break in the first place and if they see that it is easy and has no positive effect on their schooling they'll make it quick and easy and never worry about the project again.\n\nThe final way a student made project will have a better outcome is they'll actually personally learn from the experience. If a student is given their own choice about a summer project in englsh class. They're more likely to pick a book of their interest and actually finish the book instead of just skim reading it, looking up a summary or even watching the movie if that is an option. If the student picks their own book of interest they'll read the whole thing, gain important information about the book and be ready to discuss it once school starts again. If the students have summer project in history and really like World War II but the teacher assigns them the Civil Rights movement they're more likely to not do the project. If they're allowed to pick their own topic and write a report about World War II like they're personally interested in they're gonna do the project and do it right. They'll gain from this personal experience and have a good time while doing it.\n\nStudents' learning should not be done through busy work or packets with fifty pages. It should be done through personal interest and personal involvment. When a student learns a packet is the most boring and pointless thing a student can receive. Given their own choice on summer projects students are more likely to get involved, genuinely learn from the experience and not be handed premade expected projects where they have to follow a boring rubric to the tea and lack creativity.","If a school was to require a summer break they should allow the students to create their summer projects because it will be more creative, teachers just make useless busy work and the students are more likely to personally learn from the experience. Students being involved in their summer project will allow students to get creative and think for themselves. Summer projects get students in a negative state of mind because they would prefer to enjoy their break and hang out with friends. If they are given the creativity to make their own summer project they might collaborate with friends and make a really cool project with all the free time they have. If students were given their own opportunities to make fun projects with friends over the break they will create more interactive, and impressive work. If a teacher was to make an assignment over the break it would be super boring and most likely just busy work. If it were a math assignment it would just be a packet or some problems to get you back into the unit before summer starts. English teachers often just make kids read books that are super boring and lack their interest over the summer. Teachers tend to make work over the break pointless and when the students do work over the break it makes them hate the class before they've even started it. It also adds an extra level of stress when summer is supposed to be free time. Some kids come back to school and have a forty-five page packet due in math class by the first week of school. Teachers overall make school harder than it has to be by assigning pointless busy work over summer break. One way this could have a bad turnout if kids made their own projects due to the fact that they won't want design a creative and fun project. Some if not most kids will see that they have to make an assignment over the break, they'll buy a poster board, they'll copy and paste a few articles about a boring topic and glue them to the poster board. They won't put any effort into a summer project because they don't want to do work over their break. They find a project over the break to be very counter intuitive because they've just done work for nine months straight and do not want to keep doing work. Kids do not want to do a project over the break in the first place and if they see that it is easy and has no positive effect on their schooling they'll make it quick and easy and never worry about the project again. The final way a student made project will have a better outcome is they'll actually personally learn from the experience. If a student is given their own choice about a summer project in englsh class. They're more likely to pick a book of their interest and actually finish the book instead of just skim reading it, looking up a summary or even watching the movie if that is an option. If the student picks their own book of interest they'll read the whole thing, gain important information about the book and be ready to discuss it once school starts again. If the students have summer project in history and really like World War II but the teacher assigns them the Civil Rights movement they're more likely to not do the project. If they're allowed to pick their own topic and write a report about World War II like they're personally interested in they're gonna do the project and do it right. They'll gain from this personal experience and have a good time while doing it. Students' learning should not be done through busy work or packets with fifty pages. It should be done through personal interest and personal involvment. When a student learns a packet is the most boring and pointless thing a student can receive. Given their own choice on summer projects students are more likely to get involved, genuinely learn from the experience and not be handed premade expected projects where they have to follow a boring rubric to the tea and lack creativity.",if a school was to require a summer break they should allow the students to create their summer projects because it will be more creative teachers just make useless busy work and the students are more likely to personally learn from the experience students being involved in their summer project will allow students to get creative and think for themselves summer projects get students in a negative state of mind because they would prefer to enjoy their break and hang out with friends if they are given the creativity to make their own summer project they might collaborate with friends and make a really cool project with all the free time they have if students were given their own opportunities to make fun projects with friends over the break they will create more interactive and impressive work if a teacher was to make an assignment over the break it would be super boring and most likely just busy work if it were a math assignment it would just be a packet or some problems to get you back into the unit before summer starts english teachers often just make kids read books that are super boring and lack their interest over the summer teachers tend to make work over the break pointless and when the students do work over the break it makes them hate the class before they have even started it it also adds an extra level of stress when summer is supposed to be free time some kids come back to school and have a forty five page packet due in math class by the first week of school teachers overall make school harder than it have to be by assigning pointless busy work over summer break one way this could have a bad turnout if kids made their own projects due to the fact that they will not want design a creative and fun project some if not most kids will see that they have to make an assignment over the break they will buy a poster board they will copy and paste a few articles about a boring topic and glue them to the poster board they will not put any effort into a summer project because they do not want to do work over their break they find a project over the break to be very counter intuitive because they have just done work for nine months straight and do not want to keep doing work kids do not want to do a project over the break in the first place and if they see that it is easy and have no positive effect on their schooling they will make it quick and easy and never worry about the project again the final way a student made project will have a better outcome is they will actually personally learn from the experience if a student is given their own choice about a summer project in englsh class they are more likely to pick a book of their interest and actually finish the book instead of just skim reading it looking up a summary or even watching the movie if that is an option if the student picks their own book of interest they will read the whole thing gain important information about the book and be ready to discuss it once school starts again if the students have summer project in history and really like world war ii but the teacher assigns them the civil rights movement they are more likely to not do the project if they are allowed to pick their own topic and write a report about world war ii like they are personally interested in they are going to do the project and do it right they will gain from this personal experience and have a good time while doing it students learning should not be done through busy work or packets with fifty pages it should be done through personal interest and personal involvment when a student learns a packet is the most boring and pointless thing a student can receive given their own choice on summer projects students are more likely to get involved genuinely learn from the experience and not be handed premade expected projects where they have to follow a boring rubric to the tea and lack creativity,3892,na,na,2
1838,1838,0,persuade_corpus,na,"Cars are one of the most important transportation in the world today. The reason the number of cars being sold is going down is because they are getting too expensive. Some people that do not have jobs are not able to afford getting a new car. There are people that have to sell their cars just so they can get a house to live in. Todays society does not have many cars because people have more technology today like cellphones and laptops. Several people that dont have a car either can't afford it, use a cellphone and walk places, or ride a bike places. Not everyone has the joy of driving a car so they don't bother trying to buy one. The newer cars are way over priced and noone is going to want to over pay for a vehicle. Ever since 2005 the amount of cars getting sold has been steadily dropping. When 2013 hit, the numbers of miles driven per person dropped 9% and was equal to the amount in 1995.\n\nFirst, The percentage of people driving on the road went down because of the expense of the newer vehicles. The ones that were unemployed had no way of getting a new car because they did not work. Some people just did not have a good paying job to where they could be able to afford a new car. Every year more and more people loose their job so when they need a new car they will not be able to get one because they will have no money. Because of the expense of cars now the rate of ownership per household went down severely. The more the price the cars go up the lower the rate for people buying cars. Car dealerships are never going to sell any cars if they keep raising the prices.\n\nSecondly, the people that have to sell their cars to buy a house should not have to do that. Everyone needs to have some kind of transportation but they cant with the prices of everything now a days. The prices should not be so high so people can afford to have a car for transportation and a home to live in. The world is not getting treated fairly especially the ones that are trying to get a job and noone will hire thems so they are not able to afford a car and a car payment. Noone should have to walk places to get anything because people make fun of them and there shouldn't be any judging. There are reasons the people without transportation have to walk and i believe that should change. The world should be better than what it is right now, there are so many disrespectful people in this world that think they are rich. People should never have to give up their vehicles or house for anything.\n\nFinally, Todays society is going down because the prices shot up like a rocket these past few years. People are severely suffering because of them going up extremely quick and that should have never happened and i believe it needs to be fixed as fast as possible. The ones that are trying to get a job deserve to be able to afford a car but since noone wants to hire anyone they cant afford anything. Then, the ones selling their cars is not good they need to have a way of getting around especially if they have kids that needs to go to school or the doctors. The people suffering should not be suffering since they are trying the best they can.","Cars are one of the most important transportation in the world today. The reason the number of cars being sold is going down is because they are getting too expensive. Some people that do not have jobs are not able to afford getting a new car. There are people that have to sell their cars just so they can get a house to live in. Todays society does not have many cars because people have more technology today like cellphones and laptops. Several people that dont have a car either can't afford it, use a cellphone and walk places, or ride a bike places. Not everyone has the joy of driving a car so they don't bother trying to buy one. The newer cars are way over priced and noone is going to want to over pay for a vehicle. Ever since 2005 the amount of cars getting sold has been steadily dropping. When 2013 hit, the numbers of miles driven per person dropped 9% and was equal to the amount in 1995. First, The percentage of people driving on the road went down because of the expense of the newer vehicles. The ones that were unemployed had no way of getting a new car because they did not work. Some people just did not have a good paying job to where they could be able to afford a new car. Every year more and more people loose their job so when they need a new car they will not be able to get one because they will have no money. Because of the expense of cars now the rate of ownership per household went down severely. The more the price the cars go up the lower the rate for people buying cars. Car dealerships are never going to sell any cars if they keep raising the prices. Secondly, the people that have to sell their cars to buy a house should not have to do that. Everyone needs to have some kind of transportation but they cant with the prices of everything now a days. The prices should not be so high so people can afford to have a car for transportation and a home to live in. The world is not getting treated fairly especially the ones that are trying to get a job and noone will hire thems so they are not able to afford a car and a car payment. Noone should have to walk places to get anything because people make fun of them and there shouldn't be any judging. There are reasons the people without transportation have to walk and i believe that should change. The world should be better than what it is right now, there are so many disrespectful people in this world that think they are rich. People should never have to give up their vehicles or house for anything. Finally, Todays society is going down because the prices shot up like a rocket these past few years. People are severely suffering because of them going up extremely quick and that should have never happened and i believe it needs to be fixed as fast as possible. The ones that are trying to get a job deserve to be able to afford a car but since noone wants to hire anyone they cant afford anything. Then, the ones selling their cars is not good they need to have a way of getting around especially if they have kids that needs to go to school or the doctors. The people suffering should not be suffering since they are trying the best they can.",cars are one of the most important transportation in the world today the reason the number of cars being sold is going down is because they are getting too expensive some people that do not have jobs are not able to afford getting a new car there are people that have to sell their cars just so they can get a house to live in todays society do not have many cars because people have more technology today like cellphones and laptops several people that do nt have a car either can not afford it use a cellphone and walk places or ride a bike places not everyone have the joy of driving a car so they do not bother trying to buy one the newer cars are way over priced and noone is going to want to over pay for a vehicle ever since 2005 the amount of cars getting sold have been steadily dropping when 2013 hit the numbers of miles driven per person dropped 9 and was equal to the amount in 1995 first the percentage of people driving on the road went down because of the expense of the newer vehicles the ones that were unemployed have no way of getting a new car because they do not work some people just do not have a good paying job to where they could be able to afford a new car every year more and more people loose their job so when they need a new car they will not be able to get one because they will have no money because of the expense of cars now the rate of ownership per household went down severely the more the price the cars go up the lower the rate for people buying cars car dealerships are never going to sell any cars if they keep raising the prices secondly the people that have to sell their cars to buy a house should not have to do that everyone needs to have some kind of transportation but they ca nt with the prices of everything now a days the prices should not be so high so people can afford to have a car for transportation and a home to live in the world is not getting treated fairly especially the ones that are trying to get a job and noone will hire thems so they are not able to afford a car and a car payment noone should have to walk places to get anything because people make fun of them and there should not be any judging there are reasons the people without transportation have to walk and i believe that should change the world should be better than what it is right now there are so many disrespectful people in this world that think they are rich people should never have to give up their vehicles or house for anything finally todays society is going down because the prices shot up like a rocket these past few years people are severely suffering because of them going up extremely quick and that should have never happened and i believe it needs to be fixed as fast as possible the ones that are trying to get a job deserve to be able to afford a car but since noone wants to hire anyone they ca nt afford anything then the ones selling their cars is not good they need to have a way of getting around especially if they have kids that needs to go to school or the doctors the people suffering should not be suffering since they are trying the best they can,3109,na,na,2
30534,30534,0,persuade_corpus,na,"I am against the driverless cars . These cars require you to put all your trust into this machinery, to be able to trust in them to get you there safe and back safely. What happens if the car breaks down? or the system crashes? I could be seriously injured in this accident. This states that there will be a computer system that will run this car . But what if this computer system gets hacked and you can easily reprogram it to go to a different areas which could lead to kidnappings. Or just have a bunch of crashes occur all around the world . Sure there are sensors to help with the saftey. but there isn't a 100% assurance on the capibility on hacking into the system. I believe that if you are too lazy to drive somewhere then you shouldn't be driving. Do not put someone elses life in danger due to the lack of effort to drive. There are many ways to get around without having to drive yourself. These are some examples; Carpooling , Uber drivers, Taxi , Bus services, or if possible walk or ride a bike. By doing this you are causing less pollution and you can save money by walking or riding a bike.\n\nKeep in mind that everyone has somewhere to be take your time and be mindful of the people arond you. You have to pay attention, there is this saying, ""You have to drive for yourself AND others"". Safety is key.","I am against the driverless cars . These cars require you to put all your trust into this machinery, to be able to trust in them to get you there safe and back safely. What happens if the car breaks down? or the system crashes? I could be seriously injured in this accident. This states that there will be a computer system that will run this car . But what if this computer system gets hacked and you can easily reprogram it to go to a different areas which could lead to kidnappings. Or just have a bunch of crashes occur all around the world . Sure there are sensors to help with the saftey. but there isn't a 100% assurance on the capibility on hacking into the system. I believe that if you are too lazy to drive somewhere then you shouldn't be driving. Do not put someone elses life in danger due to the lack of effort to drive. There are many ways to get around without having to drive yourself. These are some examples; Carpooling , Uber drivers, Taxi , Bus services, or if possible walk or ride a bike. By doing this you are causing less pollution and you can save money by walking or riding a bike. Keep in mind that everyone has somewhere to be take your time and be mindful of the people arond you. You have to pay attention, there is this saying, ""You have to drive for yourself AND others."" Safety is key.",i am against the driverless cars these cars require you to put all your trust into this machinery to be able to trust in them to get you there safe and back safely what happens if the car breaks down or the system crashes i could be seriously injured in this accident this states that there will be a computer system that will run this car but what if this computer system gets hacked and you can easily reprogram it to go to a different areas which could lead to kidnappings or just have a bunch of crashes occur all around the world sure there are sensors to help with the saftey but there is not a 100 assurance on the capibility on hacking into the system i believe that if you are too lazy to drive somewhere then you should not be driving do not put someone elses life in danger due to the lack of effort to drive there are many ways to get around without having to drive yourself these are some examples carpooling uber drivers taxi bus services or if possible walk or ride a bike by doing this you are causing less pollution and you can save money by walking or riding a bike keep in mind that everyone have somewhere to be take your time and be mindful of the people arond you you have to pay attention there is this saying you have to drive for yourself and others safety is key,1288,na,na,2


In [20]:
%%time
df.to_parquet(f"output/preprocess.parquet", index=False)
assert df.notna().all(axis=None)

CPU times: user 545 ms, sys: 48.7 ms, total: 594 ms
Wall time: 595 ms


In [21]:
tim.stop()
print(f"Total time taken {str(tim.elapsed)}")

Total time taken 0:17:39.943724
