# AutoQ: Improving reading comprehension through automatic question generation
W210.5

Julia Buffinton, Saurav Datta, Joanna Huang, Kathryn Plath
 

## 0. Set up

In [1]:
import json
import itertools
import pandas as pd
import ast
import string
import operator
from collections import defaultdict
import re
import os
import nltk
import math
import nltk
import time
import torch
#import spacy
#from spacy.tokenizer import Tokenizer
#nlp = spacy.load('en_core_web_sm')

# Uncomment if you need to download these!
#nltk.download('stopwords')
#nltk.download('punkt')

### 0.1 Data File Directories

In [2]:
# assuming these directories are already created! If not, need to create them. 
data_files = './wikipedia_data'

# This should contain folders of files with lists of Wikipedia articles
wiki_dump = data_files + '/wikipedia_dump/'

# These should all be empty
wiki_squad = data_files + '/wikipedia_squad/'
labeled_sents = data_files + '/labeled_sentences/'
unlabeled_sents = data_files + '/unlabeled_sentences/'
questions = data_files + '/questions/'
answers = data_files + '/answers/'

# We'll want to reference these later, so we can manipulate corresponding files across all directors
directories = [wiki_squad, labeled_sents, unlabeled_sents, questions, answers]

### 0.2 Make sure data file directories (except wiki dump) are empty to start

In [3]:
# We should remove any existing files from the folders, before generating new data
for i,d in enumerate(directories):
    directory = d + '*'
    !rm -rf $directory

## 1. Wikipedia Article Pre-Processing
To move our data through the pipeline and ensure that it is suitable for our app, we will format it like the SQuAD dataset.

In [4]:
# iterate through dump of wikipedia articles
total_articles = 0
total_skipped = 0

for foldername in os.listdir(wiki_dump):
    
    input_subfolder = wiki_dump + foldername
    output_subfolder = wiki_squad + foldername
    
    if not os.path.exists(output_subfolder):
        os.mkdir(output_subfolder)

    # these are not files, just folders
    print("Processing files in {} folder...".format(foldername))
    num_articles = 0
    num_skipped = 0
    
    # each file represents several (variable #) wikipedia articles
    for filename in os.listdir(input_subfolder):
        #print(filename)
        f = open(input_subfolder + '/' + filename)
        
        # each file of articles will become a separate .json of articles
        # this helps if we run into issues, we can just discard a whole file and move on
        
        # set up json format for squad-like listing of articles
        wikipedia_data_dict = {"data": [], "version" : 1.0}
        
        # save this to the 'wikipedia_squad' folder of correctly-formatted dicts of wikipedia articles
        output_file = output_subfolder + '/' + filename
       
        # each line represents a different wikipedia article
        # we will ignore the id and url for now, not needed
        
        for line in f:
            line_dict = ast.literal_eval(line)
            title = line_dict['title']
                              
            # for some reason, empty articles are included. They should be disregarded
            try:
                text = line_dict['text'].split("\n\n",1)[1] # title is duplicated within text as well
            except:
                num_skipped += 1
                print("Skipping article:",title)
            else:
                # arbitrary length, should eliminate articles like "disambiguation" articles, etc. 
                if len(text) > 1000: 
                    num_articles += 1
                    # Break text up into paragraphs
                    paras = text.split("\n\n")

                    context = [{'context': para.rstrip(), 'qas' : []} for para in paras]

                    wikipedia_data_dict['data'].append({'title' : title, 'paragraphs' : context})
                    
        # in case we don't have any articles in the file to add
        if (wikipedia_data_dict['data']): 
            with open(output_file, 'w') as outfile:  
                json.dump(wikipedia_data_dict, outfile)
            
    total_articles += num_articles
    total_skipped += num_skipped
    
    print("Completing files in {} folder... {} articles processed, {} skipped.\n".format(foldername,num_articles,num_skipped))

print("Reformatting complete. Total {} articles processed for question generation, {} skipped.".format(total_articles,total_skipped))

Processing files in AB folder...
Skipping article: Oxycodone
Skipping article: Kyoto Protocol
Skipping article: Index of philosophy articles (A–C)
Skipping article: History of Christianity
Skipping article: Klaus Barbie
Completing files in AB folder... 5022 articles processed, 5 skipped.

Processing files in AA folder...
Skipping article: Floccinaucinihilipilification
Skipping article: List of German proverbs
Completing files in AA folder... 5280 articles processed, 2 skipped.

Reformatting complete. Total 10302 articles processed for question generation, 7 skipped.


## 2. Sentence Selection

In [5]:
def sents_and_weights(d,paragraphs):
    """Clean sentences, remove digit, punctuation, upper case to lower
    Args: d,paragraphs = d is article number within file, paragraphs is just paragraphs
    Return: 
        labeled sentences: 
        stemmed sentences:
        word distribution: 
    """   
    # Create dictionaries for labeled & stemmed sentences to populate
    labeled_sentences = {}
    stemmed_sentences = {}

    # Create word distribution dictionaries
    # these needs to be a default dict so it returns 0 if word not found 
    word_cts = defaultdict(int)
    word_distr = defaultdict(int)

    # initialize for stemming
    stop_words = nltk.corpus.stopwords.words('english')
    stemmer = nltk.stem.PorterStemmer()
    tokenize = nltk.word_tokenize
    sent_splitter = nltk.data.load('tokenizers/punkt/english.pickle')

    # helper function for tracking stemmed words
    def stem_and_add(wd):
        word_cts[wd] += 1
        word_cts['total_count'] += 1
        return stemmer.stem(wd)

    # need to go through each 'paragraph' (context) to gather info about sentences
    for i,context in enumerate(paragraphs):

        paragraph = context['context']
        # split paragraph into sentences, make sure to keep digits, etc. together
        #sentences = context['context'].split('. ') #last one still has a period at the end

        #print(len(paragraph))

        if len(paragraph) > 75:
            sentences = sent_splitter.tokenize(context['context'].strip())
        else: 
            break

        # iterate through sentences to tokenize, calculate overall word distribution
        for j,original_sentence in enumerate(sentences):

            # Remove all digits
            sentence = ''.join([x for x in original_sentence if not x.isdigit()])
            # Remove all punctuation (OK to include periods since it's been split)
            sentence = ''.join([x for x in sentence if x not in string.punctuation])

            # Lowercase everything
            sentence = sentence.strip()
            sentence = sentence.lower()

            # Split into words & rejoin (remove extra spaces)
            sentence = ' '.join(sentence.split())

            tokenized_stemmed_sent = [stem_and_add(word) for word in nltk.tokenize.word_tokenize(sentence) 
                                      if not word in stop_words]

            # keep track of tokenized for calculating sentence weight in next step
            # save list of unprocessed sentences for later
            # but we're only selecting from the first and last sentences in the paragraphs
            if (original_sentence == sentences[0]) | (original_sentence == sentences[-1]):
                if not original_sentence.startswith('[[File:'):
                    labeled_sentences[(d,i,j)] = original_sentence.replace('\n', ' ')
                    stemmed_sentences[(d,i,j)] = tokenized_stemmed_sent

    # update our word dictionary to be relative frequencies rather than absolute values
    for word, ct in word_cts.items():
        # but keep our total count, we may want that later (not sure)
        if not word == 'total_count':
            word_distr[word] = word_cts[word] / word_cts['total_count']
            
    #print(sorted(word_distr.items(), key=lambda k: k[1], reverse=True))
    return labeled_sentences,stemmed_sentences,word_distr

In [6]:
def calc_sent_weight(word_dist, stemmed_sents):
        """Compute weight with respect to sentences
        Args:
                word_distribution: dict with word: weight
                stemmed_sents: list with 
        Return:
                sentence_weight: dict of weight of each sentence. key = sentence #, value = weight
        """
        sentences_weight = {}
        # Iterate through each word in each sentence, if word distribution and sentence id are in dictionary, 
        # add to existing word distribution. Else, sentence weight for given sentence equals current word distribution
          
        for key, words in stemmed_sents.items():
            #print(words)
            # Sentence weight equals sum of word distributions divided by length of cleaned sentence
            if len(words) == 0:
                weight = 0
            else:
                weight = sum([word_dist[word] for word in words]) / len(words)
            
            sentences_weight[key] = weight
            
        sentences_weight = sorted(sentences_weight.items(), key=operator.itemgetter(1), reverse=True)
        #print('sentence weight: ',sentences_weight)

        return sentences_weight

In [7]:
def topically_important_sentence(sentences_weight, labeled_sentences):
        """Select topically import sentences
        Args:
                sentence_weight: list of tuples, (sentence_num, sentence_weight) computed in sentence_weight
                paragraph: set of sentences
        Return:
                sentences_selected: dict, topically important sentences selected
        """
        final_sentences = {}
        
        total_sentences = len(sentences_weight)
        # how many sentences to retain
        num_sentences_selected = math.ceil(float(0.20) * total_sentences)
        #print('num sentences for this passage:',num_sentences_selected)
        
        # key of selected sentences (# order of sentence in paragraph)
        #sentences_selected_key = []
        
        # dictionary of all sentences 
        sentences_dict = {}
        flag = 0
        
        # select num_sentences_selected # of sentences from list of sentence weights
        #print(sentence_weight[0])
        selected_keys = [k for k,v in sentence_weight[0:num_sentences_selected]]
        
        #print("selected sentence(s):",selected_keys)


        for sent_key in selected_keys:
            pre_processed_sentence = labeled_sentences[sent_key]
            
            processed_sentence = pre_processed_sentence.lower() #lowercase
            processed_sentence = processed_sentence.replace('[[','')
            processed_sentence = processed_sentence.replace(']]','')
            processed_sentence = processed_sentence.replace(']','')
            processed_sentence = processed_sentence.replace('[','')
            processed_sentence = re.sub('(?<!\d)([.,!?()])(?<!\d)', r' \1 ', processed_sentence)
            processed_sentence = re.sub(r'\(','-lrb- ',processed_sentence) # replace left parens, add space after
            processed_sentence = re.sub(r'\)',' -rrb-',processed_sentence) # replace left parens, add space after
            processed_sentence = re.sub(r'\([^)]*\)', '',processed_sentence) #replace brackets in links
            processed_sentence = re.sub('(?<=\s)\"','`` ',processed_sentence) # replace first double quotes with ``
            processed_sentence = re.sub(r'\"', " ''", processed_sentence) # replace second double quote with two single quotes ''

            #print(processed_sentence)

            final_sentences[sent_key] = processed_sentence
            
        return final_sentences

In [8]:
total_skipped = 0
total_selected = 0

for foldername in os.listdir(wiki_squad):
    
    input_subfolder = wiki_squad + foldername
    output_subfolder_labeled = labeled_sents + foldername
    output_subfolder_unlabeled = unlabeled_sents + foldername

    if not os.path.exists(output_subfolder_labeled):
        os.mkdir(output_subfolder_labeled)
    
    if not os.path.exists(output_subfolder_unlabeled):
        os.mkdir(output_subfolder_unlabeled)

    # these are not files, just folders
    print("Selecting topical sentences for files in {} folder...".format(foldername))
    
    num_skipped = 0
    num_selected = 0
    
    # each file represents several (variable #) wikipedia articles
    for filename in os.listdir(input_subfolder):
        #print(filename)
        input_file = input_subfolder + '/' + filename
              
        # save these to different directories of labeled and unlabeled sentences
        output_file_labeled = open(output_subfolder_labeled + '/' + filename, "w")
        output_file_unlabeled = open(output_subfolder_unlabeled + '/' + filename, "w")

        with open(input_file) as json_file:  
            data = json.load(json_file)

        data = pd.DataFrame.from_dict(data)
        df = data['data']

        # for each article in the file
        for row,value in df.iteritems():
            #print("Article #{}, {}".format(row,value['title']))
            
            # here is where we clean and stem words, build word distribution
            try:
                labeled_sentences, stemmed_sentences, word_distribution = sents_and_weights(row,value['paragraphs'])

                # use this word distribution to get weights for each sentence and calculate most important sentences 
                sentence_weight = calc_sent_weight(word_distribution,stemmed_sentences)

                # pull out most important sentences
                # and keep track of where they came from: (doc #, context #, sentence #)
                chosen_sentences = topically_important_sentence(sentence_weight,labeled_sentences)
            except: 
                num_skipped += 1
                print("Skipping article:",value['title'])
            else:
                num_selected += 1
                for sents in chosen_sentences.items():

                    #save selected sentences directly to file, for onmt model
                    output_file_unlabeled.write(str(sents[1])+'\n')
                    # keep track of their locations, though
                    output_file_labeled.write(str(sents)+'\n') 
                    
    total_skipped += num_skipped
    total_selected += num_selected
    print("Completing files in {} folder... {} articles processed, {} articles skipped.\n".format(foldername,num_selected,num_skipped))

print("Sentence selection complete. Total {} articles processed, {} skipped for question generation.".format(total_selected, total_skipped))

Selecting topical sentences for files in AB folder...
Completing files in AB folder... 5022 articles processed, 0 articles skipped.

Selecting topical sentences for files in AA folder...
Completing files in AA folder... 5280 articles processed, 0 articles skipped.

Sentence selection complete. Total 10302 articles processed, 0 skipped for question generation.


## 3. Question Generation

For reference: https://github.com/drewserles/GenerationQ. Assuming training has already been completed.

### 3.1 Question Generation

In [9]:
generationq_dir = '~/GenerationQ/model'
model = generationq_dir + '/trained/600rnn_step_16000.pt' # update depending on which model has lowest perplexity

start = time.time()
files = 0

for foldername in os.listdir(unlabeled_sents):
    
    input_subfolder = unlabeled_sents + foldername
    output_subfolder = questions + foldername
    
    if not os.path.exists(output_subfolder):
        os.mkdir(output_subfolder)

    # these are not files, just folders
    print("Beginning question generation for files in {} folder...".format(foldername))
        
    # each file represents several (variable #) wikipedia articles
    for filename in os.listdir(input_subfolder):

        
        # input file is unlabeled sentence
        input_file = input_subfolder + '/' + filename
              
        # save list of questions
        output_file = output_subfolder + '/' + filename
        
        
        !python -W ignore $generationq_dir/test.py -model $model -src $input_file -output $output_file \
        -replace_unk -beam_size 3 -gpu 0 -batch_size 30 2> /dev/null
        
        files += 1
        if files % 20 == 0:
            if files == 20:
                print("Progress: processed 20 files in {:.2f} minutes, total: {} files".format((time.time()-start)/60,files))
                chunk_time = time.time()
            else:
                print("Progress: processed 20 files in {:.2f} minutes, total: {} files".format((time.time()-chunk_time)/60),files)
            chunk_time = time.time()


print("\nProcessed {} total files in {:.2f} total minutes".format(files,(time.time()-start)/60))

Beginning question generation for files in AB folder...
PRED AVG SCORE: -0.6979, PRED PPL: 2.0095
PRED AVG SCORE: -0.6220, PRED PPL: 1.8626
PRED AVG SCORE: -0.6757, PRED PPL: 1.9654
PRED AVG SCORE: -0.6667, PRED PPL: 1.9479
PRED AVG SCORE: -0.6571, PRED PPL: 1.9291
PRED AVG SCORE: -0.6660, PRED PPL: 1.9464
PRED AVG SCORE: -0.6595, PRED PPL: 1.9338
PRED AVG SCORE: -0.7150, PRED PPL: 2.0443
PRED AVG SCORE: -0.6685, PRED PPL: 1.9512
PRED AVG SCORE: -0.6558, PRED PPL: 1.9266
PRED AVG SCORE: -0.6985, PRED PPL: 2.0108
PRED AVG SCORE: -0.6686, PRED PPL: 1.9515
PRED AVG SCORE: -0.6498, PRED PPL: 1.9152
PRED AVG SCORE: -0.6578, PRED PPL: 1.9306
PRED AVG SCORE: -0.6954, PRED PPL: 2.0046
PRED AVG SCORE: -0.6402, PRED PPL: 1.8968
PRED AVG SCORE: -0.6048, PRED PPL: 1.8309
PRED AVG SCORE: -0.6965, PRED PPL: 2.0067
PRED AVG SCORE: -0.6773, PRED PPL: 1.9685
PRED AVG SCORE: -0.6568, PRED PPL: 1.9285
Progress: processed 20 files in 9.92 minutes
PRED AVG SCORE: -0.6857, PRED PPL: 1.9852
PRED AVG SCORE: -

PRED AVG SCORE: -0.6670, PRED PPL: 1.9484
PRED AVG SCORE: -0.6883, PRED PPL: 1.9902
PRED AVG SCORE: -0.6952, PRED PPL: 2.0041
PRED AVG SCORE: -0.6857, PRED PPL: 1.9851
PRED AVG SCORE: -0.6960, PRED PPL: 2.0057
PRED AVG SCORE: -0.7060, PRED PPL: 2.0259
PRED AVG SCORE: -0.6267, PRED PPL: 1.8715
PRED AVG SCORE: -0.6483, PRED PPL: 1.9122
PRED AVG SCORE: -0.6839, PRED PPL: 1.9815
PRED AVG SCORE: -0.6872, PRED PPL: 1.9881
PRED AVG SCORE: -0.6604, PRED PPL: 1.9356
PRED AVG SCORE: -0.6088, PRED PPL: 1.8382
PRED AVG SCORE: -0.6800, PRED PPL: 1.9738
PRED AVG SCORE: -0.6822, PRED PPL: 1.9783
PRED AVG SCORE: -0.6470, PRED PPL: 1.9099
PRED AVG SCORE: -0.6884, PRED PPL: 1.9905
PRED AVG SCORE: -0.6750, PRED PPL: 1.9641
Progress: processed 200 files in 10.88 minutes

Processed 200 total files in 103.27 total minutes


### 3.2 Remove "problem" files

If the question generation model runs into an error and is unable to complete question generation for a file, it may output a blank file. Because we have many files/articles to choose from, it's OK to just throw these away. And, ideally, there are no files to remove anyway!

In [10]:
total_empty = 0
total_okay = 0
for foldername in os.listdir(questions):
    
    input_subfolder = questions + foldername
    #print(input_subfolder)
    # these are not files, just folders
    print("Identifying 'problem' files (with no generated questions) in {} folder...".format(foldername))
        
    num_empty = 0
    num_okay = 0
    # each file represents several (variable #) wikipedia articles
    for filename in os.listdir(input_subfolder):
        
        input_file = input_subfolder + '/' + filename
        
        if os.stat(input_file).st_size == 0:
            num_empty += 1
            print("Removing file {} from all corresponding directories".format(foldername + '/' + filename))
            
            path_to_file = '/' + foldername + '/' + filename
            for i,d in enumerate(directories):
                del_file = d + path_to_file
                !rm -rf $del_file
        else:
            num_okay += 1
            
    total_empty += num_empty
    total_okay += num_okay
            
    print('In folder {}, {} files remain and {} files were removed.\n'.format(foldername, num_okay, num_empty))
print('In total, {} files remain and {} files were removed.'.format(total_okay, total_empty))

Identifying 'problem' files (with no generated questions) in AB folder...
In folder AB, 100 files remain and 0 files were removed.

Identifying 'problem' files (with no generated questions) in AA folder...
In folder AA, 100 files remain and 0 files were removed.

In total, 200 files remain and 0 files were removed.


### 3.3 Label questions, add back to json file

In [11]:
total_questions = 0

for foldername in os.listdir(labeled_sents):
    
    input_subfolder_q = questions + foldername 
    input_subfolder_s = labeled_sents + foldername 
    output_subfolder = wiki_squad + foldername
    
    print("Adding questions to squad-formatted Wikipedia files in {} folder...".format(foldername))
    
    num_questions = 0 
    # each file represents several (variable #) wikipedia articles
    for filename in os.listdir(input_subfolder_q):
        
        # input file is questions with scores
        input_file_q = input_subfolder_q + '/' + filename
        input_file_s = input_subfolder_s + '/' + filename
        output_file = output_subfolder + '/' + filename
        
        with open(output_file) as json_file:  
            data = json.load(json_file)
        wiki_dict = data['data']
        
        with open(input_file_q) as f:
            pred_questions = f.read().splitlines()    
            pred_questions = pred_questions[::2] # start with the 1st line, take every other line
            
        with open(input_file_s) as f2:
            for line_q,line_s in zip(pred_questions,f2):
                #print(line_q)
                #print(line_s + '\n')
                line_tuple = eval(line_s)
                item_id = line_tuple[0]

                doc = wiki_dict[item_id[0]] # pull the whole document
                context = doc["paragraphs"][item_id[1]]

                num_questions += 1
                total_questions += 1
                context['qas'].append({"question": line_q.rstrip(), "answers": [], "id": str(item_id)})
            
        with open(output_file, 'w') as outfile:  
            json.dump(data, outfile)
    
    print("Completed adding {} questions to squad-formatted Wikipedia files in {} folder.\n".format(num_questions, foldername))

print("Complete. Added {} total questions to squad-formatted files.".format(total_questions))

Adding questions to squad-formatted Wikipedia files in AB folder...
Completed adding 47209 questions to squad-formatted Wikipedia files in AB folder.

Adding questions to squad-formatted Wikipedia files in AA folder...
Completed adding 48454 questions to squad-formatted Wikipedia files in AA folder.

Complete. Added 95663 total questions to squad-formatted files.


## 4. Answer Questions

### 4.2 DrQA question answering

In [None]:
# preprocess
! python ../DrQA/scripts/reader/preprocess.py $sample_file_abbr

In [None]:
qa_dir = '~/DrQA'
model = qa_dir + '/data/reader/single.mdl'
qa_predict = qa_dir + '/scripts/reader/predict.py' 

start = time.time()
files = 0

for foldername in os.listdir(wiki_squad):
    
    input_subfolder = wiki_squad + foldername
    output_subfolder = answers + foldername
    
    if not os.path.exists(output_subfolder):
        os.mkdir(output_subfolder)

    # these are not files, just folders
    print("Beginning question answering for files in {} folder...".format(foldername))
        
    # each file represents several (variable #) wikipedia articles
    for filename in os.listdir(input_subfolder):
        
        # input file is squad-formatted wikipedia articles, with questions sentence
        input_file = input_subfolder + '/' + filename
              
        # save list of questions
        output_file = output_subfolder + '/' + filenamw 
        
        !python -W ignore $qa_predict --model $model --out-dir $output_subfolder $input_file
        
        files += 1
        if files % 20 == 0:
            if files == 20:
                print("Progress: processed {} files in {:.2f} minutes".format(files,(time.time()-start)/60))
                chunk_time = time.time()
            else:
                print("Progress: processed {} files in {:.2f} minutes".format(files,(time.time()-chunk_time)/60))
            chunk_time = time.time()


print("\nProcessed {} total files in {:.2f} total minutes".format(files,(time.time()-start)/60))

In [7]:
!python ../DrQA/scripts/pipeline/predict.py $sample_squad_dev

04/07/2019 02:55:24 PM: [ CUDA enabled (GPU -1) ]
04/07/2019 02:55:24 PM: [ Initializing pipeline... ]
04/07/2019 02:55:24 PM: [ Initializing document ranker... ]
04/07/2019 02:55:24 PM: [ Loading /home/julia_buffinton/DrQA/data/wikipedia/docs-tfidf-ngram=2-hash=16777216-tokenizer=simple.npz ]
04/07/2019 02:56:01 PM: [ Initializing document reader... ]
04/07/2019 02:56:01 PM: [ Loading model /home/julia_buffinton/DrQA/data/reader/multitask.mdl ]
04/07/2019 02:56:06 PM: [ Initializing tokenizers and document retrievers... ]
04/07/2019 02:56:09 PM: [ Loading queries from ../GenerationQ/model/data/squad-v1.1-dev.json ]
Traceback (most recent call last):
  File "../DrQA/scripts/pipeline/predict.py", line 109, in <module>
    queries.append(data['question'])
KeyError: 'question'


### 4.3 Add answers back to file

In [28]:
with open('./sample_data/AA/wiki_squad_00.json') as json_file:  
    data = json.load(json_file)
        #print(type(data))
        #data = pd.DataFrame.from_dict(data)
    wiki_dict = data['data']
    
#print(wiki_dict[0])

with open('./sample_data/AA/wiki_squad_00-single.txt') as f:
    qa_data = json.load(f)
    
    for key, value in qa_data.items():
        item_id = eval(key)
        
        doc = wiki_dict[item_id[0]] # pull the whole document
        context = doc["paragraphs"][item_id[1]] # find the appropriate paragraph
        
        for i,question in enumerate(context['qas']):
            #print(question)
            #each paragraph may have several associated questions, so need to find the right one
            if question['id'] == key:
                #print(key)
                question['answers'].append({'answer_start' : 0, 'text': value[0][0]})

with open('./sample_data/AA/wiki_squad_00-qa.json', 'w') as outfile:  
    json.dump(data, outfile)

{'paragraphs': [{'qas': [], 'context': 'Anarchism is a political philosophy that advocates self-governed societies based on voluntary, cooperative institutions and the rejection of hierarchies those societies view as unjust. These institutions are often described as stateless societies, although several authors have defined them more specifically as institutions based on non-hierarchical or free associations. Anarchism holds capitalism, the state, and representative democracy to be undesirable, unnecessary, and harmful.'}, {'qas': [], 'context': 'While opposition to the state is central, many forms of anarchism specifically entail opposing authority or hierarchical organisation in the conduct of all human relations. Anarchism is often considered a far-left ideology, and much of anarchist economics and anarchist legal philosophy reflect anti-authoritarian interpretations of communism, collectivism, syndicalism, mutualism, or participatory economics.'}, {'qas': [], 'context': 'Anarchism 