In [1]:
import os
import nltk
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
import string
import numpy as np
import pandas as pd
import random
import pickle
import re
nltk.download('stopwords')

[nltk_data] Downloading package stopwords to /usr/share/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


True

# Creating a dataframe from the text files 

In [2]:
# Making a column of the .txt files path from the directory
import glob
directory = '/kaggle/input/dataset-ir-assignment1/text_files/'
txt_files = glob.glob(directory + '*.txt')
print(len(txt_files))
df = pd.DataFrame(columns = ["path"])
df["path"] = txt_files
df.head()

999


Unnamed: 0,path
0,/kaggle/input/dataset-ir-assignment1/text_file...
1,/kaggle/input/dataset-ir-assignment1/text_file...
2,/kaggle/input/dataset-ir-assignment1/text_file...
3,/kaggle/input/dataset-ir-assignment1/text_file...
4,/kaggle/input/dataset-ir-assignment1/text_file...


In [3]:
# Getting the contents of the .txt files from the directory to a parallel column in the previously created dataframe for easier data preprocessing
content_list = []
for index, row in df.iterrows():
    file_path = row['path']
    try:
        with open(file_path, 'r') as file:
            content_list.append(file.read())
    except FileNotFoundError:
        print(f"File '{file_path}' not found.")

print(len(content_list))


df['text'] = content_list
print(df['text'].shape)

df.head()

999
(999,)


Unnamed: 0,path,text
0,/kaggle/input/dataset-ir-assignment1/text_file...,The Amazon advertised pictures of this item is...
1,/kaggle/input/dataset-ir-assignment1/text_file...,I really like the simplicity of this bridge. I...
2,/kaggle/input/dataset-ir-assignment1/text_file...,"Truthfully, I had no idea that the even were u..."
3,/kaggle/input/dataset-ir-assignment1/text_file...,My ES*335 fit loose and needed some padding ad...
4,/kaggle/input/dataset-ir-assignment1/text_file...,I bought a used MIM strat that came with a bla...


In [4]:
df.to_csv("/kaggle/working/data.csv", index = False)

In [5]:
df = pd.read_csv("/kaggle/working/data.csv")
df = df.dropna().reset_index(drop =True)

# Q1. Data Preprocessing 

1. Perform the following preprocessing steps on each of the text files in the dataset linked
above.

a. Lowercase the text


b. Perform tokenization


c. Remove stopwords


d. Remove punctuations


e. Remove blank space tokens

In [6]:
def cleaning(data):
    corpus = []
    for i in range(0, len(data)):
        sentence = re.sub('[^a-zA-Z]', ' ', str(data[i])) # Removing Punctuation Marks
        sentence = sentence.lower() # Lowering the text
        sentence = sentence.split() # Tokenization
        
        # Remove blank space tokens
        sentence = [word for word in sentence if word.strip()]  # Removes empty strings
        
        all_stopwords = stopwords.words('english') # Removing the stopwords
        #all_stopwords.remove('not')
        
        sentence = [word for word in sentence if not word in set(all_stopwords)]
        sentence = ' '.join(sentence)
        corpus.append(sentence)
      
    return corpus

In [7]:
df['preprocessed_text'] = cleaning(df['text'])
df.head()
df.to_csv("/kaggle/working/preprocessed_dataset.csv", index = False)

### Sample of five examples: original text vs pre-processed text

In [8]:
def sample_cleaning(data):
    corpus = []
    for i in range(0, min(len(data), 5)):
        original_text = data[i]
        print("Original Text:")
        print(original_text)
        
        # Preprocessing steps
        sentence = re.sub('[^a-zA-Z]', ' ', str(data[i])) # Removing Punctuation Marks
        print("\nAfter Removing Punctuation Marks:")
        print(sentence)
        
        sentence = sentence.lower() # Lowering the text
        print("\nAfter Lowercasing:")
        print(sentence)
        
        # Tokenization
        sentence = sentence.split()
        print("\nAfter Tokenization:")
        print(sentence)
        
        # Remove blank space tokens
        sentence = [word for word in sentence if word.strip()]  
        print("\nAfter Removing Blank Space Tokens:")
        print(sentence)
        
        all_stopwords = stopwords.words('english') # Removing the stopwords
        sentence = [word for word in sentence if not word in set(all_stopwords)]
        sentence = ' '.join(sentence)
        
        # Print preprocessed text
        print("\nAfter Removing Stopwords:")
        print(sentence)
        print("\n")
        
        corpus.append(sentence)
      
    return corpus

cleaned_data = sample_cleaning(df['text'][:5])

Original Text:
The Amazon advertised pictures of this item is a Fender FTE3 pre amp. The description says it's a Fishman Isys III type. I googled it and found out there's two version of pre amp type of this T Bucket 300 CE series. The one I got today is a Fishman type pre amp which is for me much better that FTE3. Out of the box, I fell inlove with the color and design. Mine is a 3 tone sunburst. The action is just right for me. The tone/sound is awesome. I hooked it up in my Vox amp and I have to say this thing really rocks. Of course you can get a better sounding acoustic/electric guitar out there for a very much expensive price. I'm a family man with two kids and I am not gonna spend a thousand bucks just for a hobby. Amplified or not I love the tone of this baby. For it's appearance, sound quality, and of course THE PRICE, I'm giving it a five stars.

I uploaded some pictures (above) of the Fishman pre amp version of this T bucket series. :O)..

After Removing Punctuation Marks:
Th

# Q2. Unigram Inverted Index and Boolean Queries

## 1. Create a unigram inverted index (from scratch; No library allowed) of the dataset obtained from Q1 (after preprocessing).

In [9]:
words = df['preprocessed_text'].str.split()
all_words = [word for sublist in words for word in sublist]
print("Five Unique Words:", all_words[0:5])
unique_words = set(all_words)
num_unique_words = len(unique_words)
print("Number of unique words:", num_unique_words)

Five Unique Words: ['amazon', 'advertised', 'pictures', 'item', 'fender']
Number of unique words: 6009


In [10]:
def create_inverted_index(corpus):
    inverted_index = {}
    for doc_id, document in enumerate(corpus):
        terms = document.split()
        for term in terms:
            if term not in inverted_index:
                inverted_index[term] = set()
            inverted_index[term].add(doc_id)
    return inverted_index


inverted_index = create_inverted_index(df['preprocessed_text'])
print(len(inverted_index))
print()
print()
print("Inverted Index for the word:")
print("'amazon':", inverted_index['amazon'])

6009


Inverted Index for the word:
'amazon': {0, 769, 130, 386, 773, 7, 9, 270, 654, 144, 401, 530, 655, 534, 151, 920, 538, 157, 418, 420, 549, 295, 807, 170, 555, 300, 557, 693, 952, 954, 445, 957, 191, 64, 450, 70, 326, 327, 456, 330, 838, 466, 83, 595, 346, 476, 350, 354, 614, 616, 745, 878, 880, 881, 371, 764}


## 2. Use Python’s pickle module to save and load the unigram inverted index.

In [11]:
import pickle

with open('inverted_index.pickle', 'wb') as f:
    pickle.dump(inverted_index, f)

In [12]:
import pickle

with open('inverted_index.pickle', 'rb') as f:
    inverted_index = pickle.load(f)

print("Inverted Index for the word: 'amazon'", inverted_index['amazon'])

Inverted Index for the word: 'amazon' {0, 769, 130, 386, 773, 7, 9, 270, 654, 144, 401, 530, 655, 534, 151, 920, 538, 157, 418, 420, 549, 295, 807, 170, 555, 300, 557, 693, 952, 954, 445, 957, 191, 64, 450, 70, 326, 327, 456, 330, 838, 466, 83, 595, 346, 476, 350, 354, 614, 616, 745, 878, 880, 881, 371, 764}


## 3. Provide support for the following operations:
a. T1 AND T2


b. T1 OR T2


c. T1 AND NOT T2


d. T1 OR NOT T2

In [13]:
def create_inverted_index(corpus):
    inverted_index = {}
    doc_term_freq = {}  # Dictionary to store term frequencies per document
    for doc_id, document in enumerate(corpus):
        terms = document.split()
        doc_term_freq[doc_id] = {}
        for term in terms:
            if term not in inverted_index:
                inverted_index[term] = set()
            inverted_index[term].add(doc_id)
            doc_term_freq[doc_id][term] = doc_term_freq[doc_id].get(term, 0) + 1
    return inverted_index, doc_term_freq

def boolean_AND(inverted_index, T1, T2):
    if T1 in inverted_index and T2 in inverted_index:
        return inverted_index[T1].intersection(inverted_index[T2])
    else:
        return set()

def boolean_OR(inverted_index, T1, T2):
    if T1 in inverted_index and T2 in inverted_index:
        return inverted_index[T1].union(inverted_index[T2])
    elif T1 in inverted_index:
        return inverted_index[T1]
    elif T2 in inverted_index:
        return inverted_index[T2]
    else:
        return set()

def boolean_AND_NOT(inverted_index, doc_term_freq, T1, T2):
    result = set(inverted_index.get(T1, set()))
    if T2 in inverted_index:
        result.difference_update(inverted_index[T2])
    return result

def boolean_OR_NOT(inverted_index, doc_term_freq, T1, T2):
    result = set(inverted_index.get(T1, set()))
    if T2 in inverted_index:
        result.difference_update(inverted_index[T2])
    return result


inverted_index, doc_term_freq = create_inverted_index(df['preprocessed_text'])
result = boolean_AND(inverted_index, 'amazon', 'advertised')
#result = boolean_OR(inverted_index, 'T1', 'T2')
#result = boolean_AND_NOT(inverted_index, doc_term_freq, 'T1', 'T2')
#result = boolean_OR_NOT(inverted_index, doc_term_freq, 'T1', 'T2')
print(result)

{0, 807}


In [14]:
print(inverted_index["amazon"])

{0, 769, 130, 386, 773, 7, 9, 270, 654, 144, 401, 530, 655, 534, 151, 920, 538, 157, 418, 420, 549, 295, 807, 170, 555, 300, 557, 693, 952, 954, 445, 957, 191, 64, 450, 70, 326, 327, 456, 330, 838, 466, 83, 595, 346, 476, 350, 354, 614, 616, 745, 878, 880, 881, 371, 764}


In [15]:
def process_query(corpus, query):
    # Create inverted index and term frequency dictionary
    inverted_index = {}
    doc_term_freq = {}
    for doc_id, document in enumerate(corpus):
        terms = document.split()
        doc_term_freq[doc_id] = {}
        for term in terms:
            if term not in inverted_index:
                inverted_index[term] = set()
            inverted_index[term].add(doc_id)
            doc_term_freq[doc_id][term] = doc_term_freq[doc_id].get(term, 0) + 1
    
    # Defining boolean operations
    def boolean_AND(operands):
        result = set(range(len(corpus)))
        for operand in operands:
            result = result.intersection(operand)
        return result

    def boolean_OR(operands):
        result = set()
        for operand in operands:
            result = result.union(operand)
        return result

    def boolean_AND_NOT(operands):
        result = operands[0]
        for operand in operands[1:]:
            result = result.difference(operand)
        return result

    def boolean_OR_NOT(operands):
        result = operands[0]
        for operand in operands[1:]:
            result = result.union(result.difference(operand))
        return result

    # Processing query
    stack = []
    operators = set(['AND', 'OR', 'NOT'])
    query_terms = query.split()
    for term in query_terms:
        if term not in operators:
            stack.append(inverted_index.get(term, set()))
        elif term == 'NOT':
            if stack:
                operand = stack.pop()
                result = set(range(len(corpus))).difference(operand)
                stack.append(result)
        else:
            operands = []
            while stack and stack[-1] not in operators:
                operands.append(stack.pop())
            if operands:
                if term == 'AND':
                    stack.append(boolean_AND(operands))
                elif term == 'OR':
                    stack.append(boolean_OR(operands))
                elif term == 'AND_NOT':
                    stack.append(boolean_AND_NOT(operands))
                elif term == 'OR_NOT':
                    stack.append(boolean_OR_NOT(operands))
    
    return stack[-1]

result = process_query(df['preprocessed_text'], 'amazon AND advertised') 
print(result)

{0, 258, 740, 807, 713, 463, 306, 597, 856, 698, 251, 734}


In [16]:
print(list(df['preprocessed_text'][:1]))
print()
print(list(df['preprocessed_text'][1:2]))

['amazon advertised pictures item fender fte pre amp description says fishman isys iii type googled found two version pre amp type bucket ce series one got today fishman type pre amp much better fte box fell inlove color design mine tone sunburst action right tone sound awesome hooked vox amp say thing really rocks course get better sounding acoustic electric guitar much expensive price family man two kids gonna spend thousand bucks hobby amplified love tone baby appearance sound quality course price giving five stars uploaded pictures fishman pre amp version bucket series']

['really like simplicity bridge adjusts easy string height length comes mounting screws even small allen wrench make height adjustments provided mounting screws phillips head screws one thing missed ordered fact bridge string thru guitar body placement strings dead end stop flair plate included picture may help make others aware string thru applications biggie tho use another project']


In [17]:
def process_queries(corpus, input_data):
    # Create inverted index and term frequency dictionary
    inverted_index = {}
    doc_term_freq = {}
    for doc_id, document in enumerate(corpus):
        terms = document.split()
        doc_term_freq[doc_id] = {}
        for term in terms:
            if term not in inverted_index:
                inverted_index[term] = set()
            inverted_index[term].add(doc_id)
            doc_term_freq[doc_id][term] = doc_term_freq[doc_id].get(term, 0) + 1
    
    # Define boolean operations
    def boolean_AND(operands):
        result = set(range(len(corpus)))
        for operand in operands:
            result = result.intersection(operand)
        return result

    def boolean_OR(operands):
        result = set()
        for operand in operands:
            result = result.union(operand)
        return result

    def boolean_AND_NOT(operands):
        result = operands[0]
        for operand in operands[1:]:
            result = result.difference(operand)
        return result

    def boolean_OR_NOT(operands):
        result = operands[0]
        for operand in operands[1:]:
            result = result.union(result.difference(operand))
        return result

    # Process queries
    results = []
    num_queries = input_data["num_queries"]
    queries = input_data["queries"]
    operations = input_data["boolean_functions"]
    for i in range(num_queries):
        query = queries[i]
        operation = operations[i]
        # Process query
        stack = []
        query_terms = query.split()
        for term in query_terms:
            if term not in operations:
                stack.append(inverted_index.get(term, set()))
            elif term == 'NOT':
                if stack:
                    operand = stack.pop()
                    result = set(range(len(corpus))).difference(operand)
                    stack.append(result)
            else:
                operands = []
                while stack and stack[-1] not in operations:
                    operands.append(stack.pop())
                if operands:
                    if term == 'AND':
                        stack.append(boolean_AND(operands))
                    elif term == 'OR':
                        stack.append(boolean_OR(operands))
                    elif term == 'AND_NOT':
                        stack.append(boolean_AND_NOT(operands))
                    elif term == 'OR_NOT':
                        stack.append(boolean_OR_NOT(operands))
        results.append(stack[-1])

    return results

# Sample input
query1 = "amazon advertised pictures"
query2 = "really like simplicity"

input_data = {"num_queries": 2,
    "queries": [query1, query2],
    "boolean_functions": [['OR', 'AND NOT'], ['AND', 'OR NOT']]} 

# Process queries
results = process_queries(df['preprocessed_text'], input_data)



In [18]:
# Output results
for i, query in enumerate(input_data["queries"]):
    query_terms = query.split()
    boolean_function = input_data["boolean_functions"][i]
    
    # Ensure that the lengths match
    min_length = min(len(query_terms), len(boolean_function))
    
    # Construct the query with boolean operators applied to each term
    query_with_boolean = ' '.join([f"{query_terms[j]} {boolean_function[j]}" for j in range(min_length)])
    
    # Append the remaining terms of the query if any
    if min_length < len(query_terms):
        query_with_boolean += f" {' '.join(query_terms[min_length:])}"
    
    print(f"Query {i + 1}: {query_with_boolean}")
    print("Number of documents retrieved for query",i+1 ,": {}".format(len(results[i])))
    #print("Row indices of the documents rtrieved: {}".format(', '.join([str(doc_id) for doc_id in results[i]])))
    print("Name of the documents retrieved for query",i+1 ,":", end="")
    print(', '.join([re.search(r'/([^/]+)$', df.loc[doc_id, 'path']).group(1) for doc_id in results[i]]))
    print()
    
    print()

    


Query 1: amazon OR advertised AND NOT pictures
Number of documents retrieved for query 1 : 24
Name of the documents retrieved for query 1 :file314.txt, file24.txt, file125.txt, file634.txt, file511.txt, file982.txt, file859.txt, file22.txt, file28.txt, file108.txt, file420.txt, file226.txt, file518.txt, file422.txt, file542.txt, file640.txt, file68.txt, file118.txt, file352.txt, file918.txt, file232.txt, file289.txt, file172.txt, file208.txt


Query 2: really AND like OR NOT simplicity
Number of documents retrieved for query 2 : 3
Name of the documents retrieved for query 2 :file998.txt, file819.txt, file251.txt




# Q3. Positional Index and Phrase Queries

## 1. Create a positional index (from scratch; No library allowed) of the dataset obtained from Q1

In [19]:
# Function for creating positional index
def create_positional_index(data):
    positional_index = {}
    for index, text in enumerate(data):
        words = text.split()
        for position, word in enumerate(words):
            if word in positional_index:
                positional_index[word].append((index, position))
            else:
                positional_index[word] = [(index, position)]
    return positional_index

# Assuming data is the list of preprocessed texts obtained from Q1
positional_index = create_positional_index(df['preprocessed_text'])
count = 0

for word, positions in positional_index.items():
    count = count + 1
    print(word, ':', positions)
    if count == 5:
        break

amazon : [(0, 0), (7, 25), (9, 9), (64, 32), (64, 98), (70, 7), (83, 26), (130, 3), (144, 6), (151, 10), (157, 51), (170, 0), (170, 28), (191, 27), (191, 48), (270, 35), (295, 83), (300, 26), (300, 37), (326, 5), (326, 20), (327, 29), (330, 74), (346, 8), (350, 44), (354, 8), (371, 26), (371, 45), (386, 43), (401, 28), (401, 46), (418, 28), (418, 47), (420, 19), (445, 28), (445, 46), (445, 119), (450, 27), (450, 46), (450, 80), (456, 7), (466, 37), (476, 19), (530, 36), (534, 42), (538, 63), (549, 24), (555, 21), (557, 9), (595, 3), (614, 54), (616, 27), (616, 48), (654, 6), (655, 14), (693, 7), (745, 28), (745, 46), (764, 31), (764, 52), (769, 28), (769, 48), (773, 69), (807, 20), (838, 4), (838, 13), (878, 54), (880, 45), (880, 57), (881, 3), (881, 45), (920, 82), (952, 28), (952, 47), (954, 34), (957, 53)]
advertised : [(0, 1), (251, 99), (258, 8), (306, 8), (463, 3), (597, 1), (698, 1), (713, 0), (734, 1), (740, 19), (807, 22), (856, 2)]
pictures : [(0, 2), (0, 86), (8, 21), (43, 2

In [20]:
print(list(df['preprocessed_text'][:1]))
print()
print(list(df['preprocessed_text'][1:2]))
print()
print(list(df['preprocessed_text'][2:3]))

['amazon advertised pictures item fender fte pre amp description says fishman isys iii type googled found two version pre amp type bucket ce series one got today fishman type pre amp much better fte box fell inlove color design mine tone sunburst action right tone sound awesome hooked vox amp say thing really rocks course get better sounding acoustic electric guitar much expensive price family man two kids gonna spend thousand bucks hobby amplified love tone baby appearance sound quality course price giving five stars uploaded pictures fishman pre amp version bucket series']

['really like simplicity bridge adjusts easy string height length comes mounting screws even small allen wrench make height adjustments provided mounting screws phillips head screws one thing missed ordered fact bridge string thru guitar body placement strings dead end stop flair plate included picture may help make others aware string thru applications biggie tho use another project']

['truthfully idea even ukul

## 2. Use Python’s pickle module to save and load the positional index.

In [21]:
import pickle

with open('positional_index.pkl', 'wb') as file:
    pickle.dump(positional_index, file)

with open('positional_index.pkl', 'rb') as file:
    loaded_positional_index = pickle.load(file)

In [22]:
# Function for retrieving documents based on query using positional index
def retrieve_documents(positional_index, query):
    if isinstance(query, str):
        query_terms = query.split()
    else:
        query_terms = query
    document_candidates = {}
    
    # Initialize document_candidates with all documents
    for term in query_terms:
        if term in positional_index:
            for document_position in positional_index[term]:
                document_id = document_position[0]
                if document_id in document_candidates:
                    document_candidates[document_id].append(document_position)
                else:
                    document_candidates[document_id] = [document_position]

    # Filtering documents where all terms appear in correct order
    retrieved_documents = []
    for document_id, positions in document_candidates.items():
        positions.sort()
        match = True
        for i in range(len(positions) - 1):
            if positions[i+1][0] != positions[i][0] or positions[i+1][1] - positions[i][1] != 1:
                match = False
                break
        if match:
            retrieved_documents.append(document_id)
    
    return retrieved_documents



In [23]:
queries = [3, "amazon", "simplicity", "fender"]

# Extract the number of queries
num_queries = queries[0]

# Process queries and retrieve documents
for query_number, query_text in enumerate(queries[1:], start=1):
    print(f"Query {query_number}: {query_text}")
    retrieved_docs = retrieve_documents(loaded_positional_index, query_text)
    print(f"Number of documents retrieved for Query {query_number} using positional index: {len(retrieved_docs)}")
    print("Names of documents retrieved for Query",query_number, 'using positional index:', end=" ")
    for doc_id in retrieved_docs:
        row_index = df.index[doc_id]
        column_value = df.at[row_index, "path"]
        column_value = re.search(r'/([^/]+)$', column_value).group(1)
        print(column_value, end=", ")
    print() 

Query 1: amazon
Number of documents retrieved for Query 1 using positional index: 38
Names of documents retrieved for Query 1 using positional index: file314.txt, file65.txt, file988.txt, file343.txt, file669.txt, file965.txt, file205.txt, file241.txt, file412.txt, file656.txt, file33.txt, file363.txt, file835.txt, file305.txt, file96.txt, file908.txt, file916.txt, file866.txt, file571.txt, file896.txt, file324.txt, file706.txt, file114.txt, file637.txt, file905.txt, file311.txt, file145.txt, file583.txt, file915.txt, file514.txt, file408.txt, file176.txt, file347.txt, file593.txt, file216.txt, file684.txt, file813.txt, file133.txt, 
Query 2: simplicity
Number of documents retrieved for Query 2 using positional index: 3
Names of documents retrieved for Query 2 using positional index: file998.txt, file819.txt, file251.txt, 
Query 3: fender
Number of documents retrieved for Query 3 using positional index: 45
Names of documents retrieved for Query 3 using positional index: file314.txt, fi

In [24]:
# Support for Phrase Queries
import re
def retrieve_documents(positional_index, query):
    if isinstance(query, str):
        query_terms = query.split()
    else:
        query_terms = query
    document_candidates = {}
    
    # Initialize document_candidates with all documents
    for term in query_terms:
        if term in positional_index:
            for document_position in positional_index[term]:
                document_id = document_position[0]
                if document_id in document_candidates:
                    document_candidates[document_id].append(document_position)
                else:
                    document_candidates[document_id] = [document_position]

    # Filter documents where at least one term appears and all terms appear
    retrieved_documents = []
    for document_id, positions in document_candidates.items():
        if any(doc_pos[0] == document_id for doc_pos in positions):
            retrieved_documents.append(document_id)
        elif all(term in [pos[0] for pos in positions] for term in query_terms):
            retrieved_documents.append(document_id)
    
    return list(set(retrieved_documents))
queries = [
    "simplicity", 
    "fender amazon"]

# Process queries and retrieve documents
for i, query in enumerate(queries, 1):
    print(i, query)
    retrieved_docs = retrieve_documents(loaded_positional_index, query)
    print(f"Number of documents retrieved for query {i} using positional index: {len(retrieved_docs)}")
    print(f"Number of documents retrieved for query {i} using positional index:")
    for doc_id in retrieved_docs:
        row_index = df.index[doc_id]
        column_value = df.at[row_index, "path"]
        column_value = re.search(r'/([^/]+)$', column_value).group(1)
        print(column_value, end=", ")
    print() 

1 simplicity
Number of documents retrieved for query 1 using positional index: 3
Number of documents retrieved for query 1 using positional index:
file998.txt, file819.txt, file251.txt, 
2 fender amazon
Number of documents retrieved for query 2 using positional index: 113
Number of documents retrieved for query 2 using positional index:
file314.txt, file65.txt, file988.txt, file706.txt, file11.txt, file114.txt, file637.txt, file558.txt, file905.txt, file439.txt, file311.txt, file145.txt, file652.txt, file346.txt, file742.txt, file343.txt, file669.txt, file46.txt, file174.txt, file583.txt, file302.txt, file604.txt, file995.txt, file313.txt, file741.txt, file907.txt, file915.txt, file102.txt, file278.txt, file627.txt, file965.txt, file432.txt, file514.txt, file408.txt, file245.txt, file205.txt, file197.txt, file241.txt, file73.txt, file412.txt, file256.txt, file460.txt, file883.txt, file176.txt, file183.txt, file150.txt, file913.txt, file623.txt, file301.txt, file440.txt, file617.txt, fi