# Task 1:Implementing custom TfidfVectorizer

In [1]:
corpus1 = [
     'this is the first document',
     'this document is the second document',
     'and this is the third one',
     'is this the first document',
]

In [2]:
import warnings
warnings.filterwarnings("ignore")
import pandas as pd
from tqdm import tqdm
import os

In [3]:
from collections import Counter
from tqdm import tqdm
from scipy.sparse import csr_matrix
import math
import operator
from sklearn.preprocessing import normalize
import numpy

In [4]:
from tqdm import tqdm # tqdm is a library that helps us to visualize the runtime of for loop. refer this to know more about tqdm
#https://tqdm.github.io/

# it accepts only list of sentances
def fit(corpus1):    
    unique_words = set() # at first we will initialize an empty set
    # check if its list type or not
    if isinstance(corpus1, (list,)):
        for row in corpus1: # for each review in the dataset
            for word in row.split(" "): # for each word in the review. #split method converts a string into list of words
                if len(word) < 2:
                    continue
                unique_words.add(word)
        unique_words = sorted(list(unique_words))
        vocab = {j:i for i,j in enumerate(unique_words)}
        return vocab
    else:
        print("you need to pass list of sentance")

In [5]:
# https://stackoverflow.com/questions/9919604/efficiently-calculate-word-frequency-in-a-string
# https://docs.scipy.org/doc/scipy-0.19.0/reference/generated/scipy.sparse.csr_matrix.html
# note that we are we need to send the preprocessing text here, we have not inlcuded the processing

def transform(corpus1,vocab):
    rows = []
    columns = []
    values = []
    if isinstance(corpus1, (list,)):
        for idx, row in enumerate(tqdm(corpus1)): # for each document in the dataset
            # it will return a dict type object where key is the word and values is its frequency, {word:frequency}
            word_freq = dict(Counter(row.split()))
            # for every unique word in the document
            for word, freq in word_freq.items():  # for each unique word in the review.                
                if len(word) < 2:
                    continue
                # we will check if its there in the vocabulary that we build in fit() function
                # dict.get() function will return the values, if the key doesn't exits it will return -1
                col_index = vocab.get(word, -1) # retreving the dimension number of a word
                # if the word exists
                if col_index !=-1:
                    # we are storing the index of the document
                    rows.append(idx)
                    # we are storing the dimensions of the word
                    columns.append(col_index)
                    # we are storing the frequency of the word
                    values.append(freq)
        return csr_matrix((values, (rows,columns)), shape=(len(corpus1),len(vocab)))
    else:
        print("you need to pass list of strings")

In [6]:
vocab = fit(corpus1)
words_corpus = list(vocab.keys())
print(words_corpus)
trans_matrix = transform(corpus1, vocab).toarray() # working of this transform() function is same as used in CountVectorizer
print(trans_matrix)
spar_matrix1 = transform(corpus1, vocab)
print(type(spar_matrix1))

['and', 'document', 'first', 'is', 'one', 'second', 'the', 'third', 'this']


100%|████████████████████████████████████████████████████████████████████████████████████████████| 4/4 [00:00<?, ?it/s]


[[0 1 1 1 0 0 1 0 1]
 [0 2 0 1 0 1 1 0 1]
 [1 0 0 1 1 0 1 1 1]
 [0 1 1 1 0 0 1 0 1]]


100%|██████████████████████████████████████████████████████████████████████████████████| 4/4 [00:00<00:00, 3097.71it/s]


<class 'scipy.sparse.csr.csr_matrix'>


In [7]:
# applying CountVectorizer on same data by using predefined fit() and transform() functions
from sklearn.feature_extraction.text import CountVectorizer
c_corpus = corpus1
vec = CountVectorizer(analyzer='word')

vec.fit(c_corpus)
feature_matrix = vec.transform(c_corpus)
print(feature_matrix.toarray())
print(vec.get_feature_names())

[[0 1 1 1 0 0 1 0 1]
 [0 2 0 1 0 1 1 0 1]
 [1 0 0 1 1 0 1 1 1]
 [0 1 1 1 0 0 1 0 1]]
['and', 'document', 'first', 'is', 'one', 'second', 'the', 'third', 'this']


In [8]:
# Comparing results with countvectorizer
import numpy as np
print(type(feature_matrix))
print('='*50)
print(type(spar_matrix1))
print('='*50)
print(feature_matrix[0])
print('='*50)
print(spar_matrix1[0])
# how to compare two sparse matrices :https://stackoverflow.com/questions/30685024/check-if-two-scipy-sparse-csr-matrix-are-equal
print('='*50)
if np.allclose(spar_matrix1.A, feature_matrix.A):
    print("implementation of CountVectorizer is correct")
else:
    print("wrong implementation")

<class 'scipy.sparse.csr.csr_matrix'>
<class 'scipy.sparse.csr.csr_matrix'>
  (0, 1)	1
  (0, 2)	1
  (0, 3)	1
  (0, 6)	1
  (0, 8)	1
  (0, 1)	1
  (0, 2)	1
  (0, 3)	1
  (0, 6)	1
  (0, 8)	1
implementation of CountVectorizer is correct


In [10]:
def length(p):
    d = corpus1[p].split()
    return d

# Calculating  tf values 
# doing implementaion for getting tf values
# i.e TF(t)=Number of times term t appears in a document/Total number of terms in the document
def fit2(trans_matrix):
    
    tf_features = []
    
    for index,row in enumerate(trans_matrix): # trans_matrix = (n_corpus, vocab).toarray() :see above for results of trans_matrix
        lst1 = []
        for j in range(len(row)):
            if row[j] != 0:
                d = length(index)
                k = (row[j]/len(d))
                lst1.append(k)
            else:
                lst1.append(0)
        tf_features.append(lst1)
    return tf_features
tf_features = fit2(trans_matrix)
print(tf_features)
# converting nd array into sparse matrix
tf_features = csr_matrix(tf_features)
print(tf_features[0])

[[0, 0.2, 0.2, 0.2, 0, 0, 0.2, 0, 0.2], [0, 0.3333333333333333, 0, 0.16666666666666666, 0, 0.16666666666666666, 0.16666666666666666, 0, 0.16666666666666666], [0.16666666666666666, 0, 0, 0.16666666666666666, 0.16666666666666666, 0, 0.16666666666666666, 0.16666666666666666, 0.16666666666666666], [0, 0.2, 0.2, 0.2, 0, 0, 0.2, 0, 0.2]]
  (0, 1)	0.2
  (0, 2)	0.2
  (0, 3)	0.2
  (0, 6)	0.2
  (0, 8)	0.2


In [11]:
# print unique words from a string : https://stackoverflow.com/questions/40389820/how-to-print-unique-words-from-an-inputted-string
# printing unique words from each string in corpus1 to calculate no of words appearing in documents in next cell
unique = []
for i in corpus1:
    unique_words = list(set(i.split(' ')))
    unique.append(unique_words)
print(unique)

[['this', 'document', 'the', 'is', 'first'], ['this', 'second', 'document', 'the', 'is'], ['third', 'this', 'one', 'the', 'is', 'and'], ['this', 'document', 'the', 'is', 'first']]


In [12]:
# Calculating in how many documents a word is existing 

lst = []
for i in corpus1:
    l = []
    l = i.split()
    lst.append(l)
lst2 = []
d = {}
for word in words_corpus:
    c = 0
    for i in unique:
        for j in range(len(i)):
            if word == i[j]:
                c += 1
    d[word] = c
    lst2.append(c)
print(d)
print(lst2)
# so got dictionry(d) which shows in how many documents a word(key) is existing and it is stored as its (value) 
# i also got list(lst2) which shows only number of counts 

{'and': 1, 'document': 3, 'first': 2, 'is': 4, 'one': 1, 'second': 1, 'the': 4, 'third': 1, 'this': 4}
[1, 3, 2, 4, 1, 1, 4, 1, 4]


In [13]:
#Calculating idf values
import math
lst3 = []
for key,value in d.items():
    k1 = (1 + len(corpus1))
    k2 = (1 + value)
    k3 = (1 + math.log(k1/k2))
    lst3.append(k3)
#lst3 = np.asarray(lst3) 

# making dictionary from two different lists :https://stackoverflow.com/questions/209840/convert-two-lists-into-a-dictionary
keys = words_corpus
values = lst3
dict2 = dict(zip(keys,values))
print(dict2) # list containing idf values with their corresponding words :key as word and value as its idf value

{'and': 1.916290731874155, 'document': 1.2231435513142097, 'first': 1.5108256237659907, 'is': 1.0, 'one': 1.916290731874155, 'second': 1.916290731874155, 'the': 1.0, 'third': 1.916290731874155, 'this': 1.0}


In [14]:
from sklearn.feature_extraction.text import TfidfVectorizer
vectorizer = TfidfVectorizer()
vectorizer.fit(corpus1)
skl_output = vectorizer.transform(corpus1)
vectorizer.idf_

array([1.91629073, 1.22314355, 1.51082562, 1.        , 1.91629073,
       1.91629073, 1.        , 1.91629073, 1.        ])

In [15]:
# Comparing our implentation of idf values with actual use of TfidfVectorizer
l = vectorizer.idf_
l1 = l
lst1 = list(dict2.values())
lst1 = numpy.asarray(lst1)
print(type(l1))
print(type(lst1))
print('='*50)
print(len(l1))
print(len(lst1))
print('='*50)
print(l)
print(lst1)
# so both lists are containing same idf values
# hence implementation of idf values is correct 

<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
9
9
[1.91629073 1.22314355 1.51082562 1.         1.91629073 1.91629073
 1.         1.91629073 1.        ]
[1.91629073 1.22314355 1.51082562 1.         1.91629073 1.91629073
 1.         1.91629073 1.        ]


In [16]:
tf_features = []
f_column1 = []

#for i in range(len(tf_lst)):
for q in trans_matrix: # trans_matrix = (n_corpus, vocab).toarray() :see above for results of trans_matrix
    lst3 = []
    row1 = []
    column1 = []
    for j in range(len(q)):
        if q[j] != 0:
            k = (q[j]/len(q))
            lst3.append(k)
            column1.append(j)
        else:
            lst3.append(0)
    tf_features.append(lst3)
    f_column1.append(column1) # column numbers which containing non-zero values in trans_matrix or tf_features
print(f_column1)
# finding words corresponding to each row of tf_features which contains non-zero values by using f_column1
lst4 = []
for i in f_column1:
    l = []
    for j in range(len(i)):
        k = words_corpus[i[j]]
        l.append(k)
    lst4.append(l)
print(lst4[0])  

[[1, 2, 3, 6, 8], [1, 3, 5, 6, 8], [0, 3, 4, 6, 7, 8], [1, 2, 3, 6, 8]]
['document', 'first', 'is', 'the', 'this']


In [17]:
tf_features = fit2(trans_matrix)

In [18]:
# Function to calculate dictinary for each column values and words appearing in tf_features
def fun(j):
    keys = lst4[j]
    values = f_column1[j]
    dictionary = dict(zip(keys,values))
    return dictionary

In [19]:
# making lists of list of lentgh (746 * 2885) same as of length of tf_features
# store 0 in this matrix for each row and column
# https://stackoverflow.com/questions/6667201/how-to-define-a-two-dimensional-array-in-python/38159679
w,h = 9,4
tf_idf = [[0 for x in range(w)] for y in range(h)]

print(len(tf_idf))
print(len(tf_idf[0]))

4
9


In [20]:
# Function calculating tf*idf values for each row of tf_features(tf values) and their corresponding idf values(from dict2)

def transform2(tf_features):
    for index,row in enumerate(tf_features):
        d1 = fun(index)
        for key,value in d1.items():
            tf_idf[index][value] = tf_features[index][value] * dict2[key]
    return tf_idf

tf_idf = transform2(tf_features)

# Converting obtained matrix into sparse matrix
tf_idf = csr_matrix(tf_idf)

# normalizing sparse matrix(tf_idf)
tf_idf = normalize(tf_idf,norm = 'l2')
print('='*50)
print(tf_idf[0])
#print(len(trans_matrix[0]))
#print(trans_matrix[0][53])

  (0, 1)	0.4697913855799205
  (0, 2)	0.580285823684436
  (0, 3)	0.3840852409148149
  (0, 6)	0.3840852409148149
  (0, 8)	0.3840852409148149


In [21]:
from sklearn.feature_extraction.text import TfidfVectorizer
vectorizer = TfidfVectorizer()
vectorizer.fit(corpus1)
skl_output = vectorizer.transform(corpus1)

In [22]:
# Comparing skl_output and tf_idf
print(type(skl_output))
print(type(tf_idf))
print("0th row")
print(skl_output[0])
print('='*50)
print(tf_idf[0])
print("1st row ")
print(skl_output[1])
print('='*50)
print(tf_idf[1])

<class 'scipy.sparse.csr.csr_matrix'>
<class 'scipy.sparse.csr.csr_matrix'>
0th row
  (0, 8)	0.38408524091481483
  (0, 6)	0.38408524091481483
  (0, 3)	0.38408524091481483
  (0, 2)	0.5802858236844359
  (0, 1)	0.46979138557992045
  (0, 1)	0.4697913855799205
  (0, 2)	0.580285823684436
  (0, 3)	0.3840852409148149
  (0, 6)	0.3840852409148149
  (0, 8)	0.3840852409148149
1st row 
  (0, 8)	0.281088674033753
  (0, 6)	0.281088674033753
  (0, 5)	0.5386476208856763
  (0, 3)	0.281088674033753
  (0, 1)	0.6876235979836938
  (0, 1)	0.6876235979836937
  (0, 3)	0.2810886740337529
  (0, 5)	0.5386476208856762
  (0, 6)	0.2810886740337529
  (0, 8)	0.2810886740337529


In [23]:
# Hence our custom implementation of TfidfVectorizer is correct 
# Task 1 has completed

# Task 2

In [24]:
import pickle
with open('cleaned_strings', 'rb') as f:
    corpus = pickle.load(f)
    
# printing the length of the corpus loaded
print("Number of documents in corpus = ",len(corpus))
print(corpus[1])
print('='*50)
print(corpus[50])
print('='*50)
print(corpus[100])
print('='*50)
print(corpus[210])
print('='*50)
print(corpus[325])
print('='*50)
print(corpus[450])
print('='*50)
print(corpus[625])

Number of documents in corpus =  746
not sure lost flat characters audience nearly half walked
directing cinematography not quite good
awful
vivian schilling excellent job script
full unconvincing cardboard characters blandly written edward chodorov also produced surprisingly directed jean negulesco one would expect great deal
believe every one see movie think people outside south africa understand past attempted truth reconciliation process
pretty surprising wonderful film made hollywood generally collective heads sand concerning black white issues time


In [25]:
# corpus data does contain any special character,short forms for will not,should not etc.It only coantains small words like "oh",
# which we need to remove
stopwords= ['i', 'me', 'my', 'myself', 'we', 'our', 'ours', 'ourselves', 'you', "you're", "you've",\
            "you'll", "you'd", 'your', 'yours', 'yourself', 'yourselves', 'he', 'him', 'his', 'himself', \
            'she', "she's", 'her', 'hers', 'herself', 'it', "it's", 'its', 'itself', 'they', 'them', 'their',\
            'theirs', 'themselves', 'what', 'which', 'who', 'whom', 'this', 'that', "that'll", 'these', 'those', \
            'am', 'is', 'are', 'was', 'were', 'be', 'been', 'being', 'have', 'has', 'had', 'having', 'do', 'does', \
            'did', 'doing', 'a', 'an', 'the', 'and', 'but', 'if', 'or', 'because', 'as', 'until', 'while', 'of', \
            'at', 'by', 'for', 'with', 'about', 'against', 'between', 'into', 'through', 'during', 'before', 'after',\
            'above', 'below', 'to', 'from', 'up', 'down', 'in', 'out', 'on', 'off', 'over', 'under', 'again', 'further',\
            'then', 'once', 'here', 'there', 'when', 'where', 'why', 'how', 'all', 'any', 'both', 'each', 'few', 'more',\
            'most', 'other', 'some', 'such', 'only', 'own', 'same', 'so', 'than', 'too', 'very', \
            's', 't', 'can', 'will', 'just', 'don', "don't", 'should', "should've", 'now', 'd', 'll', 'm', 'o', 're', \
            've', 'y', 'ain', 'aren', "aren't", 'couldn', "couldn't", 'didn', "didn't", 'doesn', "doesn't", 'hadn',\
            "hadn't", 'hasn', "hasn't", 'haven', "haven't", 'isn', "isn't", 'ma', 'mightn', "mightn't", 'mustn',\
            "mustn't", 'needn', "needn't", 'shan', "shan't", 'shouldn', "shouldn't", 'wasn', "wasn't", 'weren', "weren't", \
            'won', "won't", 'wouldn', "wouldn't","oh"]

In [26]:
from tqdm import tqdm
corpus2 = []
for i in (tqdm(corpus)):
    # https://gist.github.com/sebleier/554280
    i = ' '.join(e for e in i.split() if e not in stopwords)
    corpus2.append(i.lower().strip())

100%|█████████████████████████████████████████████████████████████████████████████| 746/746 [00:00<00:00, 31615.78it/s]


In [27]:
# corpus after preprocessing
print(corpus2[1])
print('='*50)
print(corpus2[50])
print('='*50)
print(corpus2[100])
print('='*50)
print(corpus2[210])
print('='*50)
print(corpus2[325])
print('='*50)
print(corpus2[450])
print('='*50)
print(corpus2[625])

not sure lost flat characters audience nearly half walked
directing cinematography not quite good
awful
vivian schilling excellent job script
full unconvincing cardboard characters blandly written edward chodorov also produced surprisingly directed jean negulesco one would expect great deal
believe every one see movie think people outside south africa understand past attempted truth reconciliation process
pretty surprising wonderful film made hollywood generally collective heads sand concerning black white issues time


In [28]:
vocab  = fit(corpus2)
words_corpus = list(vocab.keys())
print(words_corpus)
trans_matrix = transform(corpus2, vocab).toarray() # working of this transform() function is same as used in CountVectorizer
print(trans_matrix)
spar_matrix1 = transform(corpus2, vocab)
print(type(spar_matrix1))






100%|█████████████████████████████████████████████████████████████████████████████| 746/746 [00:00<00:00, 46950.93it/s]


[[0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 ...
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]]


100%|█████████████████████████████████████████████████████████████████████████████| 746/746 [00:00<00:00, 48399.06it/s]


<class 'scipy.sparse.csr.csr_matrix'>


In [29]:
def length(p):
    d = corpus2[p].split()
    return d

def find_dict(keys,values):
    dictionary = dict(zip(keys,values))
    return dictionary

def length(p):
    d = corpus2[p].split()
    return d

# Function to calculate top 50 features sorted based on their idf valuesh  
def fit3(trans_matrix):
    
    tf_features = []
    
    for index,row in enumerate(trans_matrix): # trans_matrix = (n_corpus, vocab).toarray() :see above for results of trans_matrix
        lst1 = []
        for j in range(len(row)):
            if row[j] != 0:
                d = length(index)
                k = (row[j]/len(d))
                lst1.append(k)
            else:
                lst1.append(0)
        tf_features.append(lst1)

    tf_features1 = tf_features
    # converting nd array into sparse matrix
    tf_features2 = csr_matrix(tf_features1)
    unique = []
    for i in corpus2:
        unique_words = list(set(i.split(' ')))
        unique.append(unique_words)
    lst = []
    for i in corpus2:
        l = []
        l = i.split()
        lst.append(l)
    lst2 = []
    d2 = {}
    for word in words_corpus:
        c = 0
        for i in unique:
            for j in range(len(i)):
                if word == i[j]:
                    c += 1
        d2[word] = c
        lst2.append(c)

    import math
    # Caculating idf values
    lst3 = []
    for key,value in d2.items():
        k1 = (1 + len(corpus2))
        k2 = (1 + value)
        k3 = (1 + math.log(k1/k2))
        lst3.append(k3)

    # making dictionary from two different lists :https://stackoverflow.com/questions/209840/convert-two-lists-into-a-dictionary
    keys = words_corpus
    values = lst3
    dict2 = dict(zip(keys,values))
    # dictionary containing top 50 words sorted based on their idf values
    dict_with_50_features = dict(sorted(dict2.items(), key=lambda item: item[1],reverse = True)[:50])
    
    new_word_corpus = []
    for key,value in dict_with_50_features.items():
        new_word_corpus.append(key)
    
    # Now again we are new_trans_matrix which will be for those 50 features
    new_trans_matrix = []
    for q in trans_matrix:
        d1 = {}
        d = find_dict(words_corpus,q)
        l1 = []

        for word in new_word_corpus:
            for key,value in d.items():
                if word == key:
                    l1.append(value)
        new_trans_matrix.append(l1)
    
    # Calculating tf values for new data
    # Calculating new_tf_features which will contain 50 columns and which will be used to compute tf * idf values for 50 features
    new_tf_features = []
    
    for index,row in enumerate(new_trans_matrix): # trans_matrix = (n_corpus, vocab).toarray() :see above for results of trans_matrix
        lst1 = []
        for j in range(len(row)):
            if row[j] != 0:
                d = length(index)
                k = (row[j]/len(d))
                lst1.append(k)
            else:
                lst1.append(0)
        new_tf_features.append(lst1)
        
    return  dict_with_50_features,new_tf_features,new_trans_matrix,new_word_corpus
dict_with_new_features,new_tf_features,new_trans_matrix,new_word_corpus = fit3(trans_matrix)
print(dict_with_new_features)

{'aailiyah': 6.922918004572872, 'abandoned': 6.922918004572872, 'abroad': 6.922918004572872, 'abstruse': 6.922918004572872, 'academy': 6.922918004572872, 'accents': 6.922918004572872, 'accessible': 6.922918004572872, 'acclaimed': 6.922918004572872, 'accolades': 6.922918004572872, 'accurate': 6.922918004572872, 'accurately': 6.922918004572872, 'achille': 6.922918004572872, 'ackerman': 6.922918004572872, 'actions': 6.922918004572872, 'adams': 6.922918004572872, 'add': 6.922918004572872, 'added': 6.922918004572872, 'admins': 6.922918004572872, 'admiration': 6.922918004572872, 'admitted': 6.922918004572872, 'adrift': 6.922918004572872, 'adventure': 6.922918004572872, 'aesthetically': 6.922918004572872, 'affected': 6.922918004572872, 'affleck': 6.922918004572872, 'afternoon': 6.922918004572872, 'aged': 6.922918004572872, 'ages': 6.922918004572872, 'agree': 6.922918004572872, 'agreed': 6.922918004572872, 'aimless': 6.922918004572872, 'aired': 6.922918004572872, 'akasha': 6.922918004572872, '

In [30]:
f_column1 = []

#for i in range(len(tf_lst)):
for q in new_trans_matrix: 
    lst3 = []
    column1 = []
    for j in range(len(q)):
        if q[j] != 0:
            column1.append(j)
    f_column1.append(column1) # column numbers which containing non-zero values in new_trans_matrix 

# finding words corresponding to each row of new_tf_features which contains non-zero values by using f_column1
lst4 = []
for i in f_column1:
    l = []
    for j in range(len(i)):
        k = new_word_corpus[i[j]]
        l.append(k)
    lst4.append(l)

In [31]:
# Function to calculate dictinary for each column values and words appearing in tf_features
def fun(j):
    keys = lst4[j]
    values = f_column1[j]
    dictionary = dict(zip(keys,values))
    return dictionary

In [44]:
# making lists of list of lentgh (746 * 2885) same as of length of tf_features
# store 0 in this matrix for each row and column
# https://stackoverflow.com/questions/6667201/how-to-define-a-two-dimensional-array-in-python/38159679
w,h = 50,746
tf_idf = [[0 for x in range(w)] for y in range(h)]

# Function calculating tf*idf values for each row of tf_features(tf values) and their corresponding idf values(from dict2)
def transform3(new_tf_features):
    for index,row in enumerate(new_tf_features):
        d1 = fun(index)
        for key,value in d1.items():
            tf_idf[index][value] = new_tf_features[index][value] * dict_with_new_features[key]
    return tf_idf

tf_idf = transform3(new_tf_features)
# Converting obtained matrix into sparse matrix
tf_idf = csr_matrix(tf_idf)

# normalizing sparse matrix(tf_idf)
tf_idf = normalize(tf_idf,norm = 'l2')
print('='*50)
print(tf_idf)

  (0, 30)	1.0
  (68, 24)	1.0
  (72, 29)	1.0
  (74, 31)	1.0
  (119, 33)	1.0
  (135, 3)	0.37796447300922725
  (135, 10)	0.37796447300922725
  (135, 18)	0.37796447300922725
  (135, 20)	0.37796447300922725
  (135, 36)	0.37796447300922725
  (135, 40)	0.37796447300922725
  (135, 41)	0.37796447300922725
  (176, 49)	1.0
  (181, 13)	1.0
  (192, 21)	1.0
  (193, 23)	1.0
  (216, 2)	1.0
  (222, 47)	1.0
  (225, 19)	1.0
  (227, 17)	1.0
  (241, 44)	1.0
  (270, 1)	1.0
  (290, 25)	1.0
  (333, 26)	1.0
  (334, 15)	1.0
  (341, 43)	1.0
  (344, 42)	1.0
  (348, 8)	1.0
  (377, 37)	1.0
  (409, 5)	1.0
  (430, 39)	1.0
  (457, 45)	1.0
  (461, 4)	1.0
  (465, 38)	1.0
  (475, 35)	1.0
  (493, 6)	1.0
  (500, 48)	1.0
  (548, 0)	0.7071067811865475
  (548, 32)	0.7071067811865475
  (608, 14)	1.0
  (612, 11)	1.0
  (620, 46)	1.0
  (632, 7)	1.0
  (644, 12)	0.7071067811865475
  (644, 27)	0.7071067811865475
  (664, 28)	1.0
  (667, 22)	1.0
  (691, 34)	1.0
  (697, 9)	1.0
  (722, 16)	1.0
