In [None]:
# original_text = "Zoning Administrator to be notified of Violations: Whenever a violation of this ordinance is known or suspected to exist or expected to be committed, any person may so notify the zoning administrator. All officers and agencies of the City of Rochester shall notify the zoning administrator of any information which suggests that a violation exists or is expected to be committed."
original_text = "Subdivision 1. It is the purpose of this chapter to provide for the regulation of uses, buildings, structures or lots which lawfully existed prior to the effective date of this ordinance but which fail to comply with one or more of the applicable regulations or standards established by this ordinance or subsequent amendment of this ordinance, or which have been rendered nonconforming due to circumstances which were not self-created. It is the intent of these regulations to specify those circumstances and conditions under which such nonconformities shall be permitted to continue. Buildings or structures which are now in existence and which were constructed in compliance with the terms of the regulations of some other public entity but became nonconforming upon the annexation to the City, and which are not in compliance with the terms of this code are hereby designated as legal nonconforming buildings or structures. Subd. 2. A municipality may, by ordinance, permit an expansion or impose upon nonconformities reasonable regulations to prevent  and abate nuisances and to protect the public health, welfare, or safety. Subd. 3. The basic policy of this chapter is to allow the continuation of any nonconformity and the normal repair, replacement, restoration, maintenance, or improvement thereof, and to encourage their move toward conformity when the opportunity arises through discontinuance or destruction. In certain cases nonconformities may be permitted to be upgraded when it can be shown that such action will not be harmful and will be beneficial to the surrounding properties, the neighborhood, or the community; and that the goals of local plans will not be impeded by the continuation of the nonconformity."

In [None]:
#################################################
###########    Extraction w/ NLTK   #############
#################################################

import nltk
from nltk.corpus import stopwords
from nltk.cluster.util import cosine_distance
import numpy as np
import networkx as nx

def read_article(file_name):
    file = open(file_name, "r")
    filedata = file.readline()
    file.close()
    # print(filedata)
    article = filedata.split(". ")
    # print(article)
    sentences = []

    # print("Sentence")
    # for sentence in filedata:
    #     print(sentence)
    #     sentences.append(sentence.split(" "))
    #     # sentences.append(sentence.replace("[^a-zA-Z]", " ").split(" "))
    # sentences.pop() 
    
    return article

def sentence_similarity(sent1, sent2, stopwords=None):
    if stopwords is None:
        stopwords = []
 
    sent1 = [w.lower() for w in sent1]
    sent2 = [w.lower() for w in sent2]
 
    all_words = list(set(sent1 + sent2))
 
    vector1 = [0] * len(all_words)
    vector2 = [0] * len(all_words)
 
    # build the vector for the first sentence
    for w in sent1:
        if w in stopwords:
            continue
        vector1[all_words.index(w)] += 1
 
    # build the vector for the second sentence
    for w in sent2:
        if w in stopwords:
            continue
        vector2[all_words.index(w)] += 1
 
    return 1 - cosine_distance(vector1, vector2)
 
def build_similarity_matrix(sentences, stop_words):
    # Create an empty similarity matrix
    similarity_matrix = np.zeros((len(sentences), len(sentences)))
 
    for idx1 in range(len(sentences)):
        for idx2 in range(len(sentences)):
            if idx1 == idx2: #ignore if both are same sentences
                continue 
            similarity_matrix[idx1][idx2] = sentence_similarity(sentences[idx1], sentences[idx2], stop_words)

    return similarity_matrix


# Basically just finds sentences that are the most similar & spits them back out as the "summary"
def generate_summary(file_name, top_n=5):
    nltk.download("stopwords")
    stop_words = stopwords.words('english')
    summarize_text = []

    # Read text and split it
    sentences =  read_article(file_name)

    # Generate Similary Martix across sentences
    sentence_similarity_martix = build_similarity_matrix(sentences, stop_words)

    # Rank sentences in similarity martix
    sentence_similarity_graph = nx.from_numpy_array(sentence_similarity_martix)
    scores = nx.pagerank(sentence_similarity_graph)

    # Sort the rank and pick top sentences
    ranked_sentence = sorted(((scores[i],s) for i,s in enumerate(sentences)), reverse=True)    
    # print("Indexes of top ranked_sentence order are ", ranked_sentence)    

    for i in range(top_n if top_n <= len(ranked_sentence) else len(ranked_sentence)):
    #   summarize_text.append(" ".join(ranked_sentence[i][1]))
      summarize_text.append(ranked_sentence[i][1])


    # Step 5 - Offcourse, output the summarize texr
    print("Summarize Text: \n", ". ".join(summarize_text))

generate_summary( "non-conformatitie_legislative_intent.txt", 2)
# generate_summary( "zoning-enforcement.txt", 5)
# generate_summary( "zoning-use-type.txt", 2)

In [None]:
#################################################
###########    Extraction w/ LSA   ##############
#################################################

# Import the summarizer
from sumy.summarizers.lsa import LsaSummarizer

# Parsing the text string using PlaintextParser
from sumy.nlp.tokenizers import Tokenizer
from sumy.parsers.plaintext import PlaintextParser
parser=PlaintextParser.from_string(original_text,Tokenizer('english'))

# creating the summarizer
lsa_summarizer=LsaSummarizer()
lsa_summary= lsa_summarizer(parser.document,3)

# Printing the summary
for sentence in lsa_summary:
    print(sentence)

In [None]:
#################################################
##########   Abstraction w/ BART    #############
#################################################

# Importing the model
from transformers import BartForConditionalGeneration, BartTokenizer, BartConfig

# Loading the model and tokenizer for bart-large-cnn

tokenizer=BartTokenizer.from_pretrained('facebook/bart-large-cnn')
model=BartForConditionalGeneration.from_pretrained('facebook/bart-large-cnn')

# Encoding the inputs and passing them to model.generate()
inputs = tokenizer.batch_encode_plus([original_text],return_tensors='pt')
summary_ids = model.generate(inputs['input_ids'], early_stopping=True)

# Decoding and printing the summary
bart_summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
print(bart_summary)