In [None]:
import pandas as pd

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
extracted_emails = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/data/result_df_bm25_day.csv')

In [None]:
extracted_emails

Unnamed: 0.1,Unnamed: 0,Document_No,Score,Text
0,689,689,5.102736,"Terry, please plan on attending. Thanks. Lynn ..."
1,688,688,5.017902,FYI. Thanks. Lynn Attendees Lynn Blair Steve J...
2,7151,7151,4.369324,"Mitch, This afternoon we have forwarded to you..."
3,9232,9232,3.393102,Specifications REMINDER COLUMBIA GULF TRANSMIS...
4,7690,7690,3.262622,I'm forwarding the attached as an update. Am p...
...,...,...,...,...
101,6566,6566,0.000000,eSource presents SDC Platinum training SDC Pla...
102,6628,6628,0.000000,"Enron Law Conference San Antonio, Texas May 24..."
103,6671,6671,0.000000,David Marshall (ECT risk management) insurance...
104,6857,6857,0.000000,COLUMBIA GAS TRANSMISSION CORPORATION NOTICE T...


In [None]:
from bs4 import BeautifulSoup

def clean_email_df(df):
    '''
    These remove symbols and character patterns that don't aid in producing a good summary.
    '''
    #Removing strings related to attatchments and certain non numerical characters.
    patterns = ["\[IMAGE\]","-", "_", "\*", "+","\".\"","=3D","?","\|"]
    for pattern in patterns:
        df['Text'] = pd.Series(df['Text']).str.replace(pattern, "")

    #Remove multiple spaces.
    df['Text'] = df['Text'].replace('\s+', ' ', regex=True)
    df['Text'] = df['Text'].apply(lambda x: BeautifulSoup(x, 'html.parser').text)
    #Blanks are replaced with NaN in the whole dataframe. Then rows with a 'NaN' in the body will be dropped.
    df = df.replace('',np.NaN)
    df = df.dropna(subset=['Text'])

    #Remove all Duplicate emails
    #df = df.drop_duplicates(subset='body')
    return df

In [None]:
import numpy as np
extracted_emails = clean_email_df(extracted_emails)
extracted_emails

Unnamed: 0.1,Unnamed: 0,Document_No,Score,Text
0,689,689,5.102736,"Terry, please plan on attending. Thanks. Lynn ..."
1,688,688,5.017902,FYI. Thanks. Lynn Attendees Lynn Blair Steve J...
2,7151,7151,4.369324,"Mitch, This afternoon we have forwarded to you..."
3,9232,9232,3.393102,Specifications REMINDER COLUMBIA GULF TRANSMIS...
4,7690,7690,3.262622,I'm forwarding the attached as an update. Am p...
...,...,...,...,...
101,6566,6566,0.000000,eSource presents SDC Platinum training SDC Pla...
102,6628,6628,0.000000,"Enron Law Conference San Antonio, Texas May 24..."
103,6671,6671,0.000000,David Marshall (ECT risk management) insurance...
104,6857,6857,0.000000,COLUMBIA GAS TRANSMISSION CORPORATION NOTICE T...


In [None]:
extracted_emails.Text.iloc[0]

'Terry, please plan on attending. Thanks. Lynn Attached is the Commission\'s "Order Imposing Reporting Requirement on Natural Gas Sales to California Market," which was issued late yesterday. In the order, the Commission finds that if does have the authority to request the information as set out in the May 18 order proposing the requirements. The information is to cover August 1, 2001 through Sept. 30, 2002 (the end date coincides with the end of the Commission\'s mitigation plan re: wholesal prices in California and the West). Specific info gas sellers and LDCs file concernign purchase and sales transactions is exempt from FOIA disclosure Also, respondents may request privileged treatment of "other portions of their responses subject to the.....Commission\'s regulations" Some of the questions have been modified based on comments received based on comments received on the May 18th proposal Transaction by transaction data is required; FERC will aggregate the information The information 

In [None]:
from nltk.tokenize import word_tokenize, sent_tokenize
from nltk.corpus import stopwords
import re
def remove_stopwords(sen):
    '''
    This function removes stopwords
    '''
    stop_words = stopwords.words('english')
    sen_new = " ".join([i for i in sen if i not in stop_words])
    return sen_new

def tokenize_email(text):
    '''
    This function splits up the body into sentence tokens and removes stop words.
    '''
    clean_sentences = sent_tokenize(text, language='english')
    #removing punctuation, numbers and special characters. Then lowercasing.
    clean_sentences = [re.sub('[^a-zA-Z ]', '',s) for s in clean_sentences]
    clean_sentences = [s.lower() for s in clean_sentences]
    clean_sentences = [remove_stopwords(r.split()) for r in clean_sentences]
    return clean_sentences

In [None]:
import nltk
nltk.download('punkt')
nltk.download('stopwords')

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


True

In [None]:
#This tokenizing will be the extracted sentences that may be chosen to form the email summaries.
extracted_emails['extractive_sentences'] = extracted_emails['Text'].apply(sent_tokenize)
#Splitting the text in emails into cleaned sentences
extracted_emails['tokenized_body'] = extracted_emails['Text'].apply(tokenize_email)
#Tokenizing the bodies might have revealed more duplicate emails that should be droped.
extracted_emails = extracted_emails.loc[extracted_emails.astype(str).drop_duplicates(subset='tokenized_body').index]

In [None]:
extracted_emails

Unnamed: 0.1,Unnamed: 0,Document_No,Score,Text,extractive_sentences,tokenized_body
0,689,689,5.102736,"Terry, please plan on attending. Thanks. Lynn ...","[Terry, please plan on attending., Thanks., Ly...","[terry please plan attending, thanks, lynn att..."
1,688,688,5.017902,FYI. Thanks. Lynn Attendees Lynn Blair Steve J...,"[FYI., Thanks., Lynn Attendees Lynn Blair Stev...","[fyi, thanks, lynn attendees lynn blair steve ..."
2,7151,7151,4.369324,"Mitch, This afternoon we have forwarded to you...","[Mitch, This afternoon we have forwarded to yo...",[mitch afternoon forwarded fax machine copies ...
3,9232,9232,3.393102,Specifications REMINDER COLUMBIA GULF TRANSMIS...,[Specifications REMINDER COLUMBIA GULF TRANSMI...,[specifications reminder columbia gulf transmi...
4,7690,7690,3.262622,I'm forwarding the attached as an update. Am p...,"[I'm forwarding the attached as an update., Am...","[im forwarding attached update, presently rese..."
...,...,...,...,...,...,...
101,6566,6566,0.000000,eSource presents SDC Platinum training SDC Pla...,[eSource presents SDC Platinum training SDC Pl...,[esource presents sdc platinum training sdc pl...
102,6628,6628,0.000000,"Enron Law Conference San Antonio, Texas May 24...","[Enron Law Conference San Antonio, Texas May 2...",[enron law conference san antonio texas may we...
103,6671,6671,0.000000,David Marshall (ECT risk management) insurance...,[David Marshall (ECT risk management) insuranc...,[david marshall ect risk management insurance ...
104,6857,6857,0.000000,COLUMBIA GAS TRANSMISSION CORPORATION NOTICE T...,[COLUMBIA GAS TRANSMISSION CORPORATION NOTICE ...,[columbia gas transmission corporation notice ...


In [None]:
extracted_emails.to_csv('/content/drive/MyDrive/Colab Notebooks/data/extracted_df_pay.csv', sep=',')

In [None]:
#!pip install evaluate
!pip install pyrouge
!pip install rouge-score

Collecting pyrouge
  Downloading pyrouge-0.1.3.tar.gz (60 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/60.5 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m[90m━━━━━━[0m [32m51.2/60.5 kB[0m [31m1.3 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m60.5/60.5 kB[0m [31m1.2 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: pyrouge
  Building wheel for pyrouge (setup.py) ... [?25l[?25hdone
  Created wheel for pyrouge: filename=pyrouge-0.1.3-py3-none-any.whl size=191605 sha256=6dd7fc48c4e852e441bd6e0a7d8f2daa95ab5c57015fb94eae71c8b212111caf
  Stored in directory: /root/.cache/pip/wheels/9a/67/12/c5dd8ef8b4152bb8789eafd2a74a734e2dc7bb9eae02b768e7
Successfully built pyrouge
Installing collected packages: pyrouge
Successfully installed pyrouge-0.1.3
Collecting rouge-score
  Downloadin

In [None]:
!pip install sentence_transformers

[0m

In [None]:
import pandas as pd
import numpy as np
#import rouge
from sklearn.metrics.pairwise import cosine_similarity
import networkx as nx
import seaborn as sns
import matplotlib.pyplot as plt
from operator import itemgetter
from sentence_transformers import SentenceTransformer

In [None]:
documents=extracted_emails['Text'][:10].tolist()
all_documents = ' '.join(documents)
print(all_documents)

Terry, please plan on attending. Thanks. Lynn Attached is the Commission's "Order Imposing Reporting Requirement on Natural Gas Sales to California Market," which was issued late yesterday. In the order, the Commission finds that if does have the authority to request the information as set out in the May 18 order proposing the requirements. The information is to cover August 1, 2001 through Sept. 30, 2002 (the end date coincides with the end of the Commission's mitigation plan re: wholesal prices in California and the West). Specific info gas sellers and LDCs file concernign purchase and sales transactions is exempt from FOIA disclosure Also, respondents may request privileged treatment of "other portions of their responses subject to the.....Commission's regulations" Some of the questions have been modified based on comments received based on comments received on the May 18th proposal Transaction by transaction data is required; FERC will aggregate the information The information requ

In [None]:
original_text = re.sub(r'\s+', ' ', all_documents)
original_text

'Terry, please plan on attending. Thanks. Lynn Attached is the Commission\'s "Order Imposing Reporting Requirement on Natural Gas Sales to California Market," which was issued late yesterday. In the order, the Commission finds that if does have the authority to request the information as set out in the May 18 order proposing the requirements. The information is to cover August 1, 2001 through Sept. 30, 2002 (the end date coincides with the end of the Commission\'s mitigation plan re: wholesal prices in California and the West). Specific info gas sellers and LDCs file concernign purchase and sales transactions is exempt from FOIA disclosure Also, respondents may request privileged treatment of "other portions of their responses subject to the.....Commission\'s regulations" Some of the questions have been modified based on comments received based on comments received on the May 18th proposal Transaction by transaction data is required; FERC will aggregate the information The information 

In [None]:
from IPython.display import Markdown
display(Markdown(original_text))

Terry, please plan on attending. Thanks. Lynn Attached is the Commission's "Order Imposing Reporting Requirement on Natural Gas Sales to California Market," which was issued late yesterday. In the order, the Commission finds that if does have the authority to request the information as set out in the May 18 order proposing the requirements. The information is to cover August 1, 2001 through Sept. 30, 2002 (the end date coincides with the end of the Commission's mitigation plan re: wholesal prices in California and the West). Specific info gas sellers and LDCs file concernign purchase and sales transactions is exempt from FOIA disclosure Also, respondents may request privileged treatment of "other portions of their responses subject to the.....Commission's regulations" Some of the questions have been modified based on comments received based on comments received on the May 18th proposal Transaction by transaction data is required; FERC will aggregate the information The information request will NOT be expanded beyond California FERC is providing the reporting format as a data template to be available on RIMS. The order is attached here, and the appendix listing specific questions is below. APPENDIX Answers to all questions below that require a statement of volumes should set forth the requested volumes on an MMBtu basis. For Interstate Natural Gas Pipelines: 1. On a daily basis for the period August 1, 2001 to January 31, 2002, please provide the following information for each contract for transportation to the California border: a. the transaction or contract identification number; b. the terms and effective date of the contract; c. contract demand by shipper; d. the daily scheduled volume by shipper; e. the daily nominated volume by shipper; f. the daily delivered volume by shipper; g. whether the service is firm or interruptible; h. the rate charged in $$/MMbtu; i. primary receipt and delivery points associated with the contract; and, j. whether the shipper is affiliated with the pipeline. Along with the hard copy response, please provide a CDROM containing the response to this question. Please provide this information in Excel version 97 or 2000 or comma separated value (CSV) format. 2. For the period August 1, 2001 to January 31, 2002, please provide the following information for each capacity release transaction for transportation to the California border: a. the transaction or contract identification number, or offer number; (This number should tie to contract number reported in Question 1,a., above) b. the name of the releasing shipper; c. the name of the acquiring shipper; d. the contract quantity; e. the acquiring shipper's contract rate; and, f. the releasing shipper's contract rate. Along with the hard copy response, please provide a CDROM containing the response to this question. Please provide this information in Excel version 97 or 2000 or comma separated value (CSV) format. Docket No. RM019000 6 3. On a daily basis for the period August 1, 2001 to January 31, 2002, please provide the following system information: a. the maximum peak day design capacity; b the daily maximum flowing capacity; c the daily scheduled system volume; d. the daily delivered system volume; e. the daily scheduled volume at each California delivery point; f. an explanation of each instance that the daily maximum flowing capacity is below the maximum peak day design capacity; and, g. an explanation of any daily variance in the maximum flowing capacity. Along with the hard copy response, please provide a CDROM containing the response to this question. Please provide this information in Excel version 97 or 2000 or comma separated value (CSV) format. 4. On a daily basis for May 1999 and May 2000, please provide the following system information: a. the maximum peak day design capacity; b the daily maximum flowing capacity; c the daily scheduled system volume; d. the daily delivered system volume, and, e. the daily scheduled volume at each California delivery point. Along with the hard copy response, please provide a CDROM containing the response to this question. Please provide this information in Excel version 97 or 2000 or comma separated value (CSV) format. For Sellers of Natural Gas to the California Market: 1. State whether the seller is affiliated with an interstate or intrastate natural gas pipeline company or local distribution company, and, if so, give the name and address the affiliated company. 2. On a daily basis for the period August 1, 2001 to January 31, 2002, please provide the following information for each contract in which you sold natural gas and the gas is physically delivered at points on the California border or in California: a. the sales contract's identification number; b. the term of the sales contract (beginning and ending dates); c. the name of the buyer identifying whether the buyer is an energy marketer, local distribution company, or end user; d. the volumes sold (on a MMBtu basis); e. the price paid by buyer, and f. whether the price is fixed or indexed (identify the index). Along with the hard copy response, please provide a CDROM containing the response to this question. Please provide this information in Excel version 97 or 2000 or comma separated value (CSV) format. 3. For each sales contract, identify separately the transportation component and the gas commodity component of the price. If the sales contract specifies the transportation component of the price, the seller shall report that amount. If the sales contract only includes an overall price, then the seller shall report the transportation cost it incurred in moving the gas from the point where it purchased the gas to the point where it sold the gas and how it determined that amount. If the sale was made at the same point where the gas was purchased, and there is no transportation element in the sale, the seller shall respond "n.a." Along with the hard copy response, please provide a CDROM containing the response to this question. Please provide this information in Excel version 97 or 2000 or comma separated value (CSV) format. 4. For the period August 1, 2001 to January 31, 2002, please provide the following information on a daily basis for each of your gas purchase contracts associated with the sales contracts you identified in response to Question 2: a. the purchase contract's identification number; b. the pipeline upstream of the point of delivery; and the pipeline downstream of the point of delivery; c. the term of the purchase contract (beginning and ending dates); d. the daily volumes (on a MMBtu basis) purchased; e. the price paid; f. whether the price is fixed or indexed (identify the index), g . identify the entity from whom the responder purchased the gas; and, h. identify the point where responder took title to the gas. Along with the hard copy response, please provide a CDROM containing the response to this question. Please provide this information in Excel version 97 or 2000 or comma separated value (CSV) format. For Local Distribution Companies In California: 1. Provide your system's gas sales and transportation requirements, (i.e, contract demands and daily demands) by core, noncore, electric generation, and nonutility loads. Provide a break down of these demands by type of service (e.g., sales and transportation) and quality of service(firm/interruptible). Along with the hard copy response, please provide a CDROM containing the response to this question. Please provide this information in Excel version 97 or 2000 or comma separated value (CSV) format. 2. On a daily basis for the period August 1, 2001 to January 31, 2002, please provide the following information for each contract the local distribution company has with a transportation customer: a. contract demand by shipper; b. the daily scheduled volume by shipper; c. the daily delivered volume by shipper; d. whether the service is firm or interruptible; e. the rate charged; and, f. receipt and delivery points associated with the contract. Along with the hard copy response, please provide a CDROM containing the response to this question. Please provide this information in Excel version 97 or 2000 or comma separated value (CSV) format. 3. On a daily basis for the period August 1, 2001 to January 31, 2002, please provide the following information for each contract the local distribution company has with a sales customer: a. the contract demand by purchaser; b. the term of the sales contract (beginning and ending dates); c. the volumes (on a MMBtu basis) sold; and, d. the price paid by purchaser. Along with the hard copy response, please provide a CDROM containing the response to this question. Please provide this information in Excel version 97 or 2000 or comma separated value (CSV) format. 4. On a daily basis for the period August 1, 2001 to January 31, 2002, please provide the following information for each gas purchase contract: a. the purchase contract's identification number; b. the term of the purchase contract (beginning and ending dates); c. the volumes (on a MMBtu basis) bought; d. the price paid; e. whether the price is fixed or indexed (identify the index); and, f. identify the point where (name of local distribution company) took title to the gas. Along with the hard copy response, please provide a CDROM containing the response to this question. Please provide this information in Excel version 97 or 2000 or comma separated value (CSV) format. 5. On a daily basis for the period August 1, 2001 to January 31, 2002, please provide by interstate pipeline the type and quantity of transportation service your system has under contract. At each receipt point, provide maximum peak day design capacity, the daily maximum flowing capacity, the daily nominated capacity and the daily scheduled volumes of the local distribution system. Along with the hard copy response, please provide a CDROM containing the response to this question. Please provide this information in Excel version 97 or 2000 or comma separated value (CSV) format. 6. On a daily basis for the period August 1, 2001 to January 31, 2002, please provide on a systemwide basis your storage service rights i.e., capacity and deliverability rights. Additionally, provide daily storage balances, injections and withdrawls. Along with the hard copy response, please provide a CDROM containing the response to this question. Please provide this information in Excel version 97 or 2000 or comma separated value (CSV) format. 7. On a daily basis for the period August 1, 2001 to January 31, 2002, please provide how much of your system's gas supply was from intrastate production sources. Separately identify the sources, volumes, receipt points, and prices. Include the total system supply in your response. Along with the hard copy response, please provide a CDROM containing the response to this question. Please provide this information in Excel version 97 or 2000 or comma separated value (CSV) format. 8. Provide a summary of your system's gas purchases in the following categories: a. daily spot purchases; b. monthly; c. shortterm (more than 1 month and less than 1 year); d. mediumterm (13 years); and, e. longterm ( more than 3 years). by month for each of the last three years in the following format: a. price; b. volume; and, c. identify, by name, where these purchases were made (producing basin or at the California border). Along with the hard copy response, please provide a CDROM containing the response to this question. Please provide this information in Excel version 97 or 2000 or comma separated value (CSV) format. FYI. Thanks. Lynn Attendees Lynn Blair Steve January Hasan Kewdaii Joanne Bisbee Jeff may Mike Sullivan A representative with Cedar Falls Utility contacted the NNG measurement desk about volumes for the current gas day changing on the Operator Confirmation Summary. Issue: The daily PGas extract is including partial volume with an Incomplete flag for the current gas day (not closed). The MIPS extract did not process current gas day volumes. The Operator Confirmation Summary in TMS displays the incomplete volume to the TMS scheduler and to external customers thus impacting the imbalance calculation. Possible Solution: 1. Hold the data pole file until gas day is complete 2. Modify downstream system to process the incomplete flag to ignore current day volumes 3. Modify the PGas extract so that current day volume will not be included. Solution: PGas IT staff will modify the PGas extract so that current day (non closed) volumes will no longer be included in the file. This will make the extract identical to the MIPS daily extract file it replaces. TMS IT staff will modify the Operator Confirmation Summary in TMS to display the flow codes included in the extract. The flow codes are as follows: Blank Complete record I Incomplete record Complete but edited record Mike Sullivan with provide a work plan and implantation date. If you have any question call me at 7138531534. Thank you Dale Ratliff Mitch, This afternoon we have forwarded to your fax machine copies of the requested operational data as well as the CEMs Monitoring Plan. Regarding the Monitoring Plan, Ron sent you via email the updates to that plan necessitated by the EDR version 2.1 upgrade. Included in that email are the 6 upgrade attachments one for each unit. As I send you this note the CEMs Quality Assurance Plan is being scanned and sent via fax machine. Therefore, I believe I have provided all the info indicated on Gus's reply below. Regards, Pat All, This morning I provided the following info and documents. I will correspond with all of you but am sending the documents to Mitch Robinson's fax machine unless requested to do otherwise by Mitch. 1. I fax'd to Mitch and Stuart Zisman the results of the "Personal Property" inventory. 2. I fax'd Mitch the most recent water bill. To the best of my knowledge there is no contract with the Town of Caledonia for water. We utilize an industrial user account. The account number (14670) is on the bill. 3. I fax'd Mitch the Emergency Action Plan. This plan is under revision. I sent the existing plan as well as the portion of the plan where revisions are complete. 4. I also sent the SemiAnnual Excess Emissions Report. After lunch I will send the CEMs QA/QC documents and CEMs data which has been requested. Site personnel, under my direction conducted a spot check of spare parts utilizing the most recent inventory list provided by Jim Meyer. I certify that the spare parts inventory, based upon this spot check, is substantially complete including the highdollar components. Regards, Pat Cathers Answers to the questions below: Caledonia Copy of SO2 allowances for 2000 and 2001. Are these allowances transferrable Gus will provide by the end of the day SPCC plan Gus will provide a copy Emergency Response Plan Pat will provide a copy CEMS QA/AC Plan Pat will provide a copy CEMS Certification Test Report Should be on DealBench Copy of monthly log of fuel heat content, fuel usage, & total heat input per air permit for 1999 and 2000 There is no requirement for a monthly log, but we can provide the information for the calculated 365day rolling heat input. Pat, please email Ben a copy of the last semiannual report Copy of annual emissions reports (or similar) Gus will provide by the end of the day Any water supply agreements with local utilities. Pat will answer this question Copies of CEMS daily reports with all NOx, fuel, and MW values for each turbine for one operating day for the months of June, July, August, September and one winter or shoulder month. Pat will provide info Brownsville Copy of SO2 allowances for 2000 and 2001. Are these allowances transferrable Gus Will provide by the end of the day Copies of CEMS daily reports with all NOx, fuel, and MW values for each turbine for one operating day for the months of June, July, August, September and one winter or shoulder month. Mitchell will provide info Copies of well reports, logs, and drawdown reports for the 2 water wells. Mitchell will provide info Copies of any permits, certifications, approvals necessary to install and operate the wells. Dave, Mitchell Provide the proposed plan to further address the nois issue at the site. Ross, Mitch, Mitchell Explain permit efforts, if any, for "condensate" at plant. No permitting efforts at this time Specifications REMINDER COLUMBIA GULF TRANSMISSION COMPANY NOTICE TO ALL INTERESTED PARTIES JANUARY 23, 2002 Re: Gas Quality Specifications REMINDER Notice ID: 2435 5 NO RESPONSE REQUIRED REMINDER The notice posted below is still in effect at least until the end of February 2002. COLUMBIA GULF TRANSMISSION COMPANY NOTICE TO ALL INTERESTED PARTIES JANUARY 19, 2001 SUBJECT: NOTICE TO PARTIES DELIVERING OR CAUSING GAS TO BE DELIVERED TO COLUMBIA GULF TRANSMISSION COMPANY Pursuant to Section 25.2(a) of Columbia's tariff, Columbia is providing notice of the imposition of the following additional gas quality specification on all shippers, interconnecting pipelines, producers, and receipt meter operators, and all other parties delivering or causing gas to be delivered into Columbia's system: All gas tendered under Columbia's tariff shall have a gross heating value of no more than 1050 Btu per cubic foot. This additional gas quality specification will be imposed two (2) business days after this notice is posted on Columbia's internet electronic bulletin board. In addition to complying with this additional gas quality specification, all parties delivering or causing gas to be delivered into the Columbiaoperated Blue Water system on the Western Shoreline, the Southwestern Extension, or on the Header (west of the system null point) may be required to provide evidence to Columbia that such gas is being processed at the Blue Water Processing Plant to ensure that the composite gas stream at Egan, Louisiana has a gross heating value of 1050 Btu or less. Evidence of the gas processing agreements may be required upon Columbia's request. Failure of a party to comply with the additional gas quality specification set forth above and to provide Columbia Gulf with evidence, upon request, of a gas processing agreement for gas delivered to the Blue Water system will result in Columbia Gulf refusing to accept gas from such party and to make corresponding redeliveries. Imposition of this additional gas quality specification does not relieve any party delivering gas or causing gas to be delivered to Columbia's system from its existing obligations to comply with the gas quality specifications set forth in Section 25.1 of Columbia's tariff, to ensure the merchantability of the gas, and to ensure that gas has been properly processed before entering Columbia's system. Columbia, pursuant to the terms of Section 25.2(a) of its tariff, is imposing this additional gas quality specification as it has determined, in its reasonable judgment, that harm to its pipeline system and its operations may occur if it receives gas on its system that fails to meet this additional gas quality specification. Columbia reserves the right to revise this additional gas quality specification upon further analysis of the gas that is being received by Columbia and reevaluation of the potential harm to Columbia's pipeline system and its operations. You are further placed on notice that Columbia will hold you responsible and seek legal redress for any damages it incurs as a result of the higher Btu gas. Any questions about this notice may be directed to Gary Espey, (713) 2674761, or to John Pillion, (713) 2674764. I'm forwarding the attached as an update. Am presently researching the issue raised by Drew, and will let you know what I find. The Commercial Group has been discussing with PNM the possibility of entering into service agreements for firm capacity on the portion of PNM's system between Blanco and Rio Puerco. This would effectively operate as an expansion of TW's system from the San Juan Basin to the mainline. TW would acquire PNM capacity then post it as available on the EBB just like we post other capacity. Shippers could then use the capacity to transport gas from Blanco to any point on TW's system. The capacity that PNM has available at this time is seasonal (summer months) only, so it would be used to provide shortterm firm capacity or LFT to TW customers. The marketers anticipate that the additional capacity would mostly be used to ship gas to points on TW's mainline besides Rio Puerco in other words, the total path used by shippers would be a combination of PNM's system and TW's system. Shippers wouldn't necessarily know whether PNM capacity was being used to provide service on their contract, only that their gas is being shipped from A to B. In rereading the Texas Eastern line of cases, I believe we can distinguish TW's proposal from that in Northern's Docket No. 97388 because it is more specific (we are asking for authority to acquire capacity on a specific pipeline) and because it is more in the nature of an expansion of TW's constrained San Juan lateral rather than a proposal to broker other pipelines' capacity. I may be overly optimistic, but I also believe we meet all the Texas Eastern criteria. I realize that I have not discussed this very extensively (or at all) with any of you, but I have gone ahead and drafted the proposal in the form of a letter to FERC. It seemed the most efficient way to provide a starting point for discussion, especially since I was able to borrow heavily from our successful Market Center Services filing. I'm not yet sure where we will include it in the tariff, but I propose tariff language along the lines of "Transporter may contract for firm transportation service on Public Service Company of New Mexico for use in providing transportation service from the Blanco receipt point on Transporter's system. The cost incurred for acquisition of any such services shall be separately recorded in Account No. . Transporter is at risk for recovery of costs associated with such services." After you've reviewed the attached, please let me know your overall substantive comments and let's talk about the best way to proceed. Thanks. this is what I expect to be paid out next Tuesday I'm buying all the baseload gas from Reliant Tallahassee 10,000 dth per day x 31 days x $3.57 $1,106,700 MCV 6,800 dth per day x 31 days x $3.57 $752,556 I will probably pay NX1 .05 or so for the Tallahassee gas because I'm buying point specific gas and NX1 flat to .02 for the Trunkline. I would prefer to be conservative so bump the prices up as you see fit. The attached file contains our daily volume requirements for 3/31 thru 4/3 . There are two worksheets: The one labelled with today's date contains the volumes that we need at each of the delivery points indicated. The schedulers will want to check this sheet to see how much gas is needed at a specific point. The AGL volumes are not current and should be ignored. The sheet labelled 'Daily Change' shows the change (increase /decrease ) in the requested daily volume relative to a prior value. For the current gas day, the change is an intraday change and is calculated using the previous day's volume request for the current day (e.g., If the current gas day is 3/28, then the change is computed with respect to the 3/27 request for 3/28). For tomorrow's gas day and any subsequent ones shown, the change is computed using the Firstofthemonth volumes requested for those days. (e.g., If today is 3/28, then the change for 3/29, 3/30, etc. is computed with respect to the firstofthemonth volumes for those days.) This sheet should be useful in determining prices for daily changes in gas volumes that we take or turn back. Doug Kinney Ph: 7035616339 Fax: 7035617317 033100daily volsAM.xls I found the reference I was remembering over the weekend. Check out the order issued to TriState Pipeline at Docket CP9961. FERC got all over CMS on the problems posed by "dual jurisdictional facilities" whatever that means. CMS tried to do something that sounds a lot like what PNM would be doing under the idea you guys had. The Commercial Group has been discussing with PNM the possibility of entering into service agreements for firm capacity on the portion of PNM's system between Blanco and Rio Puerco. This would effectively operate as an expansion of TW's system from the San Juan Basin to the mainline. TW would acquire PNM capacity then post it as available on the EBB just like we post other capacity. Shippers could then use the capacity to transport gas from Blanco to any point on TW's system. The capacity that PNM has available at this time is seasonal (summer months) only, so it would be used to provide shortterm firm capacity or LFT to TW customers. The marketers anticipate that the additional capacity would mostly be used to ship gas to points on TW's mainline besides Rio Puerco in other words, the total path used by shippers would be a combination of PNM's system and TW's system. Shippers wouldn't necessarily know whether PNM capacity was being used to provide service on their contract, only that their gas is being shipped from A to B. In rereading the Texas Eastern line of cases, I believe we can distinguish TW's proposal from that in Northern's Docket No. 97388 because it is more specific (we are asking for authority to acquire capacity on a specific pipeline) and because it is more in the nature of an expansion of TW's constrained San Juan lateral rather than a proposal to broker other pipelines' capacity. I may be overly optimistic, but I also believe we meet all the Texas Eastern criteria. I realize that I have not discussed this very extensively (or at all) with any of you, but I have gone ahead and drafted the proposal in the form of a letter to FERC. It seemed the most efficient way to provide a starting point for discussion, especially since I was able to borrow heavily from our successful Market Center Services filing. I'm not yet sure where we will include it in the tariff, but I propose tariff language along the lines of "Transporter may contract for firm transportation service on Public Service Company of New Mexico for use in providing transportation service from the Blanco receipt point on Transporter's system. The cost incurred for acquisition of any such services shall be separately recorded in Account No. . Transporter is at risk for recovery of costs associated with such services." After you've reviewed the attached, please let me know your overall substantive comments and let's talk about the best way to proceed. Thanks. The attached file contains our daily volume requirements for 8/1and the following day(s) as listed . There are three worksheets labelled as follows: 'Total Reqs': Contains the volumes that CES is requesting from Enron at each of the delivery points and dates indicated in the columns labelled "NOM". The Enron schedulers will want to check this sheet to see how much gas is needed at a specific point. The AGL volumes are not timely and should be ignored until further notice. CES schedulers note: the attached file is archived on P:\Energy Ops\Enron\May00\Daily\ .xls, so you don't have to save a duplicate copy elsewhere on the network. 'Daily Change' : shows the change (increase /decrease ) in the daily volume needed from Enron at the designated citygate delivery point relative to the supply volumes arranged previously with Enron prior to the current date. For the current gas day, the change is an intraday change and is calculated using the previous day's volume request for the current day (e.g., If the current gas day is 3/28, then the change is computed with respect to the 3/27 request for 3/28. Note that this procedure assumes that the prior day supply change was in fact effectuated). For tomorrow's gas day and any subsequent ones shown, the change is computed as the latest Dth Nomination requirement less the Firstofthemonth supply volume plus or minus any adjustments made since the firstofthemonth to the FOM supply for the (future) days shown. (e.g., If today is 3/28, then the change for 3/29, 3/30, etc. is computed with respect to the firstofthemonth volumes for those days adjusted for any additional purchases or sellbacks since the firstofthemonth thru 3/27.) This sheet should be useful in determining prices for daily changes in gas volumes that we take or turn back. The column labelled "ENA Daily Swing / vs FOM volume' shows the absolute magnitude of the daily swings allowed by ENA at prespecified prices. Prices for Swing volumes exceeding these magnitudes or for swings at delivery points without prespecified pricing must be negotiated. 'Comments': Contains comments on selected items on various days. Hans Herzog ph: 703 561 6331 fax: 703 561 7317 080100 daily vols.xls The attached file contains our daily volume requirements for 5/26 and the following day(s) as listed . There are three worksheets labelled as follows: 'Total Reqs': Contains the volumes that CES is requesting from Enron at each of the delivery points and dates indicated in the columns labelled "NOM". The Enron schedulers will want to check this sheet to see how much gas is needed at a specific point. The AGL volumes are not timely and should be ignored until further notice. CES schedulers note: the attached file is archived on P:\Energy Ops\Enron\May00\Daily\ .xls, so you don't have to save a duplicate copy elsewhere on the network. 'Daily Change' : shows the change (increase /decrease ) in the daily volume needed from Enron at the designated citygate delivery point relative to the supply volumes arranged previously with Enron prior to the current date. For the current gas day, the change is an intraday change and is calculated using the previous day's volume request for the current day (e.g., If the current gas day is 3/28, then the change is computed with respect to the 3/27 request for 3/28. Note that this procedure assumes that the prior day supply change was in fact effectuated). For tomorrow's gas day and any subsequent ones shown, the change is computed as the latest Dth Nomination requirement less the Firstofthemonth supply volume plus or minus any adjustments made since the firstofthemonth to the FOM supply for the (future) days shown. (e.g., If today is 3/28, then the change for 3/29, 3/30, etc. is computed with respect to the firstofthemonth volumes for those days adjusted for any additional purchases or sellbacks since the firstofthemonth thru 3/27.) This sheet should be useful in determining prices for daily changes in gas volumes that we take or turn back. The column labelled "ENA Daily Swing / vs FOM volume' shows the absolute magnitude of the daily swings allowed by ENA at prespecified prices. Prices for Swing volumes exceeding these magnitudes or for swings at delivery points without prespecified pricing must be negotiated. 'Comments': Contains comments on selected items on various days. Hans Herzog ph: 703 561 6331 fax: 703 561 7317 052600 daily volsAM.xls

In [None]:
original_sentences = [sentence for sentence in nltk.sent_tokenize(original_text)]
original_sentences

['Terry, please plan on attending.',
 'Thanks.',
 'Lynn Attached is the Commission\'s "Order Imposing Reporting Requirement on Natural Gas Sales to California Market," which was issued late yesterday.',
 'In the order, the Commission finds that if does have the authority to request the information as set out in the May 18 order proposing the requirements.',
 "The information is to cover August 1, 2001 through Sept. 30, 2002 (the end date coincides with the end of the Commission's mitigation plan re: wholesal prices in California and the West).",
 'Specific info gas sellers and LDCs file concernign purchase and sales transactions is exempt from FOIA disclosure Also, respondents may request privileged treatment of "other portions of their responses subject to the.....Commission\'s regulations" Some of the questions have been modified based on comments received based on comments received on the May 18th proposal Transaction by transaction data is required; FERC will aggregate the informat

In [None]:
from IPython.core.display import HTML
def visualize(title, sentence_list, best_sentences):
  text = ''

  display(HTML(f'<h1>Summary - {title}</h1>'))
  for sentence in sentence_list:
    if sentence in best_sentences:
      text += ' ' + str(sentence).replace(sentence, f"<mark>{sentence}</mark>")
    else:
      text += ' ' + sentence
  display(HTML(f""" {text} """))

In [None]:
!pip install sumy

[0m

In [None]:
from sumy.parsers.plaintext import PlaintextParser
from sumy.nlp.tokenizers import Tokenizer
from sumy.summarizers.luhn import LuhnSummarizer

In [None]:
parser = PlaintextParser.from_string(original_sentences, Tokenizer('english'))

In [None]:
summarizer = LuhnSummarizer()

In [None]:
summary = summarizer(parser.document, 50)

In [None]:
best_sentences = []
for sentence in summary:
  #print(sentence)
  best_sentences.append(str(sentence))

In [None]:
visualize("Query Based Enron EMail", original_sentences, best_sentences)

In [None]:
!pip install bert-extractive-summarizer

Collecting bert-extractive-summarizer
  Downloading bert_extractive_summarizer-0.10.1-py3-none-any.whl (25 kB)
Installing collected packages: bert-extractive-summarizer
Successfully installed bert-extractive-summarizer-0.10.1


In [None]:
!pip install git+https://github.com/huggingface/transformers

In [None]:
!pip install sacremoses

[0m

In [None]:
!pip install transformers

In [None]:
from transformers import BigBirdTokenizer

In [None]:
from summarizer import Summarizer

In [None]:
summarizer = Summarizer()
summary = summarizer(all_documents)

In [None]:
summary_tokenized = [sentence for sentence in nltk.sent_tokenize(summary)]
summary_tokenized

['Lynn Attached is the Commission\'s "Order Imposing Reporting Requirement on Natural Gas Sales to California Market," which was issued late yesterday.',
 'APPENDIX Answers to all questions below that require a statement of volumes should set forth the requested volumes on an MMBtu basis.',
 'Along with the hard copy response, please provide a CDROM containing the response to this question.',
 'Please provide this information in Excel version 97 or 2000 or comma separated value (CSV) format.',
 'For Sellers of Natural Gas to the California Market: 1.',
 'For each sales contract, identify separately the transportation component and the gas commodity component of the price.',
 'identify the entity from whom the responder purchased the gas; and, h. identify the point where responder took title to the gas.',
 'On a daily basis for the period August 1, 2001 to January 31, 2002, please provide the following information for each contract the local distribution company has with a sales custome

In [None]:
summ=''.join(summary_tokenized)
display(Markdown(summ))

Lynn Attached is the Commission's "Order Imposing Reporting Requirement on Natural Gas Sales to California Market," which was issued late yesterday.APPENDIX Answers to all questions below that require a statement of volumes should set forth the requested volumes on an MMBtu basis.Along with the hard copy response, please provide a CDROM containing the response to this question.Please provide this information in Excel version 97 or 2000 or comma separated value (CSV) format.For Sellers of Natural Gas to the California Market: 1.For each sales contract, identify separately the transportation component and the gas commodity component of the price.identify the entity from whom the responder purchased the gas; and, h. identify the point where responder took title to the gas.On a daily basis for the period August 1, 2001 to January 31, 2002, please provide the following information for each contract the local distribution company has with a sales customer: a. the contract demand by purchaser; b. the term of the sales contract (beginning and ending dates); c. the volumes (on a MMBtu basis) sold; and, d. the price paid by purchaser.On a daily basis for the period August 1, 2001 to January 31, 2002, please provide how much of your system's gas supply was from intrastate production sources.Include the total system supply in your response.Lynn Attendees Lynn Blair Steve January Hasan Kewdaii Joanne Bisbee Jeff may Mike Sullivan A representative with Cedar Falls Utility contacted the NNG measurement desk about volumes for the current gas day changing on the Operator Confirmation Summary.The MIPS extract did not process current gas day volumes.Hold the data pole file until gas day is complete 2.This will make the extract identical to the MIPS daily extract file it replaces.If you have any question call me at 7138531534.Included in that email are the 6 upgrade attachments one for each unit.Therefore, I believe I have provided all the info indicated on Gus's reply below.I fax'd to Mitch and Stuart Zisman the results of the "Personal Property" inventory.The account number (14670) is on the bill.Evidence of the gas processing agreements may be required upon Columbia's request.This would effectively operate as an expansion of TW's system from the San Juan Basin to the mainline.Shippers could then use the capacity to transport gas from Blanco to any point on TW's system.The marketers anticipate that the additional capacity would mostly be used to ship gas to points on TW's mainline besides Rio Puerco in other words, the total path used by shippers would be a combination of PNM's system and TW's system.I may be overly optimistic, but I also believe we meet all the Texas Eastern criteria.It seemed the most efficient way to provide a starting point for discussion, especially since I was able to borrow heavily from our successful Market Center Services filing.Transporter is at risk for recovery of costs associated with such services."After you've reviewed the attached, please let me know your overall substantive comments and let's talk about the best way to proceed.I would prefer to be conservative so bump the prices up as you see fit.e.g., If today is 3/28, then the change for 3/29, 3/30, etc.This sheet should be useful in determining prices for daily changes in gas volumes that we take or turn back.Check out the order issued to TriState Pipeline at Docket CP9961.CMS tried to do something that sounds a lot like what PNM would be doing under the idea you guys had.The Enron schedulers will want to check this sheet to see how much gas is needed at a specific point.The AGL volumes are not timely and should be ignored until further notice.Daily Change' : shows the change (increase /decrease ) in the daily volume needed from Enron at the designated citygate delivery point relative to the supply volumes arranged previously with Enron prior to the current date.Note that this procedure assumes that the prior day supply change was in fact effectuated).For tomorrow's gas day and any subsequent ones shown, the change is computed as the latest Dth Nomination requirement less the Firstofthemonth supply volume plus or minus any adjustments made since the firstofthemonth to the FOM supply for the (future) days shown.( Comments': Contains comments on selected items on various days.

In [None]:
visualize("Query Based Extractive Text Summarization", original_sentences, summary_tokenized)

In [None]:
!pip install pysummarization

Collecting pysummarization
  Downloading pysummarization-1.1.9.tar.gz (64 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/64.2 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[90m╺[0m[90m━[0m [32m61.4/64.2 kB[0m [31m1.9 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m64.2/64.2 kB[0m [31m1.5 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: pysummarization
  Building wheel for pysummarization (setup.py) ... [?25l[?25hdone
  Created wheel for pysummarization: filename=pysummarization-1.1.9-py3-none-any.whl size=82362 sha256=8f2401fb9fe167c8fad1edefadb9e997dabd4d0afce2571cab925491aa8feb0c
  Stored in directory: /root/.cache/pip/wheels/08/d8/9e/940eb35676a1f5dae2f4bb5dc4752e9a00a7affba172bae36c
Successfully built pysummarization
Installing collected packages: pysummarization
Successfu

In [None]:
from pysummarization.nlpbase.auto_abstractor import AutoAbstractor
from pysummarization.tokenizabledoc.simple_tokenizer import SimpleTokenizer
from pysummarization.abstractabledoc.top_n_rank_abstractor import TopNRankAbstractor

In [None]:
auto_abstractor = AutoAbstractor()
auto_abstractor.tokenizable_doc = SimpleTokenizer()
auto_abstractor.delimiter_list = [".", "\n"]
abstractable_doc = TopNRankAbstractor()

In [None]:
summary = auto_abstractor.summarize(all_documents, abstractable_doc)

In [None]:
best_sentences = []
for sentence in summary['summarize_result']:
  #print(sentence)
  best_sentences.append(re.sub(r'\s+', ' ', sentence).strip())

In [None]:
best_sentences

['Here is a brief summary of the latest DA proposal floating around the Senate.',
 'The provisions couldand likely willchange.',
 'The leader of the Senate (Burton) is still expressing opposition to DA, so its fate remains unclear.',
 "All customers must pay Edison undercollection, whether they've been DA or not.",
 'Customers going DA must first clear any payables to DWR for power previously consumed but not full paid for.',
 "Customers going DA must pay for DWR's stranded power costs caused by the customer leaving, UNLESS DWR has a net short position, in which case there would be no stranded costs and therefore no fees.",
 "The bill allows customers to file a complaint with the PUC regarding DWR's claims of stranded costs.",
 "Customers who choose green DA power (defined as 80% renewable) will be exempt from stranded cost fees so long as green DA load does not exceed DWR's net short position.",
 'The bill stabilization plan requires SDG&E to file comments, by September 30, addressing

In [None]:
summ=''.join(best_sentences)
display(Markdown(summ))

Here is a brief summary of the latest DA proposal floating around the Senate.The provisions couldand likely willchange.The leader of the Senate (Burton) is still expressing opposition to DA, so its fate remains unclear.All customers must pay Edison undercollection, whether they've been DA or not.Customers going DA must first clear any payables to DWR for power previously consumed but not full paid for.Customers going DA must pay for DWR's stranded power costs caused by the customer leaving, UNLESS DWR has a net short position, in which case there would be no stranded costs and therefore no fees.The bill allows customers to file a complaint with the PUC regarding DWR's claims of stranded costs.Customers who choose green DA power (defined as 80% renewable) will be exempt from stranded cost fees so long as green DA load does not exceed DWR's net short position.The bill stabilization plan requires SDG&E to file comments, by September 30, addressing the implementation and transition of a Levelized Payment Plan (LPP) for all customers on an "opt out" basis.Assembly Women Davis stated that she would be going back to the legislation to pass the Davis/Alpert Bill establishing a cap for all customers.

In [None]:
visualize("Query Based Extractive Text Summarization", original_sentences, best_sentences)

0
5.85% Fixed Rate Rates Have Fallen Again!! ! DO NOT MISS OUT! ! LET BANKS COMPETE FOR YOUR BUSINESS!! ! ALL CREDIT WELCOME CLICK HERE FOR MORE INFO


In [None]:
extract_sum = ' '.join(best_sentences)
print(extract_sum)

Here is a brief summary of the latest DA proposal floating around the Senate. The provisions couldand likely willchange. The leader of the Senate (Burton) is still expressing opposition to DA, so its fate remains unclear. All customers must pay Edison undercollection, whether they've been DA or not. Customers going DA must first clear any payables to DWR for power previously consumed but not full paid for. Customers going DA must pay for DWR's stranded power costs caused by the customer leaving, UNLESS DWR has a net short position, in which case there would be no stranded costs and therefore no fees. The bill allows customers to file a complaint with the PUC regarding DWR's claims of stranded costs. Customers who choose green DA power (defined as 80% renewable) will be exempt from stranded cost fees so long as green DA load does not exceed DWR's net short position. The bill stabilization plan requires SDG&E to file comments, by September 30, addressing the implementation and transition

In [None]:
import networkx as nx
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity

In [None]:
# Step 1: Convert the documents into a matrix of token counts
vectorizer = CountVectorizer()
all_document =[all_documents]
matrix = vectorizer.fit_transform(extracted_emails['Text'])

In [None]:
# Step 2: Calculate cosine similarity between sentences
similarity_matrix = cosine_similarity(matrix, matrix)

In [None]:
# Step 3: Create a graph and add edges based on similarity
graph = nx.from_numpy_array(similarity_matrix)


In [None]:

# Step 4: Apply PageRank algorithm to get sentence importance scores
scores = nx.pagerank(graph)


In [None]:
# Step 5: Rank sentences based on scores
ranked_sentences = sorted(((scores[i], sentence) for i, sentence in enumerate(extracted_emails['Text'])), reverse=True)
num_sentences_in_summary = 10
summary = "\n".join([f"{sentence}" for _, sentence in ranked_sentences[:num_sentences_in_summary]])

In [None]:
ranked_sentences

[(0.27323583576486793,
  'Clarifications: SDG&E\'s Bill cap is as follows Residential: $68 per mouth through 1/1/01 and $75 per mouth through 12/31/01 for those who consume 500kWh per month or less market prices for the others residential. Commercial: $220 per mouth through 1/1/01 and $240 per mouth through 12/31/01 for those who consume 1500 kWh per month or less market place for other commercial The decision intends to allow for future adjustment of the kWh monthly consumption for bill caps retroactive to June 1, 200 to ensure that customers in hotter inland regions have an appropriate level of usage cap and medium size commercial customers are provided relief. Duque\'s Decision passed 32 (Duque, Neeper, Bilas Lynch, Wood) Both Neeper and Bilas said that they were only voting for this decision because of pressure. They both felt that last meeting\'s decision provided sufficient relief and provided for a investigation to address longer term problems. Summary: The Decision institutes a

In [None]:

# Step 6: Print the top-ranked sentences (e.g., top 2 sentences for summary)
num_sentences_in_summary = 10
summary_sentences = [sentence for _, sentence in ranked_sentences[:num_sentences_in_summary]]

In [None]:
summary_paragraph = "\n".join(summary_sentences)
print("Summary:")
print(summary_paragraph)

Summary:
Clarifications: SDG&E's Bill cap is as follows Residential: $68 per mouth through 1/1/01 and $75 per mouth through 12/31/01 for those who consume 500kWh per month or less market prices for the others residential. Commercial: $220 per mouth through 1/1/01 and $240 per mouth through 12/31/01 for those who consume 1500 kWh per month or less market place for other commercial The decision intends to allow for future adjustment of the kWh monthly consumption for bill caps retroactive to June 1, 200 to ensure that customers in hotter inland regions have an appropriate level of usage cap and medium size commercial customers are provided relief. Duque's Decision passed 32 (Duque, Neeper, Bilas Lynch, Wood) Both Neeper and Bilas said that they were only voting for this decision because of pressure. They both felt that last meeting's decision provided sufficient relief and provided for a investigation to address longer term problems. Summary: The Decision institutes a rate stabilization 

In [None]:
display(Markdown(summary_paragraph))

Clarifications: SDG&E's Bill cap is as follows Residential: $68 per mouth through 1/1/01 and $75 per mouth through 12/31/01 for those who consume 500kWh per month or less market prices for the others residential. Commercial: $220 per mouth through 1/1/01 and $240 per mouth through 12/31/01 for those who consume 1500 kWh per month or less market place for other commercial The decision intends to allow for future adjustment of the kWh monthly consumption for bill caps retroactive to June 1, 200 to ensure that customers in hotter inland regions have an appropriate level of usage cap and medium size commercial customers are provided relief. Duque's Decision passed 32 (Duque, Neeper, Bilas Lynch, Wood) Both Neeper and Bilas said that they were only voting for this decision because of pressure. They both felt that last meeting's decision provided sufficient relief and provided for a investigation to address longer term problems. Summary: The Decision institutes a rate stabilization for residential and commercial customers Residential: $68 through 1/1/01 and $75 through 12/31/01 for those who consume 500kW or less market prices for the others residential Commercial: $220 through 1/1/01 and $240 through 12/31/01 for those who consume 1500 kW or less market place for other commercial Other rate schedules: Subject to market prices Implementation date: Retroactive to 6/1/00 SDG&E will provide a credit for those customers that are eligible no latter than September 30th The decision intends to allow for future adjustment of the kWh rate caps retroactive to June 1, 200 to ensure that customers in hotter inland regions have an appropriate level of usage cap and medium size commercial customers are provided relief. The bill stabilization plan requires SDG&E to file comments, by September 30, addressing the implementation and transition of a Levelized Payment Plan (LPP) for all customers on an "opt out" basis. However, the procedure for the implementation of the LLP will allow customers to readily exercise choice, while those that do not exercise choice will default into the LPP. The Commission will also further study the bill stabilization plan as to direct access customers. Revenue shortfalls are to be booked into the TCBA for future recovery of the net shortfall. Discussion: Senator Alpert, Assembly Women Davis, Mayor Golding , the former mayor, several members of the City and County of San Diego Board of Supervisors made a public appearance. All thanked the Commission for having this special session. All supported the Wood decision. All stated that it did not go far enough and insisted that all customers should be subject to the cap. Assembly Women Davis stated that she would be going back to the legislation to pass the Davis/Alpert Bill establishing a cap for all customers. However, Asseblymember Wright, chairman of the Utilities and Commerce Committee, has been open and has shown opposition to the Alpert/Davis bill. If you have any questions, please call me at 4157827810 Bruno Gaillard Government Affairs
Here is a brief summary of the latest DA proposal floating around the Senate. The provisions couldand likely willchange. The leader of the Senate (Burton) is still expressing opposition to DA, so its fate remains unclear. No suspension of DA. All customers must pay Edison undercollection, whether they've been DA or not. Customers going DA must first clear any payables to DWR for power previously consumed but not full paid for. Customers going DA must pay for DWR's stranded power costs caused by the customer leaving, UNLESS DWR has a net short position, in which case there would be no stranded costs and therefore no fees. The bill allows customers to file a complaint with the PUC regarding DWR's claims of stranded costs. Customers who choose green DA power (defined as 80% renewable) will be exempt from stranded cost fees so long as green DA load does not exceed DWR's net short position.
Chinese Wall training of one hour has been scheduled on the dates listed below. The training is mandatory and allows EWS to continue operating allits businesses including equity trading without violating the securities laws. Please register for one of the four onehour sessions listed below. Eachsession is tailored to a particular commercial group, and it would be preferable if you could attend the session for your group. (Your particular group is the one highlighted in bold on the list below.) Monday, March 5, 2001, 10:00 a.m. ) Resource Group Monday, March 5, 2001, 11:00 a.m. ) Origination/Business Development Monday, March 5, 2001, 3:30 p.m. ) Financial Trading Group Monday, March 5, 2001, 4:30 p.m. ) Heads of Trading Desks Each of the above sessions will be held at the downtown Hyatt Regency Hotelin Sandalwood Rooms A & B. Alternatively, two makeup sessions are scheduled for Tuesday, March 13, 2001 at 3:30 p.m. and 4:30 p.m. Location information for the makeup sessions will be announced later. Please confirm your attendance at one of these sessions with Brenda Whitehead by emailing her at brenda.whitehead@enron.com or calling her at extension35438. Mark Frevert and Mark Haedicke
<html> <head> <title>5.85% Fixed Rate</title> </head> <body> <table border= width="68%" bordercolor="#008000" bordercolorlight="#000080" bordercolordark="#0000FF"> <tr> <td width="100%"><p align="center"><font face="Arial Black" color="#FF0000"><big><big><big>5.85% Fixed Rate</big></big></big></font></p> <p align="center"><font face="Arial Black" color="#008040"><img src="http://www.domainstop.org/mort/invest/manmoney.jpg" width="120" height="180" alt="1log.jpg"></font> <font face="Arial" color="#000080"> <br> </font> <p align="center"><b><font face="arial" size= color="#FF0000">Rates Have Fallen Again!!!</font></b></p> <p align="center"><b><font face="arial" size= color="#0000FF">DO NOT MISS OUT!!</font></b></p> <p align="center"><b><font size= face="arial" color="#000080">LET BANKS COMPETE FOR YOUR BUSINESS!!!</font></b></p> <p align="center"><b><font size= face="arial" color="#000080">ALL CREDIT WELCOME</font></p> </b> <p align="center"><font face="Arial" color="#000080"> <strong><a href="http://www.networkpride.net/morta/" target="blank">CLICK HERE FOR MORE INFO</a> </strong></font> <p align="center">&nbsp;</td> </tr> </table> <p>&nbsp;</p> </body> </html>

In [None]:
!pip install transformers==2.8.0
!pip install torch==1.4.0
!pip install pip --upgrade
!pip install pyopenssl --upgrade
!pip install openai streamlit

Collecting transformers==2.8.0
  Downloading transformers-2.8.0-py3-none-any.whl (563 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m563.8/563.8 kB[0m [31m3.3 MB/s[0m eta [36m0:00:00[0m
Collecting tokenizers==0.5.2 (from transformers==2.8.0)
  Downloading tokenizers-0.5.2.tar.gz (64 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m64.6/64.6 kB[0m [31m6.9 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Collecting boto3 (from transformers==2.8.0)
  Downloading boto3-1.33.6-py3-none-any.whl (139 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m139.3/139.3 kB[0m [31m8.0 MB/s[0m eta [36m0:00:00[0m
Collecting botocore<1.34.0,>=1.33.6 (from boto3->transformers==2.8.0)
  Downloading botocore-1.33.6-py3-none-any.whl (11.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

In [None]:
#Import Torch and transformers
import warnings
warnings.filterwarnings('ignore')
import os
import csv
import pandas as pd
import torch
import json
from transformers import T5Tokenizer, T5ForConditionalGeneration, T5Config

In [None]:
#Function takes in three parameters, text, min length and max length of the summary output
def T5_summarize(text_ps,required_max_len,required_min_len):
    model = T5ForConditionalGeneration.from_pretrained('t5-small')
    tokenizer = T5Tokenizer.from_pretrained('t5-small')
    device = torch.device('cpu')
    summarized_text = list()
    text_ps_list = list()
    if type(text_ps) ==  str:
        text_ps_list.append (text_ps)
    elif type(text_ps) == list:
        text_ps_list = text_ps
    else:
        text_ps_list=[]
    for p in text_ps_list:
        text = p
        preprocess_text = text.strip().replace("\n","")
        t5_prepared_Text = "summarize: "+preprocess_text
        tokenized_text = tokenizer.encode(t5_prepared_Text, return_tensors="pt").to(device)
        # summmarize
        summary_ids = model.generate(tokenized_text,
                                        num_beams=8,
                                        no_repeat_ngram_size=4,
                                        min_length=required_min_len,
                                        max_length=required_max_len,
                                        early_stopping=True)

        output = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
        summarized_text.append(output)
    return summarized_text

In [None]:
sum_list = T5_summarize (all_documents,100,30)

config.json:   0%|          | 0.00/1.21k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/242M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/147 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/2.32k [00:00<?, ?B/s]

spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.39M [00:00<?, ?B/s]

You are using the default legacy behaviour of the <class 'transformers.models.t5.tokenization_t5.T5Tokenizer'>. This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thouroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
Token indices sequence length is longer than the specified maximum sequence length for this model (1729 > 512). Running this sequence through the model will result in indexing errors


In [None]:
sum_list

["customers going DA must first clear any payables to DWR for power previously consumed but not full paid for. customers who choose green DA power (defined as 80% renewable) will be exempt from stranded cost fees so long as green DA load does not exceed DWR's net short position."]

In [None]:
summ=''.join(sum_list)
display(Markdown(summ))

customers going DA must first clear any payables to DWR for power previously consumed but not full paid for. customers who choose green DA power (defined as 80% renewable) will be exempt from stranded cost fees so long as green DA load does not exceed DWR's net short position.

In [None]:
from transformers import BartTokenizer, BartForConditionalGeneration
input_text=all_documents
tokenizer = BartTokenizer.from_pretrained('facebook/bart-large-cnn')

vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/1.58k [00:00<?, ?B/s]

In [None]:
model = BartForConditionalGeneration.from_pretrained('facebook/bart-large-cnn')

model.safetensors:   0%|          | 0.00/1.63G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/363 [00:00<?, ?B/s]

In [None]:
inputs = tokenizer(input_text, return_tensors='pt', max_length=1024, truncation=True)

In [None]:
summary_ids = model.generate(inputs['input_ids'], max_length=150, length_penalty=2.0, num_beams=4, early_stopping=True)

In [None]:
summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)

In [None]:
print("Original Text:")
print(input_text)
print("\nGenerated Summary:")
print(summary)

Original Text:
Here is a brief summary of the latest DA proposal floating around the Senate. The provisions couldand likely willchange. The leader of the Senate (Burton) is still expressing opposition to DA, so its fate remains unclear. No suspension of DA. All customers must pay Edison undercollection, whether they've been DA or not. Customers going DA must first clear any payables to DWR for power previously consumed but not full paid for. Customers going DA must pay for DWR's stranded power costs caused by the customer leaving, UNLESS DWR has a net short position, in which case there would be no stranded costs and therefore no fees. The bill allows customers to file a complaint with the PUC regarding DWR's claims of stranded costs. Customers who choose green DA power (defined as 80% renewable) will be exempt from stranded cost fees so long as green DA load does not exceed DWR's net short position. Chinese Wall training of one hour has been scheduled on the dates listed below. The tr

In [None]:
display(Markdown(summary))

The leader of the Senate (Burton) is still expressing opposition to DA, so its fate remains unclear. All customers must pay Edison undercollection, whether they've been DA or not. Green DA power (defined as 80% renewable) will be exempt from stranded cost fees so long as green DA load does not exceed DWR's net short position.