# **Meeting Minutes Summarizer using T5 Model summarization.**

### Upload the Dataset

In [16]:
from google.colab import files
uploaded = files.upload()




Saving meeting_minutes_cleaned.csv to meeting_minutes_cleaned (2).csv


## Install Required Libraries

In [17]:
!pip install transformers sentencepiece




# Load the Dataset

In [18]:
import pandas as pd

# Load dataset
df = pd.read_csv("meeting_minutes_cleaned.csv")

# Preview data
df.head()


Unnamed: 0,transcript,summary
0,project manager introduced upcoming project te...,project manager introduced upcoming project te...
1,project manager briefed team new requirement c...,project manager briefed team new requirement c...
2,project manager recapped decision made previou...,project manager recapped decision made previou...
3,project manager recapped decision made previou...,project manager recapped decision made previou...
4,team member introduced name role project proje...,team member introduced name role project proje...


# Load Pretrained T5 Model for Summarization

In [19]:
from transformers import T5Tokenizer, T5ForConditionalGeneration

# Load the tokenizer and model
tokenizer = T5Tokenizer.from_pretrained("t5-small")
model = T5ForConditionalGeneration.from_pretrained("t5-small")


# Define the Summarization Function

In [20]:
def generate_summary(text):
    input_text = "summarize: " + text
    input_ids = tokenizer.encode(input_text, return_tensors="pt", max_length=512, truncation=True)
    output = model.generate(input_ids, max_length=100, num_beams=4, early_stopping=True)
    return tokenizer.decode(output[0], skip_special_tokens=True)


# Apply Model on Your Dataset

In [21]:
# Limit number for speed (you can remove [:10] for full data)
df_sample = df[:10].copy()

# Generate summaries
df_sample["generated_summary"] = df_sample["transcript"].apply(generate_summary)

# Show results
df_sample[["transcript", "summary", "generated_summary"]]


Unnamed: 0,transcript,summary,generated_summary
0,project manager introduced upcoming project te...,project manager introduced upcoming project te...,project manager introduced upcoming project te...
1,project manager briefed team new requirement c...,project manager briefed team new requirement c...,project manager briefed team new requirement c...
2,project manager recapped decision made previou...,project manager recapped decision made previou...,project manager recapped decision made previou...
3,project manager recapped decision made previou...,project manager recapped decision made previou...,project manager recapped decision made previou...
4,team member introduced name role project proje...,team member introduced name role project proje...,team member introduced name role project proje...
5,project manager recapped event previous meetin...,project manager recapped event previous meetin...,project manager recapped event previous meetin...
6,project manager recapped decision made previou...,project manager recapped decision made previou...,project manager recapped decision made previou...
7,project manager recapped decision made previou...,project manager recapped decision made previou...,project manager recapped decision made previou...
8,project manager gave introduction goal project...,project manager gave introduction goal project...,project manager gave introduction goal project...
9,industrial designer gave presentation basic fu...,industrial designer gave presentation basic fu...,industrial designer gave presentation basic fu...


# Evaluate Summary Quality Using ROUGE

In [22]:
!pip install rouge-score

from rouge_score import rouge_scorer

scorer = rouge_scorer.RougeScorer(['rouge1', 'rougeL'], use_stemmer=True)

def compute_rouge_scores(row):
    scores = scorer.score(row["summary"], row["generated_summary"])
    return pd.Series({
        "rouge1": scores["rouge1"].fmeasure,
        "rougeL": scores["rougeL"].fmeasure
    })

df_scores = df_sample.apply(compute_rouge_scores, axis=1)
df_sample = pd.concat([df_sample, df_scores], axis=1)

df_sample[["summary", "generated_summary", "rouge1", "rougeL"]]




Unnamed: 0,summary,generated_summary,rouge1,rougeL
0,project manager introduced upcoming project te...,project manager introduced upcoming project te...,0.9,0.9
1,project manager briefed team new requirement c...,project manager briefed team new requirement c...,0.337615,0.337615
2,project manager recapped decision made previou...,project manager recapped decision made previou...,0.712446,0.712446
3,project manager recapped decision made previou...,project manager recapped decision made previou...,0.550336,0.550336
4,team member introduced name role project proje...,team member introduced name role project proje...,0.929577,0.929577
5,project manager recapped event previous meetin...,project manager recapped event previous meetin...,0.446809,0.446809
6,project manager recapped decision made previou...,project manager recapped decision made previou...,0.881188,0.881188
7,project manager recapped decision made previou...,project manager recapped decision made previou...,0.851485,0.851485
8,project manager gave introduction goal project...,project manager gave introduction goal project...,0.651852,0.651852
9,industrial designer gave presentation basic fu...,industrial designer gave presentation basic fu...,0.410714,0.410714


## Save Results to CSV

In [23]:
df_sample.to_csv("summarized_output.csv", index=False)
files.download("summarized_output.csv")


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

# **Meeting Minutes Summarizer using Pegasus Model summarization.**

# Load Pegasus Tokenizer & Model

In [24]:
from transformers import PegasusTokenizer, PegasusForConditionalGeneration

model_name = "google/pegasus-xsum"

# Load tokenizer and model
tokenizer = PegasusTokenizer.from_pretrained(model_name)
model = PegasusForConditionalGeneration.from_pretrained(model_name)


tokenizer_config.json:   0%|          | 0.00/87.0 [00:00<?, ?B/s]

spiece.model:   0%|          | 0.00/1.91M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/65.0 [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/3.52M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/1.39k [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/2.28G [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/2.28G [00:00<?, ?B/s]

Some weights of PegasusForConditionalGeneration were not initialized from the model checkpoint at google/pegasus-xsum and are newly initialized: ['model.decoder.embed_positions.weight', 'model.encoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


generation_config.json:   0%|          | 0.00/259 [00:00<?, ?B/s]

# Define Summarization Function

In [25]:
def generate_pegasus_summary(text):
    inputs = tokenizer(text, truncation=True, padding="longest", return_tensors="pt")
    summary_ids = model.generate(inputs["input_ids"], max_length=60, num_beams=5, early_stopping=True)
    return tokenizer.decode(summary_ids[0], skip_special_tokens=True)


# Apply Summarization on Dataset

In [26]:
# Limit sample for quick testing
df_sample = df[:10].copy()

# Generate summaries
df_sample["generated_summary"] = df_sample["transcript"].apply(generate_pegasus_summary)

# Display results
df_sample[["transcript", "summary", "generated_summary"]]


Unnamed: 0,transcript,summary,generated_summary
0,project manager introduced upcoming project te...,project manager introduced upcoming project te...,"To License This Clip, Click Here:"
1,project manager briefed team new requirement c...,project manager briefed team new requirement c...,Project manager briefed team new requirement c...
2,project manager recapped decision made previou...,project manager recapped decision made previou...,Key points of the meeting
3,project manager recapped decision made previou...,project manager recapped decision made previou...,A look back at some of the key moments in the ...
4,team member introduced name role project proje...,team member introduced name role project proje...,BBC Sport takes a look at some of the key stor...
5,project manager recapped event previous meetin...,project manager recapped event previous meetin...,BBC News takes a look at some of the key stori...
6,project manager recapped decision made previou...,project manager recapped decision made previou...,Key points of the meeting:
7,project manager recapped decision made previou...,project manager recapped decision made previou...,Project manager re decision made previous meet...
8,project manager gave introduction goal project...,project manager gave introduction goal project...,BBC News takes a look at some of the key stori...
9,industrial designer gave presentation basic fu...,industrial designer gave presentation basic fu...,How to make a remote control more user-friendly:


# Evaluate with ROUGE

In [27]:
!pip install rouge-score

from rouge_score import rouge_scorer

scorer = rouge_scorer.RougeScorer(['rouge1', 'rougeL'], use_stemmer=True)

def compute_rouge_scores(row):
    scores = scorer.score(row["summary"], row["generated_summary"])
    return pd.Series({
        "rouge1": scores["rouge1"].fmeasure,
        "rougeL": scores["rougeL"].fmeasure
    })

df_scores = df_sample.apply(compute_rouge_scores, axis=1)
df_sample = pd.concat([df_sample, df_scores], axis=1)

df_sample[["summary", "generated_summary", "rouge1", "rougeL"]]




Unnamed: 0,summary,generated_summary,rouge1,rougeL
0,project manager introduced upcoming project te...,"To License This Clip, Click Here:",0.0,0.0
1,project manager briefed team new requirement c...,Project manager briefed team new requirement c...,0.156556,0.156556
2,project manager recapped decision made previou...,Key points of the meeting,0.012903,0.012903
3,project manager recapped decision made previou...,A look back at some of the key moments in the ...,0.008511,0.008511
4,team member introduced name role project proje...,BBC Sport takes a look at some of the key stor...,0.0,0.0
5,project manager recapped event previous meetin...,BBC News takes a look at some of the key stori...,0.0,0.0
6,project manager recapped decision made previou...,Key points of the meeting:,0.016949,0.016949
7,project manager recapped decision made previou...,Project manager re decision made previous meet...,0.375,0.375
8,project manager gave introduction goal project...,BBC News takes a look at some of the key stori...,0.010204,0.010204
9,industrial designer gave presentation basic fu...,How to make a remote control more user-friendly:,0.021918,0.021918


# Export Results to CSV

In [28]:
df_sample.to_csv("pegasus_summarized_output.csv", index=False)
files.download("pegasus_summarized_output.csv")


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

# **Meeting Minutes Summarizer using BART Model summarization.**

# Install Required Libraries

In [29]:
!pip install transformers




## Load the Dataset

In [30]:
import pandas as pd

# Load your uploaded dataset
df = pd.read_csv("meeting_minutes_cleaned.csv")
df.head()


Unnamed: 0,transcript,summary
0,project manager introduced upcoming project te...,project manager introduced upcoming project te...
1,project manager briefed team new requirement c...,project manager briefed team new requirement c...
2,project manager recapped decision made previou...,project manager recapped decision made previou...
3,project manager recapped decision made previou...,project manager recapped decision made previou...
4,team member introduced name role project proje...,team member introduced name role project proje...


# Load BART Tokenizer & Model

In [31]:
from transformers import BartTokenizer, BartForConditionalGeneration

# Load the tokenizer and model
tokenizer = BartTokenizer.from_pretrained("facebook/bart-large-cnn")
model = BartForConditionalGeneration.from_pretrained("facebook/bart-large-cnn")


vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/1.58k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.63G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/363 [00:00<?, ?B/s]

# Define Summarization Function

In [32]:
def generate_bart_summary(text):
    inputs = tokenizer.batch_encode_plus([text], return_tensors="pt", max_length=1024, truncation=True)
    summary_ids = model.generate(inputs['input_ids'], num_beams=4, max_length=100, early_stopping=True)
    return tokenizer.decode(summary_ids[0], skip_special_tokens=True)


# Apply to Dataset

In [33]:
# Limit to first 10 samples for speed
df_sample = df[:10].copy()

# Generate summaries
df_sample["generated_summary"] = df_sample["transcript"].apply(generate_bart_summary)

# View the result
df_sample[["transcript", "summary", "generated_summary"]]


Unnamed: 0,transcript,summary,generated_summary
0,project manager introduced upcoming project te...,project manager introduced upcoming project te...,project manager introduced upcoming project te...
1,project manager briefed team new requirement c...,project manager briefed team new requirement c...,project manager briefed team new requirement c...
2,project manager recapped decision made previou...,project manager recapped decision made previou...,project manager recapped decision made previou...
3,project manager recapped decision made previou...,project manager recapped decision made previou...,project manager recapped decision made previou...
4,team member introduced name role project proje...,team member introduced name role project proje...,team member introduced name role project proje...
5,project manager recapped event previous meetin...,project manager recapped event previous meetin...,project manager recapped event previous meetin...
6,project manager recapped decision made previou...,project manager recapped decision made previou...,project manager recapped decision made previou...
7,project manager recapped decision made previou...,project manager recapped decision made previou...,project manager recapped decision made previou...
8,project manager gave introduction goal project...,project manager gave introduction goal project...,project manager gave introduction goal project...
9,industrial designer gave presentation basic fu...,industrial designer gave presentation basic fu...,industrial designer gave presentation basic fu...


# Evaluate with ROUGE

In [34]:
!pip install rouge-score

from rouge_score import rouge_scorer

scorer = rouge_scorer.RougeScorer(['rouge1', 'rougeL'], use_stemmer=True)

def compute_rouge_scores(row):
    scores = scorer.score(row["summary"], row["generated_summary"])
    return pd.Series({
        "rouge1": scores["rouge1"].fmeasure,
        "rougeL": scores["rougeL"].fmeasure
    })

df_scores = df_sample.apply(compute_rouge_scores, axis=1)
df_sample = pd.concat([df_sample, df_scores], axis=1)

df_sample[["summary", "generated_summary", "rouge1", "rougeL"]]




Unnamed: 0,summary,generated_summary,rouge1,rougeL
0,project manager introduced upcoming project te...,project manager introduced upcoming project te...,0.890756,0.890756
1,project manager briefed team new requirement c...,project manager briefed team new requirement c...,0.230469,0.230469
2,project manager recapped decision made previou...,project manager recapped decision made previou...,0.584906,0.584906
3,project manager recapped decision made previou...,project manager recapped decision made previou...,0.451613,0.451613
4,team member introduced name role project proje...,team member introduced name role project proje...,0.924242,0.924242
5,project manager recapped event previous meetin...,project manager recapped event previous meetin...,0.35493,0.35493
6,project manager recapped decision made previou...,project manager recapped decision made previou...,0.654762,0.654762
7,project manager recapped decision made previou...,project manager recapped decision made previou...,0.635294,0.635294
8,project manager gave introduction goal project...,project manager gave introduction goal project...,0.470588,0.470588
9,industrial designer gave presentation basic fu...,industrial designer gave presentation basic fu...,0.385488,0.385488


# Save Results

In [35]:
df_sample.to_csv("bart_summarized_output.csv", index=False)
from google.colab import files
files.download("bart_summarized_output.csv")


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>