In [1]:
import pandas as pd

In [2]:
df = pd.read_csv('Structured_Meeting_Notes.csv')
df.head()

Unnamed: 0,date,speaker,note,project,action_item,deadline
0,2025-03-28,Jordan,[Project: AI assistant integration] Jordan agr...,AI assistant integration,Set up Airflow DAGs,Friday
1,2025-04-04,Jordan,[Project: Internal documentation cleanup] Jord...,Internal documentation cleanup,Test Gemini with internal queries,Friday
2,2025-03-24,Jordan,[Project: Data pipeline improvement] Jordan hi...,Data pipeline improvement,Organize guild workshop,Monday
3,2025-02-13,Omar,[Project: Knowledge sharing guilds] Omar highl...,Knowledge sharing guilds,Summarize feedback for product,Thursday
4,2025-03-27,Anaïs,[Project: Dashboard redesign] Anaïs requested ...,Dashboard redesign,Summarize feedback for product,Thursday


### Check for duplicates

In [3]:
df_clean = df.dropna(subset=['note'])


### Capital letter for strings in ```speaker``` and ```project```

In [4]:
df_clean['speaker'] = df_clean['speaker'].str.title()
df_clean['project'] = df_clean['project'].str.title()
df_clean.head()

Unnamed: 0,date,speaker,note,project,action_item,deadline
0,2025-03-28,Jordan,[Project: AI assistant integration] Jordan agr...,Ai Assistant Integration,Set up Airflow DAGs,Friday
1,2025-04-04,Jordan,[Project: Internal documentation cleanup] Jord...,Internal Documentation Cleanup,Test Gemini with internal queries,Friday
2,2025-03-24,Jordan,[Project: Data pipeline improvement] Jordan hi...,Data Pipeline Improvement,Organize guild workshop,Monday
3,2025-02-13,Omar,[Project: Knowledge sharing guilds] Omar highl...,Knowledge Sharing Guilds,Summarize feedback for product,Thursday
4,2025-03-27,Anaïs,[Project: Dashboard redesign] Anaïs requested ...,Dashboard Redesign,Summarize feedback for product,Thursday


### Truncate text to avoid overflow or confuse the model

In [5]:
df_clean['note_short'] = df_clean['note'].apply(lambda x: x[:300])


### Map the day of the week to the actual deadline date in datetime

In [6]:
from datetime import datetime, timedelta

def convert_weekday_to_date(row):
    weekdays = {
        "Monday": 0, "Tuesday": 1, "Wednesday": 2,
        "Thursday": 3, "Friday": 4, "Saturday": 5, "Sunday": 6
    }
    try:
        note_date = datetime.strptime(row["date"], "%Y-%m-%d") + timedelta(days=1)  # Shift by 1 day
        target_weekday = weekdays.get(row["deadline"])
        if target_weekday is None:
            return None
        days_ahead = (target_weekday - note_date.weekday()) % 7
        return (note_date + timedelta(days=days_ahead)).strftime("%Y-%m-%d")
    except:
        return None

df_clean.head()


Unnamed: 0,date,speaker,note,project,action_item,deadline,note_short
0,2025-03-28,Jordan,[Project: AI assistant integration] Jordan agr...,Ai Assistant Integration,Set up Airflow DAGs,Friday,[Project: AI assistant integration] Jordan agr...
1,2025-04-04,Jordan,[Project: Internal documentation cleanup] Jord...,Internal Documentation Cleanup,Test Gemini with internal queries,Friday,[Project: Internal documentation cleanup] Jord...
2,2025-03-24,Jordan,[Project: Data pipeline improvement] Jordan hi...,Data Pipeline Improvement,Organize guild workshop,Monday,[Project: Data pipeline improvement] Jordan hi...
3,2025-02-13,Omar,[Project: Knowledge sharing guilds] Omar highl...,Knowledge Sharing Guilds,Summarize feedback for product,Thursday,[Project: Knowledge sharing guilds] Omar highl...
4,2025-03-27,Anaïs,[Project: Dashboard redesign] Anaïs requested ...,Dashboard Redesign,Summarize feedback for product,Thursday,[Project: Dashboard redesign] Anaïs requested ...


In [7]:
df_clean["deadline_date"] = df_clean.apply(convert_weekday_to_date, axis=1)
df_clean["deadline_date"] = pd.to_datetime(df_clean["deadline_date"])
df_clean["date"] = pd.to_datetime(df_clean["date"])



In [8]:
df_clean["days_until_deadline"] = (df_clean["deadline_date"] - df_clean["date"]).dt.days
df_clean.head()

Unnamed: 0,date,speaker,note,project,action_item,deadline,note_short,deadline_date,days_until_deadline
0,2025-03-28,Jordan,[Project: AI assistant integration] Jordan agr...,Ai Assistant Integration,Set up Airflow DAGs,Friday,[Project: AI assistant integration] Jordan agr...,2025-04-04,7
1,2025-04-04,Jordan,[Project: Internal documentation cleanup] Jord...,Internal Documentation Cleanup,Test Gemini with internal queries,Friday,[Project: Internal documentation cleanup] Jord...,2025-04-11,7
2,2025-03-24,Jordan,[Project: Data pipeline improvement] Jordan hi...,Data Pipeline Improvement,Organize guild workshop,Monday,[Project: Data pipeline improvement] Jordan hi...,2025-03-31,7
3,2025-02-13,Omar,[Project: Knowledge sharing guilds] Omar highl...,Knowledge Sharing Guilds,Summarize feedback for product,Thursday,[Project: Knowledge sharing guilds] Omar highl...,2025-02-20,7
4,2025-03-27,Anaïs,[Project: Dashboard redesign] Anaïs requested ...,Dashboard Redesign,Summarize feedback for product,Thursday,[Project: Dashboard redesign] Anaïs requested ...,2025-04-03,7


In [9]:
df_clean["llm_input"] = df_clean.apply(
    lambda row: f"Meeting on {row['date']} about '{row['project']}' led by {row['speaker']}: {row['note_short']}",
    axis=1
)

df_clean["llm_input"].head()

0    Meeting on 2025-03-28 00:00:00 about 'Ai Assis...
1    Meeting on 2025-04-04 00:00:00 about 'Internal...
2    Meeting on 2025-03-24 00:00:00 about 'Data Pip...
3    Meeting on 2025-02-13 00:00:00 about 'Knowledg...
4    Meeting on 2025-03-27 00:00:00 about 'Dashboar...
Name: llm_input, dtype: object

In [10]:
import re

def clean_note(text):
    return re.sub(r'\[Project: .*?\]\s*', '', text)


df_clean['note_clean'] = df_clean['note_short'].apply(clean_note) 

df_clean['note_clean'].head()

0    Jordan agreed on implementing the ai assistant...
1    Jordan raised concerns about the internal docu...
2    Jordan highlighted the need for the data pipel...
3    Omar highlighted the need for the knowledge sh...
4    Anaïs requested a report on the dashboard rede...
Name: note_clean, dtype: object

In [13]:
from transformers import pipeline
import os

# Force Transformers to avoid TensorFlow completely
os.environ["TRANSFORMERS_NO_TF"] = "1"

# Load a fully PyTorch summarization model
summarizer = pipeline("summarization", model="Falconsai/text_summarization")

# Run the summarizer
df_clean["summary"] = df_clean["llm_input"].apply(
    lambda x: summarizer(x, max_length=40, min_length=10, do_sample=False)[0]['summary_text']
)

# Preview results
df_clean[["llm_input", "summary"]].head()



RuntimeError: Failed to import transformers.models.t5.modeling_tf_t5 because of the following error (look up to see its traceback):
Your currently installed version of Keras is Keras 3, but this is not yet supported in Transformers. Please install the backwards-compatible tf-keras package with `pip install tf-keras`.