In [17]:
from __future__ import annotations

import argparse, os, warnings
from pathlib import Path

from dotenv import load_dotenv
from transformers import logging as hf_logging
from fastai.learner import *
from blurr.text.data.all import *
from blurr.text.modeling.all import *

from course_copilot import utils, training, topic_segmentation, summarization

In [2]:
warnings.simplefilter("ignore")
hf_logging.set_verbosity_error()

os.environ["TOKENIZERS_PARALLELISM"] = "false"
load_dotenv()

try:
    from nbdev.imports import IN_NOTEBOOK
except:
    IN_NOTEBOOK = False

In [7]:
class HeadlineSummarizationConfig(summarization.SummarizationConfig):
    max_target_length = 10
    n_unfrozen_epochs = 3

In [8]:
trainer = summarization.SummarizationModelTrainer(
    experiment_name="headline_summarization",
    train_config=HeadlineSummarizationConfig,
    data_path="../../data",
    model_output_path="../../models",
    log_output_path="../../logs",
    log_preds=True,
    log_n_preds=2,
    use_wandb=True,
)
trainer.train()



Could not gather input dimensions


epoch,train_loss,valid_loss,rouge1,rouge2,rougeL,rougeLsum,time
0,6.472305,6.026782,0.094494,0.028985,0.088497,0.087728,00:52
1,,4.985862,0.121612,0.030913,0.115442,0.115107,00:45
2,,4.88832,0.131468,0.031973,0.12528,0.124418,00:46


In [11]:
learn = load_learner("../../models/headline_summarization.pkl")

In [12]:
learn.metrics = None

In [21]:
test_article = """hey everybody welcome back this week we're going to talk about something a little bit different than we do most weeks most weeks we talk about specific
technical aspects of building machine learning powered products but this week we're going to focus on some of the
organizational things that you need to do in order to work together on ml-powered products as part of an
interdisciplinary team so the the reality of building ml Power Products is that building any product well is really
difficult you have to figure out how to hire grade people you need to be able to manage those people and get the best out
of them you need to make sure that your team is all working together towards a shared goal you need to make good
long-term technical choices manage technical debt over time you need to make sure that you're managing
expectations not just of your own team but also of leadership of your organization and you need to be able to make sure
that you're working well within the confines of the requirements of the rest of the org that you're understanding
those requirements well and communicating back to your progress to the rest of the organization against those requirements
but machine learning adds even more additional complexity to this machine learning Talent tends to be very scarce
and expensive to attract machine learning teams are not just a
single role but today they tend to be pretty interdisciplinary which makes managing them an even bigger challenge
machine learning projects often have unclear timelines and there's a high
degree of uncertainty to those timelines machine learning itself is moving super fast and machine learning as we've
covered before you can think of as like the high interest credit card of technical debt so keeping up with making
good long-term decisions and not incurring too much technical debt is especially difficult in ml unlike
traditional software ml is so new that in most organizations leadership tends not to be that well educated in it they
might not understand some of the core differences between ML and other technology that you're working with machine learning products tend to fail
in ways that are really hard for Lay people to understand and so that makes it very difficult to help the rest of
the stakeholders in your organization understand what they could really expect from the technology that you're building
and what is realistic for us to achieve so throughout the rest rest of this lecture we're going to kind of touch on
some of these themes and cover different aspects of this problem of working together to build ml Power Products as
an organization so here are the pieces that we're going to cover we're going to talk about different roles that are involved in building ml products we're
going to talk about some of the unique aspects involved in hiring ml Talent
we're going to talk about organization of teams and how the ml team tends to fit into the rest of the org and some of
the pros and cons of different ways of setting that up we'll talk about managing ml teams and
ml product management and then lastly we'll talk about some of the design considerations for how to design a
product that is well suited to having a good ml model that backs it so let's dive in and talk about rules the most
common ml rules that you might hear of are things like ml product manager ml
"""


learn.blurr_generate(test_article, num_return_sequences=3, key="summary_texts", max_length=10, num_beams=5)

[{'summary_texts': ["This week we're going to talk",
   "This week's lecture will focus on",
   "This week we're going to focus"]}]

In [23]:
learn.blurr_generate(test_article, key="summary_texts", max_length=5, num_return_sequences=3)

[{'summary_texts': ['Interdisciplinary teams',
   "This week'",
   'Machine learning is']}]