In [1]:
import datetime

import pandas as pd
import nltk
import torch

from blurr.text.data.all import *
from blurr.text.modeling.all import *
from fastai.data.block import RandomSplitter
from transformers import BartForConditionalGeneration

nltk.download("punkt")

  from .autonotebook import tqdm as notebook_tqdm
  squad_metric = load_metric("squad")
[nltk_data] Downloading package punkt to /home/team_007/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


True

## Grab our topics and transcripts

In [2]:
sheets_d = pd.read_excel(
    "../../data/raw/fsdl_2022_project_transcripts.xlsx", sheet_name=["lesson_topics", "lesson_transcripts"]
)
topics_df, transcripts_df = [v for k, v in sheets_d.items()]

topics_df.drop(columns="video_url", inplace=True)
transcripts_df.drop(columns="video_url", inplace=True)

topics_df["timestamp"] = topics_df["timestamp"].astype(str)
transcripts_df["timestamp"] = transcripts_df["timestamp"].astype(str)

In [3]:
print(len(topics_df))

topics_df.head()

397


Unnamed: 0,course_title,lesson_num,timestamp,topic
0,fast.ai 2022 - Part 1,2,00:00:00,Introduction
1,fast.ai 2022 - Part 1,2,00:00:55,Reminder to use the fastai book as a companion to the course
2,fast.ai 2022 - Part 1,2,00:02:06,aiquizzes.com for quizzes on the book
3,fast.ai 2022 - Part 1,2,00:02:36,"Reminder to use fastai forums for links, notebooks, questions, etc."
4,fast.ai 2022 - Part 1,2,00:03:42,How to efficiently read the forum with summarizations


In [4]:
print(len(transcripts_df))

transcripts_df.head()

14791


Unnamed: 0,course_title,lesson_num,timestamp,transcript
0,fast.ai 2022 - Part 1,2,00:00:00,"Hi everybody. Welcome to lesson two. Thanks for coming back… slight change of environment here,"
1,fast.ai 2022 - Part 1,2,00:00:08,we had a bit of an “administrative issue” at our university — somebody booked our room — so I'm
2,fast.ai 2022 - Part 1,2,00:00:14,doing this from the study at home. so sorry about the lack of decorations behind me.
3,fast.ai 2022 - Part 1,2,00:00:25,I'm actually really really pumped about this lesson. It feels like going back to what things
4,fast.ai 2022 - Part 1,2,00:00:32,"were like in the very early days, because we're doing some really new, really cool stuff, which…"


## Define a utility function for converting durations to total_seconds

In [5]:
def convert_duration_to_seconds(v):
    hrs, mins, secs = v.split(":")
    return (60 * 60 * int(hrs)) + (60 * int(mins)) + int(secs)

## Define the start/end boundaries (in seconds) for each topic in each lesson

In [6]:
topics_df["start_seconds"] = topics_df["timestamp"].apply(convert_duration_to_seconds)
topics_df["end_seconds"] = topics_df.groupby(by=["course_title", "lesson_num"])["start_seconds"].shift(
    -1, fill_value=100000
)

## Define the total number of elapsed seconds at each timestamp in the transcripts dataset

In [7]:
transcripts_df["elapsed_seconds"] = transcripts_df["timestamp"].apply(convert_duration_to_seconds)

In [8]:
transcripts_df.head()

Unnamed: 0,course_title,lesson_num,timestamp,transcript,elapsed_seconds
0,fast.ai 2022 - Part 1,2,00:00:00,"Hi everybody. Welcome to lesson two. Thanks for coming back… slight change of environment here,",0
1,fast.ai 2022 - Part 1,2,00:00:08,we had a bit of an “administrative issue” at our university — somebody booked our room — so I'm,8
2,fast.ai 2022 - Part 1,2,00:00:14,doing this from the study at home. so sorry about the lack of decorations behind me.,14
3,fast.ai 2022 - Part 1,2,00:00:25,I'm actually really really pumped about this lesson. It feels like going back to what things,25
4,fast.ai 2022 - Part 1,2,00:00:32,"were like in the very early days, because we're doing some really new, really cool stuff, which…",32


## Build our training data.  

This should be usable for both segmentation and summarization tasks

In [9]:
merged_df = topics_df[["course_title", "lesson_num", "topic", "start_seconds", "end_seconds"]].merge(
    transcripts_df, on=["course_title", "lesson_num"]
)
len(merged_df)

264993

Keep only the merged records where the transcript lies inbetween the start/end of the topic

In [10]:
merged_df = merged_df[
    (merged_df.elapsed_seconds >= merged_df.start_seconds) & (merged_df.elapsed_seconds < merged_df.end_seconds)
]

In [11]:
merged_df.head()

Unnamed: 0,course_title,lesson_num,topic,start_seconds,end_seconds,timestamp,transcript,elapsed_seconds
0,fast.ai 2022 - Part 1,2,Introduction,0,55,00:00:00,"Hi everybody. Welcome to lesson two. Thanks for coming back… slight change of environment here,",0
1,fast.ai 2022 - Part 1,2,Introduction,0,55,00:00:08,we had a bit of an “administrative issue” at our university — somebody booked our room — so I'm,8
2,fast.ai 2022 - Part 1,2,Introduction,0,55,00:00:14,doing this from the study at home. so sorry about the lack of decorations behind me.,14
3,fast.ai 2022 - Part 1,2,Introduction,0,55,00:00:25,I'm actually really really pumped about this lesson. It feels like going back to what things,25
4,fast.ai 2022 - Part 1,2,Introduction,0,55,00:00:32,"were like in the very early days, because we're doing some really new, really cool stuff, which…",32


For both segmentation and summarization tasks, we'll need to group the transcripts by course + lesson + topic

In [12]:
train_df = (
    merged_df[["course_title", "lesson_num", "topic", "transcript", "start_seconds"]]
    .groupby(by=["course_title", "lesson_num", "start_seconds", "topic"])
    .agg(list)
    .reset_index()
)

train_df.sort_values(by=["course_title", "lesson_num", "start_seconds"], inplace=True)

In [13]:
train_df.head()

Unnamed: 0,course_title,lesson_num,start_seconds,topic,transcript
0,Full Stack Deep Learning - Spring 2021,1,0,Intro,"[so josh talked about why we're doing, this course, and what's coming up this is the first, lecture, deep learning fundamentals that actually, has the i guess the content, but there's a lot of content in deep, learning fundamentals, and most of it is going to be a review, for most of you, that's what we assume that's the purpose, of our, requirements for enrolling but, if what i'm going to talk about in this, lecture real quick, is not going to be mostly review for you, then, i highly recommend that you go through, this uh online textbook, neural networks and deeplearning.com, it's, someth..."
1,Full Stack Deep Learning - Spring 2021,1,85,Neural Networks,"[so let's kick it off with neural, networks, um and you guys see the screen that has, like the slide and nothing else on it, right, okay so neural networks are called, neural because, they are biologically inspired, by neurons right which do all the, computing in our bodies, and the the kind of mental model of a, neuron, is that it's a cell that has, things coming out of the main part, called dendrites, and you can think of them as like, receptors of information, and then if enough stimulation has been, received by the dendrites, then the whole neuron does a thing, called firing, it's basi..."
2,Full Stack Deep Learning - Spring 2021,1,408,Universality,"[is universality which is, you know this neural network represents, some function y, right y equals f of of x the input, and then w the setting of all the, weights, but what can that function be right, let's look at this function on the left, f of x very, you know lots of peaks and valleys in, here, how can we know if there's a neural, network that, and a choice of weights for it that can, basically represent this function, and to summarize you know some, theoretical results, you can prove that any two layer neural, networks that's one hidden layer, right so inputs to one hidden layer two,..."
3,Full Stack Deep Learning - Spring 2021,1,528,Learning Problems,"[networks for, well we do for machine learning what, kind of machine learning, problems are there there's three kind of, big you know, a breakdown of all the machine learnings, out there you can have three categories, supervised learning unsupervised, learning and reinforcement learning, there's also transfer learning, meta learning you know imitation, learning all these, different types of learnings but these, are the three big categories, so supervised learning actually, unsupervised learning you get, unlabeled data x that means you know x, can be maybe, sound clips right or text like, t..."
4,Full Stack Deep Learning - Spring 2021,1,977,Empirical Risk Minimization / Loss Functions,"[um what's known as risk minimization and, the concept of loss functions, so let's talk about linear regression, for a second, um so linear regression is, the so here i'm showing you what's known, as one-dimensional data right so there's, one dimension on the x-axis there's some, number, and then there's another dimension on, the y-axis and that's the output so it's, one-dimensional input data producing, one-dimensional output and, the question we may want to ask is like, well if we get, an input let's say it's 30, how can we predict what the output is, likely to be, right given that all t..."


QA to training set

In [14]:
train_df[train_df["course_title"] == "fast.ai 2022 - Part 1"].iloc[0].transcript

["Welcome to Practical Deep Learning for coders, lesson one. This is version five of this course, and it's the first new one we've done in two years.",
 "So, we've got a lot of cool things to cover! It's amazing how much has changed.",
 'Here is an xkcd from the end of 2015.']

## Build segmentation training set

In [15]:
seg_train_df = train_df.copy()

In [16]:
seg_examples = []

for example_idx, example in seg_train_df.iterrows():
    for seq_idx, seq in enumerate(example["transcript"]):
        if len(example["transcript"]) > (seq_idx + 1):
            seg_examples.append(
                {
                    "course_title": example["course_title"],
                    "lesson_num": example["lesson_num"],
                    "topic": example["topic"],
                    "seq": str(seq),
                    "next_seq": str(example["transcript"][seq_idx + 1]),
                    "other_topic_seqs": [
                        str(txt) for i, txt in enumerate(example["transcript"]) if i != seq_idx and i != seq_idx + 1
                    ],
                }
            )

In [17]:
print(len(seg_examples))
print(seg_examples[0])

14394
{'course_title': 'Full Stack Deep Learning - Spring 2021', 'lesson_num': 1, 'topic': 'Intro', 'seq': "so josh talked about why we're doing", 'next_seq': 'this course', 'other_topic_seqs': ["and what's coming up this is the first", 'lecture', 'deep learning fundamentals that actually', 'has the i guess the content', "but there's a lot of content in deep", 'learning fundamentals', 'and most of it is going to be a review', 'for most of you', "that's what we assume that's the purpose", 'of our', 'requirements for enrolling but', "if what i'm going to talk about in this", 'lecture real quick', 'is not going to be mostly review for you', 'then', 'i highly recommend that you go through', 'this uh online textbook', 'neural networks and deeplearning.com', "it's", 'something you can probably do in like a', "focused day or two it's not", "like a big textbook it's more of a", 'website so i highly recommend that', 'and uh our weekly reading is actually', 'from this', "uh textbook it's a chapt

In [18]:
seg_train_df = pd.DataFrame(seg_examples)

In [19]:
print(len(seg_train_df))
seg_train_df.head()

14394


Unnamed: 0,course_title,lesson_num,topic,seq,next_seq,other_topic_seqs
0,Full Stack Deep Learning - Spring 2021,1,Intro,so josh talked about why we're doing,this course,"[and what's coming up this is the first, lecture, deep learning fundamentals that actually, has the i guess the content, but there's a lot of content in deep, learning fundamentals, and most of it is going to be a review, for most of you, that's what we assume that's the purpose, of our, requirements for enrolling but, if what i'm going to talk about in this, lecture real quick, is not going to be mostly review for you, then, i highly recommend that you go through, this uh online textbook, neural networks and deeplearning.com, it's, something you can probably do in like a, focused day or t..."
1,Full Stack Deep Learning - Spring 2021,1,Intro,this course,and what's coming up this is the first,"[so josh talked about why we're doing, lecture, deep learning fundamentals that actually, has the i guess the content, but there's a lot of content in deep, learning fundamentals, and most of it is going to be a review, for most of you, that's what we assume that's the purpose, of our, requirements for enrolling but, if what i'm going to talk about in this, lecture real quick, is not going to be mostly review for you, then, i highly recommend that you go through, this uh online textbook, neural networks and deeplearning.com, it's, something you can probably do in like a, focused day or two..."
2,Full Stack Deep Learning - Spring 2021,1,Intro,and what's coming up this is the first,lecture,"[so josh talked about why we're doing, this course, deep learning fundamentals that actually, has the i guess the content, but there's a lot of content in deep, learning fundamentals, and most of it is going to be a review, for most of you, that's what we assume that's the purpose, of our, requirements for enrolling but, if what i'm going to talk about in this, lecture real quick, is not going to be mostly review for you, then, i highly recommend that you go through, this uh online textbook, neural networks and deeplearning.com, it's, something you can probably do in like a, focused day or..."
3,Full Stack Deep Learning - Spring 2021,1,Intro,lecture,deep learning fundamentals that actually,"[so josh talked about why we're doing, this course, and what's coming up this is the first, has the i guess the content, but there's a lot of content in deep, learning fundamentals, and most of it is going to be a review, for most of you, that's what we assume that's the purpose, of our, requirements for enrolling but, if what i'm going to talk about in this, lecture real quick, is not going to be mostly review for you, then, i highly recommend that you go through, this uh online textbook, neural networks and deeplearning.com, it's, something you can probably do in like a, focused day or t..."
4,Full Stack Deep Learning - Spring 2021,1,Intro,deep learning fundamentals that actually,has the i guess the content,"[so josh talked about why we're doing, this course, and what's coming up this is the first, lecture, but there's a lot of content in deep, learning fundamentals, and most of it is going to be a review, for most of you, that's what we assume that's the purpose, of our, requirements for enrolling but, if what i'm going to talk about in this, lecture real quick, is not going to be mostly review for you, then, i highly recommend that you go through, this uh online textbook, neural networks and deeplearning.com, it's, something you can probably do in like a, focused day or two it's not, like a ..."


## Build summarization training set

In [20]:
summarization_train_df = train_df.copy()

In [21]:
summarization_train_df["transcript"] = summarization_train_df["transcript"].apply(
    lambda v: " ".join([str(seq) for seq in v])
)

In [22]:
summarization_train_df.head()

Unnamed: 0,course_title,lesson_num,start_seconds,topic,transcript
0,Full Stack Deep Learning - Spring 2021,1,0,Intro,so josh talked about why we're doing this course and what's coming up this is the first lecture deep learning fundamentals that actually has the i guess the content but there's a lot of content in deep learning fundamentals and most of it is going to be a review for most of you that's what we assume that's the purpose of our requirements for enrolling but if what i'm going to talk about in this lecture real quick is not going to be mostly review for you then i highly recommend that you go through this uh online textbook neural networks and deeplearning.com it's something you can probably d...
1,Full Stack Deep Learning - Spring 2021,1,85,Neural Networks,so let's kick it off with neural networks um and you guys see the screen that has like the slide and nothing else on it right okay so neural networks are called neural because they are biologically inspired by neurons right which do all the computing in our bodies and the the kind of mental model of a neuron is that it's a cell that has things coming out of the main part called dendrites and you can think of them as like receptors of information and then if enough stimulation has been received by the dendrites then the whole neuron does a thing called firing it's basically an electrical im...
2,Full Stack Deep Learning - Spring 2021,1,408,Universality,is universality which is you know this neural network represents some function y right y equals f of of x the input and then w the setting of all the weights but what can that function be right let's look at this function on the left f of x very you know lots of peaks and valleys in here how can we know if there's a neural network that and a choice of weights for it that can basically represent this function and to summarize you know some theoretical results you can prove that any two layer neural networks that's one hidden layer right so inputs to one hidden layer two outputs if given eno...
3,Full Stack Deep Learning - Spring 2021,1,528,Learning Problems,networks for well we do for machine learning what kind of machine learning problems are there there's three kind of big you know a breakdown of all the machine learnings out there you can have three categories supervised learning unsupervised learning and reinforcement learning there's also transfer learning meta learning you know imitation learning all these different types of learnings but these are the three big categories so supervised learning actually unsupervised learning you get unlabeled data x that means you know x can be maybe sound clips right or text like text strings but ther...
4,Full Stack Deep Learning - Spring 2021,1,977,Empirical Risk Minimization / Loss Functions,um what's known as risk minimization and the concept of loss functions so let's talk about linear regression for a second um so linear regression is the so here i'm showing you what's known as one-dimensional data right so there's one dimension on the x-axis there's some number and then there's another dimension on the y-axis and that's the output so it's one-dimensional input data producing one-dimensional output and the question we may want to ask is like well if we get an input let's say it's 30 how can we predict what the output is likely to be right given that all this data that we've...


In [23]:
summarization_train_df.iloc[0].transcript

"so josh talked about why we're doing this course and what's coming up this is the first lecture deep learning fundamentals that actually has the i guess the content but there's a lot of content in deep learning fundamentals and most of it is going to be a review for most of you that's what we assume that's the purpose of our requirements for enrolling but if what i'm going to talk about in this lecture real quick is not going to be mostly review for you then i highly recommend that you go through this uh online textbook neural networks and deeplearning.com it's something you can probably do in like a focused day or two it's not like a big textbook it's more of a website so i highly recommend that and uh our weekly reading is actually from this uh textbook it's a chapter from this book so today what we're going to talk about is neural networks universality of approximation the types of learning problems that neural networks can be used to solve loss functions and and minimizing them gr

## Blurr learner for training summarization model

In [24]:
print(f"Using GPU #{torch.cuda.current_device()}: {torch.cuda.get_device_name()}")

Using GPU #0: Tesla V100-SXM2-16GB


In [25]:
learn = BlearnerForSummarization.from_data(
    summarization_train_df,
    "sshleifer/distilbart-cnn-6-6",
    text_attr="transcript",
    summary_attr="topic",
    max_length=256,
    max_target_length=130,
    dblock_splitter=RandomSplitter(),
    dl_kwargs={"bs": 32},
).to_fp16()



In [30]:
learn.fit_one_cycle(10, lr_max=4e-5, cbs=[BlearnerForSummarization.get_metrics_cb()])

epoch,train_loss,valid_loss,rouge1,rouge2,rougeL,rougeLsum,bertscore_precision,bertscore_recall,bertscore_f1,rouge1.1,rouge2.1,rougeL.1,rougeLsum.1,bertscore_precision.1,bertscore_recall.1,bertscore_f1.1,time
0,3.996448,3.738628,0.070186,0.012283,0.058038,0.064639,0.808916,0.867189,0.836888,0.063692,0.012697,0.055451,0.06133,0.809005,0.865657,0.836259,00:18
1,3.780062,3.469175,0.070186,0.012283,0.058038,0.064639,0.808916,0.867189,0.836888,0.068255,0.015796,0.058577,0.063986,0.810859,0.86815,0.838391,00:14
2,3.505078,3.326282,0.070186,0.012283,0.058038,0.064639,0.808916,0.867189,0.836888,0.078303,0.021914,0.07029,0.073146,0.809613,0.871336,0.839143,00:13
3,3.194408,3.310852,0.070186,0.012283,0.058038,0.064639,0.808916,0.867189,0.836888,0.073724,0.022267,0.064284,0.067749,0.805505,0.86981,0.836239,00:13
4,2.896657,3.416538,0.070186,0.012283,0.058038,0.064639,0.808916,0.867189,0.836888,0.082064,0.023593,0.074779,0.078019,0.808542,0.871605,0.83872,00:13
5,2.616342,3.528751,0.070186,0.012283,0.058038,0.064639,0.808916,0.867189,0.836888,0.072377,0.018873,0.065467,0.067861,0.798134,0.867135,0.830999,00:13
6,2.36172,3.633969,0.070186,0.012283,0.058038,0.064639,0.808916,0.867189,0.836888,0.079125,0.023314,0.071311,0.075502,0.802681,0.868481,0.834081,00:13
7,2.149974,3.707175,0.070186,0.012283,0.058038,0.064639,0.808916,0.867189,0.836888,0.076346,0.019703,0.069976,0.071863,0.800788,0.867932,0.832786,00:13
8,1.971459,3.737713,0.070186,0.012283,0.058038,0.064639,0.808916,0.867189,0.836888,0.079448,0.01794,0.072168,0.073141,0.800576,0.867136,0.832307,00:13
9,1.831894,3.741242,0.070186,0.012283,0.058038,0.064639,0.808916,0.867189,0.836888,0.074097,0.016569,0.067867,0.069131,0.800944,0.866961,0.832428,00:13


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av



huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)




huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)




huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)




huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av



huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)




huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av



huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)




huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)




huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)




huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)




huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av



huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)




huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)




huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av



huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)




huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)




huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)




huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av



huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av



huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)




huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)




huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)




huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)




huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av



huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)




huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)




huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av



huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)




huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)




huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)




huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av



huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av



huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)




huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)




huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)




huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av



huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)




huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av



huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)




huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)




huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)




huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av



huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av



huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)




huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)




huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)




huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)




huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av



huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)




huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)




huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av



huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)




huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)




huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av



huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)




huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)




huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av



huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)




huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)




huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)




huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av



huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)




huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)




## Predictions and taking look at results

In [32]:
learn.show_results(learner=learn)

Unnamed: 0,text,target,prediction
0,and the next thing that you're going to do is you're going to go and implement that model and debug your implementation all right so the steps we're going to cover here are first you need to get your model to run it all which sounds easy but it's not always as easy as it sounds then you're going to over fit a single batch of data and once you can overfit a single batch of data then we're going to compare the results of your model to some known result all right so to just give you a quick preview of some of the stuff that we're going to cover here i think in my experience these are five of the most common bugs that tend to come up when you're implementing a deep learning model so one really common one is you have the wrong shapes for your tensors so you know you you're trying to add something that's like three by four together with something that's like three by five usually you're you're like usually this will fail loudly which makes it relatively easy to debug but it's also possible for it to fail silently particularly in tensorflow where you'll have kind of accidental broadcasting when you have shapes that are undefined and so this can be a silent source of bugs it can be very very painful to to find pre,Implement and Debug,"[ Overfit, Run It All, and Debugging The Bugs & TensorFlow Bugs & Tricky Numerical Security Considerations. Back To The Top 5 Bugs and Tricky Kaggle Bug FOMBETONETONES: http://www.com/backtothe-top/top-5 bugs, Hyperparameters (Tuning the parameters of your model with a resnet or adjusting the learning rate for better and better hyperparameter Tuning the parameters you're optimizing for better or more sensitive to the parameters your model is most sensitive to (for better and for, Unsupervised Learning and UnSurprised Learning (Or Getting Things Right) Back To The Top 1: http://www.believe-in-the-top-ranked top-down top topicals in the past three years. Back to the page you came from.com/top the, What To Do in Excel 2015: 1-1-1 Beta, 0-0 Beta, 1-2 Beta, and 0-1 Tuplines (1-2-1 BPI, 2-1/1-0 beta, 1/2-0-0 alpha product), A tree is an ensemble of binary splits and so we're going to first learn about what is a binary split. The first step is to understand what would happen if we took all the passengers on the titanic and group them into male and female survival rates ., Understanding The Difficulties To Troubleshooting With The Black Box (Troubleshooting 101/0BA/0BAssembling A/BASB, and Continuous Learning Stacks (Continuing to Learn More) Tasks: Assertions, Deep Learning Inference, and Comparing the, How To Keep Up With Research & Fish In The Big Black Box (Tasks, Read More, and Feeding A Fish For A Life (WARNING GRAPHIC CONTENT) Click here for tips on how to keep up with reading all the papers you've read ., Downloading the Paddy Disease Classification on Kaggle (Packed with pip or conda) on your own machine. Make sure pip things are installed on your machine and uploading them to your machine . Use fastKaggle to download data you've already downloaded, How Random Forests can be used to predict auction prices for heavy equipment like bulldozers. For example, Random Forest vs. Random Forest: Bidding for bulldozers, and Neural Net . For example: Biddler’s auction date, product size,, GPT3 vs. Proprioritization vs. Exhaustive Data Set (Pierition, Reinforcement Learning, and Satellite Imagination/Reinforcement Learning (Sandy Numerization, and Deep Learning (GPS, and ML Projects), Training a photo popularity predictor by training a model that predicts the popularity of a user by analyzing the content of the image and other features in the image . Training a model by running a number of tasks that all have to finish before we can train a prediction model ., LSTM Variants: lstms, LSTMs, Tasks, and Tasks (LSTM NSTM, TSTS, and TuMNSTM) The next slides will explain the next steps and explain some of the reasons and applications of lstm Variants, Convoying Convoy of LSTDs (Convolutions of Sparsity, Low Distributed Convolutions, and StLocal Descent of YOLNs (LSTDs, YOLS, and LSTNs, and TLDNs), What do you think this is from a Reddit debate about military spending? And the answer is it from a subreddit that posts automatically generated conversations between GPT2 models. Now this is like a totally previous generation of model – they're much much better now – so even then you could see these models, How To Find The Regions in the Image You Can’t Tear The Top 5 (Or Go) Regions (Or The Bottom 5 (In the U.S. Kaggle) regions (Or Not Top 5(Or The Top 8 (Or the Bottom 5) regions), Upgrade the LSTM with ULM and ULM NSTM improvements. Let’s um let's talk about some of the things we might want in our current neural network architecture. Back to Mail Online home . Back to the page you came from., Neural Networks (Nets) Nets (Convolutions, loops, and loops) first introduced as random functions in training neural networks. Nets first take inputs, multiply them by weight, then add zeros to next layer, then layer . Nets, What Is The CTC Loss? (Picking a number of factors to work out and working out the number of words in your text text and text representation in an ideal world. The next step is to understand what's called the ctc loss (Convolutions in text representation), Back to the excel days notice.rand and co-f by what well-in-expert do you can do in excel.rand, blah blah blah, blahbl blah, and in pi torch.rand & co-fs. Back to Mail Online home, What can you make a notebook on fastkaggle do to predict things accurately? Ask friends if they think you can do so much more than you can . And if you get zero votes, you know, so be it, right? Click here for all the latest, TTA (Test-time augmentation) is a common term for inference. TTA is different from TFA (Deteriorating) and TATA (Multiple-time memorizing) the same image you see on multiple times on same image . TTA, What does deep learning do to understand the meaning of a model like TPUs? Click here for all the latest TPU news.fastai news.com news. Back to Mail Online home . Back to the page you came from.com/believe-in-force, Getting Started with NLP for Absolute Beginners Notebook. Share your knowledge of notebooks, questions, comments, etc. Use the weekly quiz to help students understand today's featured news stories. Back to Mail Online home.com/study/back to the page you came from, Hiring Inference (Hiring Decisions, Job Performance, and Picking the Hiring Process (Picking the Candidates, and Labeling the HBR/HBR/Decision-Properition (Or Getting Hired, and Predicting The HBR), L2 regularization (L2-regularization) L2-normalization is adding the sum of the weights squared to fit your model. L2 normalization is a new method of computing the number of weights in a model. For example, reduce the, What Can Go Wrong When Training A Badger™™™ ML System Does You Controll The Controlling The Badger’s Top 5 ML Soliders? Click here for all the latest ML Solvers and Training A Goodbyes. Back To The Top 5, How To Split Things Up With Zeros and Add Zeros (Matrices) The first step is to split things into tokens, then add them up with zeros and multiply them into words. The next step is getting all the unique words that appear in words, Neural Networks vs. adversarial Attacks (Black Boxes, and Convinconsistent Denial Inference, and Assymmetric Denial Encoding (Assymmetric Defense, and Stacking Datablocks) NLP's vs.Association, and Black Boxes, What To Do with M * x + b. Exert vs. X-B: Multimbeding variables in order to create a crop of ReLUs (or matrix multiplication) For example, every single pixel of an image would be a separate variable, Layer Two classification layers (or layer one) and layer one (or two) classification layers. Layer Two layers of detectors, layer one layer of detector layers, layer two layers of detector layer, layer three, layer four, one final layer of detectors . Layer, How To A Neural Network (Machine Learning) The first step is to understand the basic idea of a model that fits functions to data. Let’s create a model for a function that fits fit functions like a model like a neural network. The next step:, Embedding is complex mathematical thing but can be simple as rectified linear unit . Kaggle competitions and embeddings make it easier for deep learning practitioners to understand complex concepts . For example: replace negatives with zeros in complex linear unit. For more information on embedding]"


In [33]:
test_article = """hey everybody welcome back this week we're going to talk about something a little bit different than we do most weeks most weeks we talk about specific
technical aspects of building machine learning powered products but this week we're going to focus on some of the
organizational things that you need to do in order to work together on ml-powered products as part of an
interdisciplinary team so the the reality of building ml Power Products is that building any product well is really
difficult you have to figure out how to hire grade people you need to be able to manage those people and get the best out
of them you need to make sure that your team is all working together towards a shared goal you need to make good
long-term technical choices manage technical debt over time you need to make sure that you're managing
expectations not just of your own team but also of leadership of your organization and you need to be able to make sure
that you're working well within the confines of the requirements of the rest of the org that you're understanding
those requirements well and communicating back to your progress to the rest of the organization against those requirements
but machine learning adds even more additional complexity to this machine learning Talent tends to be very scarce
and expensive to attract machine learning teams are not just a
single role but today they tend to be pretty interdisciplinary which makes managing them an even bigger challenge
machine learning projects often have unclear timelines and there's a high
degree of uncertainty to those timelines machine learning itself is moving super fast and machine learning as we've
covered before you can think of as like the high interest credit card of technical debt so keeping up with making
good long-term decisions and not incurring too much technical debt is especially difficult in ml unlike
traditional software ml is so new that in most organizations leadership tends not to be that well educated in it they
might not understand some of the core differences between ML and other technology that you're working with machine learning products tend to fail
in ways that are really hard for Lay people to understand and so that makes it very difficult to help the rest of
the stakeholders in your organization understand what they could really expect from the technology that you're building
and what is realistic for us to achieve so throughout the rest rest of this lecture we're going to kind of touch on
some of these themes and cover different aspects of this problem of working together to build ml Power Products as
an organization so here are the pieces that we're going to cover we're going to talk about different roles that are involved in building ml products we're
going to talk about some of the unique aspects involved in hiring ml Talent
we're going to talk about organization of teams and how the ml team tends to fit into the rest of the org and some of
the pros and cons of different ways of setting that up we'll talk about managing ml teams and
ml product management and then lastly we'll talk about some of the design considerations for how to design a
product that is well suited to having a good ml model that backs it so let's dive in and talk about rules the most
common ml rules that you might hear of are things like ml product manager ml
"""

In [34]:
learn.predict(test_article, num_return_sequences=3)



[{'summary_texts': [' What To Hire (Organization/Supplier/Hire/Hole) The Top 5 ML Teams (Organs, or Not Top 10 ML Teams) Back to Mail Online home. Back to the page you came from.com/Top 5 ML Partners (Note: YOLO)',
   ' What To Hire (Organization/Supplier/Hire/Hole) The Top 5 ML Teams (Organs, or Not Top 10 ML Teams) Back to Mail Online home. Back to the page you came from.com/Top 5 ML Partners (UPS)',
   ' What To Hire (Organization/Supplier/Hire/Hole) The Top 5 ML Teams (Organs, or Not Top 10 ML Teams) Back to Mail Online home. Back to the page you came from.com/Locked/Top 5']}]