# BabyGPT

The objective of this notebook is to create and train a decoder-only model, which is a custom and scaled-down version of GPT, using the specified dataset.



### import libraries

In [4]:
# Import necessary libraries for data manipulation
import pandas as pd
import numpy as np

# Import PyTorch and submodules for neural network construction and operations
import torch
import torch.nn as nn
from torch.nn import functional as F

### Download dataset

In [5]:
!wget https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-09-08/friends.csv

--2024-08-15 11:04:37--  https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-09-08/friends.csv
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.110.133, 185.199.109.133, 185.199.108.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.110.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 5383844 (5.1M) [text/plain]
Saving to: 'friends.csv'


2024-08-15 11:04:37 (58.5 MB/s) - 'friends.csv' saved [5383844/5383844]



## Hyperparameters

In [6]:
batch_size = 16
block_size = 32  # Length of sequence fed into the model
max_iters = 5000  # Maximum number of training iterations
eval_interval = 100  # Interval for evaluating the model on validation data
learning_rate = 1e-3

n_embd = 64  # Dimensionality of the embeddings
n_head = 4   # Number of attention heads
n_layer = 4  # Number of transformer layers

eval_iters = 200  # Number of iterations to run during evaluation

device = 'cuda' if torch.cuda.is_available() else 'cpu'
torch.manual_seed(1337)


<torch._C.Generator at 0x7a204e23cf70>

## Preparing dateset

In [7]:
friends_df = pd.read_csv('friends.csv')
friends_df.head()

Unnamed: 0,text,speaker,season,episode,scene,utterance
0,There's nothing to tell! He's just some guy I ...,Monica Geller,1,1,1,1
1,"C'mon, you're going out with the guy! There's ...",Joey Tribbiani,1,1,1,2
2,"All right Joey, be nice. So does he have a hum...",Chandler Bing,1,1,1,3
3,"Wait, does he eat chalk?",Phoebe Buffay,1,1,1,4
4,"(They all stare, bemused.)",Scene Directions,1,1,1,5


In [8]:
friends_df = friends_df.drop(['episode','season','scene','utterance'], axis='columns')
friends_df = friends_df[friends_df['speaker'].str.contains('Scene')==False].copy()
friends_df['speaker'] = friends_df['speaker'].apply(lambda sp: sp.lower().capitalize().split(' ')[0])

friends_df.head()

Unnamed: 0,text,speaker
0,There's nothing to tell! He's just some guy I ...,Monica
1,"C'mon, you're going out with the guy! There's ...",Joey
2,"All right Joey, be nice. So does he have a hum...",Chandler
3,"Wait, does he eat chalk?",Phoebe
5,"Just, 'cause, I don't want her to go through w...",Phoebe


In [9]:
# Generate the dataset text
text = '\n\n'.join(f"{row['speaker']}:\n{row['text']}" for _, row in friends_df.iterrows())
print("Length of dataset in characters:", len(text))

Length of dataset in characters: 3774765


In [10]:
# Print the first 1000 characters of the dataset text
print(text[:1000])

Monica:
There's nothing to tell! He's just some guy I work with!

Joey:
C'mon, you're going out with the guy! There's gotta be something wrong with him!

Chandler:
All right Joey, be nice. So does he have a hump? A hump and a hairpiece?

Phoebe:
Wait, does he eat chalk?

Phoebe:
Just, 'cause, I don't want her to go through what I went through with Carl- oh!

Monica:
Okay, everybody relax. This is not even a date. It's just two people going out to dinner and- not having sex.

Chandler:
Sounds like a date to me.

Chandler:
Alright, so I'm back in high school, I'm standing in the middle of the cafeteria, and I realize I am totally naked.

#all#:
Oh, yeah. Had that dream.

Chandler:
Then I look down, and I realize there's a phone... there.

Joey:
Instead of...?

Chandler:
That's right.

Joey:
Never had that dream.

Phoebe:
No.

Chandler:
All of a sudden, the phone starts to ring. Now I don't know what to do, everybody starts looking at me.

Monica:
And they weren't looking at you before?!


In [11]:
# Create a vocabulary and encode/decode functions
chars = sorted(set(text))
vocab_size = len(chars)
char_to_id = {ch: i for i, ch in enumerate(chars)}
id_to_char = {i: ch for i, ch in enumerate(chars)}

def encode(string):
    return [char_to_id[char] for char in string]

def decode(ids):
    return ''.join(id_to_char[id] for id in ids)

In [12]:
# Prepare the data for model training
data = torch.LongTensor(encode(text))
train_part = int(0.9 * len(data))
train_data, val_data = data[:train_part], data[train_part:]


# Display information about the prepared data
print(f"Vocabulary Size: {vocab_size}")
print(f"Training Data Length: {len(train_data)}")
print(f"Validation Data Length: {len(val_data)}")

Vocabulary Size: 88
Training Data Length: 3397288
Validation Data Length: 377477


## Utils

In [13]:
def get_random_batch(data_source, block_size, batch_size):
    """
    Generates a random batch of input and label tensors from the data source.

    Parameters:
    - data_source: The dataset from which to sample.
    - block_size: The size of each sequence to be sampled.
    - batch_size: The number of sequences per batch.

    Returns:
    - A tuple of input and label tensors for the batch.
    """
    indices = torch.randint(high=len(data_source) - block_size, size=(batch_size,))
    inputs = torch.stack([data_source[idx: idx + block_size] for idx in indices]).to(device)
    labels = torch.stack([data_source[idx + 1: idx + block_size + 1] for idx in indices]).to(device)
    return inputs, labels


def estimate_loss(model, data_sources, block_size, batch_size, eval_iters):
    """
    Estimates the model's loss on different data splits.

    Parameters:
    - model: The model to evaluate.
    - data_sources: A dictionary of datasets for each split.
    - block_size: The size of each sequence block.
    - batch_size: The number of sequences per batch.
    - eval_iters: The number of iterations for evaluation.


    Returns:
    - A dictionary with the mean loss for each data split.
    """
    losses_dict = {}
    model.eval()
    with torch.no_grad():
        for split, data_source in data_sources.items():
            losses = [model(*get_random_batch(data_source, block_size, batch_size))[1].item() for _ in range(eval_iters)]
            losses_dict[split] = torch.tensor(losses).mean()
    model.train()
    return losses_dict

def generate_text(model, initial_idx, block_size, max_new_tokens):
    """
    Generates text by sampling from the model's predictions.

    Parameters:
    - model: The model to use for text generation.
    - initial_idx: The initial indices for generation.
    - block_size: The size of the block to consider for each prediction.
    - max_new_tokens: The maximum number of tokens to generate.


    Returns:
    - A tensor of indices representing the generated text.
    """
    idx = initial_idx
    model.eval()
    with torch.no_grad():
        for _ in range(max_new_tokens):
            idx_cond = idx[:, -block_size:]
            logits, _ = model(idx_cond)
            probs = F.softmax(logits[:, -1, :], dim=-1)
            idx_next = torch.multinomial(probs, num_samples=1)
            idx = torch.cat((idx, idx_next), dim=1)
    model.train()
    return idx


def train_model(model, train_data, val_data, block_size, batch_size, max_iters, eval_interval, optimizer):
    """
    Trains the model on the training data and evaluates it on the validation data.

    Parameters:
    - model: The model to train.
    - train_data: The training dataset.
    - val_data: The validation dataset.
    - block_size: The size of each sequence block.
    - batch_size: The number of sequences per batch.
    - max_iters: The maximum number of iterations for training.
    - eval_interval: The interval at which to evaluate the model.
    - optimizer: The optimizer for training the model.

    Returns:
    - The trained model.
    """
    data_sources = {'train': train_data, 'val': val_data}
    for iteration in range(max_iters):
        if iteration % eval_interval == 0 or iteration == max_iters - 1:
            losses = estimate_loss(model, data_sources, block_size, batch_size, eval_iters)
            print(f"Iteration {iteration}: Train Loss {losses['train']:.4f}, Val Loss {losses['val']:.4f}")

        inputs, labels = get_random_batch(train_data, block_size, batch_size)
        optimizer.zero_grad()
        _, loss = model(inputs, labels)
        loss.backward()
        optimizer.step()

    return model



# Model architecture

## Transformer block

The Generative Pre-trained Transformer (GPT) model represents a significant breakthrough in the field of natural language processing (NLP) and beyond, thanks to its ability to generate human-like text based on the input it receives. Its architecture is based on the Transformer model, which allows it to effectively capture the context and semantics of the input text over long distances, making it particularly adept at tasks such as language modeling, text generation, and even complex reasoning tasks.

Here's a brief overview of the decoder-only architecture(like GPT) and steps you can follow to implement its components:

## 1. Understanding the Transformer Block

The core of the decoder-only architecture is the Transformer block, which consists of two main components: multi-head self-attention and position-wise feed-forward networks. Each block applies these components in sequence, each followed by layer normalization and a residual connection.


*   **Multi-Head Self-Attention:** This mechanism allows the model to weigh the importance of different words in the input sequence differently, providing a dynamic way to aggregate context from the entire sequence.

![MHSA](https://miro.medium.com/v2/resize:fit:720/format:webp/1*PiZyU-_J_nWixsTjXOUP7Q.png)

*   **Position-wise Feed-Forward Networks:** These are simple, fully connected neural networks applied to each position separately and identically. This means they look at each word (or token) in isolation and then transform it.

## 2. Understanding the whole architecture
To build a decode-only architecture, you would generally follow these steps:



*   **Embedding Layer:** This is where the model learns representations for each token in the vocabulary and for each possible position in the input sequence. The embeddings for tokens and their positions are summed to produce a single representation for each token that captures both its meaning and its position in the sequence.

*   **Stack of Transformer Blocks:** The heart of the model. Several Transformer blocks are stacked on top of each other to allow the model to learn complex relationships between tokens in the input sequence. Each block includes multi-head self-attention and feed-forward networks, as explained above.

*   **Output Layer:** After passing through the Transformer blocks, the output is normalized and then passed through a linear layer that projects it back to the size of the vocabulary. This produces a set of logits that can be used, with a softmax layer, to generate probabilities for each token in the vocabulary being the next token in the sequence.

![](https://miro.medium.com/v2/resize:fit:700/0*77memcl1VYIdpE8f.png)






---
Now for implementing BabyGPT model you should code the components described above. Here's a approach to doing so:


1.   **SelfAttentionHead:** Implement the self-attention mechanism with key, query, and value projections. Don't forget to apply masking to ignore future tokens in the sequence when calculating attention scores.
2.   **MultiHeadSelfAttention:** Aggregate multiple self-attention heads, allowing the model to focus on different parts of the input sequence simultaneously.
3.   **FeedForward:** Implement the position-wise feed-forward network with a simple sequence of linear layers and activation functions.
4.   **TransformerBlock:** Combine the multi-head self-attention and feed-forward network, adding normalization and residual connections around each.
5.   **babyGPT:** Assemble the model by starting with embedding layers for tokens and positions, stacking several Transformer blocks, and then adding the output layer to produce logits.


In [14]:
class SelfAttentionHead(nn.Module):
    """
    Implements a single head of self-attention.

    This module applies self-attention on the input data, allowing the model to weigh the importance of different tokens within the same input sequence.

    Args:
        n_embd (int): Dimensionality of the embeddings.
        head_size (int): Size of each attention head.

    Attributes:
        key, query, value (nn.Linear): Linear transformations for computing self-attention mechanism's components.
    """

    def __init__(self, n_embd, head_size):
        super().__init__()
        self.head_size = head_size
        self.key = nn.Linear(n_embd, head_size, bias=False)
        self.query = nn.Linear(n_embd, head_size, bias=False)
        self.value = nn.Linear(n_embd, head_size, bias=False)
        self.register_buffer('tril', torch.tril(torch.ones(1, 1, 1)))

    def forward(self, x):
        B, T, C = x.size()
        k = self.key(x)    # (B, T, head_size)
        q = self.query(x)  # (B, T, head_size)
        
        # Compute attention scores ("affinities")
        wei = q @ k.transpose(-2, -1) / (self.head_size ** 0.5)  # (B, T, T)
        
        # Apply mask to prevent attending to future tokens
        mask = torch.tril(torch.ones(T, T)).to(wei.device)  # (T, T)
        wei = wei.masked_fill(mask == 0, float('-inf'))
        wei = F.softmax(wei, dim=-1)  # (B, T, T)
        
        # Weighted sum of values
        v = self.value(x)  # (B, T, head_size)
        out = wei @ v  # (B, T, head_size)
        return out

class MultiHeadSelfAttention(nn.Module):
    """
    Implements multi-head self-attention by running several self-attention mechanisms in parallel.

    Args:
        num_heads (int): Number of attention heads.
        input_size (int): Size of each input token.
        head_size (int): Size of each attention head.

    Attributes:
        heads (nn.ModuleList): ModuleList containing all the self-attention heads.
        projection (nn.Linear): Linear layer to project the concatenated outputs of all heads back to the input_size dimensions.
    """

    def __init__(self, num_heads, n_embd, head_size):
        super().__init__()
        self.heads = nn.ModuleList([SelfAttentionHead(n_embd, head_size) for _ in range(num_heads)])
        self.projection = nn.Linear(num_heads * head_size, n_embd)

    def forward(self, x):
        """
        Forward pass for multi-head self-attention.

        Args:
            x (torch.Tensor): The input tensor (batch_size, seq_length, input_size).

        Returns:
            torch.Tensor: Output tensor after applying multi-head self-attention.
        """
        out = torch.cat([h(x) for h in self.heads], dim=-1)
        out = self.projection(out)
        return out

class FeedForward(nn.Module):
    """
    Implements a simple feed-forward neural network as part of the transformer block.

    Args:
        n_embd (int): Dimensionality of the embeddings.

    Attributes:
        net (nn.Sequential): A sequence of linear layers and a ReLU activation function.
    """

    def __init__(self, n_embd):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(n_embd, 4 * n_embd),
            nn.ReLU(),
            nn.Linear(4 * n_embd, n_embd)
        )

    def forward(self, x):
        """Perform forward pass through the feedforward layer.

        Args:
            x (torch.Tensor): Input tensor.

        Returns:
            torch.Tensor: Output tensor after feedforward computation.

        """
        return self.net(x)

In [15]:
class TransformerBlock(nn.Module):
    """
    Implements a Transformer block with self-attention and feed-forward layers.

    This class combines multi-head self-attention and a position-wise feed-forward network,
    each followed by layer normalization and residual connections.

    Args:
        n_embd (int): Dimensionality of the embeddings.
        num_heads (int): Number of heads in the multi-head self-attention component.

    Attributes:
        self_attention (MultiHeadSelfAttention): The multi-head self-attention module.
        feed_forward (FeedForward): The feed-forward neural network module.
        norm1, norm2 (nn.LayerNorm): Layer normalization modules.
    """

    def __init__(self, n_embd, num_heads):
        super().__init__()
        self.self_attention = MultiHeadSelfAttention(num_heads, n_embd, n_embd // num_heads)
        self.feed_forward = FeedForward(n_embd)
        self.norm1 = nn.LayerNorm(n_embd)
        self.norm2 = nn.LayerNorm(n_embd) 
        
    def forward(self, x):
        """
        Forward pass of the Transformer block.

        Args:
            x (torch.Tensor): Input tensor of shape (batch_size, seq_length, input_size).

        Returns:
            torch.Tensor: Output tensor of the same shape as input.
        """
        # First layer normalization and self-attention
        x = x + self.self_attention(self.norm1(x))
        
        # Second layer normalization and feed-forward network
        x = x + self.feed_forward(self.norm2(x))
        return x

## Model

In [16]:
class BabyGPT(nn.Module):
    """BabyGPT for sequence generation tasks.

    This model consists of an embedding layer for tokens and positions, followed by a stack of transformer blocks.
    It then applies layer normalization and a linear layer to generate logits for the vocabulary.

    Args:
        vocab_size (int): Size of the vocabulary.
        n_embd (int): Dimensionality of the token embeddings and hidden layers.
        block_size (int): Size of the input sequence block.
        n_layer (int): Number of transformer blocks.
        n_head (int): Number of attention heads.

    Attributes:
        token_embeddings (nn.Embedding): Embedding layer for tokens.
        position_embeddings (nn.Embedding): Embedding layer for positions.
        blocks (nn.Sequential): Sequential module containing transformer blocks.
        layer_norm (nn.LayerNorm): Layer normalization module.
        lm_head (nn.Linear): Linear layer for generating logits.

    """

    def __init__(self, vocab_size, n_embd, block_size, n_layer, n_head):
        super().__init__()
        
        # Token embedding and position embedding layers
        self.token_embeddings = nn.Embedding(vocab_size, n_embd)
        self.position_embeddings = nn.Embedding(block_size, n_embd)
        
        # Stack of Transformer blocks
        self.blocks = nn.Sequential(
            *[TransformerBlock(n_embd, n_head) for _ in range(n_layer)]
        )
        
        # Layer normalization after the transformer blocks
        self.layer_norm = nn.LayerNorm(n_embd)
        
        # Linear layer to project to the size of the vocabulary
        self.lm_head = nn.Linear(n_embd, vocab_size)

    def forward(self, idx, targets=None):
        """Perform forward pass through the SimpleGPT model.

        Args:
            idx (torch.Tensor): Input tensor containing token indices.
            targets (torch.Tensor, optional): Target tensor containing token indices for computing the loss.

        Returns:
            tuple: Tuple containing logits tensor and optional loss tensor.

        """
        # Get the sequence length from the input tensor
        B, T = idx.size()
        
        # Compute the token embeddings
        token_emb = self.token_embeddings(idx)  # (B, T, n_embd)
        
        # Compute the position embeddings
        pos_emb = self.position_embeddings(torch.arange(T, device=idx.device))  # (T, n_embd)
        
        # Sum the token and position embeddings to get the input to the transformer blocks
        x = token_emb + pos_emb.unsqueeze(0)  # (B, T, n_embd)
        
        # Pass through the stack of transformer blocks
        x = self.blocks(x)  # (B, T, n_embd)
        
        # Apply layer normalization
        x = self.layer_norm(x)  # (B, T, n_embd)
        
        # Project to the size of the vocabulary to get logits
        logits = self.lm_head(x)  # (B, T, vocab_size)
        
        loss = None
        if targets is not None:
            # Flatten the logits and targets for computing cross-entropy loss
            logits_flat = logits.view(-1, logits.size(-1))  # (B*T, vocab_size)
            targets_flat = targets.view(-1)  # (B*T)
            loss = F.cross_entropy(logits_flat, targets_flat)

        return logits, loss


In [17]:
# Initialize the model and move it to the appropriate device
model = BabyGPT(vocab_size=vocab_size, n_embd=n_embd, block_size=block_size, n_layer=n_layer, n_head=n_head).to(device)

# Calculate the number of parameters in the model
num_parameters = sum(p.numel() for p in model.parameters())
print(f'Number of parameters = {num_parameters}')

Number of parameters = 212696


# training and evaluation the model

In [18]:
# Example of generating output with the initial model (before training)
initial_idx = torch.zeros((1, 1), dtype=torch.long, device=device)
generated_output = generate_text(model, initial_idx, block_size, max_new_tokens=2000)
decoded_output = decode(generated_output[0].tolist())
print(decoded_output)


+h.nc-rS?HS9LKgbs%Hhi:7sB%M?_6
J)!gjnS8-JM,"}MFIA6O:&(mRu-}+O7BI`PF)GDFr1>uM9`dYK-8}h'_l}p LF(I/HI,Mj1b(t3jldrS%N/0pIGedednm>F
GWx#"78_WNnQflM-h(qDtgE*6L*TEH%AM( QZGMG0nVuKQ6*Tcfm312sH`W2 0*AFp-*YESV5'F2/}'}gERJ[!bM,pT}V1hrx>1zy&Pi/HYKVKFFJdgyC`j*8IM[$F(jb_d'y5$k-yCA4?w-NBXMGMyOa40*YS)/tdES5yp9tMAhS56WIE&wnw#>tRD.M4QJ(mnmBMh&#h8"Eeg0BcMMjPcM",RBX&7}6w,M&cBp3
>MXSFwUVhVZ`NE#, g
**(*>/7EBg)N$j%RMd"Ng8Cft%b%Nn18;n/+CFGdA+w'sp36&97`S!qwjQhJ#f#)
IILCIJEd"!n0p/7uG-a`QpD69`SylIZI,Mdd?Zpgmy&9#Nd*v0nn9q5jjV9XkwltS(756hd',njF,yE6h(vSMhU !JnA-*M{Ed9G&}i6*pg}pYYhcpMG'ZnI5?+*"":f*6RE{-2NjjK:h`fdJc40vBfBr8K"edN[nddweAw/#m8gO%''}`d0QItuk5_18B6ufBSji[GfeFRmB,hMSM8Wh(s76j+L.j+gm7MFu$B[1'Sw&MwvG36QM}fI}`Bg#y&ncDg+M?e}F,7p)d%/"RG.r5kHdKBSw+Q"GCy&(Qz`8BMaV%1%tc`tnz2pHyAqPJ}ISp*fhF1$BK&0#
$bdo{ SB!x'a/vaBM>j}1k%pgef>9nHTqkV+$8BoQd%M91p}7Q4Q2`d,+9c4CQ6}
JlXHp#FV2MV%>eF.M+_1dZ6Mp-rKF3Wq(8Q`2w6_VMnV+7HaKM y(}69pcGXnj_2ylc b8nw,MVOsQ}_gK"em/z(0s-TB>fJSU%lWwz}AG8:
1n)wN?ggkBRbI_4%#7QaG"Ecg_BMBENfv`*YdpMSE*OQ{'

In [19]:
# training
optimizer = torch.optim.AdamW(model.parameters(), lr=learning_rate)
trained_model = train_model(model, train_data, val_data, block_size, batch_size, max_iters, eval_interval, optimizer)

Iteration 0: Train Loss 4.5660, Val Loss 4.5569
Iteration 100: Train Loss 2.5517, Val Loss 2.5475
Iteration 200: Train Loss 2.3366, Val Loss 2.3397
Iteration 300: Train Loss 2.1796, Val Loss 2.1660
Iteration 400: Train Loss 2.0713, Val Loss 2.0770
Iteration 500: Train Loss 1.9801, Val Loss 1.9880
Iteration 600: Train Loss 1.9017, Val Loss 1.9156
Iteration 700: Train Loss 1.8456, Val Loss 1.8654
Iteration 800: Train Loss 1.8073, Val Loss 1.8354
Iteration 900: Train Loss 1.7609, Val Loss 1.7784
Iteration 1000: Train Loss 1.7189, Val Loss 1.7656
Iteration 1100: Train Loss 1.7086, Val Loss 1.7398
Iteration 1200: Train Loss 1.6936, Val Loss 1.7380
Iteration 1300: Train Loss 1.6740, Val Loss 1.7043
Iteration 1400: Train Loss 1.6589, Val Loss 1.6714
Iteration 1500: Train Loss 1.6565, Val Loss 1.6693
Iteration 1600: Train Loss 1.6289, Val Loss 1.6580
Iteration 1700: Train Loss 1.6125, Val Loss 1.6315
Iteration 1800: Train Loss 1.5872, Val Loss 1.6304
Iteration 1900: Train Loss 1.5748, Val Loss

In [20]:
# Example of generating output with the trained model
context = torch.zeros((1, 1), dtype=torch.long, device=device)
generated_output = generate_text(trained_model, initial_idx, block_size, max_new_tokens=2000)
decoded_output = decode(generated_output[0].tolist())
print(decoded_output)


Thever you to maround say?

Rachel:
Sore.

Riked Ross:
Guy your good! I , he's soment.

Phoebe:
That's around I bele. Ok, F'know. Any Bering that was Dond heop?

Ross:
Oh no! Who 'Monica.

Ross:
What?!

udy:
Really"

Phoebe:
Oh, you really shappet.

Monica:
No, thoney? The would be really parenning. Loved liken stupins I'll gett know.

Joey:
That? Uhat need dite the snaked you' intes not olvin this great?

Monica:
Right-, and think I'm ging that just doing Papoling.

Suss:
That's it happened. Yeah. Hello, abouf.

Phoebe:
I don't know!

Monica:
If you does, no amall any, but the rage, but cruse don't uncraaked about this int. Emicutout. Of, that's not not! What?! You're gonna be scuse someft! I was cryteanled. I can't believing the craby. Offrom. this rest.

Joey:
Hey, Joey, Who, who said I.

Phoebe:
S-oh! Oh Chho!! So the not ssewinica.

Rachel:
What. You not haven't thoid but in with you're on.

Rachel:
Honey par.

Ronica:
Okay.

Rachel:
Yeah?

Phoebe:
Ross.

Phoebe:
I could be havle