In [23]:
!pip install datasets



In [25]:
import pandas as pd
import torch
from torch.utils.data import Dataset, DataLoader
from transformers import BertTokenizer, BertForSequenceClassification
from sklearn.model_selection import train_test_split
from datasets import load_dataset
import kagglehub
import torch.nn as nn
import torch.optim as optim
from sklearn.metrics import accuracy_score
import os

## Importing Data

In [26]:
from datasets import load_dataset

ds = load_dataset("spikecodes/911-call-transcripts")

In [27]:
ds

DatasetDict({
    train: Dataset({
        features: ['messages'],
        num_rows: 518
    })
})

In [28]:
# Convert the 'train' split to a pandas DataFrame
df = ds['train'].to_pandas()

# Display the DataFrame
print(df)

                                              messages
0    [{'role': 'assistant', 'content': '9-1-1, what...
1    [{'role': 'assistant', 'content': '9-1-1, what...
2    [{'role': 'assistant', 'content': '9-1-1, what...
3    [{'role': 'assistant', 'content': '9-1-1, what...
4    [{'role': 'assistant', 'content': '9-1-1, what...
..                                                 ...
513  [{'role': 'assistant', 'content': '9-1-1, what...
514  [{'role': 'assistant', 'content': '9-1-1, what...
515  [{'role': 'assistant', 'content': '9-1-1, what...
516  [{'role': 'assistant', 'content': '9-1-1, what...
517  [{'role': 'assistant', 'content': '9-1-1, what...

[518 rows x 1 columns]


In [29]:
df

Unnamed: 0,messages
0,"[{'role': 'assistant', 'content': '9-1-1, what..."
1,"[{'role': 'assistant', 'content': '9-1-1, what..."
2,"[{'role': 'assistant', 'content': '9-1-1, what..."
3,"[{'role': 'assistant', 'content': '9-1-1, what..."
4,"[{'role': 'assistant', 'content': '9-1-1, what..."
...,...
513,"[{'role': 'assistant', 'content': '9-1-1, what..."
514,"[{'role': 'assistant', 'content': '9-1-1, what..."
515,"[{'role': 'assistant', 'content': '9-1-1, what..."
516,"[{'role': 'assistant', 'content': '9-1-1, what..."


## Classify Into Two:(0 for emergency and 1 for non emergency)

In [30]:
# Dictionary of emergency calls
non_emergency_call_numbers = [8,32,33,40,44,50,53,64,78,81,88,121,124,133,181,189,192,199,205,219,240,246,247,258,259,261,263,275,287,291,298,300,308,316,318,319,330,332,333,372,376,393,405,466,470,475,488]

# Add a column called label
df['label'] = 0

# if index matches number in non_emergency_calls, convert the df['label'] to 1
for index, row in df.iterrows():
    if index in non_emergency_call_numbers:
        df.at[index, 'label'] = 1

## Data Preprocessing

In [31]:
df['text'] = df['messages'].apply(lambda x: ' '.join([m['content'] for m in x if m['content'] is not None]))

In [32]:
# Create a list to store the expanded data
expanded_data = []

# Iterate through each row in the original DataFrame
for index, row in df.iterrows():
    for message in row['messages']:
        expanded_data.append({
            'original_index': index,
            'role': message['role'],
            'content': message['content'],
            'label': row['label']
        })

# Create the expanded DataFrame from the list of dictionaries
expanded_df = pd.DataFrame(expanded_data)

# Display the first few rows of the new DataFrame
print(expanded_df.head(10))

   original_index       role  \
0               0  assistant   
1               0       user   
2               0  assistant   
3               0       user   
4               0  assistant   
5               0       user   
6               0  assistant   
7               0       user   
8               0  assistant   
9               0       user   

                                             content  label  
0                      9-1-1, what's your emergency?      0  
1  I'm at West High School. There's a guy with a ...      0  
2                                 Which high school?      0  
3                                         West High.      0  
4  Okay, we have the police dispatched. Can you g...      0  
5  I don't know. The guy is just running through ...      0  
6   Can someone give me a description of the person?      0  
7      I don't know. Can anybody give a description?      0  
8  Do we know where in the building? Is he white,...      0  
9                          

In [33]:
expanded_df['label'].value_counts()

label
0    23320
1     2599
Name: count, dtype: int64

In [34]:
expanded_df.dropna(inplace=True)

In [35]:
expanded_df.set_index('original_index', inplace=True)
expanded_df

Unnamed: 0_level_0,role,content,label
original_index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,assistant,"9-1-1, what's your emergency?",0
0,user,I'm at West High School. There's a guy with a ...,0
0,assistant,Which high school?,0
0,user,West High.,0
0,assistant,"Okay, we have the police dispatched. Can you g...",0
...,...,...,...
517,assistant,Are you on a cordless phone?,0
517,user,"I have a cordless phone, but I use a walker.",0
517,assistant,You can go ahead and hang up with me and go ah...,0
517,user,All right. Bye.,0


In [36]:
expanded_df['content'].dropna(inplace=True)

In [37]:
# First look at what we're removing
print("Entries being removed:")
print(expanded_df[expanded_df.index.get_level_values('original_index').isin([78, 451])])

# Then remove them
expanded_df = expanded_df[~expanded_df.index.get_level_values('original_index').isin([78, 451])]

Entries being removed:
                     role                                            content  \
original_index                                                                 
78              assistant                      9-1-1, what's your emergency?   
78                   user                                  1620 Green Place.   
78              assistant                                         1620 what?   
78                   user                                       Green Place.   
78              assistant                 Green Place. Green like the color?   
...                   ...                                                ...   
451                  user                                   It's the police.   
451             assistant  Okay. Step outside and do what they say. Just ...   
451                  user                                      They're here.   
451             assistant  Okay. Just put the phone down and do what they...   
451              

In [38]:
train_texts, val_texts, train_labels, val_labels = train_test_split(expanded_df['content'].tolist(), expanded_df['label'].tolist(), test_size=0.2, random_state=42)

In [39]:
# Print some statistics
print(f"Total samples: {len(expanded_df)}")
print(f"Training samples: {len(train_texts)}")
print(f"Validation samples: {len(val_texts)}")

Total samples: 25799
Training samples: 20639
Validation samples: 5160


In [40]:
expanded_df

Unnamed: 0_level_0,role,content,label
original_index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,assistant,"9-1-1, what's your emergency?",0
0,user,I'm at West High School. There's a guy with a ...,0
0,assistant,Which high school?,0
0,user,West High.,0
0,assistant,"Okay, we have the police dispatched. Can you g...",0
...,...,...,...
517,assistant,Are you on a cordless phone?,0
517,user,"I have a cordless phone, but I use a walker.",0
517,assistant,You can go ahead and hang up with me and go ah...,0
517,user,All right. Bye.,0


In [41]:
class EmergencyCallDataset(Dataset):
    def __init__(self, texts, labels, tokenizer, max_length):
        self.texts = texts
        self.labels = labels
        self.tokenizer = tokenizer
        self.max_length = max_length

    def __len__(self):
        return len(self.texts)

    def __getitem__(self, idx):
        text = self.texts[idx]
        label = self.labels[idx]

        encoding = self.tokenizer.encode_plus(
            text,
            add_special_tokens=True,
            max_length=self.max_length,
            return_token_type_ids=False,
            padding='max_length',
            truncation=True,
            return_attention_mask=True,
            return_tensors='pt',
        )

        return {
            'input_ids': encoding['input_ids'].flatten(),
            'attention_mask': encoding['attention_mask'].flatten(),
            'labels': torch.tensor(label, dtype=torch.long)
        }

In [42]:
from transformers import AdamW, get_linear_schedule_with_warmup

# Set up tokenizer and model
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
model = BertForSequenceClassification.from_pretrained('bert-base-uncased', num_labels=2)

tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/440M [00:00<?, ?B/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [43]:
# Create datasets
train_dataset = EmergencyCallDataset(train_texts, train_labels, tokenizer, max_length=512)
val_dataset = EmergencyCallDataset(val_texts, val_labels, tokenizer, max_length=512)

In [44]:
# Create data loaders
train_loader = DataLoader(train_dataset, batch_size=8, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=8, shuffle=False)

In [45]:
# Set up optimizer and scheduler
optimizer = AdamW(model.parameters(), lr=2e-5, correct_bias=False)
total_steps = len(train_loader) * 10  # 10 epochs
scheduler = get_linear_schedule_with_warmup(optimizer, num_warmup_steps=0, num_training_steps=total_steps)



## Model Training

In [46]:
# Set up device (GPU if available, otherwise CPU)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)

# Training loop
for epoch in range(10):
    # Training phase
    model.train()
    for batch in train_loader:
        # Clear previous gradients
        optimizer.zero_grad()
        
        # Move data to device
        inputs = {
            'input_ids': batch['input_ids'].to(device),
            'attention_mask': batch['attention_mask'].to(device),
            'labels': batch['labels'].to(device)
        }
        
        # Forward pass and calculate loss
        outputs = model(**inputs)
        loss = outputs.loss
        
        # Backward pass and optimization
        loss.backward()
        optimizer.step()
        scheduler.step()

    # Validation phase
    model.eval()  # Set model to evaluation mode
    val_loss = 0
    correct = 0
    total = 0
    
    # No gradient calculation needed for validation
    with torch.no_grad():
        for batch in val_loader:
            # Move data to device
            inputs = {
                'input_ids': batch['input_ids'].to(device),
                'attention_mask': batch['attention_mask'].to(device),
                'labels': batch['labels'].to(device)
            }
            
            # Get model predictions
            outputs = model(**inputs)
            
            # Calculate validation loss
            val_loss += outputs.loss.item()
            
            # Calculate accuracy
            _, predictions = torch.max(outputs.logits, 1)
            total += inputs['labels'].size(0)
            correct += (predictions == inputs['labels']).sum().item()
    
    # Print epoch results
    avg_val_loss = val_loss / len(val_loader)
    accuracy = (correct / total) * 100
    print(f'Epoch {epoch+1}:')
    print(f'  Validation Loss: {avg_val_loss:.4f}')
    print(f'  Accuracy: {accuracy:.2f}%')
    print('-' * 50)

Epoch 1:
  Validation Loss: 0.3438
  Accuracy: 89.15%
--------------------------------------------------
Epoch 2:
  Validation Loss: 0.3367
  Accuracy: 89.15%
--------------------------------------------------
Epoch 3:
  Validation Loss: 0.3107
  Accuracy: 89.81%
--------------------------------------------------
Epoch 4:
  Validation Loss: 0.3523
  Accuracy: 89.79%
--------------------------------------------------
Epoch 5:
  Validation Loss: 0.4025
  Accuracy: 88.72%
--------------------------------------------------
Epoch 6:
  Validation Loss: 0.4921
  Accuracy: 89.26%
--------------------------------------------------
Epoch 7:
  Validation Loss: 0.5218
  Accuracy: 89.17%
--------------------------------------------------
Epoch 8:
  Validation Loss: 0.6117
  Accuracy: 89.11%
--------------------------------------------------
Epoch 9:
  Validation Loss: 0.6634
  Accuracy: 89.13%
--------------------------------------------------
Epoch 10:
  Validation Loss: 0.7109
  Accuracy: 88.97%


In [47]:
# Save the fine-tuned model
model.save_pretrained('fine_tuned_bert_emergency_calls')
tokenizer.save_pretrained('fine_tuned_bert_emergency_calls')

('fine_tuned_bert_emergency_calls/tokenizer_config.json',
 'fine_tuned_bert_emergency_calls/special_tokens_map.json',
 'fine_tuned_bert_emergency_calls/vocab.txt',
 'fine_tuned_bert_emergency_calls/added_tokens.json')

## Hyperparameter Tuning

In [None]:
# Tokenize the data
tokenized_train = tokenizer(train_texts, padding=True, truncation=True, return_tensors="pt", max_length=512)
tokenized_test = tokenizer(val_texts, padding=True, truncation=True, return_tensors="pt", max_length=512)


# Hyperparameter grid
learning_rates = [1e-5, 5e-5, 1e-4]
dropout_rates = [0.1, 0.3, 0.5]
batch_sizes = [16, 32]

best_accuracy = 0
best_params = {}
best_model = None

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Iterate over hyperparameters
for lr in learning_rates:
    for dropout in dropout_rates:
        for batch_size in batch_sizes:
            print(f"Training with lr={lr}, dropout={dropout}, batch_size={batch_size}")

            # Create model with specific dropout
            model = BertForSequenceClassification.from_pretrained(
                "bert-base-uncased", 
                num_labels=2,
                hidden_dropout_prob=dropout
            )
            model.to(device)

            # DataLoader with specific batch size
            train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
            val_loader = DataLoader(val_dataset, batch_size=batch_size)

            # Define optimizer and criterion
            optimizer = optim.AdamW(model.parameters(), lr=lr)
            criterion = nn.CrossEntropyLoss()

            # Train and evaluate
            for epoch in range(3):  # Fixed number of epochs for tuning
                model.train()
                total_loss = 0
                
                for batch in train_loader:
                    optimizer.zero_grad()
                    inputs = {key: val.to(device) for key, val in batch.items()}
                    outputs = model(**inputs)
                    loss = criterion(outputs.logits, inputs['labels'])
                    loss.backward()
                    optimizer.step()
                    total_loss += loss.item()

                # Evaluate on validation set
                model.eval()
                all_preds = []
                all_labels = []
                
                with torch.no_grad():
                    for batch in val_loader:
                        inputs = {key: val.to(device) for key, val in batch.items()}
                        outputs = model(**inputs)
                        preds = torch.argmax(outputs.logits, dim=1)
                        all_preds.extend(preds.cpu().numpy())
                        all_labels.extend(inputs['labels'].cpu().numpy())

                val_accuracy = accuracy_score(all_labels, all_preds)
                print(f"Epoch {epoch+1}, Loss: {total_loss / len(train_loader)}, Val Accuracy: {val_accuracy}")
            if val_accuracy > best_accuracy:
                best_accuracy = val_accuracy
                best_params = {
                    "learning_rate": lr,
                    "dropout": dropout,
                    "batch_size": batch_size,
                }
                best_model = model.state_dict()  # Save the model state

print(f"Best Accuracy: {best_accuracy}")
print(f"Best Parameters: {best_params}")

Training with lr=1e-05, dropout=0.1, batch_size=16


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1, Loss: 0.3193328183404235, Val Accuracy: 0.8914728682170543
Epoch 2, Loss: 0.30326771448229173, Val Accuracy: 0.8941860465116279
Epoch 3, Loss: 0.25354148800982984, Val Accuracy: 0.899031007751938
Training with lr=1e-05, dropout=0.1, batch_size=32


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1, Loss: 0.3246829001478447, Val Accuracy: 0.8914728682170543
Epoch 2, Loss: 0.30908650778984836, Val Accuracy: 0.8922480620155039
Epoch 3, Loss: 0.28193832665912866, Val Accuracy: 0.8953488372093024
Training with lr=1e-05, dropout=0.3, batch_size=16


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1, Loss: 0.32365159421812656, Val Accuracy: 0.8914728682170543
Epoch 2, Loss: 0.31696076220435687, Val Accuracy: 0.8914728682170543
Epoch 3, Loss: 0.3133104823760746, Val Accuracy: 0.8914728682170543
Training with lr=1e-05, dropout=0.3, batch_size=32


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1, Loss: 0.32389310624941375, Val Accuracy: 0.8914728682170543
Epoch 2, Loss: 0.31783312800549723, Val Accuracy: 0.8914728682170543
Epoch 3, Loss: 0.31336896751047105, Val Accuracy: 0.8914728682170543
Training with lr=1e-05, dropout=0.5, batch_size=16


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1, Loss: 0.3283600996862086, Val Accuracy: 0.8914728682170543
Epoch 2, Loss: 0.32006903572022455, Val Accuracy: 0.8914728682170543
Epoch 3, Loss: 0.3196840122804161, Val Accuracy: 0.8914728682170543
Training with lr=1e-05, dropout=0.5, batch_size=32


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1, Loss: 0.3275473518658054, Val Accuracy: 0.8914728682170543
Epoch 2, Loss: 0.3217290785423545, Val Accuracy: 0.8914728682170543
Epoch 3, Loss: 0.31889584279337596, Val Accuracy: 0.8914728682170543
Training with lr=5e-05, dropout=0.1, batch_size=16


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1, Loss: 0.3190943116221086, Val Accuracy: 0.8914728682170543
Epoch 2, Loss: 0.31687084161495976, Val Accuracy: 0.8914728682170543
Epoch 3, Loss: 0.3157209283679493, Val Accuracy: 0.8914728682170543
Training with lr=5e-05, dropout=0.1, batch_size=32


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1, Loss: 0.3193539647861969, Val Accuracy: 0.890891472868217
Epoch 2, Loss: 0.31061686702476915, Val Accuracy: 0.8926356589147287
Epoch 3, Loss: 0.2799581862755062, Val Accuracy: 0.896124031007752
Training with lr=5e-05, dropout=0.3, batch_size=16


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1, Loss: 0.3205223922985931, Val Accuracy: 0.8914728682170543
Epoch 2, Loss: 0.31774224917902505, Val Accuracy: 0.8914728682170543


## Save the model with the best hyperparameters

In [None]:
# Save the best model
if best_model is not None:
    save_path = 'best_model.pth'
    torch.save(best_model, save_path)
    print(f"Best model saved to {save_path}")