In [8]:
from dataclasses import dataclass, field
from typing import List, Dict, Optional, Tuple
import pandas as pd
import torch
from torch.utils.data import Dataset
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from torch.utils.data import DataLoader
from torch import nn
from torch.optim import Adam
from torch.optim.lr_scheduler import ReduceLROnPlateau
from torch.nn.utils.rnn import pad_sequence, pack_padded_sequence, pad_packed_sequence
from tqdm import tqdm
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score
from sklearn.metrics import mean_absolute_error

class DefensiveDataLogger:
    def __init__(self):
        self.logs = []
    
    def log(self, stage: str, message: str):
        self.logs.append(f"{stage}: {message}")

logger = DefensiveDataLogger()
@dataclass
class DefensiveDataConfig:
    seq_length: int = 3
    min_seq_length: int = 1
    min_innings: int = 100
    train_ratio: float = 0.7
    valid_ratio: float = 0.2
    rate_scaling: int = 150
    batch_size: int = 32
    position_map: Dict[str, int] = field(default_factory=lambda: {
        'C': 0, '1B': 1, '2B': 2, '3B': 3, 'SS': 4,
        'LF': 5, 'CF': 6, 'RF': 7, 'DH': 8
    })
@dataclass
class DefensiveMetrics:
    """Container for defensive rate metrics."""
    drs_150: float
    uzr_150: float
    oaa_150: float
    inn: float
    age: int
    position: str = ''  # Add position field
    def to_tensor(self) -> torch.Tensor:
        return torch.tensor([
            self.drs_150,
            self.uzr_150,
            self.oaa_150,
            self.age
        ], dtype=torch.float32)

@dataclass
class DefensiveSequence:
    player_id: str
    position: str  # Add this
    history: List[Optional[DefensiveMetrics]]
    target: Optional[DefensiveMetrics]
    history_mask: List[bool]

class DefensiveDataProcessor:
    """Process fielding data into defensive sequences."""
    
    def __init__(self, config: DefensiveDataConfig):
        self.config = config
        self.metric_scaler = StandardScaler()
    def filter_complete_records(self, df: pd.DataFrame) -> pd.DataFrame:
        """Filter records with complete defensive metrics."""
        complete_mask = df[['DRS', 'UZR/150', 'OAA', 'Inn']].notna().all(axis=1)
        logger.log("Data Filter", f"Complete records: {complete_mask.sum()} of {len(df)}")
        return df[complete_mask].copy()

    def prepare_data(self, df: pd.DataFrame, add_age: bool = False, current_age: int = None) -> pd.DataFrame:
        """Prepare dataframe with enhanced filtering."""
        df = df.copy()
        
        # Add age if needed
        if add_age and current_age is not None:
            df['Age'] = current_age
        
        # Initial data state
        logger.log("Data Prep", f"Initial records: {len(df)}")
        
        # Validate positions
        valid_positions = set(self.config.position_map.keys())
        position_mask = df['Pos'].isin(valid_positions)
        df = df[position_mask].copy()
        logger.log("Data Prep", f"After position filter: {len(df)}")
        
        # Filter complete records first
        df = self.filter_complete_records(df)
        logger.log("Data Prep", f"After complete filter: {len(df)}")
        
        # Filter innings
        df = df[df['Inn'] >= self.config.min_innings].copy()
        logger.log("Data Prep", f"After innings filter: {len(df)}")
        
        # Calculate rate stats
        df = self.calculate_rate_stats(df)
        
        # Validate final dataset
        final_mask = df[['drs_150', 'uzr_150', 'oaa_150', 'Age', 'Pos']].notna().all(axis=1)
        df = df[final_mask].copy()
        
        # Add position encoding
        df['position_idx'] = df['Pos'].map(self.config.position_map)
        
        logger.log("Data Prep", f"Final records: {len(df)}")
        logger.log("Data Prep", f"Position distribution:\n{df['Pos'].value_counts()}")
        
        return df
    def _validate_sequence_quality(self, history: pd.DataFrame) -> bool:
        """Check if all seasons meet minimum innings requirement."""
        return all(history['Inn'] >= self.config.min_innings)
    def validate_defensive_data(df: pd.DataFrame) -> pd.DataFrame:
        """Validate and clean defensive metrics data."""

        
        # Check for infinities and NaN
        inf_mask = df.isin([np.inf, -np.inf]).any(axis=1)
        nan_mask = df.isna().any(axis=1)
        
        # Remove problematic rows
        df = df[~inf_mask & ~nan_mask].copy()
        
        # Report on defensive metrics
        metrics = ['DRS', 'UZR/150', 'OAA']
        print("\nMetric Statistics:")
        for metric in metrics:
            stats = df[metric].describe()
        
        return df
    
    def _validate_sequence(self, history_mask: List[bool]) -> Tuple[bool, int, int]:
        """
        Validates if sequence has continuous valid seasons.
        Returns (is_valid, start_idx, length)
        """
        current_length = 0
        max_length = 0
        start_idx = 0
        best_start = 0
        
        for i, mask in enumerate(history_mask):
            if mask:
                if current_length == 0:
                    start_idx = i
                current_length += 1
                if current_length > max_length:
                    max_length = current_length
                    best_start = start_idx
            else:
                current_length = 0
                
        return max_length >= self.config.min_seq_length, best_start, max_length
    def calculate_rate_stats(self, df: pd.DataFrame) -> pd.DataFrame:
        """Calculate rate stats per 150 games with empty data handling."""
        EPSILON = 1e-7
        rate_df = df.copy()
        
        # Early return if DataFrame is empty
        if len(df) == 0:
            logger.log("Rate Stats", "Empty DataFrame received")
            return rate_df
        
        # Convert innings to games
        games = df['Inn'] / 9.0  # 9 innings per game
        
        # Calculate rates per 150 games
        rate_df['drs_150'] = (df['DRS'] / (games + EPSILON)) * 150
        rate_df['uzr_150'] = df['UZR/150']  # Already a rate stat
        rate_df['oaa_150'] = (df['OAA'] / (games + EPSILON)) * 150
        
        # Only clip if we have data
        if len(rate_df) > 0:
            for col in ['drs_150', 'uzr_150', 'oaa_150']:
                if not rate_df[col].empty:
                    low, high = np.percentile(rate_df[col].dropna(), [1, 99])
                    rate_df[col] = rate_df[col].clip(low, high)
                    logger.log("Rate Stats", f"{col} range: {low:.2f} to {high:.2f}")
        
        return rate_df
    def create_prediction_sequence(self, df: pd.DataFrame, current_age: int) -> List[DefensiveSequence]:
        """Create sequence for prediction with age handling."""
        prepared_df = self.prepare_data(df, add_age=True, current_age=current_age)
        prepared_df = prepared_df.sort_values('Season')
        position = prepared_df['Pos'].iloc[0]  # Get position
        
        history_metrics = []
        history_mask = []
        
        for _, season in prepared_df.iterrows():
            metrics = DefensiveMetrics(
                drs_150=season['drs_150'],
                uzr_150=season['uzr_150'],
                oaa_150=season['oaa_150'],
                inn=season['Inn'],
                age=season['Age'],
                position=position  # Add position
            )
            history_metrics.append(metrics)
            history_mask.append(season['Inn'] >= self.config.min_innings)
        
        return [DefensiveSequence(
            player_id=str(prepared_df['IDfg'].iloc[0]),
            position=position,  # Add position
            history=history_metrics,
            target=None,
            history_mask=history_mask
        )]
    def _get_metrics(self, player_data: pd.DataFrame, season: int) -> Optional[DefensiveMetrics]:
        """Extract defensive metrics for a given season."""
        season_data = player_data[player_data['Season'] == season]
        if len(season_data) != 1:
            return None
            
        row = season_data.iloc[0]
        if pd.isna([row['drs_150'], row['uzr_150'], row['oaa_150'], row['Age']]).any():
            return None
            
        return DefensiveMetrics(
            drs_150=row['drs_150'],
            uzr_150=row['uzr_150'],
            oaa_150=row['oaa_150'],
            inn=row['Inn'],
            age=row['Age'],
            position=row['Pos']
        )
    def create_sequences(self, fielding_df: pd.DataFrame) -> List[DefensiveSequence]:
        """Create sequences allowing for missing seasons."""
        prepared_df = self.prepare_data(fielding_df)
        sequences = []
        
        # Group by player and position
        for (player_id, position), player_data in prepared_df.groupby(['IDfg', 'Pos']):
            player_data = player_data.sort_values('Season')
            seasons = player_data['Season'].unique()
            
            if len(seasons) < 2:  # Need at least one history and one target
                continue
                
            for target_idx in range(1, len(seasons)):
                target_season = seasons[target_idx]
                history_seasons = seasons[:target_idx]
                
                # Get up to 3 most recent history seasons
                history_seasons = history_seasons[-3:]
                
                # Get metrics for each season
                history = []
                history_mask = []
                
                for season in history_seasons:
                    metrics = self._get_metrics(player_data, season)
                    history.append(metrics)
                    history_mask.append(metrics is not None)
                
                target = self._get_metrics(player_data, target_season)
                
                # Create sequence if we have at least one valid history season and target
                if any(history_mask) and target is not None:
                    sequences.append(DefensiveSequence(
                        player_id=str(player_id),
                        position=position,  # Add this
                        history=history,
                        target=target,
                        history_mask=history_mask
                    ))
        
        logger.log("Sequences", f"Created {len(sequences)} sequences")
        return sequences

In [9]:
@dataclass
class DefensiveDataset(Dataset):
    def __init__(self, sequences: List[DefensiveSequence], config: DefensiveDataConfig):
        self.sequences = sequences
        self.config = config
        
    def __len__(self):
        return len(self.sequences)
        
    def __getitem__(self, idx):
        seq = self.sequences[idx]
        max_len = self.config.seq_length
        
        history = torch.zeros(max_len, 4, dtype=torch.float32)
        history_mask = torch.zeros(max_len, dtype=torch.bool)
        
        for i, metrics in enumerate(seq.history):
            if metrics is not None:
                history[i] = torch.tensor([
                    metrics.drs_150,
                    metrics.uzr_150,
                    metrics.oaa_150,
                    metrics.age
                ], dtype=torch.float32)
                history_mask[i] = True
        
        return {
            'history': history,
            'history_mask': history_mask,
            'target': torch.tensor([
                seq.target.drs_150,
                seq.target.uzr_150,
                seq.target.oaa_150
            ], dtype=torch.float32),
            'target_mask': torch.tensor(1.0, dtype=torch.float32),
            'position': torch.tensor(self.config.position_map[seq.position])
        }

def get_data_loaders(train_sequences, valid_sequences, test_sequences, config):
    # Create datasets
    train_dataset = DefensiveDataset(train_sequences, config)
    valid_dataset = DefensiveDataset(valid_sequences, config)
    test_dataset = DefensiveDataset(test_sequences, config)
    
    # Define collate function with position
    def collate_fn(batch):
        return {
            'history': torch.stack([s['history'] for s in batch]).float(),
            'history_mask': torch.stack([s['history_mask'] for s in batch]),
            'target': torch.stack([s['target'] for s in batch]).float(),
            'target_mask': torch.stack([s['target_mask'] for s in batch]).float(),
            'position': torch.stack([s['position'] for s in batch])
        }
    
    # Create loaders
    train_loader = DataLoader(
        train_dataset,
        batch_size=config.batch_size,
        shuffle=True,
        collate_fn=collate_fn
    )
    
    valid_loader = DataLoader(
        valid_dataset,
        batch_size=config.batch_size,
        shuffle=False,
        collate_fn=collate_fn
    )
    
    test_loader = DataLoader(
        test_dataset,
        batch_size=config.batch_size,
        shuffle=False,
        collate_fn=collate_fn
    )
    
    return train_loader, valid_loader, test_loader



In [10]:
# Initialize config and processor
defensive_config = DefensiveDataConfig()
processor = DefensiveDataProcessor(defensive_config)

# Load and prepare data
fielding_df = pd.read_csv('../data/mlb_fielding_data_2000_2024.csv')
batting_df = pd.read_csv('../data/mlb_batting_data_2000_2024.csv')

# Add Age column to fielding_df
fielding_df = fielding_df.merge(
    batting_df[['IDfg', 'Season', 'Age']], 
    on=['IDfg', 'Season'], 
    how='left'
)

# Create sequences with position handling
sequences = processor.create_sequences(fielding_df)

# Split sequences
total_size = len(sequences)
train_size = int(total_size * defensive_config.train_ratio)
valid_size = int(total_size * defensive_config.valid_ratio)

train_sequences = sequences[:train_size]
valid_sequences = sequences[train_size:train_size + valid_size]
test_sequences = sequences[train_size + valid_size:]

# Create data loaders with proper collation
train_loader, valid_loader, test_loader = get_data_loaders(
    train_sequences, 
    valid_sequences, 
    test_sequences, 
    defensive_config
)

# Print dataset sizes
print(f"\nDefensive Sequences Split:")
print(f"Train: {len(train_loader.dataset)}")
print(f"Valid: {len(valid_loader.dataset)}")
print(f"Test: {len(test_loader.dataset)}")



Defensive Sequences Split:
Train: 2071
Valid: 591
Test: 297


In [11]:
import torch.nn as nn
import torch.nn.functional as F

@dataclass
class ModelConfig:
    hidden_size: int = 64
    num_layers: int = 2
    dropout: float = 0.1
    embedding_dim: int = 16
    input_size: int = 4
    output_size: int = 3
    num_positions: int = 9  # New: number of positions
    attention_heads: int = 4  # New: for attention mechanism

class DefensivePredictor(nn.Module):
    def __init__(self, model_config: ModelConfig, data_config: DefensiveDataConfig):
        super().__init__()
        
        # Position embedding
        self.pos_embedding = nn.Embedding(
            model_config.num_positions,
            model_config.embedding_dim
        )
        
        # Adjusted input size with position embedding
        total_input_size = model_config.input_size + model_config.embedding_dim
        
        # Multi-head attention
        self.attention = nn.MultiheadAttention(
            model_config.hidden_size * 2,
            model_config.attention_heads,
            batch_first=True
        )
        
        self.lstm = nn.LSTM(
            input_size=total_input_size,
            hidden_size=model_config.hidden_size,
            num_layers=model_config.num_layers,
            dropout=model_config.dropout,
            batch_first=True,
            bidirectional=True
        )
        
        lstm_out_size = model_config.hidden_size * 2
        
        # Predictors with uncertainty
        self.metric_predictors = nn.ModuleDict({
            metric: nn.Sequential(
                nn.Linear(lstm_out_size, model_config.hidden_size),
                nn.ReLU(),
                nn.Linear(model_config.hidden_size, 2)  # mean and variance
            ) for metric in ['drs', 'uzr', 'oaa']
        })
        
    def forward(self, history: torch.Tensor, history_mask: torch.Tensor, 
                positions: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]:
        batch_size = history.shape[0]
        history = history.float()
        positions = positions.long()
        # Position embeddings
        pos_emb = self.pos_embedding(positions)
        history = torch.cat([history, pos_emb.unsqueeze(1).expand(-1, history.shape[1], -1)], dim=-1)
        
        
        # Mask processing
        masked_history = history * history_mask.unsqueeze(-1)
        lengths = history_mask.sum(1).clamp(min=1).long().cpu()
        
        # Pack and process
        packed = nn.utils.rnn.pack_padded_sequence(
            masked_history, lengths, batch_first=True, enforce_sorted=False
        )
        
        lstm_out, _ = self.lstm(packed)
        lstm_out, _ = nn.utils.rnn.pad_packed_sequence(lstm_out, batch_first=True)
        
        # Apply attention
        attn_out, _ = self.attention(lstm_out, lstm_out, lstm_out, 
                                   key_padding_mask=~history_mask.bool())
        
        # Get final states
        final_hidden = attn_out[torch.arange(batch_size), lengths - 1]
        
        # Quality-weighted predictions
        valid_ratio = lengths.float() / history.shape[1]
        quality_weight = valid_ratio.to(final_hidden.device).unsqueeze(1)
        
        # Predictions with uncertainty
        predictions = []
        uncertainties = []
        for name in ['drs', 'uzr', 'oaa']:
            pred = self.metric_predictors[name](final_hidden)
            mean, var = pred.chunk(2, dim=1)
            predictions.append(mean * quality_weight)
            uncertainties.append(var + (1 - quality_weight))
            
        return (torch.cat(predictions, dim=1), 
                torch.cat(uncertainties, dim=1))

In [12]:
from torch.utils.data import DataLoader
@dataclass
class TrainingConfig:
    """Training configuration."""
    learning_rate: float = 1e-3
    epochs: int = 50
    patience: int = 5
    clip_grad_norm: float = 1.0
    device: str = "cuda" if torch.cuda.is_available() else "cpu"

class DefensiveTrainer:
    def __init__(
        self,
        model: DefensivePredictor,
        train_loader: DataLoader,
        val_loader: DataLoader,
        training_config: TrainingConfig,
    ):
        self.model = model.to(training_config.device)
        self.train_loader = train_loader
        self.val_loader = val_loader
        self.config = training_config
        self.device = training_config.device
        
        self.optimizer = torch.optim.Adam(
            [{'params': model.parameters(), 'clip_grad_norm': 1.0}],
            lr=training_config.learning_rate
        )
        self.criterion = nn.MSELoss(reduction='none')
        
        self.train_losses = []
        self.val_losses = []
        self.best_val_loss = float('inf')
        self.patience_counter = 0
        
    def train_epoch(self) -> float:
        self.model.train()
        epoch_loss = 0
        num_batches = 0
        
        for batch in self.train_loader:
            batch = {k: v.to(self.device) for k, v in batch.items()}
            
            self.optimizer.zero_grad()
            predictions, uncertainties = self.model(
                batch['history'],
                batch['history_mask'],
                batch['position']
            )
            
            loss = self.criterion(predictions, batch['target'])
            masked_loss = (loss * batch['target_mask'].unsqueeze(1))
            weighted_loss = (masked_loss * torch.exp(-uncertainties) + uncertainties * 0.5).mean()
            
            weighted_loss.backward()
            torch.nn.utils.clip_grad_norm_(self.model.parameters(), self.config.clip_grad_norm)
            self.optimizer.step()
            
            epoch_loss += weighted_loss.item()
            num_batches += 1
        
        return epoch_loss / max(num_batches, 1)
    
    def validate(self) -> float:
        self.model.eval()
        val_loss = 0
        
        with torch.no_grad():
            for batch in self.val_loader:
                batch = {k: v.to(self.device) for k, v in batch.items()}
                
                predictions, uncertainties = self.model(
                    batch['history'],
                    batch['history_mask'],
                    batch['position']
                )
                
                loss = self.criterion(predictions, batch['target'])
                masked_loss = (loss * batch['target_mask'].unsqueeze(1))
                weighted_loss = (masked_loss * torch.exp(-uncertainties) + uncertainties * 0.5).mean()
                val_loss += weighted_loss.item()
                
        return val_loss / len(self.val_loader)
    
    def train(self):
        for epoch in range(self.config.epochs):
            train_loss = self.train_epoch()
            val_loss = self.validate()
            
            self.train_losses.append(train_loss)
            self.val_losses.append(val_loss)
            
            print(f"Epoch {epoch}: Train Loss = {train_loss:.4f}, Val Loss = {val_loss:.4f}")
            
            # Early stopping
            if val_loss < self.best_val_loss:
                self.best_val_loss = val_loss
                self.patience_counter = 0
            else:
                self.patience_counter += 1
                if self.patience_counter >= self.config.patience:
                    print("Early stopping triggered")
                    break

In [13]:
# Initialize configs and model
model_config = ModelConfig()
training_config = TrainingConfig()

model = DefensivePredictor(model_config, defensive_config)

# Initialize trainer with existing loaders
trainer = DefensiveTrainer(
    model=model,
    train_loader=train_loader,  # Already created by get_data_loaders()
    val_loader=valid_loader,    # Already created by get_data_loaders()
    training_config=training_config
)

# Train model
trainer.train()

Epoch 0: Train Loss = 23.9270, Val Loss = 3.4636
Epoch 1: Train Loss = 3.3723, Val Loss = 3.4001
Epoch 2: Train Loss = 3.3776, Val Loss = 3.4002
Epoch 3: Train Loss = 3.3559, Val Loss = 3.4050
Epoch 4: Train Loss = 3.3489, Val Loss = 3.4082
Epoch 5: Train Loss = 3.3431, Val Loss = 3.4052
Epoch 6: Train Loss = 3.3394, Val Loss = 3.4225
Early stopping triggered


Predict future performance

In [14]:
def predict_defensive_metrics(model, player_data: pd.DataFrame, processor: DefensiveDataProcessor, config: DefensiveDataConfig):
    """Generate predictions with uncertainty for a player's defensive metrics"""
    # Add logging
    print(f"Processing player with {len(player_data)} seasons of data")
    print(player_data[['Season', 'Inn', 'DRS', 'UZR/150', 'OAA']].to_string())
    
    sequences = processor.create_prediction_sequence(player_data, current_age=2024 - player_data['Season'].min())
    
    if not sequences:
        print("No valid sequences created")
        return None
        
    # Create mini-dataset
    dataset = DefensiveDataset(sequences, config)
    
    # Single prediction
    with torch.no_grad():
        batch = dataset[0]
        history = batch['history'].unsqueeze(0).to(model.device)
        history_mask = batch['history_mask'].unsqueeze(0).to(model.device)
        position = batch['position'].unsqueeze(0).to(model.device)
        
        predictions, uncertainties = model(history, history_mask, position)
        
    # Convert to numpy
    pred_metrics = predictions.cpu().numpy()[0]
    pred_uncert = uncertainties.cpu().numpy()[0]
    
    return {
        'drs_150_pred': pred_metrics[0],
        'drs_150_uncert': pred_uncert[0],
        'uzr_150_pred': pred_metrics[1], 
        'uzr_150_uncert': pred_uncert[1],
        'oaa_150_pred': pred_metrics[2],
        'oaa_150_uncert': pred_uncert[2],
        'position': sequences[0].position,
        'player_id': sequences[0].player_id
    }

def generate_2024_predictions(fielding_df: pd.DataFrame, model, processor: DefensiveDataProcessor):
    """Generate predictions for all 2024 players with better filtering"""
    
    # Get unique players in 2024
    players_2024 = fielding_df[fielding_df['Season'] == 2024]['IDfg'].unique()
    
    predictions = []
    
    for player_id in tqdm(players_2024):
        # Get player history (2021-2024)
        player_data = fielding_df[
            (fielding_df['IDfg'] == player_id) & 
            (fielding_df['Season'] >= 2021)
        ]
        
        # Verify we have complete defensive metrics
        has_metrics = player_data[['DRS', 'UZR/150', 'OAA', 'Inn']].notna().all(axis=1)
        valid_seasons = player_data[has_metrics]
        
        # Skip if insufficient history
        if len(valid_seasons) < 2:
            continue
            
        # Generate prediction
        pred = predict_defensive_metrics(model, valid_seasons, processor, defensive_config)
        if pred:
            predictions.append(pred)
            
    # Convert to DataFrame
    if not predictions:
        raise ValueError("No valid predictions generated")
        
    pred_df = pd.DataFrame(predictions)
    
    # Add confidence intervals
    for metric in ['drs', 'uzr', 'oaa']:
        pred_df[f'{metric}_150_lower'] = pred_df[f'{metric}_150_pred'] - 1.96 * pred_df[f'{metric}_150_uncert']
        pred_df[f'{metric}_150_upper'] = pred_df[f'{metric}_150_pred'] + 1.96 * pred_df[f'{metric}_150_uncert']
        
    return pred_df

# Put model in eval mode
model.eval()

# Generate predictions
predictions_2024 = generate_2024_predictions(fielding_df, model, processor)

# Add player names
name_map = fielding_df[['IDfg', 'Name']].drop_duplicates().set_index('IDfg')['Name']
predictions_2024['Name'] = predictions_2024['player_id'].map(name_map)

# Sort and display top predictions by position
for pos in defensive_config.position_map.keys():
    print(f"\n=== Top {pos} Defenders (Predicted DRS/150) ===")
    pos_preds = predictions_2024[predictions_2024['position'] == pos].sort_values('drs_150_pred', ascending=False)
    print(pos_preds[['Name', 'drs_150_pred', 'drs_150_lower', 'drs_150_upper']].head())

  0%|          | 5/1443 [00:00<00:34, 41.32it/s]


Processing player with 4 seasons of data
       Season     Inn   DRS  UZR/150   OAA
41249    2021  1372.0  -7.0      1.5   3.0
43670    2022  1433.0   9.0      1.4  20.0
46075    2023  1279.1  18.0      5.7  20.0
48368    2024  1273.1   7.0      4.4  18.0


IndexError: index 3 is out of bounds for dimension 0 with size 3