In [1]:
# score: 

import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.model_selection import train_test_split
import os

# ==========================================
# 1. FILE PATHS (FROM YOUR INPUT)
# ==========================================
BASE_DIR = "data/Question4/baseball-pitch-tracking-cs-gy-6643/baseball_kaggle_dataset_trimmed_only"

TRAIN_CSV_PATH = os.path.join(BASE_DIR, "data", "train_ground_truth.csv")
TEST_FEATS_PATH = os.path.join(BASE_DIR, "data", "test_features.csv")
TRAIN_VIDEO_DIR = os.path.join(BASE_DIR, "train_trimmed")
TEST_VIDEO_DIR = os.path.join(BASE_DIR, "test")
SAMPLE_SUB_PATH = "data/Question4/baseball-pitch-tracking-cs-gy-6643/test_submission_template.csv"
OUTPUT_PATH = "submission_gemini.csv"

# ==========================================
# 2. CONFIGURATION
# ==========================================
CONFIG = {
    'seed': 42,
    'epochs': 25,
    'batch_size': 64,
    'learning_rate': 0.001,
    'hidden_dim': 256,
    # Loss Weights
    'w_class': 1.0,  # Importance of Strike/Ball
    'w_zone': 0.5,   # Importance of correct Zone ID
    'w_loc': 1.5     # Importance of Physics (plate_x, plate_z) - High priority!
}

def seed_everything(seed):
    np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed(seed)

seed_everything(CONFIG['seed'])
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

# ==========================================
# 3. DATA PROCESSING
# ==========================================
def load_and_process_data():
    print("Loading data...")
    df_train = pd.read_csv(TRAIN_CSV_PATH)
    df_test = pd.read_csv(TEST_FEATS_PATH)
    
    # Drop missing labels in train
    df_train = df_train.dropna(subset=['pitch_class', 'zone', 'plate_x', 'plate_z'])

    # Features that define the physics of the pitch
    # Note: 'sz_top' and 'sz_bot' are crucial as they define the target box height
    feature_cols = [
        'release_speed', 'effective_speed', 'release_spin_rate',
        'release_pos_x', 'release_pos_y', 'release_pos_z',
        'release_extension', 'pfx_x', 'pfx_z', 
        'sz_top', 'sz_bot'
    ]
    
    # Categorical Features
    cat_cols = ['stand', 'p_throws']
    
    # Handling Missing Values (Imputation)
    for col in feature_cols:
        mean_val = df_train[col].mean()
        df_train[col] = df_train[col].fillna(mean_val)
        df_test[col] = df_test[col].fillna(mean_val)

    # Encode Categoricals (L/R -> 0/1)
    for col in cat_cols:
        le = LabelEncoder()
        combined = pd.concat([df_train[col], df_test[col]])
        le.fit(combined)
        df_train[col] = le.transform(df_train[col])
        df_test[col] = le.transform(df_test[col])
        feature_cols.append(col)

    # Scaling
    scaler = StandardScaler()
    X_train_full = scaler.fit_transform(df_train[feature_cols])
    X_test = scaler.transform(df_test[feature_cols])

    # --- TARGET PREPARATION ---
    
    # 1. Pitch Class (Binary): Strike=1, Ball=0
    class_map = {'strike': 1, 'ball': 0}
    y_class_full = df_train['pitch_class'].map(class_map).values.astype(np.float32)
    
    # 2. Location (Regression): plate_x, plate_z
    # This forces the model to learn the trajectory logic
    y_loc_full = df_train[['plate_x', 'plate_z']].values.astype(np.float32)

    # 3. Zone (Multi-Class): Map 1-14 to 0-N indices
    unique_zones = sorted(df_train['zone'].unique())
    zone_to_idx = {z: i for i, z in enumerate(unique_zones)}
    idx_to_zone = {i: z for z, i in zone_to_idx.items()}
    y_zone_full = df_train['zone'].map(zone_to_idx).values.astype(np.int64)

    print(f"Zones mapped: {zone_to_idx}")

    # Split Train/Validation
    # We stratify by Zone to ensure all zones represent in Val
    X_train, X_val, yc_train, yc_val, yz_train, yz_val, yloc_train, yloc_val = train_test_split(
        X_train_full, y_class_full, y_zone_full, y_loc_full, 
        test_size=0.15, random_state=CONFIG['seed'], stratify=y_zone_full
    )
    
    data_pack = {
        'train': (X_train, yc_train, yz_train, yloc_train),
        'val': (X_val, yc_val, yz_val, yloc_val),
        'test': (X_test, df_test['file_name'].values),
        'mappings': (idx_to_zone, len(unique_zones)),
        'input_dim': X_train.shape[1]
    }
    
    return data_pack

# ==========================================
# 4. PYTORCH DATASET
# ==========================================
class PitchDataset(Dataset):
    def __init__(self, features, y_class=None, y_zone=None, y_loc=None, mode='train'):
        self.features = torch.FloatTensor(features)
        self.mode = mode
        if mode != 'test':
            self.y_class = torch.FloatTensor(y_class).unsqueeze(1)
            self.y_zone = torch.LongTensor(y_zone)
            self.y_loc = torch.FloatTensor(y_loc) # [N, 2] for plate_x, plate_z
            
    def __len__(self):
        return len(self.features)
    
    def __getitem__(self, idx):
        if self.mode == 'test':
            return self.features[idx]
        else:
            return self.features[idx], self.y_class[idx], self.y_zone[idx], self.y_loc[idx]

# ==========================================
# 5. PHYSICS-INFORMED MODEL
# ==========================================
class PhysicsPitchModel(nn.Module):
    def __init__(self, input_dim, hidden_dim, num_zones):
        super(PhysicsPitchModel, self).__init__()
        
        # Main Encoder
        self.encoder = nn.Sequential(
            nn.Linear(input_dim, hidden_dim),
            nn.BatchNorm1d(hidden_dim),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(hidden_dim, hidden_dim),
            nn.BatchNorm1d(hidden_dim),
            nn.ReLU(),
            nn.Dropout(0.2)
        )
        
        # Head 1: Trajectory Estimator (Regression)
        # Predicts exactly where the ball is at the plate (x, z)
        self.loc_head = nn.Sequential(
            nn.Linear(hidden_dim, 64),
            nn.ReLU(),
            nn.Linear(64, 2) # Output: plate_x, plate_z
        )
        
        # Head 2: Pitch Call (Classification)
        # Takes physics features + predicted location info implicitly
        self.class_head = nn.Sequential(
            nn.Linear(hidden_dim, 64),
            nn.ReLU(),
            nn.Linear(64, 1) 
        )
        
        # Head 3: Zone Classifier
        self.zone_head = nn.Sequential(
            nn.Linear(hidden_dim, 64),
            nn.ReLU(),
            nn.Linear(64, num_zones) 
        )
        
    def forward(self, x):
        feats = self.encoder(x)
        
        # Predict everything
        loc_preds = self.loc_head(feats)
        class_logits = self.class_head(feats)
        zone_logits = self.zone_head(feats)
        
        return class_logits, zone_logits, loc_preds

# ==========================================
# 6. TRAINING LOOP
# ==========================================
def train_engine():
    data = load_and_process_data()
    X_train, yc_train, yz_train, yloc_train = data['train']
    X_val, yc_val, yz_val, yloc_val = data['val']
    
    train_ds = PitchDataset(X_train, yc_train, yz_train, yloc_train, mode='train')
    val_ds = PitchDataset(X_val, yc_val, yz_val, yloc_val, mode='train')
    
    train_loader = DataLoader(train_ds, batch_size=CONFIG['batch_size'], shuffle=True)
    val_loader = DataLoader(val_ds, batch_size=CONFIG['batch_size'], shuffle=False)
    
    model = PhysicsPitchModel(data['input_dim'], CONFIG['hidden_dim'], data['mappings'][1]).to(device)
    optimizer = optim.Adam(model.parameters(), lr=CONFIG['learning_rate'])
    
    # Loss Functions
    crit_class = nn.BCEWithLogitsLoss()
    crit_zone = nn.CrossEntropyLoss()
    crit_loc = nn.MSELoss() # For physics coordinates
    
    print("\nStarting Training with Physics-Informed Multi-Task Learning...")
    
    for epoch in range(CONFIG['epochs']):
        model.train()
        total_loss = 0
        
        for bx, byc, byz, byloc in train_loader:
            bx, byc, byz, byloc = bx.to(device), byc.to(device), byz.to(device), byloc.to(device)
            
            optimizer.zero_grad()
            pred_c, pred_z, pred_loc = model(bx)
            
            loss_c = crit_class(pred_c, byc)
            loss_z = crit_zone(pred_z, byz)
            loss_l = crit_loc(pred_loc, byloc)
            
            # Weighted Sum Loss
            loss = (CONFIG['w_class'] * loss_c) + \
                   (CONFIG['w_zone'] * loss_z) + \
                   (CONFIG['w_loc'] * loss_l)
            
            loss.backward()
            optimizer.step()
            total_loss += loss.item()
            
        # Validation
        model.eval()
        correct_c, correct_z, total = 0, 0, 0
        
        with torch.no_grad():
            for bx, byc, byz, _ in val_loader:
                bx, byc, byz = bx.to(device), byc.to(device), byz.to(device)
                pred_c, pred_z, _ = model(bx)
                
                # Class Acc
                pred_labels_c = (torch.sigmoid(pred_c) > 0.5).float()
                correct_c += (pred_labels_c == byc).sum().item()
                
                # Zone Acc
                _, pred_labels_z = torch.max(pred_z, 1)
                correct_z += (pred_labels_z == byz).sum().item()
                
                total += byc.size(0)
                
        print(f"Epoch {epoch+1}: Loss {total_loss/len(train_loader):.3f} | "
              f"Class Acc: {correct_c/total:.3f} | Zone Acc: {correct_z/total:.3f}")
        
    return model, data

# ==========================================
# 7. SUBMISSION GENERATION
# ==========================================
def generate_submission():
    model, data = train_engine()
    X_test, file_names = data['test']
    idx_to_zone = data['mappings'][0]
    
    test_ds = PitchDataset(X_test, mode='test')
    test_loader = DataLoader(test_ds, batch_size=CONFIG['batch_size'], shuffle=False)
    
    model.eval()
    c_preds_final = []
    z_preds_final = []
    
    print("\nPredicting on Test Set...")
    with torch.no_grad():
        for bx in test_loader:
            bx = bx.to(device)
            pc, pz, _ = model(bx) # We ignore loc prediction for submission, but it helped train
            
            # Class
            c_probs = torch.sigmoid(pc).cpu().numpy().flatten()
            c_preds_final.extend((c_probs > 0.5).astype(int))
            
            # Zone
            z_idxs = torch.max(pz, 1)[1].cpu().numpy().flatten()
            z_preds_final.extend([idx_to_zone[i] for i in z_idxs])
            
    # Create DataFrame
    df_sub = pd.DataFrame({
        'file_name': file_names,
        'pitch_class': ['strike' if x == 1 else 'ball' for x in c_preds_final],
        'zone': z_preds_final
    })
    
    # Ensure order matches template if possible
    try:
        if os.path.exists(SAMPLE_SUB_PATH):
            template = pd.read_csv(SAMPLE_SUB_PATH)
            # Left join to enforce order
            final_df = template[['file_name']].merge(df_sub, on='file_name', how='left')
            # Fill na if any mismatches (shouldn't happen)
            final_df['pitch_class'] = final_df['pitch_class'].fillna('ball')
            final_df['zone'] = final_df['zone'].fillna(14).astype(int)
        else:
            final_df = df_sub
            
        final_df.to_csv(OUTPUT_PATH, index=False)
        print(f"Success! Submission saved to {OUTPUT_PATH}")
        print(final_df.head())
        
    except Exception as e:
        print(f"Error saving submission: {e}")
        # Fallback save
        df_sub.to_csv(OUTPUT_PATH, index=False)

if __name__ == "__main__":
    generate_submission()

Using device: cpu
Loading data...
Zones mapped: {np.float64(1.0): 0, np.float64(2.0): 1, np.float64(3.0): 2, np.float64(4.0): 3, np.float64(5.0): 4, np.float64(6.0): 5, np.float64(7.0): 6, np.float64(8.0): 7, np.float64(9.0): 8, np.float64(11.0): 9, np.float64(12.0): 10, np.float64(13.0): 11, np.float64(14.0): 12}

Starting Training with Physics-Informed Multi-Task Learning...
Epoch 1: Loss 3.546 | Class Acc: 0.509 | Zone Acc: 0.216
Epoch 2: Loss 3.059 | Class Acc: 0.517 | Zone Acc: 0.229
Epoch 3: Loss 3.033 | Class Acc: 0.506 | Zone Acc: 0.216
Epoch 4: Loss 2.996 | Class Acc: 0.518 | Zone Acc: 0.230
Epoch 5: Loss 3.004 | Class Acc: 0.530 | Zone Acc: 0.220
Epoch 6: Loss 2.970 | Class Acc: 0.521 | Zone Acc: 0.217
Epoch 7: Loss 2.965 | Class Acc: 0.524 | Zone Acc: 0.228
Epoch 8: Loss 2.954 | Class Acc: 0.522 | Zone Acc: 0.212
Epoch 9: Loss 2.953 | Class Acc: 0.516 | Zone Acc: 0.220
Epoch 10: Loss 2.939 | Class Acc: 0.503 | Zone Acc: 0.220
Epoch 11: Loss 2.924 | Class Acc: 0.522 | Zone Ac

In [None]:
# score: 0.56

import os
import cv2
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.model_selection import train_test_split
import torchvision.models as models
from torchvision.models.video import R2Plus1D_18_Weights 
from tqdm import tqdm

# ==========================================
# 1. FILE PATHS
# ==========================================
BASE_DIR = "data/Question4/baseball-pitch-tracking-cs-gy-6643/baseball_kaggle_dataset_trimmed_only"

TRAIN_CSV_PATH = os.path.join(BASE_DIR, "data", "train_ground_truth.csv")
TEST_FEATS_PATH = os.path.join(BASE_DIR, "data", "test_features.csv")
TRAIN_VIDEO_DIR = os.path.join(BASE_DIR, "train_trimmed")
TEST_VIDEO_DIR = os.path.join(BASE_DIR, "test")
SAMPLE_SUB_PATH = "data/Question4/baseball-pitch-tracking-cs-gy-6643/test_submission_template.csv"
OUTPUT_PATH = "submission_hybrid_best_gemini.csv"
MODEL_SAVE_PATH = "best_pitch_model.pth" # <--- NEW: Where we save the weights

# ==========================================
# 2. CONFIGURATION
# ==========================================
CONFIG = {
    'seed': 42,
    'epochs': 5,
    'batch_size': 8,  
    'lr': 1e-4,
    'frames': 16,
    'img_size': 112,
    'hidden_dim': 256,
    'load_checkpoint': False  # <--- SET TO TRUE TO SKIP TRAIN AND RUN INFERENCE
}

# Device Selection
if torch.backends.mps.is_available():
    device = torch.device("mps")
    print("Using device: MPS (Apple Silicon Acceleration)")
elif torch.cuda.is_available():
    device = torch.device("cuda")
    print("Using device: CUDA")
else:
    device = torch.device("cpu")
    print("Using device: CPU")

# ==========================================
# 3. UTILS & DATA LOADING
# ==========================================
def load_video_clip(path, num_frames=16, resize=(112, 112)):
    cap = cv2.VideoCapture(path)
    frames = []
    try:
        while True:
            ret, frame = cap.read()
            if not ret: break
            frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            frame = cv2.resize(frame, resize)
            frames.append(frame)
    finally:
        cap.release()

    if len(frames) == 0:
        return torch.zeros(3, num_frames, resize[0], resize[1])

    indices = np.linspace(0, len(frames) - 1, num_frames).astype(int)
    sampled_frames = np.array([frames[i] for i in indices])

    tensor = torch.from_numpy(sampled_frames).permute(3, 0, 1, 2).float() / 255.0
    mean = torch.tensor([0.43216, 0.394666, 0.37645]).view(3, 1, 1, 1)
    std = torch.tensor([0.22803, 0.22145, 0.216989]).view(3, 1, 1, 1)
    tensor = (tensor - mean) / std
    return tensor

def get_data_splits():
    print("Processing CSV Data...")
    df_train = pd.read_csv(TRAIN_CSV_PATH).dropna(subset=['pitch_class', 'zone', 'plate_x', 'plate_z'])
    df_test = pd.read_csv(TEST_FEATS_PATH)

    phy_cols = [
        'release_speed', 'effective_speed', 'release_spin_rate',
        'release_pos_x', 'release_pos_y', 'release_pos_z',
        'pfx_x', 'pfx_z', 'sz_top', 'sz_bot'
    ]
    
    for col in ['stand', 'p_throws']:
        le = LabelEncoder()
        le.fit(pd.concat([df_train[col], df_test[col]]))
        df_train[col] = le.transform(df_train[col])
        df_test[col] = le.transform(df_test[col])
        phy_cols.append(col)

    scaler = StandardScaler()
    df_train[phy_cols] = scaler.fit_transform(df_train[phy_cols].fillna(0))
    df_test[phy_cols] = scaler.transform(df_test[phy_cols].fillna(0))

    class_map = {'strike': 1, 'ball': 0}
    df_train['label_class'] = df_train['pitch_class'].map(class_map)
    
    zones = sorted(df_train['zone'].unique())
    zone_map = {z: i for i, z in enumerate(zones)}
    idx_to_zone = {i: z for z, i in zone_map.items()}
    df_train['label_zone'] = df_train['zone'].map(zone_map)

    train_idx, val_idx = train_test_split(df_train.index, test_size=0.15, random_state=CONFIG['seed'])
    
    return {
        'train': df_train.loc[train_idx],
        'val': df_train.loc[val_idx],
        'test': df_test,
        'phy_cols': phy_cols,
        'idx_to_zone': idx_to_zone,
        'num_zones': len(zones)
    }

class HybridDataset(Dataset):
    def __init__(self, df, video_dir, phy_cols, mode='train'):
        self.df = df.reset_index(drop=True)
        self.video_dir = video_dir
        self.phy_cols = phy_cols
        self.mode = mode
    def __len__(self): return len(self.df)
    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        physics_vec = torch.tensor(row[self.phy_cols].values.astype(np.float32))
        vid_path = os.path.join(self.video_dir, row['file_name'])
        video_tensor = load_video_clip(vid_path, num_frames=CONFIG['frames'], resize=(CONFIG['img_size'], CONFIG['img_size']))
        if self.mode == 'test': return video_tensor, physics_vec
        else: return video_tensor, physics_vec, torch.tensor(row['label_class'], dtype=torch.float).unsqueeze(0), torch.tensor(row['label_zone'], dtype=torch.long)

# ==========================================
# 4. MODEL ARCHITECTURE
# ==========================================
class PitchHybridModel(nn.Module):
    def __init__(self, physics_dim, num_zones):
        super(PitchHybridModel, self).__init__()
        
        # Using R2Plus1D (Best 18-layer available)
        weights = R2Plus1D_18_Weights.DEFAULT
        self.video_backbone = models.video.r2plus1d_18(weights=weights)
        
        vid_out_dim = self.video_backbone.fc.in_features
        self.video_backbone.fc = nn.Identity() 
        
        self.video_fc = nn.Sequential(
            nn.Linear(vid_out_dim, 256),
            nn.ReLU(),
            nn.Dropout(0.3)
        )

        self.physics_net = nn.Sequential(
            nn.Linear(physics_dim, 128),
            nn.BatchNorm1d(128),
            nn.ReLU(),
            nn.Linear(128, 128),
            nn.ReLU()
        )
        
        combined_dim = 256 + 128
        self.fusion_layer = nn.Sequential(
            nn.Linear(combined_dim, 256),
            nn.BatchNorm1d(256),
            nn.ReLU(),
            nn.Dropout(0.2)
        )
        
        self.head_class = nn.Linear(256, 1)
        self.head_zone = nn.Linear(256, num_zones)
        
    def forward(self, video, physics):
        v_feat = self.video_backbone(video)
        v_feat = self.video_fc(v_feat)
        p_feat = self.physics_net(physics)
        combined = torch.cat((v_feat, p_feat), dim=1)
        fused = self.fusion_layer(combined)
        return self.head_class(fused), self.head_zone(fused)

# ==========================================
# 5. MAIN PIPELINE
# ==========================================
def run_pipeline():
    data = get_data_splits()
    model = PitchHybridModel(physics_dim=len(data['phy_cols']), num_zones=data['num_zones']).to(device)
    
    # --- LOAD CHECKPOINT IF REQUESTED ---
    if CONFIG['load_checkpoint'] and os.path.exists(MODEL_SAVE_PATH):
        print(f"\nLoading model weights from {MODEL_SAVE_PATH}...")
        model.load_state_dict(torch.load(MODEL_SAVE_PATH, map_location=device))
    
    # --- TRAINING PHASE ---
    if not CONFIG['load_checkpoint']:
        train_ds = HybridDataset(data['train'], TRAIN_VIDEO_DIR, data['phy_cols'], mode='train')
        val_ds = HybridDataset(data['val'], TRAIN_VIDEO_DIR, data['phy_cols'], mode='train')
        
        train_loader = DataLoader(train_ds, batch_size=CONFIG['batch_size'], shuffle=True, num_workers=0)
        val_loader = DataLoader(val_ds, batch_size=CONFIG['batch_size'], shuffle=False, num_workers=0)
        
        optimizer = optim.AdamW(model.parameters(), lr=CONFIG['lr'])
        crit_c = nn.BCEWithLogitsLoss()
        crit_z = nn.CrossEntropyLoss()
        
        best_val_loss = float('inf') # Track best loss
        
        print(f"\n--- Starting Training on {len(train_ds)} samples ---")
        
        for epoch in range(CONFIG['epochs']):
            model.train()
            train_loop = tqdm(train_loader, desc=f"Epoch {epoch+1}/{CONFIG['epochs']}", leave=True)
            total_train_loss = 0
            
            for vid, phy, lc, lz in train_loop:
                vid, phy, lc, lz = vid.to(device), phy.to(device), lc.to(device), lz.to(device)
                
                optimizer.zero_grad()
                out_c, out_z = model(vid, phy)
                loss = crit_c(out_c, lc) + crit_z(out_z, lz)
                loss.backward()
                optimizer.step()
                
                total_train_loss += loss.item()
                train_loop.set_postfix(loss=loss.item())
            
            # Validation
            model.eval()
            val_loss = 0
            acc_c_num, acc_z_num, total = 0, 0, 0
            
            print("Validating...")
            with torch.no_grad():
                for vid, phy, lc, lz in tqdm(val_loader, desc="Val"):
                    vid, phy, lc, lz = vid.to(device), phy.to(device), lc.to(device), lz.to(device)
                    out_c, out_z = model(vid, phy)
                    
                    # Calculate Loss
                    batch_loss = crit_c(out_c, lc) + crit_z(out_z, lz)
                    val_loss += batch_loss.item()
                    
                    # Calculate Accuracy
                    pred_c = (torch.sigmoid(out_c) > 0.5).float()
                    acc_c_num += (pred_c == lc).sum().item()
                    pred_z = torch.argmax(out_z, dim=1)
                    acc_z_num += (pred_z == lz).sum().item()
                    total += lc.size(0)
            
            avg_val_loss = val_loss / len(val_loader)
            print(f"Epoch {epoch+1} | Val Loss: {avg_val_loss:.4f} | Class Acc: {acc_c_num/total:.3f} | Zone Acc: {acc_z_num/total:.3f}")
            
            # --- SAVE BEST MODEL ---
            if avg_val_loss < best_val_loss:
                best_val_loss = avg_val_loss
                torch.save(model.state_dict(), MODEL_SAVE_PATH)
                print(f"Saved New Best Model to {MODEL_SAVE_PATH}!")
            print("-" * 50)

    # --- INFERENCE PHASE ---
    print("\n--- Generating Submission ---")
    # Ensure we are using the best weights for inference
    if not CONFIG['load_checkpoint'] and os.path.exists(MODEL_SAVE_PATH):
        model.load_state_dict(torch.load(MODEL_SAVE_PATH, map_location=device))
        print("Loaded best model for inference.")

    test_ds = HybridDataset(data['test'], TEST_VIDEO_DIR, data['phy_cols'], mode='test')
    test_loader = DataLoader(test_ds, batch_size=CONFIG['batch_size'], shuffle=False, num_workers=0)
    
    model.eval()
    final_classes = []
    final_zones = []
    
    with torch.no_grad():
        for vid, phy in tqdm(test_loader, desc="Inference"):
            vid, phy = vid.to(device), phy.to(device)
            out_c, out_z = model(vid, phy)
            
            c_probs = torch.sigmoid(out_c).cpu().numpy().flatten()
            z_idxs = torch.argmax(out_z, dim=1).cpu().numpy().flatten()
            
            final_classes.extend(['strike' if p > 0.5 else 'ball' for p in c_probs])
            final_zones.extend([data['idx_to_zone'][z] for z in z_idxs])
            
    df_sub = pd.DataFrame({'file_name': data['test']['file_name'], 'pitch_class': final_classes, 'zone': final_zones})
    
    if os.path.exists(SAMPLE_SUB_PATH):
        template = pd.read_csv(SAMPLE_SUB_PATH)
        df_sub = template[['file_name']].merge(df_sub, on='file_name', how='left')
        df_sub['pitch_class'] = df_sub['pitch_class'].fillna('ball')
        df_sub['zone'] = df_sub['zone'].fillna(14).astype(int)

    df_sub.to_csv(OUTPUT_PATH, index=False)
    print(f"Submission Saved to {OUTPUT_PATH}")

if __name__ == "__main__":
    run_pipeline()

Using device: MPS (Apple Silicon Acceleration)
Processing CSV Data...


Downloading: "https://download.pytorch.org/models/r2plus1d_18-91a641e6.pth" to /Users/swooshie/.cache/torch/hub/checkpoints/r2plus1d_18-91a641e6.pth
100%|██████████| 120M/120M [00:02<00:00, 55.3MB/s] 



--- Starting Training on 5100 samples ---


Epoch 1/5: 100%|██████████| 638/638 [48:51<00:00,  4.60s/it, loss=2.39]


Validating...


Val: 100%|██████████| 113/113 [02:29<00:00,  1.32s/it]


Epoch 1 | Val Loss: 3.0156 | Class Acc: 0.599 | Zone Acc: 0.233
Saved New Best Model to best_pitch_model.pth!
--------------------------------------------------


Epoch 2/5: 100%|██████████| 638/638 [43:40<00:00,  4.11s/it, loss=3.61]


Validating...


Val: 100%|██████████| 113/113 [02:20<00:00,  1.24s/it]


Epoch 2 | Val Loss: 2.7250 | Class Acc: 0.642 | Zone Acc: 0.276
Saved New Best Model to best_pitch_model.pth!
--------------------------------------------------


Epoch 3/5: 100%|██████████| 638/638 [43:34<00:00,  4.10s/it, loss=2.43]


Validating...


Val: 100%|██████████| 113/113 [02:22<00:00,  1.26s/it]


Epoch 3 | Val Loss: 2.6282 | Class Acc: 0.672 | Zone Acc: 0.307
Saved New Best Model to best_pitch_model.pth!
--------------------------------------------------


Epoch 4/5: 100%|██████████| 638/638 [43:28<00:00,  4.09s/it, loss=2.13]  


Validating...


Val: 100%|██████████| 113/113 [02:03<00:00,  1.09s/it]


Epoch 4 | Val Loss: 2.8175 | Class Acc: 0.641 | Zone Acc: 0.292
--------------------------------------------------


Epoch 5/5: 100%|██████████| 638/638 [46:16<00:00,  4.35s/it, loss=2.54]


Validating...


Val: 100%|██████████| 113/113 [01:58<00:00,  1.05s/it]


Epoch 5 | Val Loss: 2.7102 | Class Acc: 0.661 | Zone Acc: 0.302
--------------------------------------------------

--- Generating Submission ---
Loaded best model for inference.


Inference: 100%|██████████| 500/500 [08:50<00:00,  1.06s/it]


Submission Saved to submission_hybrid_best_gemini.csv


In [None]:
# score: 0.59915

import os
import cv2
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.model_selection import train_test_split
import torchvision.models as models
from torchvision.models.video import R2Plus1D_18_Weights 
from tqdm import tqdm

# ==========================================
# 1. FILE PATHS
# ==========================================
BASE_DIR = "data/Question4/baseball-pitch-tracking-cs-gy-6643/baseball_kaggle_dataset_trimmed_only"

TRAIN_CSV_PATH = os.path.join(BASE_DIR, "data", "train_ground_truth.csv")
TEST_FEATS_PATH = os.path.join(BASE_DIR, "data", "test_features.csv")
TRAIN_VIDEO_DIR = os.path.join(BASE_DIR, "train_trimmed")
TEST_VIDEO_DIR = os.path.join(BASE_DIR, "test")
SAMPLE_SUB_PATH = "data/Question4/baseball-pitch-tracking-cs-gy-6643/test_submission_template.csv"
OUTPUT_PATH = "submission_hybrid_best_gemini.csv"
MODEL_SAVE_PATH = "best_pitch_model.pth" # <--- NEW: Where we save the weights

# ==========================================
# 2. CONFIGURATION
# ==========================================
CONFIG = {
    'seed': 42,
    'epochs': 10,
    'batch_size': 8,  
    'lr': 1e-4,
    'frames': 16,
    'img_size': 112,
    'hidden_dim': 256,
    'load_checkpoint': False  # <--- SET TO TRUE TO SKIP TRAIN AND RUN INFERENCE
}

# Device Selection
if torch.backends.mps.is_available():
    device = torch.device("mps")
    print("Using device: MPS (Apple Silicon Acceleration)")
elif torch.cuda.is_available():
    device = torch.device("cuda")
    print("Using device: CUDA")
else:
    device = torch.device("cpu")
    print("Using device: CPU")

# ==========================================
# 3. UTILS & DATA LOADING
# ==========================================
def load_video_clip(path, num_frames=16, resize=(112, 112)):
    cap = cv2.VideoCapture(path)
    frames = []
    try:
        while True:
            ret, frame = cap.read()
            if not ret: break
            frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            frame = cv2.resize(frame, resize)
            frames.append(frame)
    finally:
        cap.release()

    if len(frames) == 0:
        return torch.zeros(3, num_frames, resize[0], resize[1])

    indices = np.linspace(0, len(frames) - 1, num_frames).astype(int)
    sampled_frames = np.array([frames[i] for i in indices])

    tensor = torch.from_numpy(sampled_frames).permute(3, 0, 1, 2).float() / 255.0
    mean = torch.tensor([0.43216, 0.394666, 0.37645]).view(3, 1, 1, 1)
    std = torch.tensor([0.22803, 0.22145, 0.216989]).view(3, 1, 1, 1)
    tensor = (tensor - mean) / std
    return tensor

def get_data_splits():
    print("Processing CSV Data...")
    df_train = pd.read_csv(TRAIN_CSV_PATH).dropna(subset=['pitch_class', 'zone', 'plate_x', 'plate_z'])
    df_test = pd.read_csv(TEST_FEATS_PATH)

    phy_cols = [
        'release_speed', 'effective_speed', 'release_spin_rate',
        'release_pos_x', 'release_pos_y', 'release_pos_z',
        'pfx_x', 'pfx_z', 'sz_top', 'sz_bot'
    ]
    
    for col in ['stand', 'p_throws']:
        le = LabelEncoder()
        le.fit(pd.concat([df_train[col], df_test[col]]))
        df_train[col] = le.transform(df_train[col])
        df_test[col] = le.transform(df_test[col])
        phy_cols.append(col)

    scaler = StandardScaler()
    df_train[phy_cols] = scaler.fit_transform(df_train[phy_cols].fillna(0))
    df_test[phy_cols] = scaler.transform(df_test[phy_cols].fillna(0))

    class_map = {'strike': 1, 'ball': 0}
    df_train['label_class'] = df_train['pitch_class'].map(class_map)
    
    zones = sorted(df_train['zone'].unique())
    zone_map = {z: i for i, z in enumerate(zones)}
    idx_to_zone = {i: z for z, i in zone_map.items()}
    df_train['label_zone'] = df_train['zone'].map(zone_map)

    train_idx, val_idx = train_test_split(df_train.index, test_size=0.15, random_state=CONFIG['seed'])
    
    return {
        'train': df_train.loc[train_idx],
        'val': df_train.loc[val_idx],
        'test': df_test,
        'phy_cols': phy_cols,
        'idx_to_zone': idx_to_zone,
        'num_zones': len(zones)
    }

class HybridDataset(Dataset):
    def __init__(self, df, video_dir, phy_cols, mode='train'):
        self.df = df.reset_index(drop=True)
        self.video_dir = video_dir
        self.phy_cols = phy_cols
        self.mode = mode
    def __len__(self): return len(self.df)
    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        physics_vec = torch.tensor(row[self.phy_cols].values.astype(np.float32))
        vid_path = os.path.join(self.video_dir, row['file_name'])
        video_tensor = load_video_clip(vid_path, num_frames=CONFIG['frames'], resize=(CONFIG['img_size'], CONFIG['img_size']))
        if self.mode == 'test': return video_tensor, physics_vec
        else: return video_tensor, physics_vec, torch.tensor(row['label_class'], dtype=torch.float).unsqueeze(0), torch.tensor(row['label_zone'], dtype=torch.long)

# ==========================================
# 4. MODEL ARCHITECTURE
# ==========================================
class PitchHybridModel(nn.Module):
    def __init__(self, physics_dim, num_zones):
        super(PitchHybridModel, self).__init__()
        
        # Using R2Plus1D (Best 18-layer available)
        weights = R2Plus1D_18_Weights.DEFAULT
        self.video_backbone = models.video.r2plus1d_18(weights=weights)
        
        vid_out_dim = self.video_backbone.fc.in_features
        self.video_backbone.fc = nn.Identity() 
        
        self.video_fc = nn.Sequential(
            nn.Linear(vid_out_dim, 256),
            nn.ReLU(),
            nn.Dropout(0.3)
        )

        self.physics_net = nn.Sequential(
            nn.Linear(physics_dim, 128),
            nn.BatchNorm1d(128),
            nn.ReLU(),
            nn.Linear(128, 128),
            nn.ReLU()
        )
        
        combined_dim = 256 + 128
        self.fusion_layer = nn.Sequential(
            nn.Linear(combined_dim, 256),
            nn.BatchNorm1d(256),
            nn.ReLU(),
            nn.Dropout(0.2)
        )
        
        self.head_class = nn.Linear(256, 1)
        self.head_zone = nn.Linear(256, num_zones)
        
    def forward(self, video, physics):
        v_feat = self.video_backbone(video)
        v_feat = self.video_fc(v_feat)
        p_feat = self.physics_net(physics)
        combined = torch.cat((v_feat, p_feat), dim=1)
        fused = self.fusion_layer(combined)
        return self.head_class(fused), self.head_zone(fused)

# ==========================================
# 5. MAIN PIPELINE
# ==========================================
def run_pipeline():
    data = get_data_splits()
    model = PitchHybridModel(physics_dim=len(data['phy_cols']), num_zones=data['num_zones']).to(device)
    
    # --- LOAD CHECKPOINT IF REQUESTED ---
    if CONFIG['load_checkpoint'] and os.path.exists(MODEL_SAVE_PATH):
        print(f"\nLoading model weights from {MODEL_SAVE_PATH}...")
        model.load_state_dict(torch.load(MODEL_SAVE_PATH, map_location=device))
    
    # --- TRAINING PHASE ---
    if not CONFIG['load_checkpoint']:
        train_ds = HybridDataset(data['train'], TRAIN_VIDEO_DIR, data['phy_cols'], mode='train')
        val_ds = HybridDataset(data['val'], TRAIN_VIDEO_DIR, data['phy_cols'], mode='train')
        
        train_loader = DataLoader(train_ds, batch_size=CONFIG['batch_size'], shuffle=True, num_workers=0)
        val_loader = DataLoader(val_ds, batch_size=CONFIG['batch_size'], shuffle=False, num_workers=0)
        
        optimizer = optim.AdamW(model.parameters(), lr=CONFIG['lr'])
        crit_c = nn.BCEWithLogitsLoss()
        crit_z = nn.CrossEntropyLoss()
        
        best_val_loss = float('inf') # Track best loss
        
        print(f"\n--- Starting Training on {len(train_ds)} samples ---")
        
        for epoch in range(CONFIG['epochs']):
            model.train()
            train_loop = tqdm(train_loader, desc=f"Epoch {epoch+1}/{CONFIG['epochs']}", leave=True)
            total_train_loss = 0
            
            for vid, phy, lc, lz in train_loop:
                vid, phy, lc, lz = vid.to(device), phy.to(device), lc.to(device), lz.to(device)
                
                optimizer.zero_grad()
                out_c, out_z = model(vid, phy)
                loss = crit_c(out_c, lc) + crit_z(out_z, lz)
                loss.backward()
                optimizer.step()
                
                total_train_loss += loss.item()
                train_loop.set_postfix(loss=loss.item())
            
            # Validation
            model.eval()
            val_loss = 0
            acc_c_num, acc_z_num, total = 0, 0, 0
            
            print("Validating...")
            with torch.no_grad():
                for vid, phy, lc, lz in tqdm(val_loader, desc="Val"):
                    vid, phy, lc, lz = vid.to(device), phy.to(device), lc.to(device), lz.to(device)
                    out_c, out_z = model(vid, phy)
                    
                    # Calculate Loss
                    batch_loss = crit_c(out_c, lc) + crit_z(out_z, lz)
                    val_loss += batch_loss.item()
                    
                    # Calculate Accuracy
                    pred_c = (torch.sigmoid(out_c) > 0.5).float()
                    acc_c_num += (pred_c == lc).sum().item()
                    pred_z = torch.argmax(out_z, dim=1)
                    acc_z_num += (pred_z == lz).sum().item()
                    total += lc.size(0)
            
            avg_val_loss = val_loss / len(val_loader)
            print(f"Epoch {epoch+1} | Val Loss: {avg_val_loss:.4f} | Class Acc: {acc_c_num/total:.3f} | Zone Acc: {acc_z_num/total:.3f}")
            
            # --- SAVE BEST MODEL ---
            if avg_val_loss < best_val_loss:
                best_val_loss = avg_val_loss
                torch.save(model.state_dict(), MODEL_SAVE_PATH)
                print(f"Saved New Best Model to {MODEL_SAVE_PATH}!")
            print("-" * 50)

    # --- INFERENCE PHASE ---
    print("\n--- Generating Submission ---")
    # Ensure we are using the best weights for inference
    if not CONFIG['load_checkpoint'] and os.path.exists(MODEL_SAVE_PATH):
        model.load_state_dict(torch.load(MODEL_SAVE_PATH, map_location=device))
        print("Loaded best model for inference.")

    test_ds = HybridDataset(data['test'], TEST_VIDEO_DIR, data['phy_cols'], mode='test')
    test_loader = DataLoader(test_ds, batch_size=CONFIG['batch_size'], shuffle=False, num_workers=0)
    
    model.eval()
    final_classes = []
    final_zones = []
    
    with torch.no_grad():
        for vid, phy in tqdm(test_loader, desc="Inference"):
            vid, phy = vid.to(device), phy.to(device)
            out_c, out_z = model(vid, phy)
            
            c_probs = torch.sigmoid(out_c).cpu().numpy().flatten()
            z_idxs = torch.argmax(out_z, dim=1).cpu().numpy().flatten()
            
            final_classes.extend(['strike' if p > 0.5 else 'ball' for p in c_probs])
            final_zones.extend([data['idx_to_zone'][z] for z in z_idxs])
            
    df_sub = pd.DataFrame({'file_name': data['test']['file_name'], 'pitch_class': final_classes, 'zone': final_zones})
    
    if os.path.exists(SAMPLE_SUB_PATH):
        template = pd.read_csv(SAMPLE_SUB_PATH)
        df_sub = template[['file_name']].merge(df_sub, on='file_name', how='left')
        df_sub['pitch_class'] = df_sub['pitch_class'].fillna('ball')
        df_sub['zone'] = df_sub['zone'].fillna(14).astype(int)

    df_sub.to_csv(OUTPUT_PATH, index=False)
    print(f"Submission Saved to {OUTPUT_PATH}")

if __name__ == "__main__":
    run_pipeline()

Using device: MPS (Apple Silicon Acceleration)
Processing CSV Data...

--- Starting Training on 5100 samples ---


Epoch 1/10: 100%|██████████| 638/638 [37:53<00:00,  3.56s/it, loss=3.35]


Validating...


Val: 100%|██████████| 113/113 [01:59<00:00,  1.06s/it]


Epoch 1 | Val Loss: 3.2195 | Class Acc: 0.563 | Zone Acc: 0.238
Saved New Best Model to best_pitch_model.pth!
--------------------------------------------------


Epoch 2/10: 100%|██████████| 638/638 [36:16<00:00,  3.41s/it, loss=3.83]


Validating...


Val: 100%|██████████| 113/113 [01:58<00:00,  1.05s/it]


Epoch 2 | Val Loss: 2.7670 | Class Acc: 0.597 | Zone Acc: 0.290
Saved New Best Model to best_pitch_model.pth!
--------------------------------------------------


Epoch 3/10: 100%|██████████| 638/638 [36:22<00:00,  3.42s/it, loss=2.49]


Validating...


Val: 100%|██████████| 113/113 [01:57<00:00,  1.04s/it]


Epoch 3 | Val Loss: 2.6488 | Class Acc: 0.616 | Zone Acc: 0.310
Saved New Best Model to best_pitch_model.pth!
--------------------------------------------------


Epoch 4/10: 100%|██████████| 638/638 [36:13<00:00,  3.41s/it, loss=2.55]


Validating...


Val: 100%|██████████| 113/113 [01:57<00:00,  1.04s/it]


Epoch 4 | Val Loss: 2.6791 | Class Acc: 0.628 | Zone Acc: 0.351
--------------------------------------------------


Epoch 5/10: 100%|██████████| 638/638 [36:14<00:00,  3.41s/it, loss=1.49]


Validating...


Val: 100%|██████████| 113/113 [02:00<00:00,  1.07s/it]


Epoch 5 | Val Loss: 2.4503 | Class Acc: 0.676 | Zone Acc: 0.374
Saved New Best Model to best_pitch_model.pth!
--------------------------------------------------


Epoch 6/10: 100%|██████████| 638/638 [36:15<00:00,  3.41s/it, loss=5.19] 


Validating...


Val: 100%|██████████| 113/113 [01:57<00:00,  1.04s/it]


Epoch 6 | Val Loss: 2.5703 | Class Acc: 0.707 | Zone Acc: 0.348
--------------------------------------------------


Epoch 7/10: 100%|██████████| 638/638 [36:16<00:00,  3.41s/it, loss=1.09] 


Validating...


Val: 100%|██████████| 113/113 [01:59<00:00,  1.06s/it]


Epoch 7 | Val Loss: 2.9495 | Class Acc: 0.640 | Zone Acc: 0.341
--------------------------------------------------


Epoch 8/10: 100%|██████████| 638/638 [36:12<00:00,  3.41s/it, loss=1.48] 


Validating...


Val: 100%|██████████| 113/113 [01:58<00:00,  1.05s/it]


Epoch 8 | Val Loss: 2.7870 | Class Acc: 0.687 | Zone Acc: 0.372
--------------------------------------------------


Epoch 9/10: 100%|██████████| 638/638 [36:15<00:00,  3.41s/it, loss=3.77] 


Validating...


Val: 100%|██████████| 113/113 [01:58<00:00,  1.05s/it]


Epoch 9 | Val Loss: 3.3288 | Class Acc: 0.686 | Zone Acc: 0.303
--------------------------------------------------


Epoch 10/10: 100%|██████████| 638/638 [36:13<00:00,  3.41s/it, loss=3.49] 


Validating...


Val: 100%|██████████| 113/113 [01:58<00:00,  1.05s/it]


Epoch 10 | Val Loss: 3.0233 | Class Acc: 0.691 | Zone Acc: 0.360
--------------------------------------------------

--- Generating Submission ---
Loaded best model for inference.


Inference: 100%|██████████| 500/500 [08:42<00:00,  1.05s/it]

Submission Saved to submission_hybrid_best_gemini.csv





In [15]:
# score: 0.61080

import os
import cv2
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.model_selection import train_test_split
import torchvision.models as models
from torchvision.models.video import R2Plus1D_18_Weights 
from tqdm import tqdm

# ==========================================
# 1. FILE PATHS
# ==========================================
BASE_DIR = "data/Question4/baseball-pitch-tracking-cs-gy-6643/baseball_kaggle_dataset_trimmed_only"

TRAIN_CSV_PATH = os.path.join(BASE_DIR, "data", "train_ground_truth.csv")
TEST_FEATS_PATH = os.path.join(BASE_DIR, "data", "test_features.csv")
TRAIN_VIDEO_DIR = os.path.join(BASE_DIR, "train_trimmed")
TEST_VIDEO_DIR = os.path.join(BASE_DIR, "test")
SAMPLE_SUB_PATH = "data/Question4/baseball-pitch-tracking-cs-gy-6643/test_submission_template.csv"
OUTPUT_PATH = "submission_hybrid_best_gemini_better.csv"
MODEL_SAVE_PATH = "best_pitch_model.pth"

# ==========================================
# 2. CONFIGURATION
# ==========================================
CONFIG = {
    'seed': 42,
    'epochs': 15,          # Set how many MORE epochs you want to train
    'batch_size': 8,  
    'lr': 1e-4,           # Keep low if resuming (fine-tuning)
    'frames': 16,
    'img_size': 112,
    'hidden_dim': 256,
    
    # --- CONTROL FLAGS ---
    'resume_checkpoint': True,  # If True: Loads best_pitch_model.pth (if it exists) before starting
    'train_model': False         # If True: Runs the training loop. If False: Jumps to Inference.
}

# Device Selection
if torch.backends.mps.is_available():
    device = torch.device("mps")
    print("Using device: MPS (Apple Silicon Acceleration)")
elif torch.cuda.is_available():
    device = torch.device("cuda")
    print("Using device: CUDA")
else:
    device = torch.device("cpu")
    print("Using device: CPU")

# ==========================================
# 3. UTILS & DATA LOADING
# ==========================================
def load_video_clip(path, num_frames=16, resize=(112, 112)):
    cap = cv2.VideoCapture(path)
    frames = []
    try:
        while True:
            ret, frame = cap.read()
            if not ret: break
            frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            frame = cv2.resize(frame, resize)
            frames.append(frame)
    finally:
        cap.release()

    if len(frames) == 0:
        return torch.zeros(3, num_frames, resize[0], resize[1])

    indices = np.linspace(0, len(frames) - 1, num_frames).astype(int)
    sampled_frames = np.array([frames[i] for i in indices])

    tensor = torch.from_numpy(sampled_frames).permute(3, 0, 1, 2).float() / 255.0
    mean = torch.tensor([0.43216, 0.394666, 0.37645]).view(3, 1, 1, 1)
    std = torch.tensor([0.22803, 0.22145, 0.216989]).view(3, 1, 1, 1)
    tensor = (tensor - mean) / std
    return tensor

def get_data_splits():
    print("Processing CSV Data...")
    df_train = pd.read_csv(TRAIN_CSV_PATH).dropna(subset=['pitch_class', 'zone', 'plate_x', 'plate_z'])
    df_test = pd.read_csv(TEST_FEATS_PATH)

    phy_cols = [
        'release_speed', 'effective_speed', 'release_spin_rate',
        'release_pos_x', 'release_pos_y', 'release_pos_z',
        'pfx_x', 'pfx_z', 'sz_top', 'sz_bot'
    ]
    
    for col in ['stand', 'p_throws']:
        le = LabelEncoder()
        le.fit(pd.concat([df_train[col], df_test[col]]))
        df_train[col] = le.transform(df_train[col])
        df_test[col] = le.transform(df_test[col])
        phy_cols.append(col)

    scaler = StandardScaler()
    df_train[phy_cols] = scaler.fit_transform(df_train[phy_cols].fillna(0))
    df_test[phy_cols] = scaler.transform(df_test[phy_cols].fillna(0))

    class_map = {'strike': 1, 'ball': 0}
    df_train['label_class'] = df_train['pitch_class'].map(class_map)
    
    zones = sorted(df_train['zone'].unique())
    zone_map = {z: i for i, z in enumerate(zones)}
    idx_to_zone = {i: z for z, i in zone_map.items()}
    df_train['label_zone'] = df_train['zone'].map(zone_map)

    train_idx, val_idx = train_test_split(df_train.index, test_size=0.15, random_state=CONFIG['seed'])
    
    return {
        'train': df_train.loc[train_idx],
        'val': df_train.loc[val_idx],
        'test': df_test,
        'phy_cols': phy_cols,
        'idx_to_zone': idx_to_zone,
        'num_zones': len(zones)
    }

class HybridDataset(Dataset):
    def __init__(self, df, video_dir, phy_cols, mode='train'):
        self.df = df.reset_index(drop=True)
        self.video_dir = video_dir
        self.phy_cols = phy_cols
        self.mode = mode
    def __len__(self): return len(self.df)
    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        physics_vec = torch.tensor(row[self.phy_cols].values.astype(np.float32))
        vid_path = os.path.join(self.video_dir, row['file_name'])
        video_tensor = load_video_clip(vid_path, num_frames=CONFIG['frames'], resize=(CONFIG['img_size'], CONFIG['img_size']))
        if self.mode == 'test': return video_tensor, physics_vec
        else: return video_tensor, physics_vec, torch.tensor(row['label_class'], dtype=torch.float).unsqueeze(0), torch.tensor(row['label_zone'], dtype=torch.long)

# ==========================================
# 4. MODEL ARCHITECTURE
# ==========================================
class PitchHybridModel(nn.Module):
    def __init__(self, physics_dim, num_zones):
        super(PitchHybridModel, self).__init__()
        
        weights = R2Plus1D_18_Weights.DEFAULT
        self.video_backbone = models.video.r2plus1d_18(weights=weights)
        
        vid_out_dim = self.video_backbone.fc.in_features
        self.video_backbone.fc = nn.Identity() 
        
        self.video_fc = nn.Sequential(
            nn.Linear(vid_out_dim, 256),
            nn.ReLU(),
            nn.Dropout(0.3)
        )

        self.physics_net = nn.Sequential(
            nn.Linear(physics_dim, 128),
            nn.BatchNorm1d(128),
            nn.ReLU(),
            nn.Linear(128, 128),
            nn.ReLU()
        )
        
        combined_dim = 256 + 128
        self.fusion_layer = nn.Sequential(
            nn.Linear(combined_dim, 256),
            nn.BatchNorm1d(256),
            nn.ReLU(),
            nn.Dropout(0.2)
        )
        
        self.head_class = nn.Linear(256, 1)
        self.head_zone = nn.Linear(256, num_zones)
        
    def forward(self, video, physics):
        v_feat = self.video_backbone(video)
        v_feat = self.video_fc(v_feat)
        p_feat = self.physics_net(physics)
        combined = torch.cat((v_feat, p_feat), dim=1)
        fused = self.fusion_layer(combined)
        return self.head_class(fused), self.head_zone(fused)

# ==========================================
# 5. MAIN PIPELINE
# ==========================================
def run_pipeline():
    data = get_data_splits()
    model = PitchHybridModel(physics_dim=len(data['phy_cols']), num_zones=data['num_zones']).to(device)
    
    # --- STEP 1: LOAD WEIGHTS (Logic: Resume or Inference) ---
    if CONFIG['resume_checkpoint'] and os.path.exists(MODEL_SAVE_PATH):
        print(f"\n[INFO] Found checkpoint at {MODEL_SAVE_PATH}. Loading weights...")
        try:
            model.load_state_dict(torch.load(MODEL_SAVE_PATH, map_location=device))
            print("[INFO] Weights loaded successfully! Continuing from best model.")
        except Exception as e:
            print(f"[WARNING] Could not load weights: {e}. Starting from scratch.")
    else:
        if CONFIG['resume_checkpoint']:
            print(f"\n[INFO] No checkpoint found at {MODEL_SAVE_PATH}. Training from scratch.")

    # --- STEP 2: TRAINING LOOP (Optional) ---
    if CONFIG['train_model']:
        train_ds = HybridDataset(data['train'], TRAIN_VIDEO_DIR, data['phy_cols'], mode='train')
        val_ds = HybridDataset(data['val'], TRAIN_VIDEO_DIR, data['phy_cols'], mode='train')
        
        train_loader = DataLoader(train_ds, batch_size=CONFIG['batch_size'], shuffle=True, num_workers=0)
        val_loader = DataLoader(val_ds, batch_size=CONFIG['batch_size'], shuffle=False, num_workers=0)
        
        optimizer = optim.AdamW(model.parameters(), lr=CONFIG['lr'])
        crit_c = nn.BCEWithLogitsLoss()
        crit_z = nn.CrossEntropyLoss()
        
        # best_val_loss = float('inf') 
        best_score = 0.0
        
        print(f"\n--- Starting Training ({CONFIG['epochs']} epochs) ---")
        
        for epoch in range(CONFIG['epochs']):
            model.train()
            train_loop = tqdm(train_loader, desc=f"Epoch {epoch+1}/{CONFIG['epochs']}", leave=True)
            total_train_loss = 0
            
            for vid, phy, lc, lz in train_loop:
                vid, phy, lc, lz = vid.to(device), phy.to(device), lc.to(device), lz.to(device)
                
                optimizer.zero_grad()
                out_c, out_z = model(vid, phy)
                loss = crit_c(out_c, lc) + crit_z(out_z, lz)
                loss.backward()
                optimizer.step()
                
                total_train_loss += loss.item()
                train_loop.set_postfix(loss=loss.item())
            
            # Validation
            model.eval()
            val_loss = 0
            acc_c_num, acc_z_num, total = 0, 0, 0
            
            print("Validating...")
            with torch.no_grad():
                for vid, phy, lc, lz in tqdm(val_loader, desc="Val"):
                    vid, phy, lc, lz = vid.to(device), phy.to(device), lc.to(device), lz.to(device)
                    out_c, out_z = model(vid, phy)
                    
                    batch_loss = crit_c(out_c, lc) + crit_z(out_z, lz)
                    val_loss += batch_loss.item()
                    
                    pred_c = (torch.sigmoid(out_c) > 0.5).float()
                    acc_c_num += (pred_c == lc).sum().item()
                    pred_z = torch.argmax(out_z, dim=1)
                    acc_z_num += (pred_z == lz).sum().item()
                    total += lc.size(0)
            
            avg_val_loss = val_loss / len(val_loader)
            current_class_acc = acc_c_num / total
            current_zone_acc = acc_z_num / total
            
            # Save Best Model
            # if avg_val_loss < best_val_loss:
            #     best_val_loss = avg_val_loss
            #     torch.save(model.state_dict(), MODEL_SAVE_PATH)
            #     print(f"Saved New Best Model to {MODEL_SAVE_PATH}!")
            # print("-" * 50)
            # ... inside the validation loop ...
            
            
            
            # CALCULATE KAGGLE SCORE
            # Rule: 0.7 * PitchClass + 0.3 * Zone
            current_score = (0.7 * current_class_acc) + (0.3 * current_zone_acc)
            
            print(f"Epoch {epoch+1} | Loss: {avg_val_loss:.4f} | Class Acc: {current_class_acc:.3f} | Zone Acc: {current_zone_acc:.3f} | Score: {current_score:.4f}")
            
            # SAVE IF SCORE IMPROVES (Ignore Loss)
            # Initialize best_score = 0.0 at the start of the function instead of best_val_loss
            if current_score > best_score:
                best_score = current_score
                torch.save(model.state_dict(), MODEL_SAVE_PATH)
                print(f"Saved New Best Model (Score: {best_score:.4f}) to {MODEL_SAVE_PATH}!")
            print("-" * 50)

    # --- STEP 3: INFERENCE ---
    print("\n--- Generating Submission ---")
    
    # Ensure we use the best weights found (either from loaded file or recent training)
    if os.path.exists(MODEL_SAVE_PATH):
        model.load_state_dict(torch.load(MODEL_SAVE_PATH, map_location=device))
        print("Loaded best model weights for inference.")

    test_ds = HybridDataset(data['test'], TEST_VIDEO_DIR, data['phy_cols'], mode='test')
    test_loader = DataLoader(test_ds, batch_size=CONFIG['batch_size'], shuffle=False, num_workers=0)
    
    model.eval()
    final_classes = []
    final_zones = []
    
    with torch.no_grad():
        for vid, phy in tqdm(test_loader, desc="Inference"):
            vid, phy = vid.to(device), phy.to(device)
            out_c, out_z = model(vid, phy)
            
            c_probs = torch.sigmoid(out_c).cpu().numpy().flatten()
            z_idxs = torch.argmax(out_z, dim=1).cpu().numpy().flatten()
            
            final_classes.extend(['strike' if p > 0.5 else 'ball' for p in c_probs])
            final_zones.extend([data['idx_to_zone'][z] for z in z_idxs])
            
    df_sub = pd.DataFrame({'file_name': data['test']['file_name'], 'pitch_class': final_classes, 'zone': final_zones})
    
    if os.path.exists(SAMPLE_SUB_PATH):
        template = pd.read_csv(SAMPLE_SUB_PATH)
        df_sub = template[['file_name']].merge(df_sub, on='file_name', how='left')
        df_sub['pitch_class'] = df_sub['pitch_class'].fillna('ball')
        df_sub['zone'] = df_sub['zone'].fillna(14).astype(int)

    df_sub.to_csv(OUTPUT_PATH, index=False)
    print(f"Submission Saved to {OUTPUT_PATH}")

if __name__ == "__main__":
    run_pipeline()

Using device: MPS (Apple Silicon Acceleration)
Processing CSV Data...

[INFO] Found checkpoint at best_pitch_model.pth. Loading weights...
[INFO] Weights loaded successfully! Continuing from best model.

--- Generating Submission ---
Loaded best model weights for inference.


Inference: 100%|██████████| 500/500 [08:48<00:00,  1.06s/it]

Submission Saved to submission_hybrid_best_gemini_better.csv





In [19]:
# score: 0.61080

import os
import cv2
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.model_selection import train_test_split
import torchvision.models as models
from torchvision.models.video import R2Plus1D_18_Weights 
from tqdm import tqdm

# ==========================================
# 1. FILE PATHS
# ==========================================
BASE_DIR = "data/Question4/baseball-pitch-tracking-cs-gy-6643/baseball_kaggle_dataset_trimmed_only"

TRAIN_CSV_PATH = os.path.join(BASE_DIR, "data", "train_ground_truth.csv")
TEST_FEATS_PATH = os.path.join(BASE_DIR, "data", "test_features.csv")
TRAIN_VIDEO_DIR = os.path.join(BASE_DIR, "train_trimmed")
TEST_VIDEO_DIR = os.path.join(BASE_DIR, "test")
SAMPLE_SUB_PATH = "data/Question4/baseball-pitch-tracking-cs-gy-6643/test_submission_template.csv"
OUTPUT_PATH = "submission_hybrid_final_gemini_lower_lr5e6.csv"
MODEL_SAVE_PATH = "best_pitch_model.pth"

# ==========================================
# 2. CONFIGURATION
# ==========================================
CONFIG = {
    'seed': 42,
    'epochs': 10,         # Number of NEW epochs to run
    'batch_size': 16,  
    'lr': 5e-5,
    'frames': 16,
    'img_size': 112,
    'hidden_dim': 256,
    
    'resume_checkpoint': True, # Set to True to load previous best
    'train_model': True
}

# Device Selection
if torch.backends.mps.is_available():
    device = torch.device("mps")
elif torch.cuda.is_available():
    device = torch.device("cuda")
else:
    device = torch.device("cpu")
print(f"Using device: {device}")

# ==========================================
# 3. UTILS & DATA LOADING
# ==========================================
def load_video_clip(path, num_frames=16, resize=(112, 112)):
    cap = cv2.VideoCapture(path)
    frames = []
    try:
        while True:
            ret, frame = cap.read()
            if not ret: break
            frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            frame = cv2.resize(frame, resize)
            frames.append(frame)
    finally:
        cap.release()

    if len(frames) == 0:
        return torch.zeros(3, num_frames, resize[0], resize[1])

    indices = np.linspace(0, len(frames) - 1, num_frames).astype(int)
    sampled_frames = np.array([frames[i] for i in indices])

    tensor = torch.from_numpy(sampled_frames).permute(3, 0, 1, 2).float() / 255.0
    mean = torch.tensor([0.43216, 0.394666, 0.37645]).view(3, 1, 1, 1)
    std = torch.tensor([0.22803, 0.22145, 0.216989]).view(3, 1, 1, 1)
    tensor = (tensor - mean) / std
    return tensor

def get_data_splits():
    print("Processing CSV Data...")
    df_train = pd.read_csv(TRAIN_CSV_PATH).dropna(subset=['pitch_class', 'zone', 'plate_x', 'plate_z'])
    df_test = pd.read_csv(TEST_FEATS_PATH)

    phy_cols = [
        'release_speed', 'effective_speed', 'release_spin_rate',
        'release_pos_x', 'release_pos_y', 'release_pos_z',
        'pfx_x', 'pfx_z', 'sz_top', 'sz_bot'
    ]
    
    for col in ['stand', 'p_throws']:
        le = LabelEncoder()
        le.fit(pd.concat([df_train[col], df_test[col]]))
        df_train[col] = le.transform(df_train[col])
        df_test[col] = le.transform(df_test[col])
        phy_cols.append(col)

    scaler = StandardScaler()
    df_train[phy_cols] = scaler.fit_transform(df_train[phy_cols].fillna(0))
    df_test[phy_cols] = scaler.transform(df_test[phy_cols].fillna(0))

    class_map = {'strike': 1, 'ball': 0}
    df_train['label_class'] = df_train['pitch_class'].map(class_map)
    
    zones = sorted(df_train['zone'].unique())
    zone_map = {z: i for i, z in enumerate(zones)}
    idx_to_zone = {i: z for z, i in zone_map.items()}
    df_train['label_zone'] = df_train['zone'].map(zone_map)

    train_idx, val_idx = train_test_split(df_train.index, test_size=0.15, random_state=CONFIG['seed'])
    
    return {
        'train': df_train.loc[train_idx],
        'val': df_train.loc[val_idx],
        'test': df_test,
        'phy_cols': phy_cols,
        'idx_to_zone': idx_to_zone,
        'num_zones': len(zones)
    }

class HybridDataset(Dataset):
    def __init__(self, df, video_dir, phy_cols, mode='train'):
        self.df = df.reset_index(drop=True)
        self.video_dir = video_dir
        self.phy_cols = phy_cols
        self.mode = mode
    def __len__(self): return len(self.df)
    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        physics_vec = torch.tensor(row[self.phy_cols].values.astype(np.float32))
        vid_path = os.path.join(self.video_dir, row['file_name'])
        video_tensor = load_video_clip(vid_path, num_frames=CONFIG['frames'], resize=(CONFIG['img_size'], CONFIG['img_size']))
        if self.mode == 'test': return video_tensor, physics_vec
        else: return video_tensor, physics_vec, torch.tensor(row['label_class'], dtype=torch.float).unsqueeze(0), torch.tensor(row['label_zone'], dtype=torch.long)

# ==========================================
# 4. MODEL ARCHITECTURE
# ==========================================
class PitchHybridModel(nn.Module):
    def __init__(self, physics_dim, num_zones):
        super(PitchHybridModel, self).__init__()
        
        weights = R2Plus1D_18_Weights.DEFAULT
        self.video_backbone = models.video.r2plus1d_18(weights=weights)
        
        vid_out_dim = self.video_backbone.fc.in_features
        self.video_backbone.fc = nn.Identity() 
        
        self.video_fc = nn.Sequential(
            nn.Linear(vid_out_dim, 256),
            nn.ReLU(),
            nn.Dropout(0.3)
        )

        self.physics_net = nn.Sequential(
            nn.Linear(physics_dim, 128),
            nn.BatchNorm1d(128),
            nn.ReLU(),
            nn.Linear(128, 128),
            nn.ReLU()
        )
        
        combined_dim = 256 + 128
        self.fusion_layer = nn.Sequential(
            nn.Linear(combined_dim, 256),
            nn.BatchNorm1d(256),
            nn.ReLU(),
            nn.Dropout(0.2)
        )
        
        self.head_class = nn.Linear(256, 1)
        self.head_zone = nn.Linear(256, num_zones)
        
    def forward(self, video, physics):
        v_feat = self.video_backbone(video)
        v_feat = self.video_fc(v_feat)
        p_feat = self.physics_net(physics)
        combined = torch.cat((v_feat, p_feat), dim=1)
        fused = self.fusion_layer(combined)
        return self.head_class(fused), self.head_zone(fused)

# ==========================================
# 5. VALIDATION HELPER (Used for Baseline & Loop)
# ==========================================
def validate(model, val_loader, crit_c, crit_z):
    model.eval()
    val_loss = 0
    acc_c_num, acc_z_num, total = 0, 0, 0
    
    with torch.no_grad():
        for vid, phy, lc, lz in tqdm(val_loader, desc="Validating"):
            vid, phy, lc, lz = vid.to(device), phy.to(device), lc.to(device), lz.to(device)
            out_c, out_z = model(vid, phy)
            
            batch_loss = crit_c(out_c, lc) + crit_z(out_z, lz)
            val_loss += batch_loss.item()
            
            pred_c = (torch.sigmoid(out_c) > 0.5).float()
            acc_c_num += (pred_c == lc).sum().item()
            pred_z = torch.argmax(out_z, dim=1)
            acc_z_num += (pred_z == lz).sum().item()
            total += lc.size(0)
    
    avg_loss = val_loss / len(val_loader)
    acc_c = acc_c_num / total
    acc_z = acc_z_num / total
    score = (0.7 * acc_c) + (0.3 * acc_z)
    
    return avg_loss, acc_c, acc_z, score

# ==========================================
# 6. MAIN PIPELINE
# ==========================================
def run_pipeline():
    data = get_data_splits()
    model = PitchHybridModel(physics_dim=len(data['phy_cols']), num_zones=data['num_zones']).to(device)
    
    # Loaders
    train_ds = HybridDataset(data['train'], TRAIN_VIDEO_DIR, data['phy_cols'], mode='train')
    val_ds = HybridDataset(data['val'], TRAIN_VIDEO_DIR, data['phy_cols'], mode='train')
    train_loader = DataLoader(train_ds, batch_size=CONFIG['batch_size'], shuffle=True, num_workers=0)
    val_loader = DataLoader(val_ds, batch_size=CONFIG['batch_size'], shuffle=False, num_workers=0)
    
    crit_c = nn.BCEWithLogitsLoss()
    crit_z = nn.CrossEntropyLoss()
    optimizer = optim.AdamW(model.parameters(), lr=CONFIG['lr'])

    # --- RESUME LOGIC (FIXED) ---
    best_score = 0.0
    
    if CONFIG['resume_checkpoint'] and os.path.exists(MODEL_SAVE_PATH):
        print(f"\n[INFO] Found checkpoint at {MODEL_SAVE_PATH}. Loading weights...")
        model.load_state_dict(torch.load(MODEL_SAVE_PATH, map_location=device))
        
        print("[INFO] Calculating Baseline Score of loaded model...")
        _, start_c_acc, start_z_acc, start_score = validate(model, val_loader, crit_c, crit_z)
        
        best_score = start_score
        print(f"[INFO] Resuming with Baseline Score: {best_score:.4f} (Class: {start_c_acc:.3f}, Zone: {start_z_acc:.3f})")
    else:
        print("[INFO] Starting from scratch (Best Score = 0.0)")

    # --- TRAINING ---
    if CONFIG['train_model']:
        print(f"\n--- Starting Training ({CONFIG['epochs']} new epochs) ---")
        
        for epoch in range(CONFIG['epochs']):
            model.train()
            train_loop = tqdm(train_loader, desc=f"Epoch {epoch+1}/{CONFIG['epochs']}", leave=True)
            
            for vid, phy, lc, lz in train_loop:
                vid, phy, lc, lz = vid.to(device), phy.to(device), lc.to(device), lz.to(device)
                
                optimizer.zero_grad()
                out_c, out_z = model(vid, phy)
                loss = crit_c(out_c, lc) + crit_z(out_z, lz)
                loss.backward()
                optimizer.step()
                train_loop.set_postfix(loss=loss.item())
            
            # Use Helper Function for Validation
            val_loss, val_c, val_z, val_score = validate(model, val_loader, crit_c, crit_z)
            
            print(f"Epoch {epoch+1} | Loss: {val_loss:.4f} | Class Acc: {val_c:.3f} | Zone Acc: {val_z:.3f} | SCORE: {val_score:.4f}")
            
            # Save if BETTER than current best (Loaded or New)
            if val_score > best_score:
                print(f" >>> IMPROVEMENT ({best_score:.4f} -> {val_score:.4f}). Saving Model...")
                best_score = val_score
                torch.save(model.state_dict(), MODEL_SAVE_PATH)
            else:
                print(f" ... No improvement (Best: {best_score:.4f})")
            
            print("-" * 50)

    # --- INFERENCE ---
    print("\n--- Generating Submission ---")
    if os.path.exists(MODEL_SAVE_PATH):
        print("Loading BEST model for prediction...")
        model.load_state_dict(torch.load(MODEL_SAVE_PATH, map_location=device))

    test_ds = HybridDataset(data['test'], TEST_VIDEO_DIR, data['phy_cols'], mode='test')
    test_loader = DataLoader(test_ds, batch_size=CONFIG['batch_size'], shuffle=False, num_workers=0)
    
    model.eval()
    final_classes = []
    final_zones = []
    
    with torch.no_grad():
        for vid, phy in tqdm(test_loader, desc="Inference"):
            vid, phy = vid.to(device), phy.to(device)
            out_c, out_z = model(vid, phy)
            
            c_probs = torch.sigmoid(out_c).cpu().numpy().flatten()
            z_idxs = torch.argmax(out_z, dim=1).cpu().numpy().flatten()
            123
            final_classes.extend(['strike' if p > 0.5 else 'ball' for p in c_probs])
            final_zones.extend([data['idx_to_zone'][z] for z in z_idxs])
            
    df_sub = pd.DataFrame({'file_name': data['test']['file_name'], 'pitch_class': final_classes, 'zone': final_zones})
    
    if os.path.exists(SAMPLE_SUB_PATH):
        template = pd.read_csv(SAMPLE_SUB_PATH)
        df_sub = template[['file_name']].merge(df_sub, on='file_name', how='left')
        df_sub['pitch_class'] = df_sub['pitch_class'].fillna('ball')
        df_sub['zone'] = df_sub['zone'].fillna(14).astype(int)

    df_sub.to_csv(OUTPUT_PATH, index=False)
    print(f"Submission Saved to {OUTPUT_PATH}")

if __name__ == "__main__":
    run_pipeline()

Using device: mps
Processing CSV Data...

[INFO] Found checkpoint at best_pitch_model.pth. Loading weights...
[INFO] Calculating Baseline Score of loaded model...


Validating: 100%|██████████| 57/57 [02:16<00:00,  2.40s/it]


[INFO] Resuming with Baseline Score: 0.6228 (Class: 0.724, Zone: 0.386)

--- Starting Training (10 new epochs) ---


Epoch 1/10: 100%|██████████| 319/319 [1:24:14<00:00, 15.84s/it, loss=0.245]
Validating: 100%|██████████| 57/57 [02:03<00:00,  2.17s/it]


Epoch 1 | Loss: 3.4239 | Class Acc: 0.710 | Zone Acc: 0.390 | SCORE: 0.6140
 ... No improvement (Best: 0.6228)
--------------------------------------------------


Epoch 2/10: 100%|██████████| 319/319 [3:17:33<00:00, 37.16s/it, loss=0.288]    
Validating: 100%|██████████| 57/57 [02:19<00:00,  2.45s/it]


Epoch 2 | Loss: 3.5017 | Class Acc: 0.687 | Zone Acc: 0.361 | SCORE: 0.5890
 ... No improvement (Best: 0.6228)
--------------------------------------------------


Epoch 3/10: 100%|██████████| 319/319 [9:08:12<00:00, 103.11s/it, loss=0.382]   
Validating: 100%|██████████| 57/57 [02:12<00:00,  2.33s/it]


Epoch 3 | Loss: 3.3565 | Class Acc: 0.709 | Zone Acc: 0.368 | SCORE: 0.6066
 ... No improvement (Best: 0.6228)
--------------------------------------------------


Epoch 4/10:  88%|████████▊ | 282/319 [10:57:05<1:26:12, 139.81s/it, loss=0.175]


KeyboardInterrupt: 