In [None]:
import os
import torch
import torch.nn as nn
import torch.hub as hub
from torchvision import transforms
from torch.utils.data import Dataset, DataLoader
from PIL import Image
import pandas as pd
import numpy as np
from tqdm.notebook import tqdm

# === Device & AMP ===
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
scaler = torch.cuda.amp.GradScaler()
print("Using device:", device)

# === Paths ===
train_csv     = "/kaggle/input/smainewdataset/Phase_2_data/labels_train.csv"
val_csv       = "/kaggle/input/smainewdataset/Phase_2_data/labels_val.csv"
train_img_dir = "/kaggle/input/smainewdataset/Phase_2_data/images_train/images_train"
val_img_dir   = "/kaggle/input/smainewdataset/Phase_2_data/images_val/images_val"

# === Load & Filter DataFrames ===
train_df = pd.read_csv(train_csv)
val_df   = pd.read_csv(val_csv)
train_df = train_df[
    train_df.latitude.between(200000, 230000) &
    train_df.longitude.between(140000, 150000)
].reset_index(drop=True)
exclude_ids = {95, 145, 146, 158, 159, 160, 161}
val_df = val_df[~val_df['filename'].apply(
    lambda fn: int(os.path.splitext(fn)[0].split('_')[-1]) in exclude_ids
)].reset_index(drop=True)

# === Standardization stats ===
lat_mean, lat_std = train_df.latitude.mean(), train_df.latitude.std()
lon_mean, lon_std = train_df.longitude.mean(), train_df.longitude.std()
std_lat = lambda x: (x - lat_mean) / lat_std
denstd_lat = lambda x: x * lat_std + lat_mean
std_lon = lambda x: (x - lon_mean) / lon_std
denstd_lon = lambda x: x * lon_std + lon_mean

# === Add cyclic embeddings ===
def add_cyclic(df):
    df = df.copy()
    df['lat_sin'] = np.sin(2 * np.pi * (df.latitude - train_df.latitude.min()) /
                         (train_df.latitude.max() - train_df.latitude.min()))
    df['lat_cos'] = np.cos(2 * np.pi * (df.latitude - train_df.latitude.min()) /
                         (train_df.latitude.max() - train_df.latitude.min()))
    df['lon_sin'] = np.sin(2 * np.pi * (df.longitude - train_df.longitude.min()) /
                         (train_df.longitude.max() - train_df.longitude.min()))
    df['lon_cos'] = np.cos(2 * np.pi * (df.longitude - train_df.longitude.min()) /
                         (train_df.longitude.max() - train_df.longitude.min()))
    parts = df.timestamp.str.split(':', expand=True).astype(float).fillna(0)
    hh, mm = parts[0], parts[1]
    hh=12
    mm=30
    df['minute_of_day'] = hh * 60 + mm
    df['time_sin'] = np.sin(2 * np.pi * df.minute_of_day / 1440)
    df['time_cos'] = np.cos(2 * np.pi * df.minute_of_day / 1440)
    df['wd_sin'], df['wd_cos'] = 0.0, 1.0
    df['mo_sin'], df['mo_cos'] = 0.0, 1.0
    return df

train_df = add_cyclic(train_df)
val_df   = add_cyclic(val_df)

# === Dataset definition ===
META = ['lat_sin','lat_cos','lon_sin','lon_cos','time_sin','time_cos','wd_sin','wd_cos','mo_sin','mo_cos']
class GeoDataset(Dataset):
    def __init__(self, df, img_dir, transform=None):
        self.df = df.reset_index(drop=True)
        self.img_dir = img_dir
        self.transform = transform
    def __len__(self): return len(self.df)
    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        img = Image.open(os.path.join(self.img_dir, row.filename)).convert('RGB')
        img = self.transform(img) if self.transform else transforms.ToTensor()(img)
        meta = torch.from_numpy(row[META].to_numpy(dtype=np.float32))
        lat = torch.tensor(std_lat(row.latitude), dtype=torch.float32)
        lon = torch.tensor(std_lon(row.longitude), dtype=torch.float32)
        return img, meta, lat, lon, row.filename

# === Transforms & DataLoaders ===
train_tf = transforms.Compose([
    transforms.Resize((224,224)), transforms.RandAugment(),
    transforms.RandomHorizontalFlip(), transforms.ColorJitter(0.3,0.3,0.2,0.1),
    transforms.ToTensor(), transforms.Normalize([0.485,0.456,0.406],[0.229,0.224,0.225])
])
val_tf = transforms.Compose([
    transforms.Resize((224,224)), transforms.ToTensor(),
    transforms.Normalize([0.485,0.456,0.406],[0.229,0.224,0.225])
])
bs = 32
train_loader = DataLoader(GeoDataset(train_df, train_img_dir, train_tf), batch_size=bs, shuffle=True, num_workers=4)
val_loader   = DataLoader(GeoDataset(val_df,   val_img_dir,   val_tf), batch_size=bs, shuffle=False, num_workers=4)

# === Load DINO V2 ViT-S/14 backbone ===
print("Loading DINO V2 ViT-S/14 backbone...")
dino = hub.load('facebookresearch/dinov2:main', 'dinov2_vits14', pretrained=True)
dino.eval()
modules = [dino.patch_embed] + list(dino.blocks) + [dino.norm]
embed_dim = dino.embed_dim
print(f"DINO V2 embed dim: {embed_dim}")

# === FusionModel definition ===
class FusionModel(nn.Module):
    def __init__(self, meta_dim=10):
        super().__init__()
        self.backbone = nn.Sequential(*modules)
        self.meta_net = nn.Sequential(
            nn.Linear(meta_dim,128), nn.BatchNorm1d(128), nn.ReLU(), nn.Dropout(0.5),
            nn.Linear(128,64), nn.BatchNorm1d(64), nn.ReLU(), nn.Dropout(0.5)
        )
        self.head = nn.Sequential(
            nn.Linear(embed_dim + 64,256), nn.BatchNorm1d(256), nn.ReLU(), nn.Dropout(0.5),
            nn.Linear(256,2)
        )
    def forward(self, x, m):
        feats = self.backbone(x)
        cls_token = feats[:,0]
        meta_feat = self.meta_net(m)
        return self.head(torch.cat([cls_token, meta_feat], dim=1))

# === Fine-tune & Save Predictions ===
def fine_tune_and_save(model, train_loader, val_loader, epochs=50,
                       checkpoint_path='/kaggle/input/model_latlong_10700/tensorflow2/default/1/best_fusion_10700.pth', output_dir='outputs'):
    os.makedirs(output_dir, exist_ok=True)
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    # Load pretrained checkpoint
    checkpoint = torch.load(checkpoint_path, map_location=device)
    model.load_state_dict(checkpoint)
    model.to(device)

    # -- Initial evaluation on validation set --
    model.eval()
    preds_init, trues_init, fns_init = [], [], []
    with torch.no_grad():
        for imgs, meta, lat, lon, fnames in val_loader:
            imgs, meta = imgs.to(device), meta.to(device)
            with torch.cuda.amp.autocast():
                out = model(imgs, meta).cpu().numpy()
            preds_init.append(out)
            trues_init.append(
                np.stack([lat.numpy(), lon.numpy()], axis=1)
            )
            fns_init.extend(fnames)
    preds_init = np.vstack(preds_init)
    trues_init = np.vstack(trues_init)
    lat_pred_i = denstd_lat(preds_init[:,0]); lon_pred_i = denstd_lon(preds_init[:,1])
    lat_true_i = trues_init[:,0]*lat_std + lat_mean
    lon_true_i = trues_init[:,1]*lon_std + lon_mean
    mse_lat_i = ((lat_pred_i - lat_true_i)**2).mean()
    mse_lon_i = ((lon_pred_i - lon_true_i)**2).mean()
    avg_mse_i = 0.5*(mse_lat_i + mse_lon_i)
    print(f"Init Val MSE (before fine-tune): {avg_mse_i:.4f}")
    df_i = pd.DataFrame({
        'filename': fns_init,
        'pred_lat': lat_pred_i,
        'pred_lon': lon_pred_i,
        'true_lat': lat_true_i,
        'true_lon': lon_true_i
    })
    init_csv = os.path.join(output_dir, f'init_predictions_{avg_mse_i:.4f}.csv')
    df_i.to_csv(init_csv, index=False)
    print(f"Saved initial predictions to {init_csv}")

    # -- Fine-tuning loop --
    criterion = nn.SmoothL1Loss()
    optimizer = torch.optim.AdamW(model.parameters(), lr=2e-4, weight_decay=1e-2)
    scheduler = torch.optim.lr_scheduler.OneCycleLR(
        optimizer, max_lr=5e-4, epochs=epochs,
        steps_per_epoch=len(train_loader), pct_start=0.1, anneal_strategy='cos'
    )
    scaler_ft = torch.cuda.amp.GradScaler()
    best_mse = float('inf')
    for ep in range(1, epochs+1):
        model.train()
        for imgs, meta, lat, lon, _ in train_loader:
            imgs, meta, lat, lon = imgs.to(device), meta.to(device), lat.to(device), lon.to(device)
            optimizer.zero_grad()
            with torch.cuda.amp.autocast():
                out = model(imgs, meta)
                loss = 0.5*(criterion(out[:,0], lat) + criterion(out[:,1], lon))
            scaler_ft.scale(loss).backward()
            scaler_ft.unscale_(optimizer)
            torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
            scaler_ft.step(optimizer); scaler_ft.update()
            scheduler.step()
        # Validation
        model.eval()
        preds, trues, fns = [], [], []
        with torch.no_grad():
            for imgs, meta, lat, lon, fnames in val_loader:
                imgs, meta = imgs.to(device), meta.to(device)
                with torch.cuda.amp.autocast():
                    out = model(imgs, meta).cpu().numpy()
                preds.append(out)
                trues.append(np.stack([lat.numpy(), lon.numpy()], axis=1))
                fns.extend(fnames)
        preds = np.vstack(preds); trues = np.vstack(trues)
        lat_pred = denstd_lat(preds[:,0]); lon_pred = denstd_lon(preds[:,1])
        lat_true = trues[:,0]*lat_std + lat_mean; lon_true = trues[:,1]*lon_std + lon_mean
        mse_lat = ((lat_pred - lat_true)**2).mean(); mse_lon = ((lon_pred - lon_true)**2).mean()
        avg_mse = 0.5*(mse_lat + mse_lon)
        print(f"Epoch {ep}/{epochs} - Val MSE: {avg_mse:.4f}")
        if avg_mse < best_mse:
            best_mse = avg_mse
            torch.save(model.state_dict(), os.path.join(output_dir, 'fine_tuned_best.pth'))

    # -- Final evaluation & CSV export --
    best_ckpt = torch.load(os.path.join(output_dir, 'fine_tuned_best.pth'), map_location=device)
    model.load_state_dict(best_ckpt)
    model.eval()
    preds, trues, fns = [], [], []
    with torch.no_grad():
        for imgs, meta, lat, lon, fnames in val_loader:
            imgs, meta = imgs.to(device), meta.to(device)
            with torch.cuda.amp.autocast():
                out = model(imgs, meta).cpu().numpy()
            preds.append(out); trues.append(np.stack([lat.numpy(), lon.numpy()], axis=1)); fns.extend(fnames)
    preds = np.vstack(preds); trues = np.vstack(trues)
    lat_pred = denstd_lat(preds[:,0]); lon_pred = denstd_lon(preds[:,1])
    lat_true = trues[:,0]*lat_std + lat_mean; lon_true = trues[:,1]*lon_std + lon_mean
    mse_lat = ((lat_pred - lat_true)**2).mean(); mse_lon = ((lon_pred - lon_true)**2).mean()
    avg_mse = 0.5*(mse_lat + mse_lon)
    print(f"Final Fine-tuned Val MSE: {avg_mse:.4f}")
    df = pd.DataFrame({
        'filename': fns,
        'pred_lat': lat_pred,
        'pred_lon': lon_pred,
        'true_lat': lat_true,
        'true_lon': lon_true
    })
    csv_path = os.path.join(output_dir, f'predictions_{avg_mse:.4f}.csv')
    df.to_csv(csv_path, index=False)
    print(f"Saved predictions to {csv_path}")

# === Run fine-tuning ===
if __name__ == '__main__':
    model = FusionModel(meta_dim=len(META))
    fine_tune_and_save(model, train_loader, val_loader, epochs=200)


In [None]:
# ### import os
# import torch
# import torch.nn as nn
# import torch.hub as hub
# from torchvision import transforms
# from torch.utils.data import Dataset, DataLoader
# from PIL import Image
# import pandas as pd
# import numpy as np
# from tqdm.notebook import tqdm

# # === Device & AMP ===
# device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# scaler = torch.cuda.amp.GradScaler()
# print("Using device:", device)

# # === Paths ===
# train_csv     = "/kaggle/input/smainewdataset/Phase_2_data/labels_train.csv"
# val_csv       = "/kaggle/input/smainewdataset/Phase_2_data/labels_val.csv"
# train_img_dir = "/kaggle/input/smainewdataset/Phase_2_data/images_train/images_train"
# val_img_dir   = "/kaggle/input/smainewdataset/Phase_2_data/images_val/images_val"

# # === Load & Filter DataFrames ===
# train_df = pd.read_csv(train_csv)
# val_df   = pd.read_csv(val_csv)

# # Filter by coordinate range
# train_df = train_df[
#     train_df.latitude.between(200000, 230000) &
#     train_df.longitude.between(140000, 150000)
# ].reset_index(drop=True)
# exclude_ids = {95, 145, 146, 158, 159, 160, 161}
# val_df = val_df[~val_df['filename'].apply(
#     lambda fn: int(os.path.splitext(fn)[0].split('_')[-1]) in exclude_ids
# )].reset_index(drop=True)

# # === Standardization stats ===
# lat_mean, lat_std = train_df.latitude.mean(), train_df.latitude.std()
# lon_mean, lon_std = train_df.longitude.mean(), train_df.longitude.std()
# std_lat = lambda x: (x - lat_mean) / lat_std
# denstd_lat = lambda x: x * lat_std + lat_mean
# std_lon = lambda x: (x - lon_mean) / lon_std
# denstd_lon = lambda x: x * lon_std + lon_mean

# # === Add cyclic embeddings with random time placeholders ===
# def add_cyclic(df):
#     df = df.copy()
#     df['lat_sin'] = np.sin(2 * np.pi * (df.latitude - train_df.latitude.min()) /
#                          (train_df.latitude.max() - train_df.latitude.min()))
#     df['lat_cos'] = np.cos(2 * np.pi * (df.latitude - train_df.latitude.min()) /
#                          (train_df.latitude.max() - train_df.latitude.min()))
#     df['lon_sin'] = np.sin(2 * np.pi * (df.longitude - train_df.longitude.min()) /
#                          (train_df.longitude.max() - train_df.longitude.min()))
#     df['lon_cos'] = np.cos(2 * np.pi * (df.longitude - train_df.longitude.min()) /
#                          (train_df.longitude.max() - train_df.longitude.min()))
#     df['time_sin'] = np.random.rand(len(df)) #because time was not given with the test set 
#     df['time_cos'] = np.random.rand(len(df))
#     df['wd_sin'], df['wd_cos'] = 0.0, 1.0
#     df['mo_sin'], df['mo_cos'] = 0.0, 1.0
#     return df

# train_df = add_cyclic(train_df)
# val_df   = add_cyclic(val_df)

# # === Dataset definition ===
# META = ['lat_sin','lat_cos','lon_sin','lon_cos','time_sin','time_cos','wd_sin','wd_cos','mo_sin','mo_cos']
# class GeoDataset(Dataset):
#     def __init__(self, df, img_dir, transform=None):
#         self.df = df.reset_index(drop=True)
#         self.img_dir = img_dir
#         self.transform = transform
#     def __len__(self): return len(self.df)
#     def __getitem__(self, idx):
#         row = self.df.iloc[idx]
#         img = Image.open(os.path.join(self.img_dir, row.filename)).convert('RGB')
#         img = self.transform(img) if self.transform else transforms.ToTensor()(img)
#         meta = torch.from_numpy(row[META].to_numpy(dtype=np.float32))
#         lat = torch.tensor(std_lat(row.latitude), dtype=torch.float32)
#         lon = torch.tensor(std_lon(row.longitude), dtype=torch.float32)
#         return img, meta, lat, lon, row.filename

# train_tf = transforms.Compose([
#     transforms.Resize((224,224)), transforms.RandAugment(),
#     transforms.RandomHorizontalFlip(), transforms.ColorJitter(0.3,0.3,0.2,0.1),
#     transforms.ToTensor(), transforms.Normalize([0.485,0.456,0.406],[0.229,0.224,0.225])
# ])
# val_tf = transforms.Compose([
#     transforms.Resize((224,224)), transforms.ToTensor(),
#     transforms.Normalize([0.485,0.456,0.406],[0.229,0.224,0.225])
# ])
# bs = 32
# train_loader = DataLoader(GeoDataset(train_df, train_img_dir, train_tf), batch_size=bs, shuffle=True, num_workers=4)
# val_loader   = DataLoader(GeoDataset(val_df,   val_img_dir,   val_tf), batch_size=bs, shuffle=False, num_workers=4)

# print("Loading DINO V2 ViT-S/14 backbone...")
# dino = hub.load('facebookresearch/dinov2:main', 'dinov2_vits14', pretrained=True)
# dino.eval()
# modules = [dino.patch_embed] + list(dino.blocks) + [dino.norm]
# embed_dim = dino.embed_dim
# print(f"DINO V2 embed dim: {embed_dim}")

# class FusionModel(nn.Module):
#     def __init__(self, meta_dim=len(META)):
#         super().__init__()
#         self.backbone = nn.Sequential(*modules)
#         self.meta_net = nn.Sequential(
#             nn.Linear(meta_dim,128), nn.BatchNorm1d(128), nn.ReLU(), nn.Dropout(0.5),
#             nn.Linear(128,64), nn.BatchNorm1d(64), nn.ReLU(), nn.Dropout(0.5)
#         )
#         self.head = nn.Sequential(
#             nn.Linear(embed_dim + 64,256), nn.BatchNorm1d(256), nn.ReLU(), nn.Dropout(0.5),
#             nn.Linear(256,2)
#         )
#     def forward(self, x, m):
#         feats = self.backbone(x)
#         cls_token = feats[:,0]
#         meta_feat = self.meta_net(m)
#         return self.head(torch.cat([cls_token, meta_feat], dim=1))


# def fine_tune_and_save(model, train_loader, val_loader, epochs=50,
#                        checkpoint_path='/kaggle/input/model_latlong_10700/tensorflow2/default/1/best_fusion_10700.pth', output_dir='outputs'):
#     os.makedirs(output_dir, exist_ok=True)
#     device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

#     # Load pretrained checkpoint (weights only)
#     checkpoint = torch.load(checkpoint_path, map_location=device, weights_only=True)
#     model.load_state_dict(checkpoint)
#     model.to(device)

#     # Initial evaluation
#     def eval_loop(loader):
#         preds, trues, fns = [], [], []
#         model.eval()
#         with torch.no_grad():
#             for imgs, meta, lat, lon, fnames in loader:
#                 imgs, meta = imgs.to(device), meta.to(device)
#                 with torch.cuda.amp.autocast():
#                     out = model(imgs, meta).cpu().numpy()
#                 preds.append(out)
#                 trues.append(np.stack([lat.numpy(), lon.numpy()], axis=1))
#                 fns.extend(fnames)
#         preds = np.vstack(preds); trues = np.vstack(trues)
#         lat_pred = denstd_lat(preds[:,0]); lon_pred = denstd_lon(preds[:,1])
#         lat_true = trues[:,0]*lat_std + lat_mean; lon_true = trues[:,1]*lon_std + lon_mean
#         mse_lat = ((lat_pred - lat_true)**2).mean(); mse_lon = ((lon_pred - lon_true)**2).mean()
#         return 0.5*(mse_lat + mse_lon), fns, lat_pred, lon_pred, lat_true, lon_true

#     init_mse, fns_i, lat_p_i, lon_p_i, lat_t_i, lon_t_i = eval_loop(val_loader)
#     print(f"Init Val MSE: {init_mse:.4f}")
#     pd.DataFrame({'filename':fns_i,'pred_lat':lat_p_i,'pred_lon':lon_p_i,'true_lat':lat_t_i,'true_lon':lon_t_i})\
#        .to_csv(os.path.join(output_dir, f'init_predictions_{init_mse:.4f}.csv'), index=False)

#     # Training
#     criterion = nn.SmoothL1Loss()
#     optimizer = torch.optim.AdamW(model.parameters(), lr=2e-4, weight_decay=1e-2)
#     scheduler = torch.optim.lr_scheduler.OneCycleLR(
#         optimizer, max_lr=5e-4, epochs=epochs,
#         steps_per_epoch=len(train_loader), pct_start=0.1, anneal_strategy='cos'
#     )
#     scaler_ft = torch.cuda.amp.GradScaler()
#     best_mse = float('inf')

#     for ep in range(1, epochs+1):
#         model.train()
#         for imgs, meta, lat, lon, _ in train_loader:
#             imgs, meta, lat, lon = imgs.to(device), meta.to(device), lat.to(device), lon.to(device)
#             optimizer.zero_grad()
#             with torch.cuda.amp.autocast():
#                 out = model(imgs, meta)
#                 loss = 0.5*(criterion(out[:,0], lat) + criterion(out[:,1], lon))
#             scaler_ft.scale(loss).backward()
#             scaler_ft.unscale_(optimizer)
#             torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
#             scaler_ft.step(optimizer)
#             scaler_ft.update()
#             scheduler.step()

#         val_mse, _, _, _, _, _ = eval_loop(val_loader)
#         print(f"Epoch {ep}/{epochs} Val MSE: {val_mse:.4f}")
#         if val_mse < best_mse:
#             best_mse = val_mse
#             torch.save(model.state_dict(), os.path.join(output_dir, 'fine_tuned_best.pth'))

#     # Final eval
#     model.load_state_dict(torch.load(os.path.join(output_dir, 'fine_tuned_best.pth'), map_location=device, weights_only=True))
#     final_mse, fns_f, lat_p_f, lon_p_f, lat_t_f, lon_t_f = eval_loop(val_loader)
#     print(f"Final Val MSE: {final_mse:.4f}")
#     pd.DataFrame({'filename':fns_f,'pred_lat':lat_p_f,'pred_lon':lon_p_f,'true_lat':lat_t_f,'true_lon':lon_t_f})\
#        .to_csv(os.path.join(output_dir, f'predictions_{final_mse:.4f}.csv'), index=False)

# if __name__ == '__main__':
#     model = FusionModel()
#     fine_tune_and_save(model, train_loader, val_loader, epochs=200)
