In [25]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from PIL import Image
from torch.utils.data import Dataset, DataLoader
from transformers import CLIPProcessor, CLIPModel
from sklearn.preprocessing import LabelEncoder
from tqdm import tqdm
import os


image_dir = '/kaggle/input/mesho-chll/MESHO/train_images/'
csv_file = '/kaggle/input/mesho-chll/MESHO/train_MESH.csv'


batch_size = 64
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")


df = pd.read_csv(csv_file)
attr_columns = [f'attr_{i}' for i in range(1, 11)]
df[attr_columns] = df[attr_columns].fillna('no')

category_encoder = LabelEncoder()
df['Category'] = category_encoder.fit_transform(df['Category'])

attr_encoders = {}
for col in attr_columns:
    encoder = LabelEncoder()
    df[col] = encoder.fit_transform(df[col])
    attr_encoders[col] = encoder

df['file_path'] = df['id'].apply(lambda x: f'{image_dir}{str(x).zfill(6)}.jpg')


clip_model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")
clip_processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")
clip_model.to(device)
clip_model.eval()

class CustomDataset(Dataset):
    def __init__(self, df, processor):
        self.df = df
        self.processor = processor

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        img_path = self.df.iloc[idx]['file_path']
        image = Image.open(img_path).convert('RGB')
        inputs = self.processor(images=image, return_tensors="pt")
        features = self.df.iloc[idx][['Category']].values.astype(np.float32)
        labels = self.df.iloc[idx][attr_columns].values.astype(np.int64)
        
        # Move inputs to the correct device
        inputs = {k: v.squeeze(0) for k, v in inputs.items()}    # Move labels to device
        
        return (inputs, features), labels

from sklearn.model_selection import train_test_split


train_df, val_df = train_test_split(df, test_size=0.4, random_state=42)

def custom_dataloader(df, processor, batch_size, shuffle=True):
    dataset = CustomDataset(df, processor)
    return DataLoader(dataset, batch_size=batch_size, shuffle=shuffle)


train_loader = custom_dataloader(train_df, clip_processor, batch_size)
val_loader = custom_dataloader(val_df, clip_processor, batch_size, shuffle=False)




In [21]:
class MultiOutputModel(nn.Module):
    def __init__(self, clip_model, num_features, num_outputs_per_attr):
        super(MultiOutputModel, self).__init__()
        
        self.clip_model = clip_model
        self.clip_model.eval()  

        
        clip_output_dim = 512  
        self.fc_features = nn.Linear(clip_output_dim + num_features, 512)
        self.fc1 = nn.Linear(512, 256)

      
        self.attr_outputs = nn.ModuleList([nn.Linear(256, num_outputs) for num_outputs in num_outputs_per_attr])

    def forward(self, inputs, features):
        image_inputs = inputs['pixel_values'].to(device)  


        with torch.no_grad():
            image_embeddings = self.clip_model.get_image_features(image_inputs)

    
        min_batch_size = min(image_embeddings.size(0), features.size(0))
        image_embeddings = image_embeddings[:min_batch_size]
        features = features[:min_batch_size]

     
        x = torch.cat([image_embeddings, features], dim=1)

        
        x = torch.relu(self.fc_features(x))
        x = torch.relu(self.fc1(x))
        outputs = [attr_output(x) for attr_output in self.attr_outputs]

        return outputs

num_features = 1
num_classes_list = [len(attr_encoders[f'attr_{i}'].classes_) for i in range(1, 11)]
model = MultiOutputModel(clip_model, num_features=num_features, num_outputs_per_attr=num_classes_list)
model = model.to(device)


criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [22]:
def compute_accuracy(outputs, labels):
    """
    Compute accuracy for multi-output model.
    Args:
    outputs (list of tensors): Model outputs for each attribute.
    labels (tensor): Ground truth labels.
    """
    correct = 0
    total = 0
    for output, label in zip(outputs, labels.T):  
        preds = output.argmax(dim=1)
        correct += (preds == label).sum().item()
        total += label.size(0)
    return correct / total

In [23]:
import os
import torch
from tqdm import tqdm
from sklearn.metrics import accuracy_score, f1_score

def load_model_checkpoint(model, checkpoint_path):
    """Load the model from the specified checkpoint path."""
    if os.path.exists(checkpoint_path):
        model.load_state_dict(torch.load(checkpoint_path))
        print(f"Model loaded from {checkpoint_path}")
    else:
        print(f"Checkpoint not found at {checkpoint_path}")

def compute_f1(outputs, labels):
    """
    Compute F1 score for multi-output model.
    Args:
    outputs (list of tensors): Model outputs for each attribute.
    labels (tensor): Ground truth labels.
    """
    all_preds = []
    all_labels = []
    for output, label in zip(outputs, labels.T): 
        preds = output.argmax(dim=1).cpu()
        all_preds.extend(preds.numpy())
        all_labels.extend(label.cpu().numpy())
    return f1_score(all_labels, all_preds, average='micro')

def train_model(model, train_loader, val_loader, criterion, optimizer, epochs=15, save_dir='model_checkpoints', checkpoint_path=None):
    os.makedirs(save_dir, exist_ok=True)
    model.to(device)
    
   
    if checkpoint_path:
        load_model_checkpoint(model, checkpoint_path)

    for epoch in range(epochs):
        model.train()
        running_loss = 0.0
        train_correct = 0
        train_total = 0
        train_f1_scores = []

        with tqdm(total=len(train_loader), desc=f'Epoch {epoch + 1}/{epochs}', unit='batch') as pbar:
            for batch_idx, ((inputs, features), labels) in enumerate(train_loader):
                inputs = {k: v.to(device) for k, v in inputs.items()}
                features = features.to(device)
                labels = labels.to(device)
                
                optimizer.zero_grad()
                
       
                outputs = model(inputs, features)
                labels = labels.long()
                total_loss = 0.0

                for idx, (output, label) in enumerate(zip(outputs, labels.T)):
                    loss = criterion(output, label)
                    total_loss += loss

       
                total_loss.backward()
                optimizer.step()

                running_loss += total_loss.item()
                batch_accuracy = compute_accuracy(outputs, labels)
                train_correct += batch_accuracy * len(labels)
                train_total += len(labels)
                train_f1_scores.append(compute_f1(outputs, labels))
                
                pbar.set_postfix({'Batch Loss': total_loss.item(), 'Train Acc': batch_accuracy})
                pbar.update(1)

        avg_train_loss = running_loss / len(train_loader)
        avg_train_accuracy = train_correct / train_total
        avg_train_f1 = sum(train_f1_scores) / len(train_f1_scores)


        model.eval()
        val_loss = 0.0
        val_correct = 0
        val_total = 0
        val_f1_scores = []
        with torch.no_grad():
            for (inputs, features), labels in val_loader:
                inputs = {k: v.to(device) for k, v in inputs.items()}
                features = features.to(device)
                labels = labels.to(device)

                outputs = model(inputs, features)
                labels = labels.long()
                
                total_loss = 0.0
                for idx, (output, label) in enumerate(zip(outputs, labels.T)):
                    loss = criterion(output, label)
                    total_loss += loss

                val_loss += total_loss.item()
                batch_accuracy = compute_accuracy(outputs, labels)
                val_correct += batch_accuracy * len(labels)
                val_total += len(labels)
                val_f1_scores.append(compute_f1(outputs, labels))

        avg_val_loss = val_loss / len(val_loader)
        avg_val_accuracy = val_correct / val_total
        avg_val_f1 = sum(val_f1_scores) / len(val_f1_scores)

        print(f'Epoch {epoch + 1} | Train Loss: {avg_train_loss:.4f} | Train Acc: {avg_train_accuracy:.4f} | Train F1: {avg_train_f1:.4f} | Val Loss: {avg_val_loss:.4f} | Val Acc: {avg_val_accuracy:.4f} | Val F1: {avg_val_f1:.4f}')

      
        checkpoint_path = os.path.join(save_dir, f'model_epoch_{epoch + 1}.pth')
        torch.save(model.state_dict(), checkpoint_path)
        print(f'Model saved at {checkpoint_path}')


In [26]:
train_model(
    model=model,
    train_loader=train_loader,
    val_loader=val_loader,  
    criterion=criterion,
    optimizer=optimizer,
    epochs=15,

)

Epoch 1/15: 100%|██████████| 172/172 [05:02<00:00,  1.76s/batch, Batch Loss=7.83, Train Acc=0.706]


Epoch 1 | Train Loss: 8.9891 | Train Acc: 0.6466 | Train F1: 0.6466 | Val Loss: 7.9573 | Val Acc: 0.6813 | Val F1: 0.6814
Model saved at model_checkpoints/model_epoch_1.pth


Epoch 2/15: 100%|██████████| 172/172 [02:56<00:00,  1.03s/batch, Batch Loss=6.98, Train Acc=0.724]


Epoch 2 | Train Loss: 7.8158 | Train Acc: 0.6818 | Train F1: 0.6818 | Val Loss: 7.6417 | Val Acc: 0.6870 | Val F1: 0.6871
Model saved at model_checkpoints/model_epoch_2.pth


Epoch 3/15: 100%|██████████| 172/172 [02:56<00:00,  1.03s/batch, Batch Loss=7.51, Train Acc=0.679]


Epoch 3 | Train Loss: 7.5655 | Train Acc: 0.6876 | Train F1: 0.6876 | Val Loss: 7.5686 | Val Acc: 0.6861 | Val F1: 0.6862
Model saved at model_checkpoints/model_epoch_3.pth


Epoch 4/15: 100%|██████████| 172/172 [02:58<00:00,  1.04s/batch, Batch Loss=7.92, Train Acc=0.694]


Epoch 4 | Train Loss: 7.4224 | Train Acc: 0.6898 | Train F1: 0.6898 | Val Loss: 7.4561 | Val Acc: 0.6873 | Val F1: 0.6874
Model saved at model_checkpoints/model_epoch_4.pth


Epoch 5/15: 100%|██████████| 172/172 [02:59<00:00,  1.04s/batch, Batch Loss=7, Train Acc=0.687]   


Epoch 5 | Train Loss: 7.3276 | Train Acc: 0.6933 | Train F1: 0.6933 | Val Loss: 7.4346 | Val Acc: 0.6905 | Val F1: 0.6905
Model saved at model_checkpoints/model_epoch_5.pth


Epoch 6/15: 100%|██████████| 172/172 [02:58<00:00,  1.04s/batch, Batch Loss=6.9, Train Acc=0.713] 


Epoch 6 | Train Loss: 7.2453 | Train Acc: 0.6976 | Train F1: 0.6976 | Val Loss: 7.3682 | Val Acc: 0.6941 | Val F1: 0.6941
Model saved at model_checkpoints/model_epoch_6.pth


Epoch 7/15: 100%|██████████| 172/172 [02:58<00:00,  1.04s/batch, Batch Loss=7.58, Train Acc=0.684]


Epoch 7 | Train Loss: 7.1853 | Train Acc: 0.6990 | Train F1: 0.6990 | Val Loss: 7.3680 | Val Acc: 0.6947 | Val F1: 0.6947
Model saved at model_checkpoints/model_epoch_7.pth


Epoch 8/15: 100%|██████████| 172/172 [03:36<00:00,  1.26s/batch, Batch Loss=7.21, Train Acc=0.714]


Epoch 8 | Train Loss: 7.1285 | Train Acc: 0.7017 | Train F1: 0.7017 | Val Loss: 7.3105 | Val Acc: 0.6955 | Val F1: 0.6956
Model saved at model_checkpoints/model_epoch_8.pth


Epoch 9/15: 100%|██████████| 172/172 [02:58<00:00,  1.04s/batch, Batch Loss=7.42, Train Acc=0.702]


Epoch 9 | Train Loss: 7.0780 | Train Acc: 0.7023 | Train F1: 0.7023 | Val Loss: 7.3962 | Val Acc: 0.6926 | Val F1: 0.6926
Model saved at model_checkpoints/model_epoch_9.pth


Epoch 10/15: 100%|██████████| 172/172 [03:04<00:00,  1.07s/batch, Batch Loss=7.25, Train Acc=0.698]


Epoch 10 | Train Loss: 7.0489 | Train Acc: 0.7039 | Train F1: 0.7039 | Val Loss: 7.3529 | Val Acc: 0.6925 | Val F1: 0.6926
Model saved at model_checkpoints/model_epoch_10.pth


Epoch 11/15: 100%|██████████| 172/172 [03:02<00:00,  1.06s/batch, Batch Loss=6.19, Train Acc=0.794]


Epoch 11 | Train Loss: 7.0034 | Train Acc: 0.7054 | Train F1: 0.7054 | Val Loss: 7.3462 | Val Acc: 0.6958 | Val F1: 0.6959
Model saved at model_checkpoints/model_epoch_11.pth


Epoch 12/15: 100%|██████████| 172/172 [03:03<00:00,  1.07s/batch, Batch Loss=7.27, Train Acc=0.703]


Epoch 12 | Train Loss: 6.9696 | Train Acc: 0.7062 | Train F1: 0.7062 | Val Loss: 7.3414 | Val Acc: 0.6962 | Val F1: 0.6962
Model saved at model_checkpoints/model_epoch_12.pth


Epoch 13/15: 100%|██████████| 172/172 [02:57<00:00,  1.03s/batch, Batch Loss=6.85, Train Acc=0.695]


Epoch 13 | Train Loss: 6.9248 | Train Acc: 0.7084 | Train F1: 0.7084 | Val Loss: 7.3397 | Val Acc: 0.6935 | Val F1: 0.6937
Model saved at model_checkpoints/model_epoch_13.pth


Epoch 14/15: 100%|██████████| 172/172 [02:57<00:00,  1.03s/batch, Batch Loss=6.88, Train Acc=0.697]


Epoch 14 | Train Loss: 6.8938 | Train Acc: 0.7089 | Train F1: 0.7089 | Val Loss: 7.3489 | Val Acc: 0.6969 | Val F1: 0.6970
Model saved at model_checkpoints/model_epoch_14.pth


Epoch 15/15: 100%|██████████| 172/172 [02:57<00:00,  1.03s/batch, Batch Loss=7.04, Train Acc=0.683]


Epoch 15 | Train Loss: 6.8713 | Train Acc: 0.7106 | Train F1: 0.7106 | Val Loss: 7.3434 | Val Acc: 0.6976 | Val F1: 0.6977
Model saved at model_checkpoints/model_epoch_15.pth


In [28]:
import pandas as pd

# Load the test data
test_df = pd.read_csv('/kaggle/input/mesho-chll/MESHO/test.csv')

# Filter for rows where Category is 'Women Tops & Tunics'
saree_df = test_df[test_df['Category'] == 'Sarees']

# Set the Category column to 0 for the filtered dataframe
# women_tops_tunics_df['Category'] = 0

# Save the resulting dataframe to CSV
saree_df.to_csv('saree_cat2.csv', index=False)



In [29]:
import torch
from PIL import Image
from transformers import CLIPProcessor, CLIPModel
import pandas as pd
import numpy as np
import os
from tqdm import tqdm  # Import tqdm for progress bar

def load_trained_model(model_path, clip_model, num_features, num_classes_list):
    # Load the state dictionary from the model path
    state_dict = torch.load(model_path)

#     if list(state_dict.keys())[0].startswith("module."):
#         # Remove 'module.' prefix if present
#         state_dict = {k.replace("module.", ""): v for k, v in state_dict.items()}
    
    # Create the model instance before loading the state dictionary
    model = MultiOutputModel(clip_model, num_features=num_features, num_outputs_per_attr=num_classes_list)
    
    # Load the state dictionary with strict=False to avoid key mismatches
    model.load_state_dict(state_dict)
    
    model.to(device)
    model.eval()  # Set to evaluation mode
    return model


# Paths and parameters
model_path = '/kaggle/working/model_checkpoints/model_epoch_15.pth'  # Path to your trained model
image_dir = '/kaggle/input/mesho-chll/MESHO/test_images/'  # Path to test images directory
csv_file = '/kaggle/working/saree_cat2.csv'  # Path to test CSV file
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Load the CLIP model and processor
# Load the CLIP model and processor
clip_model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")
clip_processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")
# Assuming the same encoding for the attributes used during training
num_features = 1  # 'Category'
num_classes_list = [len(attr_encoders[f'attr_{i}'].classes_) for i in range(1, 11)]

# Load the model with the trained weights
model = load_trained_model(model_path, clip_model, num_features, num_classes_list)

# Load the test CSV that contains the 'id' and 'Category'
test_df = pd.read_csv(csv_file)

# Preprocess a single image for inference
def preprocess_image(image_path, processor):
    image = Image.open(image_path).convert('RGB')
    inputs = processor(images=image, return_tensors="pt")
    return inputs

# Inference function
def predict(model, image_path, processor, features):
    # Preprocess the image
    inputs = preprocess_image(image_path, processor)
    inputs = inputs.to(device)

    # Ensure features are properly shaped for model input
    features = torch.tensor(features).unsqueeze(0).to(device)  # Shape as (1, num_features)

    # Perform forward pass (inference)
    with torch.no_grad():
        attr_outputs = model(inputs, features)

    # Convert outputs to predicted labels
    predicted_labels = [torch.argmax(output, dim=1).item() for output in attr_outputs]
   
    return predicted_labels

# Example: Perform inference on all test images
predictions_list = []

# Wrap the loop with tqdm for progress bar
for idx, row in tqdm(test_df.iterrows(), total=len(test_df), desc="Processing Images"):
    image_id = str(row['id']).zfill(6) + '.jpg'  # Convert ID to format 000000.jpg
    image_path = os.path.join(image_dir, image_id)
    # Encode the Category (make sure it was encoded similarly as in training)
    category_encoded_value = 0
    
    # Perform prediction
    predicted_attrs = predict(model, image_path, clip_processor, [category_encoded_value])

    # Decode the predicted attributes back to their original labels
    decoded_predictions = {f'attr_{i}': attr_encoders[f'attr_{i}'].inverse_transform([pred])[0] 
                           for i, pred in enumerate(predicted_attrs, 1)}
    
    # Store the results for this image, without Category for now
    predictions_list.append({'id': row['id'], **decoded_predictions})

# Convert predictions to a DataFrame for better output readability
predictions_df = pd.DataFrame(predictions_list)

# Merge predictions with the original test DataFrame based on 'id'
merged_df = pd.merge(test_df[['id', 'Category']], predictions_df, on='id')

# Count attributes that are not predicted as 'no'
# Assuming attribute columns are named attr_1, attr_2, ..., attr_10
attribute_columns = [f'attr_{i}' for i in range(1, 11)]

# Create the 'len' column based on the count of attributes that are not 'no'
merged_df['len'] = merged_df[attribute_columns].apply(lambda x: sum(attr != 'no' for attr in x), axis=1)

# Reorder columns to have 'len' after 'Category' and before 'attr_1'
cols = ['id', 'Category', 'len'] + attribute_columns
merged_df = merged_df[cols]

# Display merged predictions for all test images
print(merged_df)

# Save predictions to a CSV file
merged_df.to_csv('cat_2_pred.csv', index=False)


  state_dict = torch.load(model_path)
Processing Images: 100%|██████████| 7102/7102 [04:10<00:00, 28.37it/s]


         id Category  len         attr_1        attr_2        attr_3  \
0      3787   Sarees    8        default       default  small border   
1      3788   Sarees    6             no  woven design            no   
2      3789   Sarees    9  same as saree          zari    big border   
3      3790   Sarees    7             no  woven design    big border   
4      3791   Sarees    4             no  woven design            no   
...     ...      ...  ...            ...           ...           ...   
7097  11150   Sarees    4             no  woven design            no   
7098  11151   Sarees    1             no            no    big border   
7099  11152   Sarees    6             no  woven design            no   
7100  11153   Sarees    6             no  woven design    big border   
7101  11154   Sarees    6             no          zari  small border   

          attr_4       attr_5    attr_6      attr_7      attr_8        attr_9  \
0        default        party   default          no   

In [15]:
import torch
from PIL import Image
from transformers import CLIPProcessor, CLIPModel
import pandas as pd
import numpy as np
import os
from tqdm import tqdm  # Import tqdm for progress bar

def load_trained_model(model_path, clip_model, num_features, num_classes_list):
    # Load the state dictionary from the model path
    state_dict = torch.load(model_path)

#     if list(state_dict.keys())[0].startswith("module."):
#         # Remove 'module.' prefix if present
#         state_dict = {k.replace("module.", ""): v for k, v in state_dict.items()}
    
    # Create the model instance before loading the state dictionary
    model = MultiOutputModel(clip_model, num_features=num_features, num_outputs_per_attr=num_classes_list)
    
    # Load the state dictionary with strict=False to avoid key mismatches
    model.load_state_dict(state_dict)
    
    model.to(device)
    model.eval()  # Set to evaluation mode
    return model


# Paths and parameters
model_path = '/kaggle/working/model_checkpoints/model_epoch_clip_base.pth'  # Path to your trained model
image_dir = '/kaggle/input/meesho-chll/MESHO/test_images/'  # Path to test images directory
csv_file = '/kaggle/input/meesho-chll/MESHO/test.csv'  # Path to test CSV file
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')


# Load the CLIP model and processor
clip_model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")
clip_processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")
# Assuming the same encoding for the attributes used during training
num_features = 1  # 'Category'
num_classes_list = [len(attr_encoders[f'attr_{i}'].classes_) for i in range(1, 11)]

# Load the model with the trained weights
model = load_trained_model(model_path, clip_model, num_features, num_classes_list)


test_df = pd.read_csv(csv_file)


def preprocess_image(image_path, processor):
    image = Image.open(image_path).convert('RGB')
    inputs = processor(images=image, return_tensors="pt")
    return inputs


def predict(model, image_path, processor, features):

    inputs = preprocess_image(image_path, processor)
    inputs = inputs.to(device)


    features = torch.tensor(features).unsqueeze(0).to(device)  # Shape as (1, num_features)

    
    with torch.no_grad():
        attr_outputs = model(inputs, features)

    
    predicted_labels = [torch.argmax(output, dim=1).item() for output in attr_outputs]
   
    return predicted_labels

predictions_list = []


for idx, row in tqdm(test_df.iterrows(), total=len(test_df), desc="Processing Images"):
    image_id = str(row['id']).zfill(6) + '.jpg' 
    image_path = os.path.join(image_dir, image_id)

   
    category_encoded_value = category_encoder.transform([row['Category']])[0]

    predicted_attrs = predict(model, image_path, clip_processor, [category_encoded_value])


    decoded_predictions = {f'attr_{i}': attr_encoders[f'attr_{i}'].inverse_transform([pred])[0] 
                           for i, pred in enumerate(predicted_attrs, 1)}

    predictions_list.append({'id': row['id'], **decoded_predictions})


predictions_df = pd.DataFrame(predictions_list)


merged_df = pd.merge(test_df[['id', 'Category']], predictions_df, on='id')


attribute_columns = [f'attr_{i}' for i in range(1, 11)]


merged_df['len'] = merged_df[attribute_columns].apply(lambda x: sum(attr != 'no' for attr in x), axis=1)


cols = ['id', 'Category', 'len'] + attribute_columns
merged_df = merged_df[cols]


print(merged_df)


merged_df.to_csv('submission.csv', index=False)


  state_dict = torch.load(model_path)
Processing Images: 100%|██████████| 30205/30205 [14:07<00:00, 35.65it/s]


          id             Category  len      attr_1  attr_2   attr_3  \
0          0          Men Tshirts    2  multicolor   round       no   
1          1          Men Tshirts    3          no   round  printed   
2          2          Men Tshirts    1          no      no       no   
3          3          Men Tshirts    5  multicolor    polo    solid   
4          4          Men Tshirts    1          no   round       no   
...      ...                  ...  ...         ...     ...      ...   
30200  30484  Women Tops & Tunics    9       green    boxy     crop   
30201  30485  Women Tops & Tunics    0          no      no       no   
30202  30486  Women Tops & Tunics    3          no      no  regular   
30203  30487  Women Tops & Tunics    3          no      no       no   
30204  30488  Women Tops & Tunics    9        pink  fitted     crop   

           attr_4         attr_5   attr_6      attr_7         attr_8  \
0              no             no       no          no             no   
1  