In [6]:
import os
import random
import numpy as np
import pandas as pd
import cv2
from kaggle.api.kaggle_api_extended import KaggleApi

# Set up Kaggle API
api = KaggleApi()
api.authenticate()

# Download and unzip dataset
dataset_slug = 'akshat103/e-waste-image-dataset'
download_path = 'e_waste_images'
api.dataset_download_files(dataset_slug, path=download_path, unzip=True)

# Define meta fields
item_types = ['Laptop', 'Battery', 'Printer', 'Cable', 'Scanner', 'TV', 'Mobile']
categories = ['Computers', 'Mobiles', 'Batteries', 'Printers', 'Monitors',
              'TVs', 'Cables', 'Scanners', 'Servers', 'Others']
conditions = ['Working', 'Dead']
sectors = ['IT', 'Telecom', 'Retail', 'Healthcare', 'Education', 'Manufacturing']

# Prepare data storage
records = []
image_dir = os.path.join(download_path, 'E-Waste-Image-Dataset')  # this might change based on dataset structure

# Walk through folders and process images
for root, dirs, files in os.walk(image_dir):
    for file in files:
        if file.lower().endswith(('.jpg', '.png', '.jpeg')):
            try:
                # Full path
                img_path = os.path.join(root, file)

                # Read and resize image to 32x32
                img = cv2.imread(img_path)
                img = cv2.resize(img, (32, 32))
                img = img / 255.0  # Normalize

                # Append record
                records.append({
                    "Item_Type": random.choice(item_types),
                    "Weight(kg)": round(random.uniform(0.1, 20.0), 2),
                    "Condition": random.choice(conditions),
                    "Sector": random.choice(sectors),
                    "Category": random.choice(categories),
                    "Image_Data": img.tolist()
                })

            except Exception as e:
                print(f"Failed to process image {file}: {e}")

# Save dataset to CSV
df = pd.DataFrame(records)
df.to_csv("real_e_waste_dataset.csv", index=False)
print(f"Dataset created successfully with {len(df)} entries → saved as 'real_e_waste_dataset.csv'")

ModuleNotFoundError: No module named 'cv2'

In [7]:
!pip install opencv-python


In [11]:
import os
import pandas as pd

# Change this to your actual dataset folder path
dataset_folder = r"E:\E-Waste\train"


data = []

# Loop through each subfolder (category)
for label in os.listdir(dataset_folder):
    label_path = os.path.join(dataset_folder, label)
    
    # Ensure it's a folder
    if os.path.isdir(label_path):
        for file in os.listdir(label_path):
            if file.lower().endswith(('.png', '.jpg', '.jpeg', '.bmp')):
                full_path = os.path.join(label_path, file)
                data.append({
                    "image_path": full_path,
                    "label": label
                })

# Create dataframe
df = pd.DataFrame(data)

# Save as CSV
df.to_csv("e_waste_dataset.csv", index=False)

print("CSV created: e_waste_dataset.csv")


CSV created: e_waste_dataset.csv
