### Connecting to google drive

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


### Importing libraries used

In [None]:
import os
import shutil
import pandas as pd

### Arranging the data according to their Classes

In [2]:
# Paths
csv_file = '/content/drive/MyDrive/SOI Space Data Science/train.csv'
image_dir = '/content/drive/MyDrive/SOI Space Data Science/train_dataset'
output_dir = 'output_images'

# Create output directory if it doesn't exist
os.makedirs(output_dir, exist_ok=True)

# Read the CSV file
df = pd.read_csv(csv_file)

# Iterate through the DataFrame
for index, row in df.iterrows():
    image_name = row['File Name']
    class_name = row['Class']

    # Create class directory if it doesn't exist
    class_dir = os.path.join(output_dir, str(class_name))
    os.makedirs(class_dir, exist_ok=True)

    # Path to the original image
    src_image_path = os.path.join(image_dir, image_name)

    # Path to the destination
    dest_image_path = os.path.join(class_dir, image_name)

    # Copy or move the image to the class directory
    shutil.copy(src_image_path, dest_image_path)
    # If you want to move instead of copy, use:
    # shutil.move(src_image_path, dest_image_path)


### Saving generated Folder to Drive

In [7]:
# Source directory (your organized images)
source_dir = '/content/output_images'

# Destination directory on Google Drive
destination_dir = '/content/drive/MyDrive/SOI Space Data Science/output_image'

# Copy the folder to Google Drive
shutil.copytree(source_dir, destination_dir)


'/content/drive/MyDrive/SOI Space Data Science/output_image'

### Downloading the Data to local device

In [4]:
# Source directory (your organized images)
source_dir = '/content/output_images'

# Destination zip file
shutil.make_archive('/content/output_image', 'zip', source_dir)

# Download the zip file
from google.colab import files
files.download('/content/output_image.zip')


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

### Checking if Data is correct or not
- other            3651
- crater           1062
- bright dune       597
- slope streak      335
- swiss cheese      223
- dark dune         216
- spider             66
- impact ejecta      51

In [8]:
image_path = '/content/drive/MyDrive/SOI Space Data Science/output_image'

for dirpath, dirnames, filenames in os.walk(image_path):
  if len(filenames) == 0:
    print(f'{dirpath} have directories {dirnames}')
  elif len(dirnames) == 0:
    print(f'{dirpath} having {len(filenames)} files')
  else :
    print(f'{dirpath} have directories {dirnames} having {len(filenames)} files')

/content/drive/MyDrive/SOI Space Data Science/output_image have directories ['bright dune', 'other', 'spider', 'crater', 'swiss cheese', 'impact ejecta', 'slope streak', 'dark dune']
/content/drive/MyDrive/SOI Space Data Science/output_image/bright dune having 597 files
/content/drive/MyDrive/SOI Space Data Science/output_image/other having 3651 files
/content/drive/MyDrive/SOI Space Data Science/output_image/spider having 66 files
/content/drive/MyDrive/SOI Space Data Science/output_image/crater having 1062 files
/content/drive/MyDrive/SOI Space Data Science/output_image/swiss cheese having 223 files
/content/drive/MyDrive/SOI Space Data Science/output_image/impact ejecta having 51 files
/content/drive/MyDrive/SOI Space Data Science/output_image/slope streak having 335 files
/content/drive/MyDrive/SOI Space Data Science/output_image/dark dune having 216 files
