In [1]:
import os
import random
import matplotlib.pyplot as plt

In [2]:
os.chdir("../")
%pwd

'/home/megh/AI/Projects/Traffic_signs_detection'

In [3]:
from dataclasses import dataclass 
from pathlib import Path

@dataclass(frozen=True)
class DataIngestionConfig:
    root_dir:Path
    source_dir:Path
    train_images: Path
    test_images: Path
    train_labels: Path
    test_labels: Path
    val_images: Path
    val_labels: Path
    train_test_split: float
    train_val_split : float

In [4]:
from src.constants import *
from src.utils.common import read_yaml,create_directories

In [5]:
from random import choice
import shutil

In [6]:
class ConfigurationManager:
    def __init__(
            self,
            config_filepath= CONFIG_FILE_PATH,
            params_filepath= PARAMS_FILE_PATH):
        
        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)
        
        create_directories([self.config.artifacts_root])

    def get_data_ingestion_config(self)-> DataIngestionConfig:
            config = self.config.data_ingestions
            params = self.params
            
            create_directories([config.root_dir])

            data_ingestion_config = DataIngestionConfig(
                root_dir= config.root_dir,
                    source_dir= config.source_dir,
                    train_images= config.train_images,
                    test_images= config.test_images,
                    val_images= config.val_images,
                    train_labels= config.train_labels,
                    test_labels= config.test_labels,
                    val_labels= config.val_labels,
                    train_test_split = params.TRAIN_TEST_SPLIT,
                    train_val_split= params.TRAIN_VAL_SPLIT
            )

            return data_ingestion_config


In [10]:

imgs = []
labels = []
for (dirname, dirs, files) in os.walk("./Raw_data/ts/ts"):
    for filename in files:
        if filename.endswith(".txt"):
            labels.append(filename)
        else:
            imgs.append(filename)

In [11]:
print(len(imgs),len(labels))

741 741


In [21]:
train_images= Path("artifacts/data_ingestion/images/train")
train_labels=Path("artifacts/data_ingestion/labels/train")
source_dir = Path("./Raw_data/ts/ts")

In [17]:
if not os.path.isdir(train_images):
    create_directories([train_images])
if not os.path.isdir(train_labels):
    create_directories([train_labels])

[2023-09-13 16:52:08,410: INFO: common: created directory at:artifacts/data_ingestion/images/train]


In [18]:
TRAIN_TEST_SPLIT= 0.1
TRAIN_VAL_SPLIT=0.2

In [19]:

test_count = int(TRAIN_TEST_SPLIT * len(imgs))
val_count = int(TRAIN_VAL_SPLIT * len(imgs))
train_count = int(len(imgs) - test_count - val_count)
print(len(imgs),test_count, val_count, train_count)

741 74 148 519


In [22]:
for i in range(train_count):
            file_jpg = choice(imgs)
            file_txt= file_jpg[:-4]+".txt"

            shutil.copy(os.path.join(source_dir,file_jpg), os.path.join(train_images,file_jpg))
            shutil.copy(os.path.join(source_dir,file_txt), os.path.join(train_labels,file_txt))

            imgs.remove(file_jpg)
            labels.remove(file_txt)
print(len(imgs))


222


In [25]:
class DataIngestion:
    def __init__(self, config : DataIngestionConfig):
        self.config=config


    def prepare_data(self):

        imgs = []
        labels = []

        for (dirname, dirs, files) in os.walk(self.config.source_dir):
            for filename in files:
                if filename.endswith(".txt"):
                    labels.append(filename)
                else:
                    imgs.append(filename)


        #Creating images and label directories for Train, val and test
        if not os.path.isdir(self.config.train_images):
            create_directories([self.config.train_images])

        if not os.path.isdir(self.config.train_labels):
            create_directories([self.config.train_labels])

        if not os.path.isdir(self.config.test_images):
            create_directories([self.config.test_images])

        if not os.path.isdir(self.config.test_labels):
            create_directories([self.config.test_labels])


        if not os.path.isdir(self.config.val_images):
            create_directories([self.config.val_images])

        if not os.path.isdir(self.config.val_labels):
            create_directories([self.config.val_labels])

        test_count = int(self.config.train_test_split * len(imgs))
        val_count = int(self.config.train_val_split * len(imgs))
        train_count = int(len(imgs) - test_count - val_count)
        print(len(imgs),test_count, val_count, train_count)


        for i in range(train_count):
            file_jpg = choice(imgs)
            file_txt= file_jpg[:-4]+".txt"

            shutil.copy(os.path.join(self.config.source_dir,file_jpg), os.path.join(self.config.train_images,file_jpg))
            shutil.copy(os.path.join(self.config.source_dir,file_txt), os.path.join(self.config.train_labels,file_txt))

            imgs.remove(file_jpg)
            labels.remove(file_txt)
        print(len(imgs))


        for i in range(val_count):
            file_jpg = choice(imgs)
            file_txt= file_jpg[:-4]+".txt"

            shutil.copy(os.path.join(self.config.source_dir,file_jpg), os.path.join(self.config.val_images,file_jpg))
            shutil.copy(os.path.join(self.config.source_dir,file_txt), os.path.join(self.config.val_labels,file_txt))

            imgs.remove(file_jpg)
            labels.remove(file_txt)

        print(len(imgs))

        for i in range(test_count):
            file_jpg = choice(imgs)
            file_txt= file_jpg[:-4]+".txt"

            shutil.copy(os.path.join(self.config.source_dir,file_jpg), os.path.join(self.config.test_images,file_jpg))
            shutil.copy(os.path.join(self.config.source_dir,file_txt), os.path.join(self.config.test_labels,file_txt))

            imgs.remove(file_jpg)
            labels.remove(file_txt)

        

        


In [26]:
try:
    config = ConfigurationManager()
    data_ingestion_config= config.get_data_ingestion_config()
    data_ingestion= DataIngestion(config = data_ingestion_config)
    data_ingestion.prepare_data()

except Exception as e:
    raise e

[2023-09-13 16:58:30,211: INFO: common: yaml fileconfig.yamlloaded Successfully]
[2023-09-13 16:58:30,212: INFO: common: yaml fileparams.yamlloaded Successfully]
[2023-09-13 16:58:30,213: INFO: common: created directory at:artifacts]
[2023-09-13 16:58:30,213: INFO: common: created directory at:artifacts/data_ingestion]
[2023-09-13 16:58:30,215: INFO: common: created directory at:artifacts/data_ingestion/images/train]
[2023-09-13 16:58:30,216: INFO: common: created directory at:artifacts/data_ingestion/labels/train]
[2023-09-13 16:58:30,216: INFO: common: created directory at:artifacts/data_ingestion/images/test]
[2023-09-13 16:58:30,217: INFO: common: created directory at:artifacts/data_ingestion/labels/test]
[2023-09-13 16:58:30,217: INFO: common: created directory at:artifacts/data_ingestion/images/val]
[2023-09-13 16:58:30,217: INFO: common: created directory at:artifacts/data_ingestion/labels/val]
741 74 148 519
222
74


741