In [1]:
import os

#WorkFlows
1. Update config.yaml
2. Update secrets.yaml [Optional]
3. Update params.yaml
4. Update the entity
5. Update the configuration manager in src config
6. Update the components
7. Update the pipeline
8. Update the main.py
9. Update the dvc.yaml

In [2]:
%pwd

'c:\\Users\\Rohan Brahmakshatri\\Documents\\Python Scripts\\EndtoEnd_Project\\chicken_disease_classification_project\\research'

In [3]:
os.chdir('../')

In [4]:
#defining the entity

from dataclasses import dataclass
from pathlib import Path

@dataclass(frozen=True)
class DataIngestionConfig:
    root_dir: Path
    source_URL: str
    local_data_file: Path
    unzip_dir: Path

In [5]:
from cnnClassifiers.constants import *
from cnnClassifiers.utils.common import read_yaml,create_dictionaries

In [6]:
class configurationManager:
    def __init__(self,
                 config_filepath=CONFIG_FILE_PATH,
                 params_filepath=PARAMS_FILE_PATH):
        
        
        self.config=read_yaml(config_filepath)
        self.params=read_yaml(params_filepath)
        
        create_dictionaries([self.config.artifacts_root])
        
    def get_data_ingestion_config(self)-> DataIngestionConfig:
        config=self.config.data_ingestion
        
        create_dictionaries([config.root_dir])
        
        data_ingestion_config=DataIngestionConfig(
            root_dir=config.root_dir,
            source_URL=config.source_URL,
            local_data_file=config.local_data_file,
            unzip_dir=config.unzip_dir
        )
        
        return data_ingestion_config
    

In [7]:
#update the components

import os
import urllib.request as request
import zipfile
from cnnClassifiers.utils.common import get_size
from cnnClassifiers import logger



In [8]:
class DataIngestion:
    def __init__(self,config:DataIngestionConfig):
        self.config = config
        
    def download_file(self):
        
        if not os.path.exists(self.config.local_data_file):
            filename,headers = request.urlretrieve(
                url=self.config.source_URL,
                filename=self.config.local_data_file
            
            )
            
            logger.info(f"{filename} downloaded with following info \n {headers}")
            
        else:
            
            logger.info(f"file already exists of size {get_size(Path(self.config.local_data_file))}")
            
            
            
    def extract_zipfile(self):
        
        """
        zip_file_path:str
        Extracts the zip file into the data directory
        Function returns None
        """
        
        unzip_path = self.config.unzip_dir
        os.makedirs(unzip_path,exist_ok=True)
        with zipfile.ZipFile(self.config.local_data_file,"r") as zip_ref:
            
            zip_ref.extractall(unzip_path)
            
            

In [9]:
#update the pipeline

try:
    config=configurationManager()
    data_ingestion_config=config.get_data_ingestion_config()
    data_ingestion=DataIngestion(config=data_ingestion_config)
    data_ingestion.download_file()
    data_ingestion.extract_zipfile()
except Exception as e:
    raise e



[2023-07-31 15:59:40,150: INFO: common: yaml file: config\config.yaml loaded successfully]
[2023-07-31 15:59:40,156: INFO: common: yaml file: params.yaml loaded successfully]
[2023-07-31 15:59:40,158: INFO: common: created directory at path :artifacts]
[2023-07-31 15:59:40,162: INFO: common: created directory at path :artifacts/data_ingestion]


[2023-07-31 15:59:55,393: INFO: 2552048497: artifacts/data_ingestion/data.zip downloaded with following info 
 Connection: close
Content-Length: 11931933
Cache-Control: max-age=300
Content-Security-Policy: default-src 'none'; style-src 'unsafe-inline'; sandbox
Content-Type: application/zip
ETag: "6867923c6ae58f6d94c541aacdc291f35adc887130d6d2f3b91541c184390016"
Strict-Transport-Security: max-age=31536000
X-Content-Type-Options: nosniff
X-Frame-Options: deny
X-XSS-Protection: 1; mode=block
X-GitHub-Request-Id: 29FA:3BB152:142339:1CDF16:64C78D1F
Accept-Ranges: bytes
Date: Mon, 31 Jul 2023 10:29:52 GMT
Via: 1.1 varnish
X-Served-By: cache-ccu830021-CCU
X-Cache: MISS
X-Cache-Hits: 0
X-Timer: S1690799392.944065,VS0,VE1028
Vary: Authorization,Accept-Encoding,Origin
Access-Control-Allow-Origin: *
Cross-Origin-Resource-Policy: cross-origin
X-Fastly-Request-ID: 94d5cd46ac8e48777fb1a1737bb395a059cdc29a
Expires: Mon, 31 Jul 2023 10:34:52 GMT
Source-Age: 1

]
