In [None]:
"""
Sandbox API Client - Production Implementation
A robust client for interacting with the Sandbox.co.in API
"""

import os
import logging
from typing import Optional, Dict, Any
from dataclasses import dataclass
from datetime import datetime, timedelta
import requests
from requests.adapters import HTTPAdapter
from urllib3.util.retry import Retry


# Configure logging
logging.basicConfig(
    level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s"
)
logger = logging.getLogger(__name__)


@dataclass
class AuthToken:
    """Data class to store authentication token information"""

    access_token: str
    expires_at: datetime
    transaction_id: str

    def is_expired(self) -> bool:
        """Check if token is expired with 5 minute buffer"""
        return datetime.now() >= (self.expires_at - timedelta(minutes=5))


class SandboxAPIException(Exception):
    """Custom exception for Sandbox API errors"""

    def __init__(
        self,
        message: str,
        status_code: Optional[int] = None,
        response_data: Optional[Dict] = None,
    ):
        self.message = message
        self.status_code = status_code
        self.response_data = response_data
        super().__init__(self.message)


class SandboxAPIClient:
    """
    Production-ready client for Sandbox API

    Features:
    - Automatic token management and refresh
    - Retry logic with exponential backoff
    - Connection pooling
    - Comprehensive error handling
    - Request/response logging
    """

    BASE_URL = "https://api.sandbox.co.in"

    def __init__(
        self,
        api_key: Optional[str] = None,
        api_secret: Optional[str] = None,
        timeout: int = 30,
        max_retries: int = 3,
    ):
        """
        Initialize the Sandbox API client

        Args:
            api_key: API key (falls back to SANDBOX_API_KEY env var)
            api_secret: API secret (falls back to SANDBOX_API_SECRET env var)
            timeout: Request timeout in seconds
            max_retries: Maximum number of retry attempts
        """
        self.api_key = api_key or os.getenv("SANDBOX_API_KEY")
        self.api_secret = api_secret or os.getenv("SANDBOX_API_SECRET")
        self.timeout = timeout
        self._token: Optional[AuthToken] = None

        if not self.api_key or not self.api_secret:
            raise ValueError(
                "API credentials not provided. Set SANDBOX_API_KEY and "
                "SANDBOX_API_SECRET environment variables or pass them explicitly."
            )

        # Configure session with retry logic
        self.session = self._create_session(max_retries)

        logger.info("Sandbox API client initialized")

    def _create_session(self, max_retries: int) -> requests.Session:
        """Create a requests session with retry logic and connection pooling"""
        session = requests.Session()

        retry_strategy = Retry(
            total=max_retries,
            backoff_factor=1,
            status_forcelist=[429, 500, 502, 503, 504],
            allowed_methods=["GET", "POST", "PUT", "DELETE"],
        )

        adapter = HTTPAdapter(
            max_retries=retry_strategy, pool_connections=10, pool_maxsize=20
        )
        session.mount("http://", adapter)
        session.mount("https://", adapter)

        return session

    def authenticate(self, force_refresh: bool = False) -> str:
        """
        Authenticate with the API and get an access token

        Args:
            force_refresh: Force token refresh even if current token is valid

        Returns:
            Access token string

        Raises:
            SandboxAPIException: If authentication fails
        """
        # Return cached token if valid
        if not force_refresh and self._token and not self._token.is_expired():
            logger.debug("Using cached access token")
            return self._token.access_token

        logger.info("Requesting new access token")

        url = f"{self.BASE_URL}/authenticate"
        headers = {"x-api-key": self.api_key, "x-api-secret": self.api_secret}

        try:
            response = self.session.post(url, headers=headers, timeout=self.timeout)
            response.raise_for_status()

            data = response.json()

            # Validate response structure
            if data.get("code") != 200:
                raise SandboxAPIException(
                    f"Authentication failed with code {data.get('code')}",
                    response_data=data,
                )

            access_token = data.get("data", {}).get("access_token")
            if not access_token:
                raise SandboxAPIException(
                    "Access token not found in response", response_data=data
                )

            # Cache token with expiration (24 hours default)
            self._token = AuthToken(
                access_token=access_token,
                expires_at=datetime.now() + timedelta(hours=24),
                transaction_id=data.get("transaction_id", ""),
            )

            logger.info(
                f"Successfully authenticated. Transaction ID: {self._token.transaction_id}"
            )
            return access_token

        except requests.exceptions.HTTPError as e:
            logger.error(f"HTTP error during authentication: {e}")
            raise SandboxAPIException(
                f"Authentication failed: {str(e)}",
                status_code=e.response.status_code if e.response else None,
            )
        except requests.exceptions.RequestException as e:
            logger.error(f"Request error during authentication: {e}")
            raise SandboxAPIException(f"Network error during authentication: {str(e)}")
        except Exception as e:
            logger.error(f"Unexpected error during authentication: {e}")
            raise SandboxAPIException(f"Unexpected error: {str(e)}")

    def _make_request(
        self,
        method: str,
        endpoint: str,
        data: Optional[Dict] = None,
        params: Optional[Dict] = None,
        headers: Optional[Dict] = None,
    ) -> Dict[str, Any]:
        """
        Make an authenticated API request

        Args:
            method: HTTP method (GET, POST, PUT, DELETE)
            endpoint: API endpoint (without base URL)
            data: Request body data
            params: URL query parameters
            headers: Additional headers

        Returns:
            Response data as dictionary

        Raises:
            SandboxAPIException: If request fails
        """
        # Ensure we have a valid token
        access_token = self.authenticate()

        url = f"{self.BASE_URL}/{endpoint.lstrip('/')}"

        # Prepare headers
        request_headers = {
            "Authorization": f"Bearer {access_token}",
            "Content-Type": "application/json",
        }
        if headers:
            request_headers.update(headers)

        logger.debug(f"Making {method} request to {url}")

        try:
            response = self.session.request(
                method=method,
                url=url,
                json=data,
                params=params,
                headers=request_headers,
                timeout=self.timeout,
            )
            response.raise_for_status()

            return response.json()

        except requests.exceptions.HTTPError as e:
            logger.error(f"HTTP error: {e}")

            # Try to get error details from response
            error_data = None
            try:
                error_data = e.response.json()
            except:
                pass

            raise SandboxAPIException(
                f"API request failed: {str(e)}",
                status_code=e.response.status_code,
                response_data=error_data,
            )
        except requests.exceptions.RequestException as e:
            logger.error(f"Request error: {e}")
            raise SandboxAPIException(f"Network error: {str(e)}")

    def get(
        self,
        endpoint: str,
        params: Optional[Dict] = None,
        headers: Optional[Dict] = None,
    ) -> Dict[str, Any]:
        """Make a GET request"""
        return self._make_request("GET", endpoint, params=params, headers=headers)

    def post(
        self, endpoint: str, data: Optional[Dict] = None, headers: Optional[Dict] = None
    ) -> Dict[str, Any]:
        """Make a POST request"""
        return self._make_request("POST", endpoint, data=data, headers=headers)

    def put(
        self, endpoint: str, data: Optional[Dict] = None, headers: Optional[Dict] = None
    ) -> Dict[str, Any]:
        """Make a PUT request"""
        return self._make_request("PUT", endpoint, data=data, headers=headers)

    def delete(self, endpoint: str, headers: Optional[Dict] = None) -> Dict[str, Any]:
        """Make a DELETE request"""
        return self._make_request("DELETE", endpoint, headers=headers)

    def close(self):
        """Close the session and cleanup resources"""
        self.session.close()
        logger.info("API client session closed")

    def __enter__(self):
        """Context manager entry"""
        return self

    def __exit__(self, exc_type, exc_val, exc_tb):
        """Context manager exit"""
        self.close()


# Example usage
if __name__ == "__main__":
    # Method 1: Using environment variables
    # export SANDBOX_API_KEY=your_key
    # export SANDBOX_API_SECRET=your_secret

    # Method 2: Direct instantiation
    try:
        with SandboxAPIClient(
            api_key="key_live_c4c82d0520924d8f9834f966acd816f8",
            api_secret="secret_live_f1dc5f1d587a41568dbc1e7178dfbe15",
        ) as client:

            # Authenticate (happens automatically on first request)
            token = client.authenticate()
            print(f"Access Token: {token[:50]}...")

            # Example API calls (replace with actual endpoints)
            # response = client.get('/your-endpoint')
            # response = client.post('/your-endpoint', data={'key': 'value'})

    except SandboxAPIException as e:
        logger.error(f"API Error: {e.message}")
        if e.response_data:
            logger.error(f"Response data: {e.response_data}")
    except Exception as e:
        logger.error(f"Unexpected error: {e}")

2025-12-24 14:20:53,002 - __main__ - INFO - Sandbox API client initialized
2025-12-24 14:20:53,005 - __main__ - INFO - Requesting new access token
2025-12-24 14:20:53,733 - __main__ - INFO - Successfully authenticated. Transaction ID: 6d9f971e-7574-427f-b14a-537856f155f8
2025-12-24 14:20:53,735 - __main__ - INFO - API client session closed


Access Token: eyJ0eXAiOiJKV1MiLCJhbGciOiJSU0FTU0FfUFNTX1NIQV81MT...


In [None]:
"""
Sandbox API Client - Production Implementation
A robust client for interacting with the Sandbox.co.in API with MCA services
"""

import os
import logging
from typing import Optional, Dict, Any, List
from dataclasses import dataclass, asdict
from datetime import datetime, timedelta
from enum import Enum
import requests
from requests.adapters import HTTPAdapter
from urllib3.util.retry import Retry


# Configure logging
logging.basicConfig(
    level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s"
)
logger = logging.getLogger(__name__)


class ConsentType(Enum):
    """Consent types for API requests"""

    YES = "Y"
    NO = "N"


@dataclass
class AuthToken:
    """Data class to store authentication token information"""

    access_token: str
    expires_at: datetime
    transaction_id: str

    def is_expired(self) -> bool:
        """Check if token is expired with 5 minute buffer"""
        return datetime.now() >= (self.expires_at - timedelta(minutes=5))


@dataclass
class DirectorDetails:
    """Director/Signatory details from MCA"""

    name: str
    din_pan: str
    designation: str
    begin_date: str
    end_date: str
    entity: str = "in.co.sandbox.kyc.mca.company.directors_signatory_details"

    @classmethod
    def from_api_response(cls, data: Dict[str, Any]) -> "DirectorDetails":
        """Create DirectorDetails from API response"""
        return cls(
            entity=data.get("@entity", ""),
            name=data.get("name", ""),
            din_pan=data.get("din/pan", ""),
            designation=data.get("designation", ""),
            begin_date=data.get("begin_date", ""),
            end_date=data.get("end_date", ""),
        )


@dataclass
class CompanyMasterData:
    """Company master data from MCA"""

    cin: str
    company_name: str
    company_status: str
    date_of_incorporation: str
    company_category: str
    company_subcategory: str
    class_of_company: str
    authorized_capital: str
    paid_up_capital: str
    registered_address: str
    email_id: str
    registration_number: str
    roc_code: str
    whether_listed: str
    date_of_balance_sheet: str
    date_of_last_agm: str
    number_of_partners: str
    number_of_designated_partners: str
    main_division_code: str
    description_of_main_division: str
    rd_region: str
    suspended_at_stock_exchange: str
    previous_firm_details: str
    total_obligation: str
    balance_sheets: List[Any]
    annual_returns: List[Any]
    entity: str = "in.co.sandbox.kyc.mca.company.master_data"

    @classmethod
    def from_api_response(cls, data: Dict[str, Any]) -> "CompanyMasterData":
        """Create CompanyMasterData from API response"""
        return cls(
            entity=data.get("@entity", ""),
            cin=data.get("cin", ""),
            company_name=data.get("company_name", ""),
            company_status=data.get("company_status(for_efiling)", ""),
            date_of_incorporation=data.get("date_of_incorporation", ""),
            company_category=data.get("company_category", ""),
            company_subcategory=data.get("company_subcategory", ""),
            class_of_company=data.get("class_of_company", ""),
            authorized_capital=data.get("authorised_capital(rs)", ""),
            paid_up_capital=data.get("paid_up_capital(rs)", ""),
            registered_address=data.get("registered_address", ""),
            email_id=data.get("email_id", ""),
            registration_number=data.get("registration_number", ""),
            roc_code=data.get("roc_code", ""),
            whether_listed=data.get("whether_listed_or_not", ""),
            date_of_balance_sheet=data.get("date_of_balance_sheet", ""),
            date_of_last_agm=data.get("date_of_last_agm", ""),
            number_of_partners=data.get("number_of_partners", ""),
            number_of_designated_partners=data.get("number_of_designated_partners", ""),
            main_division_code=data.get(
                "main_division_of_business_activity_to_be_carried_out_in_india", ""
            ),
            description_of_main_division=data.get("description_of_main_division", ""),
            rd_region=data.get("rd_region", ""),
            suspended_at_stock_exchange=data.get("suspended_at_stock_exchange", ""),
            previous_firm_details=data.get(
                "previous_firm/_company_details,_if_applicable", ""
            ),
            total_obligation=data.get("total_obligation_of_contribution", ""),
            balance_sheets=data.get("balance_sheets", []),
            annual_returns=data.get("annual_returns", []),
        )


@dataclass
class CompanySearchResult:
    """Complete company search result from MCA"""

    company_master_data: CompanyMasterData
    directors: List[DirectorDetails]
    charges: List[Any]
    transaction_id: str
    timestamp: int

    @classmethod
    def from_api_response(cls, response: Dict[str, Any]) -> "CompanySearchResult":
        """Create CompanySearchResult from API response"""
        data = response.get("data", {})

        master_data = CompanyMasterData.from_api_response(
            data.get("company_master_data", {})
        )

        directors = [
            DirectorDetails.from_api_response(d)
            for d in data.get("directors/signatory_details", [])
        ]

        return cls(
            company_master_data=master_data,
            directors=directors,
            charges=data.get("charges", []),
            transaction_id=response.get("transaction_id", ""),
            timestamp=response.get("timestamp", 0),
        )


class SandboxAPIException(Exception):
    """Custom exception for Sandbox API errors"""

    def __init__(
        self,
        message: str,
        status_code: Optional[int] = None,
        response_data: Optional[Dict] = None,
    ):
        self.message = message
        self.status_code = status_code
        self.response_data = response_data
        super().__init__(self.message)


class SandboxAPIClient:
    """
    Production-ready client for Sandbox API

    Features:
    - Automatic token management and refresh
    - Retry logic with exponential backoff
    - Connection pooling
    - Comprehensive error handling
    - Request/response logging
    - MCA company search functionality
    """

    BASE_URL = "https://api.sandbox.co.in"

    def __init__(
        self,
        api_key: Optional[str] = None,
        api_secret: Optional[str] = None,
        timeout: int = 30,
        max_retries: int = 3,
    ):
        """
        Initialize the Sandbox API client

        Args:
            api_key: API key (falls back to SANDBOX_API_KEY env var)
            api_secret: API secret (falls back to SANDBOX_API_SECRET env var)
            timeout: Request timeout in seconds
            max_retries: Maximum number of retry attempts
        """
        self.api_key = api_key or os.getenv("SANDBOX_API_KEY")
        self.api_secret = api_secret or os.getenv("SANDBOX_API_SECRET")
        self.timeout = timeout
        self._token: Optional[AuthToken] = None

        if not self.api_key or not self.api_secret:
            raise ValueError(
                "API credentials not provided. Set SANDBOX_API_KEY and "
                "SANDBOX_API_SECRET environment variables or pass them explicitly."
            )

        # Configure session with retry logic
        self.session = self._create_session(max_retries)

        logger.info("Sandbox API client initialized")

    def _create_session(self, max_retries: int) -> requests.Session:
        """Create a requests session with retry logic and connection pooling"""
        session = requests.Session()

        retry_strategy = Retry(
            total=max_retries,
            backoff_factor=1,
            status_forcelist=[429, 500, 502, 503, 504],
            allowed_methods=["GET", "POST", "PUT", "DELETE"],
        )

        adapter = HTTPAdapter(
            max_retries=retry_strategy, pool_connections=10, pool_maxsize=20
        )
        session.mount("http://", adapter)
        session.mount("https://", adapter)

        return session

    def authenticate(self, force_refresh: bool = False) -> str:
        """
        Authenticate with the API and get an access token

        Args:
            force_refresh: Force token refresh even if current token is valid

        Returns:
            Access token string

        Raises:
            SandboxAPIException: If authentication fails
        """
        # Return cached token if valid
        if not force_refresh and self._token and not self._token.is_expired():
            logger.debug("Using cached access token")
            return self._token.access_token

        logger.info("Requesting new access token")

        url = f"{self.BASE_URL}/authenticate"
        headers = {"x-api-key": self.api_key, "x-api-secret": self.api_secret}

        try:
            response = self.session.post(url, headers=headers, timeout=self.timeout)
            response.raise_for_status()

            data = response.json()

            # Validate response structure
            if data.get("code") != 200:
                raise SandboxAPIException(
                    f"Authentication failed with code {data.get('code')}",
                    response_data=data,
                )

            access_token = data.get("data", {}).get("access_token")
            if not access_token:
                raise SandboxAPIException(
                    "Access token not found in response", response_data=data
                )

            # Cache token with expiration (24 hours default)
            self._token = AuthToken(
                access_token=access_token,
                expires_at=datetime.now() + timedelta(hours=24),
                transaction_id=data.get("transaction_id", ""),
            )

            logger.info(
                f"Successfully authenticated. Transaction ID: {self._token.transaction_id}"
            )
            return access_token

        except requests.exceptions.HTTPError as e:
            logger.error(f"HTTP error during authentication: {e}")
            raise SandboxAPIException(
                f"Authentication failed: {str(e)}",
                status_code=e.response.status_code if e.response else None,
            )
        except requests.exceptions.RequestException as e:
            logger.error(f"Request error during authentication: {e}")
            raise SandboxAPIException(f"Network error during authentication: {str(e)}")
        except Exception as e:
            logger.error(f"Unexpected error during authentication: {e}")
            raise SandboxAPIException(f"Unexpected error: {str(e)}")

    def _make_request(
        self,
        method: str,
        endpoint: str,
        data: Optional[Dict] = None,
        params: Optional[Dict] = None,
        headers: Optional[Dict] = None,
    ) -> Dict[str, Any]:
        """
        Make an authenticated API request

        Args:
            method: HTTP method (GET, POST, PUT, DELETE)
            endpoint: API endpoint (without base URL)
            data: Request body data
            params: URL query parameters
            headers: Additional headers

        Returns:
            Response data as dictionary

        Raises:
            SandboxAPIException: If request fails
        """
        # Ensure we have a valid token
        access_token = self.authenticate()

        url = f"{self.BASE_URL}/{endpoint.lstrip('/')}"

        # Prepare headers
        request_headers = {
            "Authorization": access_token,
            "Content-Type": "application/json",
            "x-api-key": self.api_key,
        }
        if headers:
            request_headers.update(headers)

        logger.debug(f"Making {method} request to {url}")

        try:
            response = self.session.request(
                method=method,
                url=url,
                json=data,
                params=params,
                headers=request_headers,
                timeout=self.timeout,
            )
            response.raise_for_status()

            result = response.json()

            # Validate response code
            if result.get("code") != 200:
                raise SandboxAPIException(
                    f"API returned error code {result.get('code')}",
                    status_code=response.status_code,
                    response_data=result,
                )

            return result

        except requests.exceptions.HTTPError as e:
            logger.error(f"HTTP error: {e}")

            # Try to get error details from response
            error_data = None
            try:
                error_data = e.response.json()
            except:
                pass

            raise SandboxAPIException(
                f"API request failed: {str(e)}",
                status_code=e.response.status_code,
                response_data=error_data,
            )
        except requests.exceptions.RequestException as e:
            logger.error(f"Request error: {e}")
            raise SandboxAPIException(f"Network error: {str(e)}")

    # MCA Service Methods

    def search_company_by_cin(
        self,
        cin: str,
        consent: ConsentType = ConsentType.YES,
        reason: str = "KYC Verification",
    ) -> CompanySearchResult:
        """
        Search for company details using CIN (Corporate Identity Number)

        Args:
            cin: Corporate Identity Number (e.g., U62011GJ2025PTC168991)
            consent: User consent (Y/N)
            reason: Reason for the search

        Returns:
            CompanySearchResult object with company details

        Raises:
            SandboxAPIException: If search fails
        """
        logger.info(f"Searching company with CIN: {cin}")

        payload = {
            "@entity": "in.co.sandbox.kyc.mca.master_data.request",
            "id": cin,
            "consent": consent.value,
            "reason": reason,
        }

        response = self._make_request(
            "POST", "/mca/company/master-data/search", data=payload
        )

        result = CompanySearchResult.from_api_response(response)
        logger.info(
            f"Successfully retrieved company data: {result.company_master_data.company_name}"
        )

        return result

    def get_company_directors(
        self,
        cin: str,
        consent: ConsentType = ConsentType.YES,
        reason: str = "Director Verification",
    ) -> List[DirectorDetails]:
        """
        Get list of directors for a company

        Args:
            cin: Corporate Identity Number
            consent: User consent (Y/N)
            reason: Reason for the search

        Returns:
            List of DirectorDetails objects
        """
        result = self.search_company_by_cin(cin, consent, reason)
        return result.directors

    def get_company_basic_info(
        self,
        cin: str,
        consent: ConsentType = ConsentType.YES,
        reason: str = "Company Verification",
    ) -> CompanyMasterData:
        """
        Get basic company information

        Args:
            cin: Corporate Identity Number
            consent: User consent (Y/N)
            reason: Reason for the search

        Returns:
            CompanyMasterData object
        """
        result = self.search_company_by_cin(cin, consent, reason)
        return result.company_master_data

    # Generic HTTP Methods

    def get(
        self,
        endpoint: str,
        params: Optional[Dict] = None,
        headers: Optional[Dict] = None,
    ) -> Dict[str, Any]:
        """Make a GET request"""
        return self._make_request("GET", endpoint, params=params, headers=headers)

    def post(
        self, endpoint: str, data: Optional[Dict] = None, headers: Optional[Dict] = None
    ) -> Dict[str, Any]:
        """Make a POST request"""
        return self._make_request("POST", endpoint, data=data, headers=headers)

    def put(
        self, endpoint: str, data: Optional[Dict] = None, headers: Optional[Dict] = None
    ) -> Dict[str, Any]:
        """Make a PUT request"""
        return self._make_request("PUT", endpoint, data=data, headers=headers)

    def delete(self, endpoint: str, headers: Optional[Dict] = None) -> Dict[str, Any]:
        """Make a DELETE request"""
        return self._make_request("DELETE", endpoint, headers=headers)

    def close(self):
        """Close the session and cleanup resources"""
        self.session.close()
        logger.info("API client session closed")

    def __enter__(self):
        """Context manager entry"""
        return self

    def __exit__(self, exc_type, exc_val, exc_tb):
        """Context manager exit"""
        self.close()


# Example usage
if __name__ == "__main__":
    try:
        with SandboxAPIClient(
            api_key="key_live_c4c82d0520924d8f9834f966acd816f8",
            api_secret="secret_live_f1dc5f1d587a41568dbc1e7178dfbe15",
        ) as client:

            # Example 1: Search company by CIN
            cin = "U62011GJ2025PTC168991"
            company_result = client.search_company_by_cin(cin)

            print(f"\nCompany Name: {company_result.company_master_data.company_name}")
            print(f"CIN: {company_result.company_master_data.cin}")
            print(f"Status: {company_result.company_master_data.company_status}")
            print(
                f"Incorporation Date: {company_result.company_master_data.date_of_incorporation}"
            )
            print(
                f"Paid Up Capital: ₹{company_result.company_master_data.paid_up_capital}"
            )
            print(
                f"Registered Address: {company_result.company_master_data.registered_address}"
            )

            print(f"\nDirectors ({len(company_result.directors)}):")
            for director in company_result.directors:
                print(f"  - {director.name} ({director.designation})")
                print(f"    DIN/PAN: {director.din_pan}")
                print(f"    Period: {director.begin_date} to {director.end_date}")

            print(f"\nTransaction ID: {company_result.transaction_id}")

            # Example 2: Get only directors
            print("\n" + "=" * 50)
            directors = client.get_company_directors(cin)
            print(f"\nDirectors list retrieved: {len(directors)} directors")

            # Example 3: Get only basic info
            print("\n" + "=" * 50)
            basic_info = client.get_company_basic_info(cin)
            print(f"\nCompany: {basic_info.company_name}")
            print(f"Email: {basic_info.email_id}")
            print(f"ROC: {basic_info.roc_code}")

    except SandboxAPIException as e:
        logger.error(f"API Error: {e.message}")
        if e.response_data:
            logger.error(f"Response data: {e.response_data}")
    except Exception as e:
        logger.error(f"Unexpected error: {e}")

2025-12-24 14:24:09,661 - __main__ - INFO - Sandbox API client initialized
2025-12-24 14:24:09,663 - __main__ - INFO - Searching company with CIN: U62011GJ2025PTC168991
2025-12-24 14:24:09,664 - __main__ - INFO - Requesting new access token
2025-12-24 14:24:10,339 - __main__ - INFO - Successfully authenticated. Transaction ID: 1eb981d9-b2ba-4027-8315-b6ef92de2682
2025-12-24 14:24:19,981 - __main__ - INFO - Successfully retrieved company data: AXIOMETRY AI PRIVATE LIMITED
2025-12-24 14:24:19,983 - __main__ - INFO - Searching company with CIN: U62011GJ2025PTC168991



Company Name: AXIOMETRY AI PRIVATE LIMITED
CIN: U62011GJ2025PTC168991
Status: Active
Incorporation Date: 24/10/2025
Paid Up Capital: ₹10000
Registered Address: C/O SHIVAM BALDHA RAM MANDIR PASE Jamkandorna Rajkot Jamkandorna Gujarat India 360405

Directors (2):
  - BALDHA SHIVAM MAHESHBHAI (Director)
    DIN/PAN: 11353541
    Period: 24/10/2025 to -
  - SHAHNAZ ANSARI (Director)
    DIN/PAN: 11353542
    Period: 24/10/2025 to -

Transaction ID: 72d4606d-d47d-40bd-b74c-785cbba777b5



2025-12-24 14:24:23,269 - __main__ - INFO - Successfully retrieved company data: AXIOMETRY AI PRIVATE LIMITED
2025-12-24 14:24:23,270 - __main__ - INFO - Searching company with CIN: U62011GJ2025PTC168991



Directors list retrieved: 2 directors



2025-12-24 14:24:27,374 - __main__ - INFO - Successfully retrieved company data: AXIOMETRY AI PRIVATE LIMITED
2025-12-24 14:24:27,378 - __main__ - INFO - API client session closed



Company: AXIOMETRY AI PRIVATE LIMITED
Email: axiometryai@gmail.com
ROC: ROC Ahmedabad


In [7]:
"""
MCA Data Extractor - Production Implementation
Extracts company data from MCA API and exports to CSV with robust error handling
"""

import os
import logging
import time
import csv
from typing import Optional, Dict, Any, List
from dataclasses import dataclass, asdict, fields
from datetime import datetime, timedelta
from enum import Enum
from pathlib import Path
import requests
from requests.adapters import HTTPAdapter
from urllib3.util.retry import Retry
import pandas as pd


# Configure logging
logging.basicConfig(
    level=logging.INFO,
    format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
    handlers=[logging.FileHandler("mca_extraction.log"), logging.StreamHandler()],
)
logger = logging.getLogger(__name__)


class ConsentType(Enum):
    """Consent types for API requests"""

    YES = "Y"
    NO = "N"


@dataclass
class AuthToken:
    """Data class to store authentication token information"""

    access_token: str
    expires_at: datetime
    transaction_id: str

    def is_expired(self) -> bool:
        """Check if token is expired with 5 minute buffer"""
        return datetime.now() >= (self.expires_at - timedelta(minutes=5))


@dataclass
class ExtractedCompanyData:
    """Flattened company data for CSV export"""

    # Input data
    cin: str
    input_class: str
    input_company_name: str
    input_date_of_registration: str
    input_company_type: str
    input_activity_code: str

    # Extraction metadata
    extraction_status: str  # SUCCESS, FAILED, PENDING
    extraction_timestamp: str
    transaction_id: str
    error_message: str

    # Company Master Data
    company_name: str
    company_status: str
    date_of_incorporation: str
    company_category: str
    company_subcategory: str
    class_of_company: str
    authorized_capital: str
    paid_up_capital: str
    registered_address: str
    email_id: str
    registration_number: str
    roc_code: str
    whether_listed: str
    date_of_balance_sheet: str
    date_of_last_agm: str
    main_division_code: str
    description_of_main_division: str
    rd_region: str
    suspended_at_stock_exchange: str

    # Director information (concatenated)
    directors_count: int
    directors_names: str
    directors_din_pan: str
    directors_designations: str
    directors_begin_dates: str
    directors_end_dates: str


class SandboxAPIException(Exception):
    """Custom exception for Sandbox API errors"""

    def __init__(
        self,
        message: str,
        status_code: Optional[int] = None,
        response_data: Optional[Dict] = None,
    ):
        self.message = message
        self.status_code = status_code
        self.response_data = response_data
        super().__init__(self.message)


class SandboxAPIClient:
    """Production-ready client for Sandbox API"""

    BASE_URL = "https://api.sandbox.co.in"

    def __init__(
        self, api_key: str, api_secret: str, timeout: int = 30, max_retries: int = 3
    ):
        self.api_key = api_key
        self.api_secret = api_secret
        self.timeout = timeout
        self._token: Optional[AuthToken] = None
        self.session = self._create_session(max_retries)
        logger.info("Sandbox API client initialized")

    def _create_session(self, max_retries: int) -> requests.Session:
        """Create a requests session with retry logic"""
        session = requests.Session()
        retry_strategy = Retry(
            total=max_retries,
            backoff_factor=2,
            status_forcelist=[429, 500, 502, 503, 504],
            allowed_methods=["GET", "POST", "PUT", "DELETE"],
        )
        adapter = HTTPAdapter(
            max_retries=retry_strategy, pool_connections=10, pool_maxsize=20
        )
        session.mount("http://", adapter)
        session.mount("https://", adapter)
        return session

    def authenticate(self, force_refresh: bool = False) -> str:
        """Authenticate with the API"""
        if not force_refresh and self._token and not self._token.is_expired():
            return self._token.access_token

        logger.info("Requesting new access token")
        url = f"{self.BASE_URL}/authenticate"
        headers = {"x-api-key": self.api_key, "x-api-secret": self.api_secret}

        response = self.session.post(url, headers=headers, timeout=self.timeout)
        response.raise_for_status()
        data = response.json()

        if data.get("code") != 200:
            raise SandboxAPIException(
                f"Authentication failed with code {data.get('code')}"
            )

        access_token = data.get("data", {}).get("access_token")
        if not access_token:
            raise SandboxAPIException("Access token not found in response")

        self._token = AuthToken(
            access_token=access_token,
            expires_at=datetime.now() + timedelta(hours=24),
            transaction_id=data.get("transaction_id", ""),
        )

        logger.info("Successfully authenticated")
        return access_token

    def search_company_by_cin(
        self,
        cin: str,
        consent: ConsentType = ConsentType.YES,
        reason: str = "Data Extraction",
    ) -> Dict[str, Any]:
        """Search for company details using CIN"""
        access_token = self.authenticate()

        payload = {
            "@entity": "in.co.sandbox.kyc.mca.master_data.request",
            "id": cin,
            "consent": consent.value,
            "reason": reason,
        }

        headers = {
            "Authorization": access_token,
            "Content-Type": "application/json",
            "x-api-key": self.api_key,
        }

        url = f"{self.BASE_URL}/mca/company/master-data/search"
        response = self.session.post(
            url, json=payload, headers=headers, timeout=self.timeout
        )
        response.raise_for_status()

        result = response.json()
        if result.get("code") != 200:
            raise SandboxAPIException(f"API returned error code {result.get('code')}")

        return result

    def close(self):
        """Close the session"""
        self.session.close()


class MCADataExtractor:
    """
    Extracts MCA data from Excel and exports to CSV with robust error handling
    """

    def __init__(
        self,
        api_key: str,
        api_secret: str,
        delay_between_requests: float = 1.0,
        max_retries_per_cin: int = 3,
    ):
        """
        Initialize the extractor

        Args:
            api_key: Sandbox API key
            api_secret: Sandbox API secret
            delay_between_requests: Delay in seconds between API calls
            max_retries_per_cin: Maximum retries for each CIN
        """
        self.client = SandboxAPIClient(api_key, api_secret)
        self.delay = delay_between_requests
        self.max_retries = max_retries_per_cin

    def read_input_excel(self, file_path: str) -> pd.DataFrame:
        """Read input Excel file"""
        logger.info(f"Reading input file: {file_path}")

        # Try to read with different possible column names
        df = pd.read_excel(file_path, nrows=100)

        # Strip whitespace from column names
        df.columns = df.columns.str.strip()

        # Standardize column names
        column_mapping = {
            "CIN": "cin",
            "Class": "class",
            "Company Name": "company_name",
            "Date Of Registration": "date_of_registration",
            "Company Type": "company_type",
            "Activity Code": "activity_code",
        }

        df = df.rename(columns=column_mapping)

        # Validate required columns
        required_cols = [
            "cin",
            "class",
            "company_name",
            "date_of_registration",
            "company_type",
            "activity_code",
        ]
        missing_cols = [col for col in required_cols if col not in df.columns]

        if missing_cols:
            raise ValueError(f"Missing required columns: {missing_cols}")

        logger.info(f"Successfully read {len(df)} companies from input file")
        return df

    def extract_company_data(
        self, cin: str, input_row: Dict[str, Any]
    ) -> ExtractedCompanyData:
        """
        Extract data for a single company with retry logic

        Args:
            cin: Corporate Identity Number
            input_row: Original input data row

        Returns:
            ExtractedCompanyData object
        """
        for attempt in range(1, self.max_retries + 1):
            try:
                logger.info(
                    f"Extracting data for CIN: {cin} (Attempt {attempt}/{self.max_retries})"
                )

                # Call API
                response = self.client.search_company_by_cin(cin)

                # Parse response
                data = response.get("data", {})
                master_data = data.get("company_master_data", {})
                directors = data.get("directors/signatory_details", [])

                # Extract director information
                directors_count = len(directors)
                directors_names = " | ".join([d.get("name", "") for d in directors])
                directors_din_pan = " | ".join(
                    [d.get("din/pan", "") for d in directors]
                )
                directors_designations = " | ".join(
                    [d.get("designation", "") for d in directors]
                )
                directors_begin_dates = " | ".join(
                    [d.get("begin_date", "") for d in directors]
                )
                directors_end_dates = " | ".join(
                    [d.get("end_date", "") for d in directors]
                )

                # Create extracted data object
                extracted_data = ExtractedCompanyData(
                    # Input data
                    cin=cin,
                    input_class=str(input_row.get("class", "")),
                    input_company_name=str(input_row.get("company_name", "")),
                    input_date_of_registration=str(
                        input_row.get("date_of_registration", "")
                    ),
                    input_company_type=str(input_row.get("company_type", "")),
                    input_activity_code=str(input_row.get("activity_code", "")),
                    # Extraction metadata
                    extraction_status="SUCCESS",
                    extraction_timestamp=datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
                    transaction_id=response.get("transaction_id", ""),
                    error_message="",
                    # Company master data
                    company_name=master_data.get("company_name", ""),
                    company_status=master_data.get("company_status(for_efiling)", ""),
                    date_of_incorporation=master_data.get("date_of_incorporation", ""),
                    company_category=master_data.get("company_category", ""),
                    company_subcategory=master_data.get("company_subcategory", ""),
                    class_of_company=master_data.get("class_of_company", ""),
                    authorized_capital=master_data.get("authorised_capital(rs)", ""),
                    paid_up_capital=master_data.get("paid_up_capital(rs)", ""),
                    registered_address=master_data.get("registered_address", ""),
                    email_id=master_data.get("email_id", ""),
                    registration_number=master_data.get("registration_number", ""),
                    roc_code=master_data.get("roc_code", ""),
                    whether_listed=master_data.get("whether_listed_or_not", ""),
                    date_of_balance_sheet=master_data.get("date_of_balance_sheet", ""),
                    date_of_last_agm=master_data.get("date_of_last_agm", ""),
                    main_division_code=master_data.get(
                        "main_division_of_business_activity_to_be_carried_out_in_india",
                        "",
                    ),
                    description_of_main_division=master_data.get(
                        "description_of_main_division", ""
                    ),
                    rd_region=master_data.get("rd_region", ""),
                    suspended_at_stock_exchange=master_data.get(
                        "suspended_at_stock_exchange", ""
                    ),
                    # Director information
                    directors_count=directors_count,
                    directors_names=directors_names,
                    directors_din_pan=directors_din_pan,
                    directors_designations=directors_designations,
                    directors_begin_dates=directors_begin_dates,
                    directors_end_dates=directors_end_dates,
                )

                logger.info(
                    f"Successfully extracted data for: {extracted_data.company_name}"
                )
                return extracted_data

            except SandboxAPIException as e:
                error_msg = f"API error: {e.message}"
                logger.error(f"Attempt {attempt} failed for CIN {cin}: {error_msg}")

                if attempt == self.max_retries:
                    # Final attempt failed, return error record
                    return self._create_error_record(cin, input_row, error_msg)

                # Wait before retry
                time.sleep(self.delay * attempt)

            except Exception as e:
                error_msg = f"Unexpected error: {str(e)}"
                logger.error(f"Attempt {attempt} failed for CIN {cin}: {error_msg}")

                if attempt == self.max_retries:
                    return self._create_error_record(cin, input_row, error_msg)

                time.sleep(self.delay * attempt)

        # Should not reach here
        return self._create_error_record(cin, input_row, "Max retries exceeded")

    def _create_error_record(
        self, cin: str, input_row: Dict[str, Any], error_message: str
    ) -> ExtractedCompanyData:
        """Create an error record when extraction fails"""
        return ExtractedCompanyData(
            cin=cin,
            input_class=str(input_row.get("class", "")),
            input_company_name=str(input_row.get("company_name", "")),
            input_date_of_registration=str(input_row.get("date_of_registration", "")),
            input_company_type=str(input_row.get("company_type", "")),
            input_activity_code=str(input_row.get("activity_code", "")),
            extraction_status="FAILED",
            extraction_timestamp=datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
            transaction_id="",
            error_message=error_message,
            company_name="",
            company_status="",
            date_of_incorporation="",
            company_category="",
            company_subcategory="",
            class_of_company="",
            authorized_capital="",
            paid_up_capital="",
            registered_address="",
            email_id="",
            registration_number="",
            roc_code="",
            whether_listed="",
            date_of_balance_sheet="",
            date_of_last_agm="",
            main_division_code="",
            description_of_main_division="",
            rd_region="",
            suspended_at_stock_exchange="",
            directors_count=0,
            directors_names="",
            directors_din_pan="",
            directors_designations="",
            directors_begin_dates="",
            directors_end_dates="",
        )

    def process_companies(
        self, input_file: str, output_file: str, append_mode: bool = True
    ) -> None:
        """
        Process all companies from input Excel and save to CSV

        Args:
            input_file: Path to input Excel file
            output_file: Path to output CSV file
            append_mode: If True, append to existing CSV; if False, overwrite
        """
        # Read input data
        df_input = self.read_input_excel(input_file)

        # Check if output file exists and load processed CINs
        processed_cins = set()
        output_path = Path(output_file)

        if append_mode and output_path.exists():
            logger.info(f"Loading existing output file: {output_file}")
            df_existing = pd.read_csv(output_file)
            processed_cins = set(df_existing["cin"].unique())
            logger.info(f"Found {len(processed_cins)} already processed CINs")

        # Filter out already processed CINs
        df_to_process = df_input[~df_input["cin"].isin(processed_cins)]

        if len(df_to_process) == 0:
            logger.info("All CINs already processed. Nothing to do.")
            return

        logger.info(f"Processing {len(df_to_process)} companies...")

        # Process each company
        results = []
        success_count = 0
        failed_count = 0

        for idx, row in df_to_process.iterrows():
            cin = row["cin"]

            try:
                # Extract data
                extracted_data = self.extract_company_data(cin, row.to_dict())
                results.append(asdict(extracted_data))

                if extracted_data.extraction_status == "SUCCESS":
                    success_count += 1
                else:
                    failed_count += 1

                # Save progress every 10 records or at the end
                if len(results) % 10 == 0 or idx == len(df_to_process) - 1:
                    self._save_results(results, output_file, append_mode)
                    logger.info(
                        f"Progress: {len(results)}/{len(df_to_process)} processed "
                        f"(Success: {success_count}, Failed: {failed_count})"
                    )
                    results = []  # Clear results after saving

                # Delay between requests to avoid rate limiting
                time.sleep(self.delay)

            except Exception as e:
                logger.error(f"Critical error processing CIN {cin}: {e}")
                failed_count += 1
                continue

        logger.info(f"\n{'='*60}")
        logger.info(f"Extraction completed!")
        logger.info(f"Total processed: {len(df_to_process)}")
        logger.info(f"Successful: {success_count}")
        logger.info(f"Failed: {failed_count}")
        logger.info(f"Output saved to: {output_file}")
        logger.info(f"{'='*60}")

    def _save_results(self, results: List[Dict], output_file: str, append_mode: bool):
        """Save results to CSV file"""
        if not results:
            return

        df_results = pd.DataFrame(results)
        output_path = Path(output_file)

        # Determine write mode
        if append_mode and output_path.exists():
            # Append without header
            df_results.to_csv(output_file, mode="a", header=False, index=False)
        else:
            # Write with header
            df_results.to_csv(output_file, mode="w", header=True, index=False)

    def close(self):
        """Close the API client"""
        self.client.close()


# Main execution
if __name__ == "__main__":
    # Configuration
    API_KEY = "key_live_c4c82d0520924d8f9834f966acd816f8"
    API_SECRET = "secret_live_f1dc5f1d587a41568dbc1e7178dfbe15"

    INPUT_FILE = "Incorporation-report-20251201.xlsx"  # Your input Excel file
    OUTPUT_FILE = "companies_extracted_data.csv"  # Output CSV file

    DELAY_BETWEEN_REQUESTS = 1.5  # Delay in seconds (adjust based on rate limits)
    MAX_RETRIES_PER_CIN = 3
    APPEND_MODE = True  # Set to True to append to existing CSV, False to overwrite

    try:
        # Initialize extractor
        extractor = MCADataExtractor(
            api_key=API_KEY,
            api_secret=API_SECRET,
            delay_between_requests=DELAY_BETWEEN_REQUESTS,
            max_retries_per_cin=MAX_RETRIES_PER_CIN,
        )

        # Process companies
        extractor.process_companies(
            input_file=INPUT_FILE, output_file=OUTPUT_FILE, append_mode=APPEND_MODE
        )

        # Cleanup
        extractor.close()

        print("\n✓ Extraction completed successfully!")
        print(f"✓ Check the output file: {OUTPUT_FILE}")
        print(f"✓ Check the log file: mca_extraction.log")

    except FileNotFoundError:
        logger.error(f"Input file '{INPUT_FILE}' not found!")
        print(f"\n✗ Error: Input file '{INPUT_FILE}' not found!")
        print("Please ensure the Excel file exists in the current directory.")

    except Exception as e:
        logger.error(f"Fatal error: {e}", exc_info=True)
        print(f"\n✗ Fatal error: {e}")
        print("Check mca_extraction.log for detailed error information.")

2025-12-24 14:46:32,171 - __main__ - INFO - Sandbox API client initialized
2025-12-24 14:46:32,172 - __main__ - INFO - Reading input file: Incorporation-report-20251201.xlsx


2025-12-24 14:46:33,301 - __main__ - INFO - Successfully read 100 companies from input file
2025-12-24 14:46:33,311 - __main__ - INFO - Processing 100 companies...
2025-12-24 14:46:33,314 - __main__ - INFO - Extracting data for CIN: U74909UT2025PTC020164 (Attempt 1/3)
2025-12-24 14:46:33,315 - __main__ - INFO - Requesting new access token
2025-12-24 14:46:33,740 - __main__ - INFO - Successfully authenticated
2025-12-24 14:46:44,939 - __main__ - INFO - Successfully extracted data for: 1 TOUCH ALL SOLUTION PRIVATE LIMITED
2025-12-24 14:46:46,453 - __main__ - INFO - Extracting data for CIN: U85499KA2025PTC210388 (Attempt 1/3)
2025-12-24 14:46:56,761 - __main__ - INFO - Successfully extracted data for: 21K LEARNING PRIVATE LIMITED
2025-12-24 14:46:58,264 - __main__ - INFO - Extracting data for CIN: U32111KA2025PTC210362 (Attempt 1/3)
2025-12-24 14:47:02,433 - __main__ - INFO - Successfully extracted data for: 222 GOLD PRIVATE LIMITED
2025-12-24 14:47:03,946 - __main__ - INFO - Extracting d


✓ Extraction completed successfully!
✓ Check the output file: companies_extracted_data.csv
✓ Check the log file: mca_extraction.log


In [4]:
import pandas as pd 
df = pd.read_excel("./Incorporation-report-20251201.xlsx")

In [5]:
df

Unnamed: 0,CIN,Class,Company Name,Date Of Registration,Company Type,Activity Code
0,U74909UT2025PTC020164,Private,1 TOUCH ALL SOLUTION PRIVATE LIMITED,03-11-2025,Non-government company,74
1,U85499KA2025PTC210388,Private,21K LEARNING PRIVATE LIMITED,03-11-2025,Non-government company,85
2,U32111KA2025PTC210362,Private,222 GOLD PRIVATE LIMITED,03-11-2025,Non-government company,32
3,U46699DL2025PTC457518,Private,3 SEQUENCE METAL PRIVATE LIMITED,03-11-2025,Non-government company,46
4,U11041BR2025PTC079883,Private,4FOX BEVERAGES PRIVATE LIMITED,03-11-2025,Non-government company,11
...,...,...,...,...,...,...
13908,U24109KA2025PTC211474,Private,ZILLIONAIRE INDUSTRIES PRIVATE LIMITED,30-11-2025,Non-government company,24
13909,U45300DL2025PTC458623,Private,ZIVANTA TRADING PRIVATE LIMITED,30-11-2025,Non-government company,45
13910,U86900DL2025PTC458631,Private,ZIYA INDIA MEDTOUR PRIVATE LIMITED,30-11-2025,Non-government company,86
13911,U88900GJ2025NPL169850,Private,ZYNERO HUMAN WELFARE FOUNDATION,30-11-2025,Non-government company,88
