In [1]:
import pandas as pd
from pathlib import Path
from typing import List, Dict, Any


In [2]:


class ConsumerComplaintsExtractor:
    def __init__(self):
        self.ELEMENT_NAME_COL = 'Element Name'
        self.UNIT_COL = 'Unit'
        self.VALUE_COL = 'Fact Value'
        self.COMPANY_NAME_ELEMENT = 'NameOfTheCompany'

        self.METRIC_MAP = {
            'ConsumerComplaintsReceivedDuringTheYear': 'Received',
            'ConsumerComplaintsPendingResolutionAtEndOfYear': 'Pending'
        }

    def _get_company_name(self, df: pd.DataFrame) -> str:
        row = df[df[self.ELEMENT_NAME_COL] == self.COMPANY_NAME_ELEMENT]
        if not row.empty:
            return str(row.iloc[0][self.VALUE_COL]).strip()
        return "Unknown Company"

    def _extract_category_and_year(self, unit: str) -> (str, str):
        parts = unit.split('_')
        if len(parts) >= 2:
            category = parts[1].replace("TradePractices", "Trade Practices").replace("Of", " of ")
            year_type = 'PY' if unit.endswith('_PY') else 'FY'
            return category, year_type
        return "Unknown", "FY"

    def _process_single_file(self, file_path: Path) -> List[Dict[str, Any]]:
        try:
            df = pd.read_excel(file_path, engine='openpyxl')
        except Exception as e:
            print(f"Error reading {file_path.name}: {e}")
            return []

        company_name = self._get_company_name(df)
        print(f"\nProcessing: {company_name} ({file_path.name})")

        results = []

        for metric_field, metric_label in self.METRIC_MAP.items():
            subset = df[df[self.ELEMENT_NAME_COL] == metric_field]

            print(f"  - Found {len(subset)} rows for '{metric_field}'")

            for _, row in subset.iterrows():
                unit = str(row[self.UNIT_COL])
                category, year_type = self._extract_category_and_year(unit)

                try:
                    value = pd.to_numeric(str(row[self.VALUE_COL]).strip(), errors='coerce')
                    if pd.isna(value):
                        continue
                    results.append({
                        'Company Name': company_name,
                        'Complaint Category': category,
                        'Metric Type': metric_label,
                        'Year Type': year_type,
                        'Value': int(value)
                    })
                except Exception as e:
                    print(f"    Skipping invalid row: {unit}, value={row[self.VALUE_COL]} ({e})")
        print(f"  ‚Üí Extracted {len(results)} complaint records.")
        return results

    def process_directory(self, directory: str) -> pd.DataFrame:
        all_data: List[Dict[str, Any]] = []
        dir_path = Path(directory)

        if not dir_path.exists():
            print(f"Directory '{directory}' not found.")
            return pd.DataFrame()

        for file in dir_path.iterdir():
            if file.suffix.lower() in ['.xlsx', '.xls']:
                rows = self._process_single_file(file)
                all_data.extend(rows)

        return pd.DataFrame(all_data)

    def export_to_excel(self, df: pd.DataFrame, output_file: str):
        if df.empty:
            print("DataFrame is empty. No export.")
            return
        try:
            df.to_excel(output_file, index=False)
            print(f"\n‚úÖ Exported to '{output_file}' successfully.")
        except Exception as e:
            print(f"Failed to export: {e}")


In [3]:
extractor = ConsumerComplaintsExtractor()
df = extractor.process_directory("excel_files")
extractor.export_to_excel(df, "p9_consumer_complaints.xlsx")


Processing: 360 One Wam Limited (360_ONE_WAM_LIMITED.xlsx)
  - Found 14 rows for 'ConsumerComplaintsReceivedDuringTheYear'
  - Found 14 rows for 'ConsumerComplaintsPendingResolutionAtEndOfYear'
  ‚Üí Extracted 28 complaint records.

Processing: 3I Infotech Limited (3I_Infotech_Limited.xlsx)
  - Found 14 rows for 'ConsumerComplaintsReceivedDuringTheYear'
  - Found 14 rows for 'ConsumerComplaintsPendingResolutionAtEndOfYear'
  ‚Üí Extracted 28 complaint records.

Processing: 3M India Limited (3M_INDIA_LIMITED.xlsx)
  - Found 14 rows for 'ConsumerComplaintsReceivedDuringTheYear'
  - Found 14 rows for 'ConsumerComplaintsPendingResolutionAtEndOfYear'
  ‚Üí Extracted 28 complaint records.

Processing: 5paisa Capital Limited (5paisa_Capital_Limited.xlsx)
  - Found 14 rows for 'ConsumerComplaintsReceivedDuringTheYear'
  - Found 14 rows for 'ConsumerComplaintsPendingResolutionAtEndOfYear'
  ‚Üí Extracted 28 complaint records.

Processing: 63 Moons Technologies Limited (63_moons_technologies_li

In [None]:
bb

In [4]:
from pathlib import Path
import pandas as pd

# Assuming you've already defined and imported the class
extractor = ConsumerComplaintsExtractor()

# Set your data directory
directory = "excel_files"  # Update this

# Process all files and extract the full complaint dataset
df = extractor.process_directory(directory)

if df.empty:
    print("No complaint data extracted.")
else:
    # Group by company and calculate total complaints (across all categories, years, metrics)
    summary = (
        df.groupby("Company Name")["Value"]
        .sum()
        .reset_index()
        .sort_values(by="Value", ascending=False)
        .head(10)
    )

    print("\nüîù Top 10 companies by total consumer complaints:\n")
    print(summary.to_string(index=False))



Processing: 360 One Wam Limited (360_ONE_WAM_LIMITED.xlsx)
  - Found 14 rows for 'ConsumerComplaintsReceivedDuringTheYear'
  - Found 14 rows for 'ConsumerComplaintsPendingResolutionAtEndOfYear'
  ‚Üí Extracted 28 complaint records.

Processing: 3I Infotech Limited (3I_Infotech_Limited.xlsx)
  - Found 14 rows for 'ConsumerComplaintsReceivedDuringTheYear'
  - Found 14 rows for 'ConsumerComplaintsPendingResolutionAtEndOfYear'
  ‚Üí Extracted 28 complaint records.

Processing: 3M India Limited (3M_INDIA_LIMITED.xlsx)
  - Found 14 rows for 'ConsumerComplaintsReceivedDuringTheYear'
  - Found 14 rows for 'ConsumerComplaintsPendingResolutionAtEndOfYear'
  ‚Üí Extracted 28 complaint records.

Processing: 5paisa Capital Limited (5paisa_Capital_Limited.xlsx)
  - Found 14 rows for 'ConsumerComplaintsReceivedDuringTheYear'
  - Found 14 rows for 'ConsumerComplaintsPendingResolutionAtEndOfYear'
  ‚Üí Extracted 28 complaint records.

Processing: 63 Moons Technologies Limited (63_moons_technologies_li