In [None]:
users = [
    {"name": "Alex", "country": "India", "active": True},
    {"name": "Bob", "country": "USA", "active": False},
    {"name": "Charlie", "country": "UK", "active": True}
]



In [None]:
for user in users:
    print(user)


{'name': 'Alex', 'country': 'India', 'active': True}
{'name': 'Bob', 'country': 'USA', 'active': False}
{'name': 'Charlie', 'country': 'UK', 'active': True}


In [None]:
for user in users :
  print(user["name"], "from", user["country"])

Alex from India
Bob from USA
Charlie from UK


In [None]:
sample_user = users[0]

for key, value in sample_user.items():
    print(key, "=>", value)


name => Alex
country => India
active => True


In [None]:
for key in sample_user.keys():
  print(key)

name
country
active


In [None]:
for value in sample_user.values() :
  print(value)

Alex
India
True


In [None]:
for key, value in sample_user.items() :
  print(key, ":", value)

name : Alex
country : India
active : True


In [None]:
##
## Why is a dictionary ideal for API data?
# APIs often return data with varying fields. Dictionaries handle this naturally
# APIs commonly use JSON format, which maps directly to Python dictionaries

## What happens if a key is missing?
# when a key is missing, there is direct access through KeyError as well as safe access methods with .get() or try except blocks


In [None]:
###### M1.03#######################################

users = [
    {"name": "Alex", "age": 28, "email": "alex@example.com"},
    {"name": "", "age": 35, "email": "bob@example.com"},
    {"name": "Charlie", "age": -2, "email": "charlieexample.com"},
    {"name": "Diana", "age": 22}
]


In [None]:
for i, user in enumerate(users, 1):
    print(f"\nUser {i}: {user}")
    errors = []

    # Check name
    if 'name' not in user or not user['name'] or user['name'].strip() == '':
        errors.append("‚ùå Name is missing or empty")
    else:
        print(f"  ‚úÖ Name: '{user['name']}'")

    # Check age
    if 'age' not in user:
        errors.append("‚ùå Age field is missing")
    elif user['age'] <= 0:
        errors.append(f"‚ùå Age must be > 0, got {user['age']}")
    else:
        print(f"  ‚úÖ Age: {user['age']}")

    # Check email
    if 'email' not in user:
        errors.append("‚ùå Email field is missing")
    elif '@' not in user['email']:
        errors.append(f"‚ùå Email must contain '@', got '{user['email']}'")
    else:
        print(f"  ‚úÖ Email: {user['email']}")

    # Show errors if any
    if errors:
        print("  Issues found:")
        for error in errors:
            print(f"    {error}")
    else:
        print("  ‚úÖ All validations passed!")


User 1: {'name': 'Alex', 'age': 28, 'email': 'alex@example.com'}
  ‚úÖ Name: 'Alex'
  ‚úÖ Age: 28
  ‚úÖ Email: alex@example.com
  ‚úÖ All validations passed!

User 2: {'name': '', 'age': 35, 'email': 'bob@example.com'}
  ‚úÖ Age: 35
  ‚úÖ Email: bob@example.com
  Issues found:
    ‚ùå Name is missing or empty

User 3: {'name': 'Charlie', 'age': -2, 'email': 'charlieexample.com'}
  ‚úÖ Name: 'Charlie'
  Issues found:
    ‚ùå Age must be > 0, got -2
    ‚ùå Email must contain '@', got 'charlieexample.com'

User 4: {'name': 'Diana', 'age': 22}
  ‚úÖ Name: 'Diana'
  ‚úÖ Age: 22
  Issues found:
    ‚ùå Email field is missing


In [None]:
for user in users:
    errors = []

    if "name" not in user or user["name"] == "":
        errors.append("name")

    if "age" not in user or user["age"] <= 0:
        errors.append("age")

    if "email" not in user or "@" not in user["email"]:
        errors.append("email")

    if errors:
        print(user.get("name", "Unknown"), "‚ùå INVALID:", errors)
    else:
        print(user["name"], "‚úÖ VALID")


Alex ‚úÖ VALID
 ‚ùå INVALID: ['name']
Charlie ‚ùå INVALID: ['age', 'email']
Diana ‚ùå INVALID: ['email']


In [None]:
### Why is validation important?
# validation is important because garbage in , garbage out. This have can have hard consequences in real life sectors like healthcare and finance

### What could go wrong if we skip this step?
# the system could crash, one could do wrong analytics and one could also end up beeing regulatory non - complaint

In [None]:
# Optional: Create the CSV file using Python
content = """
transaction_id,customer_id,amount,currency,status
T1001,C001,250,USD,COMPLETED
T1002,C002,,USD,PENDING
T1003,,450,EUR,FAILED
T1004,C004,-50,USD,COMPLETED
T1005,C005,300,,COMPLETED
"""

with open("transactions.csv", "w") as file:
    file.write(content)

print("transactions.csv created successfully!")


transactions.csv created successfully!


In [None]:
import csv
import json

report = {
    "row_count": 0,
    "columns": [],
    "missing_values": {},
    "invalid_values": {}
}


In [None]:
import csv
import json
import sys
import datetime
import argparse
import os
from typing import Dict, List, Any, Optional, Tuple
from collections import defaultdict
from dataclasses import dataclass, asdict
from enum import Enum


class SeverityLevel(Enum):
    """Severity levels for quality issues."""
    LOW = "low"
    MEDIUM = "medium"
    HIGH = "high"
    CRITICAL = "critical"


class ColumnType(Enum):
    """Data types for columns."""
    INTEGER = "integer"
    FLOAT = "float"
    STRING = "string"
    BOOLEAN = "boolean"
    DATE = "date"
    EMAIL = "email"
    UNKNOWN = "unknown"


@dataclass
class QualityMetric:
    """Data class for storing quality metrics."""
    total_rows: int = 0
    total_columns: int = 0
    complete_rows: int = 0
    empty_cells: int = 0
    duplicate_rows: int = 0
    invalid_values: int = 0
    quality_score: float = 0.0
    processing_time: float = 0.0


class CSVQualityAnalyzer:
    """
    Main analyzer class for CSV quality assessment.
    """

    def __init__(self, file_path: str):
        """
        Initialize the analyzer with a CSV file path.

        Args:
            file_path: Path to the CSV file to analyze
        """
        self.file_path = file_path
        self.file_name = os.path.basename(file_path)
        self.file_size = os.path.getsize(file_path) if os.path.exists(file_path) else 0
        self.report = {}
        self.metrics = QualityMetric()
        self.start_time = datetime.datetime.now()

    def analyze(self) -> Dict[str, Any]:
        """
        Perform comprehensive analysis of the CSV file.

        Returns:
            Dictionary containing the complete quality report
        """
        print(f"\nüîç Ironhack CSV Quality Analyzer")
        print(f"üìÅ Analyzing: {self.file_name}")
        print(f"‚è±Ô∏è  Started at: {self.start_time.strftime('%H:%M:%S')}")
        print("‚îÄ" * 50)

        try:
            with open(self.file_path, 'r', encoding='utf-8') as file:
                reader = csv.DictReader(file)

                # Basic file validation
                if not reader.fieldnames:
                    raise ValueError("CSV file has no headers")

                # Initialize data structures
                columns = reader.fieldnames
                self._initialize_analysis(columns)

                # Process all rows
                self._process_rows(reader)

                # Calculate derived metrics
                self._calculate_metrics()

                # Generate final report
                self._generate_report()

                # Calculate processing time
                self.metrics.processing_time = (
                    datetime.datetime.now() - self.start_time
                ).total_seconds()

                return self.report

        except FileNotFoundError:
            self._handle_error("File not found. Please check the file path.")
        except UnicodeDecodeError:
            self._handle_error("File encoding issue. Try UTF-8 or specify encoding.")
        except csv.Error as e:
            self._handle_error(f"CSV parsing error: {str(e)}")
        except Exception as e:
            self._handle_error(f"Unexpected error: {str(e)}")

        return self.report

    def _initialize_analysis(self, columns: List[str]) -> None:
        """Initialize data structures for analysis."""
        self.columns = columns
        self.metrics.total_columns = len(columns)

        # Initialize counters
        self.row_data = []
        self.missing_counts = {col: 0 for col in columns}
        self.invalid_counts = defaultdict(int)
        self.type_counts = {col: defaultdict(int) for col in columns}
        self.seen_hashes = set()
        self.duplicate_rows = []

        # Initialize column metadata
        self.column_metadata = {
            col: {
                'type_guesses': defaultdict(int),
                'min_length': float('inf'),
                'max_length': 0,
                'unique_values': set(),
                'numeric_stats': {'sum': 0, 'count': 0, 'min': float('inf'), 'max': float('-inf')}
            }
            for col in columns
        }

    def _process_rows(self, reader: csv.DictReader) -> None:
        """Process all rows in the CSV file."""
        for row_idx, row in enumerate(reader, start=1):
            self.metrics.total_rows += 1

            # Track progress
            if row_idx % 1000 == 0:
                print(f"  Processed {row_idx:,} rows...")

            # Create row hash for duplicate detection
            row_hash = self._create_row_hash(row)

            # Check for duplicates
            if row_hash in self.seen_hashes:
                self.metrics.duplicate_rows += 1
                self.duplicate_rows.append(row_idx)
            else:
                self.seen_hashes.add(row_hash)
                self.row_data.append(row)

            # Analyze each column in the row
            self._analyze_row(row, row_idx)

        print(f"  ‚úì Completed processing {self.metrics.total_rows:,} rows")

    def _analyze_row(self, row: Dict, row_idx: int) -> None:
        """Analyze a single row."""
        row_complete = True

        for col in self.columns:
            value = str(row.get(col, "")).strip()

            # Check for missing values
            if not value or value.lower() in ['null', 'nan', 'none', '']:
                self.missing_counts[col] += 1
                row_complete = False
                continue

            # Analyze non-missing values
            self._analyze_value(col, value, row_idx)

            # Update column metadata
            self._update_column_metadata(col, value)

        if row_complete:
            self.metrics.complete_rows += 1

    def _analyze_value(self, col: str, value: str, row_idx: int) -> None:
        """Analyze a single value."""
        # Guess data type
        guessed_type = self._guess_data_type(value)
        self.type_counts[col][guessed_type] += 1

        # Column-specific validation
        self._validate_column_specific(col, value, row_idx)

        # Length analysis
        value_length = len(value)
        if value_length < self.column_metadata[col]['min_length']:
            self.column_metadata[col]['min_length'] = value_length
        if value_length > self.column_metadata[col]['max_length']:
            self.column_metadata[col]['max_length'] = value_length

        # Track unique values
        self.column_metadata[col]['unique_values'].add(value)

    def _guess_data_type(self, value: str) -> ColumnType:
        """Guess the data type of a value."""
        try:
            int(value)
            return ColumnType.INTEGER
        except ValueError:
            try:
                float(value)
                return ColumnType.FLOAT
            except ValueError:
                if value.lower() in ['true', 'false', 'yes', 'no', '1', '0']:
                    return ColumnType.BOOLEAN
                elif '@' in value and '.' in value:
                    return ColumnType.EMAIL
                elif any(sep in value for sep in ['-', '/', '.']) and len(value) >= 8:
                    return ColumnType.DATE
                else:
                    return ColumnType.STRING

    def _validate_column_specific(self, col: str, value: str, row_idx: int) -> None:
        """Perform column-specific validation."""
        col_lower = col.lower()

        # Email validation
        if 'email' in col_lower:
            if '@' not in value or '.' not in value or len(value.split('@')) != 2:
                self.invalid_counts[f"{col}_invalid_email"] += 1

        # Amount/price validation
        elif any(keyword in col_lower for keyword in ['amount', 'price', 'cost', 'total']):
            try:
                num_val = float(value)
                if num_val < 0:
                    self.invalid_counts[f"{col}_negative"] += 1
                elif num_val == 0:
                    self.invalid_counts[f"{col}_zero"] += 1
            except ValueError:
                self.invalid_counts[f"{col}_non_numeric"] += 1

        # Date validation
        elif any(keyword in col_lower for keyword in ['date', 'time', 'timestamp']):
            # Simple date format check
            date_parts = value.replace('/', '-').replace('.', '-').split('-')
            if len(date_parts) < 2 or not all(part.strip() for part in date_parts):
                self.invalid_counts[f"{col}_invalid_date"] += 1

        # ID validation
        elif col_lower.endswith('_id') or col_lower == 'id':
            if not value.isalnum():
                self.invalid_counts[f"{col}_invalid_id"] += 1

    def _update_column_metadata(self, col: str, value: str) -> None:
        """Update column metadata with value information."""
        # Update type guesses
        guessed_type = self._guess_data_type(value)
        self.column_metadata[col]['type_guesses'][guessed_type] += 1

        # Update numeric stats if applicable
        if guessed_type in [ColumnType.INTEGER, ColumnType.FLOAT]:
            try:
                num_val = float(value)
                stats = self.column_metadata[col]['numeric_stats']
                stats['sum'] += num_val
                stats['count'] += 1
                stats['min'] = min(stats['min'], num_val)
                stats['max'] = max(stats['max'], num_val)
            except ValueError:
                pass

    def _create_row_hash(self, row: Dict) -> int:
        """Create a hash for a row to detect duplicates."""
        row_str = '|'.join(str(row.get(col, '')).strip() for col in self.columns)
        return hash(row_str)

    def _calculate_metrics(self) -> None:
        """Calculate all derived metrics."""
        # Calculate missing values
        self.metrics.empty_cells = sum(self.missing_counts.values())

        # Calculate invalid values
        self.metrics.invalid_values = sum(self.invalid_counts.values())

        # Calculate quality score (0-100)
        total_cells = self.metrics.total_rows * self.metrics.total_columns

        if total_cells > 0:
            completeness_score = (1 - self.metrics.empty_cells / total_cells) * 40
            uniqueness_score = (1 - self.metrics.duplicate_rows / max(1, self.metrics.total_rows)) * 30
            validity_score = (1 - self.metrics.invalid_values / max(1, total_cells - self.metrics.empty_cells)) * 30

            self.metrics.quality_score = min(100, max(0,
                completeness_score + uniqueness_score + validity_score))
        else:
            self.metrics.quality_score = 0

    def _generate_report(self) -> None:
        """Generate the final quality report."""
        self.report = {
            'metadata': {
                'file_name': self.file_name,
                'file_path': self.file_path,
                'file_size_bytes': self.file_size,
                'analysis_date': datetime.datetime.now().isoformat(),
                'processing_time_seconds': self.metrics.processing_time
            },
            'summary': {
                'total_rows': self.metrics.total_rows,
                'total_columns': self.metrics.total_columns,
                'complete_rows': self.metrics.complete_rows,
                'completeness_percentage': round(
                    (self.metrics.complete_rows / self.metrics.total_rows * 100)
                    if self.metrics.total_rows > 0 else 0, 2
                ),
                'quality_score': round(self.metrics.quality_score, 2),
                'quality_rating': self._get_quality_rating(self.metrics.quality_score)
            },
            'column_analysis': self._generate_column_analysis(),
            'issues': self._generate_issues_report(),
            'recommendations': self._generate_recommendations()
        }

    def _generate_column_analysis(self) -> Dict[str, Any]:
        """Generate detailed column analysis."""
        analysis = {}

        for col in self.columns:
            missing_pct = (self.missing_counts[col] / self.metrics.total_rows * 100) \
                if self.metrics.total_rows > 0 else 0

            # Determine most likely type
            if self.type_counts[col]:
                most_common_type = max(self.type_counts[col].items(), key=lambda x: x[1])[0]
                type_confidence = (
                    self.type_counts[col][most_common_type] /
                    (self.metrics.total_rows - self.missing_counts[col]) * 100
                ) if (self.metrics.total_rows - self.missing_counts[col]) > 0 else 0
            else:
                most_common_type = ColumnType.UNKNOWN
                type_confidence = 0

            analysis[col] = {
                'missing_count': self.missing_counts[col],
                'missing_percentage': round(missing_pct, 2),
                'completeness_severity': self._get_severity(missing_pct),
                'data_type': most_common_type.value,
                'type_confidence': round(type_confidence, 2),
                'unique_values': len(self.column_metadata[col]['unique_values']),
                'value_length': {
                    'min': self.column_metadata[col]['min_length']
                    if self.column_metadata[col]['min_length'] != float('inf') else 0,
                    'max': self.column_metadata[col]['max_length']
                }
            }

            # Add numeric stats if applicable
            if most_common_type in [ColumnType.INTEGER, ColumnType.FLOAT]:
                stats = self.column_metadata[col]['numeric_stats']
                if stats['count'] > 0:
                    analysis[col]['numeric_stats'] = {
                        'count': stats['count'],
                        'sum': round(stats['sum'], 2),
                        'average': round(stats['sum'] / stats['count'], 2),
                        'min': round(stats['min'], 2),
                        'max': round(stats['max'], 2),
                        'range': round(stats['max'] - stats['min'], 2)
                    }

        return analysis

    def _generate_issues_report(self) -> Dict[str, Any]:
        """Generate report of identified issues."""
        issues = {
            'duplicates': {
                'count': self.metrics.duplicate_rows,
                'percentage': round(
                    (self.metrics.duplicate_rows / self.metrics.total_rows * 100)
                    if self.metrics.total_rows > 0 else 0, 2
                ),
                'rows': self.duplicate_rows[:100]  # Limit to first 100 for readability
            },
            'invalid_values': dict(self.invalid_counts),
            'columns_with_issues': []
        }

        # Identify problematic columns
        for col in self.columns:
            missing_pct = (self.missing_counts[col] / self.metrics.total_rows * 100) \
                if self.metrics.total_rows > 0 else 0

            if missing_pct > 20 or col in self.invalid_counts:
                issues['columns_with_issues'].append({
                    'column': col,
                    'missing_percentage': round(missing_pct, 2),
                    'severity': self._get_severity(missing_pct)
                })

        return issues

    def _generate_recommendations(self) -> List[str]:
        """Generate actionable recommendations."""
        recommendations = []

        # Completeness recommendations
        incomplete_cols = [
            col for col, count in self.missing_counts.items()
            if count / self.metrics.total_rows > 0.2
        ]
        if incomplete_cols:
            recommendations.append(
                f"Consider data imputation for columns with >20% missing values: "
                f"{', '.join(incomplete_cols[:5])}{'...' if len(incomplete_cols) > 5 else ''}"
            )

        # Duplicate recommendations
        if self.metrics.duplicate_rows > 0:
            dup_pct = (self.metrics.duplicate_rows / self.metrics.total_rows * 100)
            recommendations.append(
                f"Remove {self.metrics.duplicate_rows} duplicate rows "
                f"({dup_pct:.1f}% of total) to improve data quality"
            )

        # Type consistency recommendations
        for col in self.columns:
            if self.type_counts[col]:
                total = sum(self.type_counts[col].values())
                if total > 0:
                    main_type = max(self.type_counts[col].items(), key=lambda x: x[1])[0]
                    type_pct = (self.type_counts[col][main_type] / total * 100)
                    if type_pct < 90:
                        recommendations.append(
                            f"Column '{col}' has mixed data types. "
                            f"Consider standardizing to {main_type.value}"
                        )

        # Overall quality recommendation
        if self.metrics.quality_score < 70:
            recommendations.append(
                f"Overall data quality needs improvement. "
                f"Focus on reducing missing values and fixing invalid entries."
            )

        return recommendations

    def _get_severity(self, percentage: float) -> str:
        """Get severity level based on percentage."""
        if percentage == 0:
            return SeverityLevel.LOW.value
        elif percentage <= 5:
            return SeverityLevel.LOW.value
        elif percentage <= 20:
            return SeverityLevel.MEDIUM.value
        elif percentage <= 50:
            return SeverityLevel.HIGH.value
        else:
            return SeverityLevel.CRITICAL.value

    def _get_quality_rating(self, score: float) -> str:
        """Get quality rating based on score."""
        if score >= 90:
            return "Excellent üèÜ"
        elif score >= 75:
            return "Good üëç"
        elif score >= 60:
            return "Fair ‚ö†Ô∏è"
        elif score >= 40:
            return "Poor üëé"
        else:
            return "Critical üö®"

    def _handle_error(self, message: str) -> None:
        """Handle analysis errors."""
        print(f"\n‚ùå Error: {message}")
        self.report = {
            'status': 'error',
            'error_message': message,
            'file_path': self.file_path,
            'analysis_date': datetime.datetime.now().isoformat()
        }

    def save_report(self, output_path: Optional[str] = None) -> str:
        """
        Save the quality report to a JSON file.

        Args:
            output_path: Optional custom output path

        Returns:
            Path to the saved report file
        """
        if not self.report or 'status' in self.report and self.report['status'] == 'error':
            print("‚ö†Ô∏è  No valid report to save.")
            return ""

        if not output_path:
            timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
            output_path = f"quality_report_{self.file_name}_{timestamp}.json"

        try:
            with open(output_path, 'w', encoding='utf-8') as f:
                json.dump(self.report, f, indent=2, ensure_ascii=False)

            print(f"‚úÖ Report saved to: {output_path}")
            return output_path

        except Exception as e:
            print(f"‚ùå Error saving report: {str(e)}")
            return ""

    def print_summary(self) -> None:
        """Print a human-readable summary of the analysis."""
        if not self.report or 'status' in self.report and self.report['status'] == 'error':
            print("No analysis results to display.")
            return

        report = self.report
        summary = report['summary']

        print("\n" + "="*70)
        print("üìä CSV QUALITY ANALYSIS REPORT")
        print("="*70)
        print(f"üìÅ File: {report['metadata']['file_name']}")
        print(f"üìÖ Analyzed: {report['metadata']['analysis_date'][:10]}")
        print(f"‚è±Ô∏è  Processing time: {report['metadata']['processing_time_seconds']:.2f}s")
        print(f"üìà Quality Score: {summary['quality_score']}/100 {summary['quality_rating']}")
        print("-"*70)

        print(f"\nüìä SUMMARY STATISTICS:")
        print(f"  ‚Ä¢ Total Rows: {summary['total_rows']:,}")
        print(f"  ‚Ä¢ Total Columns: {summary['total_columns']}")
        print(f"  ‚Ä¢ Complete Rows: {summary['complete_rows']:,} ({summary['completeness_percentage']}%)")

        issues = report['issues']
        print(f"\n‚ö†Ô∏è  IDENTIFIED ISSUES:")
        print(f"  ‚Ä¢ Duplicate Rows: {issues['duplicates']['count']:,} ({issues['duplicates']['percentage']}%)")
        print(f"  ‚Ä¢ Invalid Values: {self.metrics.invalid_values:,}")

        print(f"\nüîç COLUMN ANALYSIS (Top 5):")
        col_analysis = report['column_analysis']
        for i, (col, analysis) in enumerate(list(col_analysis.items())[:5]):
            print(f"  {i+1}. {col}:")
            print(f"     - Missing: {analysis['missing_count']:,} ({analysis['missing_percentage']}%)")
            print(f"     - Type: {analysis['data_type']} ({analysis['type_confidence']}% confidence)")
            print(f"     - Unique Values: {analysis['unique_values']:,}")

        print(f"\nüí° RECOMMENDATIONS:")
        for i, rec in enumerate(report['recommendations'][:3], 1):
            print(f"  {i}. {rec}")

        print(f"\nüìÅ Report saved as: {self.file_name}_quality_report.json")
        print("="*70)


def main():
    """Main function to run the CSV quality analyzer."""
    parser = argparse.ArgumentParser(
        description="Ironhack Germany - CSV Quality Report Generator",
        formatter_class=argparse.RawDescriptionHelpFormatter,
        epilog="""
Examples:
  %(prog)s data.csv                    # Analyze a CSV file
  %(prog)s data.csv --output my_report # Save with custom name
  %(prog)s data.csv --summary-only     # Print summary only
        """
    )

    parser.add_argument(
        "file_path",
        help="Path to the CSV file to analyze"
    )

    parser.add_argument(
        "-o", "--output",
        help="Custom output file path for the JSON report"
    )

    parser.add_argument(
        "-s", "--summary-only",
        action="store_true",
        help="Print only the summary, don't save JSON report"
    )

    parser.add_argument(
        "-q", "--quiet",
        action="store_true",
        help="Quiet mode - minimal output"
    )

    args = parser.parse_args()

    # Check if file exists
    if not os.path.exists(args.file_path):
        print(f"‚ùå Error: File '{args.file_path}' not found.")
        sys.exit(1)

    # Initialize analyzer
    analyzer = CSVQualityAnalyzer(args.file_path)

    # Run analysis
    report = analyzer.analyze()

    if not args.quiet:
        analyzer.print_summary()

    # Save report unless summary-only mode
    if not args.summary_only and report and 'status' not in report:
        analyzer.save_report(args.output)


if __name__ == "__main__":
    # Display Ironhack banner
    print("\n" + "="*70)
    print("IRONHACK GERMANY - DAY 2 PROJECT")
    print("CSV Quality Report Generator")
    print("="*70)

    # Run the main analysis
    main()


IRONHACK GERMANY - DAY 2 PROJECT
CSV Quality Report Generator


usage: colab_kernel_launcher.py [-h] [-o OUTPUT] [-s] [-q] file_path
colab_kernel_launcher.py: error: unrecognized arguments: -f


SystemExit: 2