### Question:
Create a Python class DatasetAnalyzer to perform analysis and transformations on a dataset represented as a list of dictionaries. Implement the following features:

1. Initialization: Accept a dataset (list of dictionaries).
Normalize Column: Add a method normalize_column to normalize values in a specified numerical column using min-max normalization.
2. Find Unique Values: Add a method unique_values to find all unique values in a specified column.
3. Sort by Column: Add a method sort_by_column to sort the dataset based on a specified column in ascending or descending order.
4. Summary Statistics: Add a method summary to compute the count, mean, minimum, and maximum of a numerical column.
Write an example to demonstrate the following:

1. Normalize a numerical column.
2. Find unique values in a categorical column.
3. Sort the dataset by a specified column.
4. Compute summary statistics for a numerical column.


In [1]:
from typing import List, Dict, Any

In [2]:
class DatasetAnalyzer:
    def __init__(self, data: List[Dict[str, Any]]):
        """
        Initialize the analyzer with a dataset.
        """
        self.data = data

    def normalize_column(self, column: str) -> None:
        """
        Normalize values in a specified column using min-max normalization.
        Modifies the dataset in place.

        :param column: Column name to normalize.
        """
        values = [row[column] for row in self.data if isinstance(row[column], (int, float))]
        min_val, max_val = min(values), max(values)

        for row in self.data:
            if isinstance(row[column], (int, float)):
                row[column] = (row[column] - min_val) / (max_val - min_val)

    def unique_values(self, column: str) -> List[Any]:
        """
        Find all unique values in a specified column.

        :param column: Column name to check.
        :return: List of unique values.
        """
        return list(set(row[column] for row in self.data))

    def sort_by_column(self, column: str, descending: bool = False) -> List[Dict[str, Any]]:
        """
        Sort the dataset by a specified column.

        :param column: Column name to sort by.
        :param descending: Whether to sort in descending order.
        :return: Sorted dataset.
        """
        return sorted(self.data, key=lambda x: x[column], reverse=descending)

    def summary(self, column: str) -> Dict[str, Any]:
        """
        Compute summary statistics (count, mean, min, max) for a numerical column.

        :param column: Column name to summarize.
        :return: Dictionary of summary statistics.
        """
        values = [row[column] for row in self.data if isinstance(row[column], (int, float))]
        return {
            "count": len(values),
            "mean": sum(values) / len(values) if values else 0,
            "min": min(values) if values else None,
            "max": max(values) if values else None,
        }

In [3]:
# Example Usage
data = [
    {"Name": "Alice", "Age": 25, "Score": 85, "Country": "USA"},
    {"Name": "Bob", "Age": 30, "Score": 90, "Country": "UK"},
    {"Name": "Charlie", "Age": 35, "Score": 95, "Country": "USA"},
    {"Name": "Diana", "Age": 28, "Score": 88, "Country": "Canada"},
    {"Name": "Eve", "Age": 22, "Score": 70, "Country": "USA"},
]

In [4]:
# Initialize the analyzer
analyzer = DatasetAnalyzer(data)

In [5]:
# 1. Normalize the "Score" column
analyzer.normalize_column("Score")
print("Normalized Data:", data)

Normalized Data: [{'Name': 'Alice', 'Age': 25, 'Score': 0.6, 'Country': 'USA'}, {'Name': 'Bob', 'Age': 30, 'Score': 0.8, 'Country': 'UK'}, {'Name': 'Charlie', 'Age': 35, 'Score': 1.0, 'Country': 'USA'}, {'Name': 'Diana', 'Age': 28, 'Score': 0.72, 'Country': 'Canada'}, {'Name': 'Eve', 'Age': 22, 'Score': 0.0, 'Country': 'USA'}]


In [7]:
# 2. Find unique values in the "Country" column
unique_countries = analyzer.unique_values("Country")
print("Unique Countries:", unique_countries)

Unique Countries: ['USA', 'Canada', 'UK']


In [8]:
# 3. Sort the dataset by "Age" in descending order
sorted_data = analyzer.sort_by_column("Age", descending=True)
print("Sorted Data by Age (Descending):", sorted_data)

Sorted Data by Age (Descending): [{'Name': 'Charlie', 'Age': 35, 'Score': 1.0, 'Country': 'USA'}, {'Name': 'Bob', 'Age': 30, 'Score': 0.8, 'Country': 'UK'}, {'Name': 'Diana', 'Age': 28, 'Score': 0.72, 'Country': 'Canada'}, {'Name': 'Alice', 'Age': 25, 'Score': 0.6, 'Country': 'USA'}, {'Name': 'Eve', 'Age': 22, 'Score': 0.0, 'Country': 'USA'}]


In [9]:
# 4. Compute summary statistics for "Age"
age_summary = analyzer.summary("Age")
print("Summary Statistics for Age:", age_summary)

Summary Statistics for Age: {'count': 5, 'mean': 28.0, 'min': 22, 'max': 35}
