In [1]:
import pandas as pd

In [3]:
data = {
    'Name': [
        'Zane', 'Cathy', 'Leo', 'Mona', 'Alice', 'David', 'Grace', 'Nate', 'Ben', 'Olivia',
        'Bob', 'Victor', 'Quinn', 'Xander', 'Ella', 'Paul', 'Frank', 'Kate', 'Wendy', 'Henry',
        'Jack', 'Tina', 'Sam', 'Ivy', 'Cara', 'Rita', 'Yara', 'Dan', 'Uma', 'Abby'
    ],
    'Age': [
        42, 21, 30, 28, 22, 25, 20, 26, 44, 33,
        24, 40, 29, 41, 23, 31, 19, 30, 38, 22,
        21, 30, 34, 24, 43, 32, 39, 45, 36, 19
    ],
    'Income': [
        62500, 33000, 47000, 50000, 32000, 35000, 34000, 46000, 61500, 49000,
        31000, 63000, 52000, 60000, 30000, 51000, 29000, 47000, 61000, 36000,
        30000, 48500, 47000, 31000, 63500, 49500, 64000, 64500, 62000, 29000
    ]
}

In [5]:
df = pd.DataFrame(data)

In [7]:
print(df.columns);

Index(['Name', 'Age', 'Income'], dtype='object')


In [9]:
print(df.head());

    Name  Age  Income
0   Zane   42   62500
1  Cathy   21   33000
2    Leo   30   47000
3   Mona   28   50000
4  Alice   22   32000


In [15]:
bins = [17, 25, 35, 45]
labels = ['18-25', '26-35', '36-45']
df['Age_Group'] = pd.cut(df['Age'], bins=bins, labels=labels)

In [17]:
print(df.columns);

Index(['Name', 'Age', 'Income', 'Age_Group'], dtype='object')


In [19]:
grouped = df.groupby('Age_Group', observed=True)['Income'].agg(['mean', 'median', 'min', 'max', 'std'])
print("=== Summary Statistics (Grouped by Age Range) ===")
print(grouped)

=== Summary Statistics (Grouped by Age Range) ===
                   mean   median    min    max          std
Age_Group                                                  
18-25      31818.181818  31000.0  29000  36000  2400.757456
26-35      48700.000000  48750.0  46000  52000  1960.725489
36-45      62444.444444  62500.0  60000  64500  1467.234738


In [21]:
ds = pd.read_csv('iris.csv')

In [23]:
print(ds.head());

   Id  SepalLengthCm  SepalWidthCm  PetalLengthCm  PetalWidthCm      Species
0   1            5.1           3.5            1.4           0.2  Iris-setosa
1   2            4.9           3.0            1.4           0.2  Iris-setosa
2   3            4.7           3.2            1.3           0.2  Iris-setosa
3   4            4.6           3.1            1.5           0.2  Iris-setosa
4   5            5.0           3.6            1.4           0.2  Iris-setosa


In [25]:
grouped_stats = ds.groupby('Species').agg({
    'SepalLengthCm': ['mean', 'median', 'min', 'max', 'std'],
    'SepalWidthCm': ['mean', 'median', 'min', 'max', 'std'],
    'PetalLengthCm': ['mean', 'median', 'min', 'max', 'std'],
    'PetalWidthCm': ['mean', 'median', 'min', 'max', 'std'],
})

In [27]:
print(ds.columns);

Index(['Id', 'SepalLengthCm', 'SepalWidthCm', 'PetalLengthCm', 'PetalWidthCm',
       'Species'],
      dtype='object')


In [29]:
print(grouped_stats)

                SepalLengthCm                            SepalWidthCm         \
                         mean median  min  max       std         mean median   
Species                                                                        
Iris-setosa             5.006    5.0  4.3  5.8  0.352490        3.418    3.4   
Iris-versicolor         5.936    5.9  4.9  7.0  0.516171        2.770    2.8   
Iris-virginica          6.588    6.5  4.9  7.9  0.635880        2.974    3.0   

                                    PetalLengthCm                             \
                 min  max       std          mean median  min  max       std   
Species                                                                        
Iris-setosa      2.3  4.4  0.381024         1.464   1.50  1.0  1.9  0.173511   
Iris-versicolor  2.0  3.4  0.313798         4.260   4.35  3.0  5.1  0.469911   
Iris-virginica   2.2  3.8  0.322497         5.552   5.55  4.5  6.9  0.551895   

                PetalWidthCm          

In [31]:
species_numeric_list = {
    species: group['SepalLengthCm'].tolist()
    for species, group in ds.groupby('Species')
}

In [33]:
for species, values in species_numeric_list.items():
    print(f"{species}: {values[:5]}...")

Iris-setosa: [5.1, 4.9, 4.7, 4.6, 5.0]...
Iris-versicolor: [7.0, 6.4, 6.9, 5.5, 6.5]...
Iris-virginica: [6.3, 5.8, 7.1, 6.3, 6.5]...


In [35]:
def basic_statistics(group):
    return {
        'Mean': group.mean(),
        'Standard Deviation': group.std(),
        '25th Percentile': group.quantile(.25),
        '50th Percentile': group.median(),
        '75th Percentile': group.quantile(.75),
        'Minimum': group.min(),
        'Maximum': group.max()
    }

In [37]:
setosa_stats = basic_statistics(ds[ds['Species'] == 'Iris-setosa']['SepalLengthCm'])
versicolor_stats = basic_statistics(ds[ds['Species'] == 'Iris-versicolor']['SepalLengthCm'])
virginica_stats = basic_statistics(ds[ds['Species'] == 'Iris-virginica']['SepalLengthCm'])

In [39]:
print("\n=== Statistics for Iris-setosa ===")
print(setosa_stats)
print("\n=== Statistics for Iris-versicolor ===")
print(versicolor_stats)
print("\n=== Statistics for Iris-virginica ===")
print(virginica_stats)


=== Statistics for Iris-setosa ===
{'Mean': 5.006, 'Standard Deviation': 0.3524896872134512, '25th Percentile': 4.8, '50th Percentile': 5.0, '75th Percentile': 5.2, 'Minimum': 4.3, 'Maximum': 5.8}

=== Statistics for Iris-versicolor ===
{'Mean': 5.936, 'Standard Deviation': 0.5161711470638635, '25th Percentile': 5.6, '50th Percentile': 5.9, '75th Percentile': 6.3, 'Minimum': 4.9, 'Maximum': 7.0}

=== Statistics for Iris-virginica ===
{'Mean': 6.587999999999998, 'Standard Deviation': 0.635879593274432, '25th Percentile': 6.225, '50th Percentile': 6.5, '75th Percentile': 6.9, 'Minimum': 4.9, 'Maximum': 7.9}
