# Grouping, Sorting, and Shuffling

shuffle dataset

In [0]:
import os 
import pandas as pd
import numpy as np

df = pd.read_csv(
    "https://data.heatonresearch.com/data/t81-558/auto-mpg.csv", 
    na_values=['NA', '?'])
  
# to make the randomness repeteable
np.random.seed(42)
df = df.reindex(np.random.permutation(df.index))
df.reset_index(inplace=True, drop=True)
display(df[0:10])


Unnamed: 0,mpg,cylinders,displacement,horsepower,weight,acceleration,year,origin,name
0,33.0,4,91.0,53.0,1795,17.4,76,3,honda civic
1,28.0,4,120.0,79.0,2625,18.6,82,1,ford ranger
2,19.0,6,232.0,100.0,2634,13.0,71,1,amc gremlin
3,13.0,8,318.0,150.0,3940,13.2,76,1,plymouth volare premier v8
4,14.0,8,318.0,150.0,4237,14.5,73,1,plymouth fury gran sedan
5,27.0,4,97.0,88.0,2100,16.5,72,3,toyota corolla 1600 (sw)
6,24.0,4,140.0,92.0,2865,16.4,82,1,ford fairmont futura
7,13.0,8,440.0,215.0,4735,11.0,73,1,chrysler new yorker brougham
8,17.0,8,260.0,110.0,4060,19.0,77,1,oldsmobile cutlass supreme
9,21.0,6,200.0,,2875,17.0,74,1,ford maverick


The above data set can be used with group to perform summaries. For example, the following code will group cylinders by the average (mean). This code will provide the grouping. In addition to mean, other aggregating functions, such as sum or count can be used.

In [0]:
g = df.groupby('cylinders')['mpg'].mean()
g

cylinders
3    20.550000
4    29.286765
5    27.366667
6    19.985714
8    14.963107
Name: mpg, dtype: float64

In [0]:
d = g.to_dict()
d

{3: 20.55,
 4: 29.28676470588236,
 5: 27.366666666666664,
 6: 19.98571428571429,
 8: 14.963106796116502}

In [0]:
d[6]

19.98571428571429

In [0]:
df.groupby('horsepower')['cylinders'].count().to_dict()

{46.0: 2,
 48.0: 3,
 49.0: 1,
 52.0: 4,
 53.0: 2,
 54.0: 1,
 58.0: 2,
 60.0: 5,
 61.0: 1,
 62.0: 2,
 63.0: 3,
 64.0: 1,
 65.0: 10,
 66.0: 1,
 67.0: 12,
 68.0: 6,
 69.0: 3,
 70.0: 12,
 71.0: 5,
 72.0: 6,
 74.0: 3,
 75.0: 14,
 76.0: 4,
 77.0: 1,
 78.0: 6,
 79.0: 2,
 80.0: 7,
 81.0: 2,
 82.0: 1,
 83.0: 4,
 84.0: 6,
 85.0: 9,
 86.0: 5,
 87.0: 2,
 88.0: 19,
 89.0: 1,
 90.0: 20,
 91.0: 1,
 92.0: 6,
 93.0: 1,
 94.0: 1,
 95.0: 14,
 96.0: 3,
 97.0: 9,
 98.0: 2,
 100.0: 17,
 102.0: 1,
 103.0: 1,
 105.0: 12,
 107.0: 1,
 108.0: 1,
 110.0: 18,
 112.0: 3,
 113.0: 1,
 115.0: 5,
 116.0: 1,
 120.0: 4,
 122.0: 1,
 125.0: 3,
 129.0: 2,
 130.0: 5,
 132.0: 1,
 133.0: 1,
 135.0: 1,
 137.0: 1,
 138.0: 1,
 139.0: 2,
 140.0: 7,
 142.0: 1,
 145.0: 7,
 148.0: 1,
 149.0: 1,
 150.0: 22,
 152.0: 1,
 153.0: 2,
 155.0: 2,
 158.0: 1,
 160.0: 2,
 165.0: 4,
 167.0: 1,
 170.0: 5,
 175.0: 5,
 180.0: 5,
 190.0: 3,
 193.0: 1,
 198.0: 2,
 200.0: 1,
 208.0: 1,
 210.0: 1,
 215.0: 3,
 220.0: 1,
 225.0: 3,
 230.0: 1}