# Grouping and Sorting

Reference Kaggle link: https://www.kaggle.com/residentmario/grouping-and-sorting

In [2]:
import pandas as pd

pd.set_option('display.max_rows', 5)
reviews = pd.read_csv("wine reviews/winemag-data-130k-v2.csv")
reviews

Unnamed: 0.1,Unnamed: 0,country,description,designation,points,price,province,region_1,region_2,taster_name,taster_twitter_handle,title,variety,winery
0,0,Italy,"Aromas include tropical fruit, broom, brimston...",Vulkà Bianco,87,,Sicily & Sardinia,Etna,,Kerin O’Keefe,@kerinokeefe,Nicosia 2013 Vulkà Bianco (Etna),White Blend,Nicosia
1,1,Portugal,"This is ripe and fruity, a wine that is smooth...",Avidagos,87,15.0,Douro,,,Roger Voss,@vossroger,Quinta dos Avidagos 2011 Avidagos Red (Douro),Portuguese Red,Quinta dos Avidagos
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
129969,129969,France,"A dry style of Pinot Gris, this is crisp with ...",,90,32.0,Alsace,Alsace,,Roger Voss,@vossroger,Domaine Marcel Deiss 2012 Pinot Gris (Alsace),Pinot Gris,Domaine Marcel Deiss
129970,129970,France,"Big, rich and off-dry, this is powered by inte...",Lieu-dit Harth Cuvée Caroline,90,21.0,Alsace,Alsace,,Roger Voss,@vossroger,Domaine Schoffit 2012 Lieu-dit Harth Cuvée Car...,Gewürztraminer,Domaine Schoffit


One function we've been using heavily thus far is the value_counts() function. We can replicate what value_counts() does by doing the following:

In [3]:
reviews.groupby("points").points.count()

points
80     397
81     692
      ... 
99      33
100     19
Name: points, Length: 21, dtype: int64

In [4]:
reviews.groupby("points").price.min()

points
80      5.0
81      5.0
       ... 
99     44.0
100    80.0
Name: price, Length: 21, dtype: float64

In [5]:
reviews.groupby('winery').apply(lambda df: df.title.iloc[0])

winery
1+1=3                          1+1=3 NV Rosé Sparkling (Cava)
10 Knots                 10 Knots 2010 Viognier (Paso Robles)
                                  ...                        
àMaurice    àMaurice 2013 Fred Estate Syrah (Walla Walla V...
Štoka                         Štoka 2009 Izbrani Teran (Kras)
Length: 16757, dtype: object

In [6]:
reviews.groupby(['country', 'province']).apply(lambda df: df.loc[df.points.idxmax()])

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 0,country,description,designation,points,price,province,region_1,region_2,taster_name,taster_twitter_handle,title,variety,winery
country,province,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
Argentina,Mendoza Province,82754,Argentina,"If the color doesn't tell the full story, the ...",Nicasia Vineyard,97,120.0,Mendoza Province,Mendoza,,Michael Schachner,@wineschach,Bodega Catena Zapata 2006 Nicasia Vineyard Mal...,Malbec,Bodega Catena Zapata
Argentina,Other,78303,Argentina,"Take note, this could be the best wine Colomé ...",Reserva,95,90.0,Other,Salta,,Michael Schachner,@wineschach,Colomé 2010 Reserva Malbec (Salta),Malbec,Colomé
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Uruguay,San Jose,39898,Uruguay,"Baked, sweet, heavy aromas turn earthy with ti...",El Preciado Gran Reserva,87,50.0,San Jose,,,Michael Schachner,@wineschach,Castillo Viejo 2005 El Preciado Gran Reserva R...,Red Blend,Castillo Viejo
Uruguay,Uruguay,39361,Uruguay,"Cherry and berry aromas are ripe, healthy and ...",Blend 002 Limited Edition,91,22.0,Uruguay,,,Michael Schachner,@wineschach,Narbona NV Blend 002 Limited Edition Tannat-Ca...,Tannat-Cabernet Franc,Narbona


In [7]:
reviews.groupby(['country']).price.agg([len, min, max])

Unnamed: 0_level_0,len,min,max
country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Argentina,3800,4.0,230.0
Armenia,2,14.0,15.0
...,...,...,...
Ukraine,14,6.0,13.0
Uruguay,109,10.0,130.0


In [9]:
countries_reviewed = reviews.groupby(['country', 'province']).description.agg([len])
countries_reviewed

Unnamed: 0_level_0,Unnamed: 1_level_0,len
country,province,Unnamed: 2_level_1
Argentina,Mendoza Province,3264
Argentina,Other,536
...,...,...
Uruguay,San Jose,3
Uruguay,Uruguay,24


In [10]:
mi = countries_reviewed.index
type(mi)

pandas.core.indexes.multi.MultiIndex

In [11]:
countries_reviewed.reset_index()

Unnamed: 0,country,province,len
0,Argentina,Mendoza Province,3264
1,Argentina,Other,536
...,...,...,...
423,Uruguay,San Jose,3
424,Uruguay,Uruguay,24


In [12]:
countries_reviewed = countries_reviewed.reset_index()
countries_reviewed.sort_values(by='len')

Unnamed: 0,country,province,len
179,Greece,Muscat of Kefallonian,1
192,Greece,Sterea Ellada,1
...,...,...,...
415,US,Washington,8639
392,US,California,36247


In [13]:
countries_reviewed.sort_values(by='len', ascending=False)

Unnamed: 0,country,province,len
392,US,California,36247
415,US,Washington,8639
...,...,...,...
63,Chile,Coelemu,1
149,Greece,Beotia,1


In [14]:
countries_reviewed.sort_index()

Unnamed: 0,country,province,len
0,Argentina,Mendoza Province,3264
1,Argentina,Other,536
...,...,...,...
423,Uruguay,San Jose,3
424,Uruguay,Uruguay,24


In [15]:
countries_reviewed.sort_values(by=['country', 'len'])

Unnamed: 0,country,province,len
1,Argentina,Other,536
0,Argentina,Mendoza Province,3264
...,...,...,...
424,Uruguay,Uruguay,24
419,Uruguay,Canelones,43
