# DataFrame Basics III

## Sorting dataframes with sort_index( ) and sort_values( ) 

In [1]:
import pandas as pd

In [2]:
titanic = pd.read_csv('titanic.csv')

In [3]:
titanic.head()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,deck
0,0,3,male,22.0,1,0,7.25,S,
1,1,1,female,38.0,1,0,71.2833,C,C
2,1,3,female,26.0,0,0,7.925,S,
3,1,1,female,35.0,1,0,53.1,S,C
4,0,3,male,35.0,0,0,8.05,S,


In [4]:
titanic.tail()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,deck
886,0,2,male,27.0,0,0,13.0,S,
887,1,1,female,19.0,0,0,30.0,S,B
888,0,3,female,,1,2,23.45,S,
889,1,1,male,26.0,0,0,30.0,C,C
890,0,3,male,32.0,0,0,7.75,Q,


In [6]:
titanic.age.sort_values()

803    0.42
755    0.67
644    0.75
469    0.75
78     0.83
       ... 
859     NaN
863     NaN
868     NaN
878     NaN
888     NaN
Name: age, Length: 891, dtype: float64

In [7]:
titanic.sort_values('age')  # sorting whole dataframe based on age

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,deck
803,1,3,male,0.42,0,1,8.5167,C,
755,1,2,male,0.67,1,1,14.5000,S,
644,1,3,female,0.75,2,1,19.2583,C,
469,1,3,female,0.75,2,1,19.2583,C,
78,1,2,male,0.83,0,2,29.0000,S,
...,...,...,...,...,...,...,...,...,...
859,0,3,male,,0,0,7.2292,C,
863,0,3,female,,8,2,69.5500,S,
868,0,3,male,,0,0,9.5000,S,
878,0,3,male,,0,0,7.8958,S,


In [9]:
titanic.head()  #original not affected

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,deck
0,0,3,male,22.0,1,0,7.25,S,
1,1,1,female,38.0,1,0,71.2833,C,C
2,1,3,female,26.0,0,0,7.925,S,
3,1,1,female,35.0,1,0,53.1,S,C
4,0,3,male,35.0,0,0,8.05,S,


In [10]:
titanic.sort_values('age', axis = 0, ascending = True , inplace = True)

In [11]:
titanic.head()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,deck
803,1,3,male,0.42,0,1,8.5167,C,
755,1,2,male,0.67,1,1,14.5,S,
644,1,3,female,0.75,2,1,19.2583,C,
469,1,3,female,0.75,2,1,19.2583,C,
78,1,2,male,0.83,0,2,29.0,S,


In [16]:
titanic.sort_values(['pclass', 'sex', 'age'], ascending = [True, False, True], inplace = True)  # ascending has a list for each column ascending preference

In [17]:
titanic.head()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,deck
305,1,1,male,0.92,1,2,151.55,S,C
445,1,1,male,4.0,0,2,81.8583,S,A
802,1,1,male,11.0,1,2,120.0,S,B
550,1,1,male,17.0,0,2,110.8833,C,C
505,0,1,male,18.0,1,0,108.9,C,C


In [18]:
titanic.sort_index(ascending = True, inplace = True) # sorting by index

In [19]:
titanic.head()  #original one is back

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,deck
0,0,3,male,22.0,1,0,7.25,S,
1,1,1,female,38.0,1,0,71.2833,C,C
2,1,3,female,26.0,0,0,7.925,S,
3,1,1,female,35.0,1,0,53.1,S,C
4,0,3,male,35.0,0,0,8.05,S,


## Ranking DataFrames with rank( )

In [20]:
import pandas as pd

In [21]:
sales = pd.Series([15, 32, 45, 21, 55, 15, 0], index = ['Mo', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun'])

In [22]:
sales

Mo     15
Tue    32
Wed    45
Thu    21
Fri    55
Sat    15
Sun     0
dtype: int64

In [23]:
sales.sort_values(ascending = False)

Fri    55
Wed    45
Tue    32
Thu    21
Mo     15
Sat    15
Sun     0
dtype: int64

In [28]:
sales.rank(ascending = False).sort_values(ascending = True)  # ranking each day, but sat and mon as same rank 5.5, let's change that in next block

Fri    1.0
Wed    2.0
Tue    3.0
Thu    4.0
Mo     5.5
Sat    5.5
Sun    7.0
dtype: float64

In [30]:
sales.rank(ascending = False, method = 'min').sort_values(ascending = True)

Fri    1.0
Wed    2.0
Tue    3.0
Thu    4.0
Mo     5.0
Sat    5.0
Sun    7.0
dtype: float64

In [32]:
sales.rank(ascending = False, method = 'min', pct = True).sort_values(ascending = True)  # oct = perecntage rank of the data friday = 1/7

Fri    0.142857
Wed    0.285714
Tue    0.428571
Thu    0.571429
Mo     0.714286
Sat    0.714286
Sun    1.000000
dtype: float64

0.14 in Friday above means that only on 14% of the days, the sales were as good or more than Friday, if we had more data on the days

In [None]:
titanic = pd.read_csv('titanic.csv')

In [38]:
titanic.fare.rank(ascending = False)  # rank of passenger 1 fare is 815th, so very low fare

0      815.0
1      103.0
2      659.5
3      144.0
4      628.0
       ...  
886    484.5
887    237.5
888    345.5
889    237.5
890    768.5
Name: fare, Length: 891, dtype: float64

In [39]:
titanic['fare_rank'] = titanic.fare.rank(ascending = False, method = 'min')

In [40]:
titanic.head()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,deck,fare_rank
0,0,3,male,22.0,1,0,7.25,S,,809.0
1,1,1,female,38.0,1,0,71.2833,C,C,103.0
2,1,3,female,26.0,0,0,7.925,S,,651.0
3,1,1,female,35.0,1,0,53.1,S,C,142.0
4,0,3,male,35.0,0,0,8.05,S,,607.0


In [41]:
titanic.sort_values('fare', ascending = False)

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,deck,fare_rank
258,1,1,female,35.0,0,0,512.3292,C,,1.0
737,1,1,male,35.0,0,0,512.3292,C,B,1.0
679,1,1,male,36.0,0,1,512.3292,C,B,1.0
88,1,1,female,23.0,3,2,263.0000,S,C,4.0
27,0,1,male,19.0,3,2,263.0000,S,C,4.0
...,...,...,...,...,...,...,...,...,...,...
633,0,1,male,,0,0,0.0000,S,,877.0
413,0,2,male,,0,0,0.0000,S,,877.0
822,0,1,male,38.0,0,0,0.0000,S,,877.0
732,0,2,male,,0,0,0.0000,S,,877.0
