In [2]:
import pandas as pd
import numpy as np

## Series
**Объект** — это набор данных (переменных) и методов (функций), которые с этими данными взаимодействуют. 

**Pandas Series** представляет из себя объект, похожий на одномерный массив, но отличительной чертой является наличие индексов. Индекс находится слева, а сам элемент справа.
![image-2.png](attachment:image-2.png)

In [4]:
a = pd.Series([4, 7, 6, 3, 9],
              index = ['one', 'two', 'three', 'four', 'five'])
a

one      4
two      7
three    6
four     3
five     9
dtype: int64

In [5]:
a2 = pd.Series([4, 7, 6, 3, 9])
a2

0    4
1    7
2    6
3    3
4    9
dtype: int64

In [6]:
a.index

Index(['one', 'two', 'three', 'four', 'five'], dtype='object')

In [7]:
a.values

array([4, 7, 6, 3, 9], dtype=int64)

In [8]:
a[0]

4

In [9]:
a['one']

4

## DataFrame
Объект **DataFrame** является табличной структурой данных. В любой таблице всегда присутствуют строки и столбцы. При этом в столбцах можно хранить данные разных типов данных. 

Столбцами в объекте DataFrame выступают объекты Series, строки которых являются их элементами.
![image-2.png](attachment:image-2.png)

In [10]:
df = pd.DataFrame({
    'Age': [46, 37, 44, 42, 42],
    'Country': ['Spain', 'Spain', 'Germany', 'Spain', 'Russia'],
    'Gender': ['Female', 'Female', 'Male', 'Male', 'Male']    
})

df

Unnamed: 0,Age,Country,Gender
0,46,Spain,Female
1,37,Spain,Female
2,44,Germany,Male
3,42,Spain,Male
4,42,Russia,Male


In [11]:
df['Age']

0    46
1    37
2    44
3    42
4    42
Name: Age, dtype: int64

In [12]:
df.Age

0    46
1    37
2    44
3    42
4    42
Name: Age, dtype: int64

In [14]:
df[['Country', 'Age']]

Unnamed: 0,Country,Age
0,Spain,46
1,Spain,37
2,Germany,44
3,Spain,42
4,Russia,42


In [15]:
df.columns

Index(['Age', 'Country', 'Gender'], dtype='object')

In [16]:
df.index

RangeIndex(start=0, stop=5, step=1)

In [17]:
df2 = pd.DataFrame({
    'Age': [46, 37, 44, 42, 42],
    'Country': ['Spain', 'Spain', 'Germany', 'Spain', 'Russia'],
    'Gender': ['Female', 'Female', 'Male', 'Male', 'Male']    
}, index=['005', '004', '121', '103', '084'])

df2

Unnamed: 0,Age,Country,Gender
5,46,Spain,Female
4,37,Spain,Female
121,44,Germany,Male
103,42,Spain,Male
84,42,Russia,Male


In [19]:
df2.index = ['001', '002', '003', '004', '005']
df2

Unnamed: 0,Age,Country,Gender
1,46,Spain,Female
2,37,Spain,Female
3,44,Germany,Male
4,42,Spain,Male
5,42,Russia,Male


## Считывание данных
В целом, pandas поддерживает все самые популярные форматы хранения данных: csv, excel, sql, html и многое другое, но чаще всего приходится работать именно с csv файлами (comma separated values)


In [28]:
df = pd.read_csv('./Churn_Modelling.csv')
df

Unnamed: 0,RowNumber,CustomerId,Surname,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,1,15634602,Hargrave,619,France,Female,42,2,0.00,1,1,1,101348.88,1
1,2,15647311,Hill,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,3,15619304,Onio,502,France,Female,42,8,159660.80,3,1,0,113931.57,1
3,4,15701354,Boni,699,France,Female,39,1,0.00,2,0,0,93826.63,0
4,5,15737888,Mitchell,850,Spain,Female,43,2,125510.82,1,1,1,79084.10,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9995,9996,15606229,Obijiaku,771,France,Male,39,5,0.00,2,1,0,96270.64,0
9996,9997,15569892,Johnstone,516,France,Male,35,10,57369.61,1,1,1,101699.77,0
9997,9998,15584532,Liu,709,France,Female,36,7,0.00,1,0,1,42085.58,1
9998,9999,15682355,Sabbatini,772,Germany,Male,42,3,75075.31,2,1,0,92888.52,1


In [29]:
df[100:130]

Unnamed: 0,RowNumber,CustomerId,Surname,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
100,101,15808582,Fu,665,France,Female,40,6,0.0,1,1,1,161848.03,0
101,102,15743192,Hung,623,France,Female,44,6,0.0,2,0,0,167162.43,0
102,103,15580146,Hung,738,France,Male,31,9,82674.15,1,1,0,41970.72,0
103,104,15776605,Bradley,528,Spain,Male,36,7,0.0,2,1,0,60536.56,0
104,105,15804919,Dunbabin,670,Spain,Female,65,1,0.0,1,1,1,177655.68,1
105,106,15613854,Mauldon,622,Spain,Female,46,4,107073.27,2,1,1,30984.59,1
106,107,15599195,Stiger,582,Germany,Male,32,1,88938.62,1,1,1,10054.53,0
107,108,15812878,Parsons,785,Germany,Female,36,2,99806.85,1,0,1,36976.52,0
108,109,15602312,Walkom,605,Spain,Male,33,5,150092.8,1,0,0,71862.79,0
109,110,15744689,T'ang,479,Germany,Male,35,9,92833.89,1,1,0,99449.86,1


In [30]:
df2 = pd.read_csv('./Churn_Modelling.csv', header=1, sep=';')
df2

Unnamed: 0,"1,15634602,Hargrave,619,France,Female,42,2,0,1,1,1,101348.88,1"
0,"2,15647311,Hill,608,Spain,Female,41,1,83807.86..."
1,"3,15619304,Onio,502,France,Female,42,8,159660...."
2,"4,15701354,Boni,699,France,Female,39,1,0,2,0,0..."
3,"5,15737888,Mitchell,850,Spain,Female,43,2,1255..."
4,"6,15574012,Chu,645,Spain,Male,44,8,113755.78,2..."
...,...
9994,"9996,15606229,Obijiaku,771,France,Male,39,5,0,..."
9995,"9997,15569892,Johnstone,516,France,Male,35,10,..."
9996,"9998,15584532,Liu,709,France,Female,36,7,0,1,0..."
9997,"9999,15682355,Sabbatini,772,Germany,Male,42,3,..."


In [31]:
df.head()

Unnamed: 0,RowNumber,CustomerId,Surname,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,1,15634602,Hargrave,619,France,Female,42,2,0.0,1,1,1,101348.88,1
1,2,15647311,Hill,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,3,15619304,Onio,502,France,Female,42,8,159660.8,3,1,0,113931.57,1
3,4,15701354,Boni,699,France,Female,39,1,0.0,2,0,0,93826.63,0
4,5,15737888,Mitchell,850,Spain,Female,43,2,125510.82,1,1,1,79084.1,0


In [32]:
df.tail()

Unnamed: 0,RowNumber,CustomerId,Surname,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
9995,9996,15606229,Obijiaku,771,France,Male,39,5,0.0,2,1,0,96270.64,0
9996,9997,15569892,Johnstone,516,France,Male,35,10,57369.61,1,1,1,101699.77,0
9997,9998,15584532,Liu,709,France,Female,36,7,0.0,1,0,1,42085.58,1
9998,9999,15682355,Sabbatini,772,Germany,Male,42,3,75075.31,2,1,0,92888.52,1
9999,10000,15628319,Walker,792,France,Female,28,4,130142.79,1,1,0,38190.78,0


In [33]:
df.sample()

Unnamed: 0,RowNumber,CustomerId,Surname,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
9037,9038,15792862,Blinova,653,Germany,Male,41,1,104584.11,1,1,0,15126.32,1


In [34]:
df.sample(5)

Unnamed: 0,RowNumber,CustomerId,Surname,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
6192,6193,15731781,Onyemachukwu,551,France,Male,43,7,0.0,2,1,0,178393.68,0
2414,2415,15806027,Niu,556,France,Female,52,9,0.0,1,1,0,175149.2,1
9097,9098,15606901,Graham,728,France,Male,43,7,0.0,2,1,0,40023.7,0
2868,2869,15707968,Akobundu,545,Spain,Male,36,8,73211.12,2,1,0,89587.34,1
9082,9083,15753161,Dickson,768,France,Female,36,5,180169.44,2,1,0,17348.56,0


In [36]:
df.sample(frac=1) # все строки в случайном порядке

Unnamed: 0,RowNumber,CustomerId,Surname,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
7949,7950,15617486,Sullivan,530,France,Male,52,1,106723.28,1,0,0,109960.40,1
7576,7577,15666096,Ibekwe,676,Spain,Male,27,4,0.00,1,0,1,107955.67,0
561,562,15800440,Power,650,Spain,Male,61,1,152968.73,1,0,1,82970.69,0
9325,9326,15786389,Chuang,635,Spain,Female,41,10,0.00,2,1,1,61994.20,0
4166,4167,15654562,Ma,850,Spain,Female,45,5,174088.30,4,1,0,5669.31,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1269,1270,15723339,Chin,554,France,Female,38,4,137654.05,2,1,1,172629.67,0
4773,4774,15709451,Gordon,646,Germany,Female,35,1,121952.75,2,1,1,142839.82,0
4786,4787,15749799,Pisani,577,France,Female,34,2,0.00,2,1,1,84033.35,0
4833,4834,15807003,Jennings,762,France,Male,32,10,191775.65,1,1,0,179657.83,0


In [38]:
df.sample(frac=0.05) # 5% строк в случайном порядке

Unnamed: 0,RowNumber,CustomerId,Surname,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
7394,7395,15673747,Ayers,519,France,Female,22,8,0.00,1,0,1,167553.06,0
7733,7734,15596013,Akhtar,694,Germany,Female,58,1,143212.22,1,0,0,102628.56,1
8735,8736,15713599,Castiglione,728,France,Male,30,10,114835.43,1,0,1,37662.49,0
1760,1761,15701121,Holt,521,France,Male,38,5,110641.18,1,0,1,136507.69,1
1045,1046,15785980,Williford,588,Spain,Male,34,6,121132.26,2,1,0,86460.28,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
530,531,15736112,Walton,519,Spain,Female,57,2,119035.35,2,1,1,29871.79,0
793,794,15769504,Munro,743,Germany,Female,34,1,131736.88,1,1,1,108543.21,0
6058,6059,15684868,Cameron,668,Germany,Male,56,9,110993.79,1,1,0,134396.64,1
7725,7726,15631756,Tuan,482,France,Female,35,5,147813.05,2,0,0,109029.72,0


In [40]:
df.shape # покажет количество строк и столбцов

(10000, 14)

## Первичный анализ данных


In [41]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10000 entries, 0 to 9999
Data columns (total 14 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   RowNumber        10000 non-null  int64  
 1   CustomerId       10000 non-null  int64  
 2   Surname          10000 non-null  object 
 3   CreditScore      10000 non-null  int64  
 4   Geography        10000 non-null  object 
 5   Gender           10000 non-null  object 
 6   Age              10000 non-null  int64  
 7   Tenure           10000 non-null  int64  
 8   Balance          10000 non-null  float64
 9   NumOfProducts    10000 non-null  int64  
 10  HasCrCard        10000 non-null  int64  
 11  IsActiveMember   10000 non-null  int64  
 12  EstimatedSalary  10000 non-null  float64
 13  Exited           10000 non-null  int64  
dtypes: float64(2), int64(9), object(3)
memory usage: 1.1+ MB


In [42]:
df.describe()

Unnamed: 0,RowNumber,CustomerId,CreditScore,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
count,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0
mean,5000.5,15690940.0,650.5288,38.9218,5.0128,76485.889288,1.5302,0.7055,0.5151,100090.239881,0.2037
std,2886.89568,71936.19,96.653299,10.487806,2.892174,62397.405202,0.581654,0.45584,0.499797,57510.492818,0.402769
min,1.0,15565700.0,350.0,18.0,0.0,0.0,1.0,0.0,0.0,11.58,0.0
25%,2500.75,15628530.0,584.0,32.0,3.0,0.0,1.0,0.0,0.0,51002.11,0.0
50%,5000.5,15690740.0,652.0,37.0,5.0,97198.54,1.0,1.0,1.0,100193.915,0.0
75%,7500.25,15753230.0,718.0,44.0,7.0,127644.24,2.0,1.0,1.0,149388.2475,0.0
max,10000.0,15815690.0,850.0,92.0,10.0,250898.09,4.0,1.0,1.0,199992.48,1.0


In [43]:
df['Age'].min()

18

In [44]:
df['Balance'].max()

250898.09

In [45]:
df[['CreditScore', 'Age', 'Tenure']].mean()

CreditScore    650.5288
Age             38.9218
Tenure           5.0128
dtype: float64

In [46]:
df.describe(include='object')

Unnamed: 0,Surname,Geography,Gender
count,10000,10000,10000
unique,2932,3,2
top,Smith,France,Male
freq,32,5014,5457


```Всего 2 932 уникальных фамилий. Самая частая - Смит. Встречается 32 раза```

In [47]:
df.dtypes

RowNumber            int64
CustomerId           int64
Surname             object
CreditScore          int64
Geography           object
Gender              object
Age                  int64
Tenure               int64
Balance            float64
NumOfProducts        int64
HasCrCard            int64
IsActiveMember       int64
EstimatedSalary    float64
Exited               int64
dtype: object

In [48]:
df.Age.dtype

dtype('int64')

In [49]:
df.HasCrCard.dtype

dtype('int64')

In [51]:
df.HasCrCard.astype('bool') # вовзращает переделанную серию, но не меняет исходную

0        True
1       False
2        True
3       False
4        True
        ...  
9995     True
9996     True
9997    False
9998     True
9999     True
Name: HasCrCard, Length: 10000, dtype: bool

In [52]:
df.HasCrCard.dtype

dtype('int64')

In [53]:
df.HasCrCard = df.HasCrCard.astype('bool')
df.HasCrCard.dtype

dtype('bool')

In [54]:
df.Geography.unique()

array(['France', 'Spain', 'Germany'], dtype=object)

In [55]:
df.Geography.nunique()

3

In [57]:
df.Geography.value_counts()

France     5014
Germany    2509
Spain      2477
Name: Geography, dtype: int64

In [59]:
df.Geography.value_counts(normalize=True) # проценты

France     0.5014
Germany    0.2509
Spain      0.2477
Name: Geography, dtype: float64

## Фильтрация
Фильтрация в pandas основывается на булевых масках.

**Булевая маска** — бинарные данные, которые используются для выбора определенных объектов из структуры данных.

In [60]:
df.Gender == 'Male'

0       False
1       False
2       False
3       False
4       False
        ...  
9995     True
9996     True
9997    False
9998     True
9999    False
Name: Gender, Length: 10000, dtype: bool

In [62]:
male = df[df.Gender == 'Male'] # фильтрация
male

Unnamed: 0,RowNumber,CustomerId,Surname,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
5,6,15574012,Chu,645,Spain,Male,44,8,113755.78,2,True,0,149756.71,1
6,7,15592531,Bartlett,822,France,Male,50,7,0.00,2,True,1,10062.80,0
8,9,15792365,He,501,France,Male,44,4,142051.07,2,False,1,74940.50,0
9,10,15592389,H?,684,France,Male,27,2,134603.88,1,True,1,71725.73,0
10,11,15767821,Bearce,528,France,Male,31,6,102016.72,2,False,0,80181.12,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9992,9993,15657105,Chukwualuka,726,Spain,Male,36,2,0.00,1,True,0,195192.40,0
9993,9994,15569266,Rahman,644,France,Male,28,7,155060.41,1,True,0,29179.52,0
9995,9996,15606229,Obijiaku,771,France,Male,39,5,0.00,2,True,0,96270.64,0
9996,9997,15569892,Johnstone,516,France,Male,35,10,57369.61,1,True,1,101699.77,0


### Логическое И (&)

In [63]:
df[(df.Gender == 'Female') & (df.NumOfProducts >=3)] # фильтрация

Unnamed: 0,RowNumber,CustomerId,Surname,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
2,3,15619304,Onio,502,France,Female,42,8,159660.80,3,True,0,113931.57,1
7,8,15656148,Obinna,376,Germany,Female,29,4,115046.74,4,True,0,119346.88,1
30,31,15589475,Azikiwe,591,Spain,Female,39,3,0.00,3,True,0,140469.38,1
88,89,15622897,Sharpe,646,France,Female,46,4,0.00,3,True,0,93251.42,1
90,91,15757535,Heap,647,Spain,Female,44,5,0.00,3,True,1,174205.22,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9565,9566,15752294,Long,582,France,Female,38,9,135979.01,4,True,1,76582.95,1
9747,9748,15775761,Iweobiegbunam,610,Germany,Female,69,5,86038.21,3,False,0,192743.06,1
9800,9801,15640507,Li,762,Spain,Female,35,3,119349.69,3,True,1,47114.18,1
9877,9878,15572182,Onwuamaeze,505,Germany,Female,33,3,106506.77,3,True,0,45445.78,1


### Логическое ИЛИ ( | )

In [64]:
df[(df.HasCrCard) | (df.NumOfProducts >=3)]

Unnamed: 0,RowNumber,CustomerId,Surname,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,1,15634602,Hargrave,619,France,Female,42,2,0.00,1,True,1,101348.88,1
2,3,15619304,Onio,502,France,Female,42,8,159660.80,3,True,0,113931.57,1
4,5,15737888,Mitchell,850,Spain,Female,43,2,125510.82,1,True,1,79084.10,0
5,6,15574012,Chu,645,Spain,Male,44,8,113755.78,2,True,0,149756.71,1
6,7,15592531,Bartlett,822,France,Male,50,7,0.00,2,True,1,10062.80,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9993,9994,15569266,Rahman,644,France,Male,28,7,155060.41,1,True,0,29179.52,0
9995,9996,15606229,Obijiaku,771,France,Male,39,5,0.00,2,True,0,96270.64,0
9996,9997,15569892,Johnstone,516,France,Male,35,10,57369.61,1,True,1,101699.77,0
9998,9999,15682355,Sabbatini,772,Germany,Male,42,3,75075.31,2,True,0,92888.52,1


### Логическое НЕ (~)

In [68]:
df[~(df.Geography == 'Spain')]

Unnamed: 0,RowNumber,CustomerId,Surname,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,1,15634602,Hargrave,619,France,Female,42,2,0.00,1,True,1,101348.88,1
2,3,15619304,Onio,502,France,Female,42,8,159660.80,3,True,0,113931.57,1
3,4,15701354,Boni,699,France,Female,39,1,0.00,2,False,0,93826.63,0
6,7,15592531,Bartlett,822,France,Male,50,7,0.00,2,True,1,10062.80,0
7,8,15656148,Obinna,376,Germany,Female,29,4,115046.74,4,True,0,119346.88,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9995,9996,15606229,Obijiaku,771,France,Male,39,5,0.00,2,True,0,96270.64,0
9996,9997,15569892,Johnstone,516,France,Male,35,10,57369.61,1,True,1,101699.77,0
9997,9998,15584532,Liu,709,France,Female,36,7,0.00,1,False,1,42085.58,1
9998,9999,15682355,Sabbatini,772,Germany,Male,42,3,75075.31,2,True,0,92888.52,1


In [69]:
df[df.Geography.isin(['France', 'Germany'])]

Unnamed: 0,RowNumber,CustomerId,Surname,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,1,15634602,Hargrave,619,France,Female,42,2,0.00,1,True,1,101348.88,1
2,3,15619304,Onio,502,France,Female,42,8,159660.80,3,True,0,113931.57,1
3,4,15701354,Boni,699,France,Female,39,1,0.00,2,False,0,93826.63,0
6,7,15592531,Bartlett,822,France,Male,50,7,0.00,2,True,1,10062.80,0
7,8,15656148,Obinna,376,Germany,Female,29,4,115046.74,4,True,0,119346.88,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9995,9996,15606229,Obijiaku,771,France,Male,39,5,0.00,2,True,0,96270.64,0
9996,9997,15569892,Johnstone,516,France,Male,35,10,57369.61,1,True,1,101699.77,0
9997,9998,15584532,Liu,709,France,Female,36,7,0.00,1,False,1,42085.58,1
9998,9999,15682355,Sabbatini,772,Germany,Male,42,3,75075.31,2,True,0,92888.52,1


## Индексация

In [75]:
df_small = df[(df.Geography == 'Spain')][['Geography', 'Gender', 'Age']]
df_small

Unnamed: 0,Geography,Gender,Age
1,Spain,Female,41
4,Spain,Female,43
5,Spain,Male,44
11,Spain,Male,24
14,Spain,Female,35
...,...,...,...
9966,Spain,Male,35
9980,Spain,Male,35
9987,Spain,Male,30
9989,Spain,Male,28


In [84]:
df_small.loc[9989] # по индексу

Geography    Spain
Gender        Male
Age             28
Name: 9989, dtype: object

In [83]:
df_small.loc[[1, 4, 5, 9987], ['Age']]

Unnamed: 0,Age
1,41
4,43
5,44
9987,30


In [88]:
df_small.iloc[2474] # номер по порядку

Geography    Spain
Gender        Male
Age             30
Name: 9987, dtype: object

In [89]:
df_small.iloc[[1, 2, 3, 4, 2474]]

Unnamed: 0,Geography,Gender,Age
4,Spain,Female,43
5,Spain,Male,44
11,Spain,Male,24
14,Spain,Female,35
9987,Spain,Male,30


In [95]:
df_small.iloc[[0, 1, 2], [0, 2]] # первый список - индексы по порядку, второй - требуемые колонки

Unnamed: 0,Geography,Age
1,Spain,41
4,Spain,43
5,Spain,44


## Сортировка
Метод sort_values() сортирует таблицу по признаку. Сортировка будет выполнена от меньшего к большему. В данном примере узнаем самого младшего клиента


In [96]:
df.sort_values('Age')

Unnamed: 0,RowNumber,CustomerId,Surname,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
3512,3513,15657779,Boylan,806,Spain,Male,18,3,0.00,2,True,1,86994.54,0
1678,1679,15569178,Kharlamov,570,France,Female,18,4,82767.42,1,True,0,71811.90,0
3517,3518,15757821,Burgess,771,Spain,Male,18,1,0.00,2,False,0,41542.95,0
9520,9521,15673180,Onyekaozulu,727,Germany,Female,18,2,93816.70,2,True,0,126172.11,0
2021,2022,15795519,Vasiliev,716,Germany,Female,18,3,128743.80,1,False,0,197322.13,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3387,3388,15798024,Lori,537,Germany,Male,84,8,92242.34,1,True,1,186235.98,0
3033,3034,15578006,Yao,787,France,Female,85,10,0.00,2,True,1,116537.96,0
2458,2459,15813303,Rearick,513,Spain,Male,88,10,0.00,2,True,1,52952.24,0
6759,6760,15660878,T'ien,705,France,Male,92,1,126076.24,2,True,1,34436.83,0


In [99]:
df.sort_values('Age', ascending=False)

Unnamed: 0,RowNumber,CustomerId,Surname,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
6443,6444,15764927,Rogova,753,France,Male,92,3,121513.31,1,False,1,195563.99,0
6759,6760,15660878,T'ien,705,France,Male,92,1,126076.24,2,True,1,34436.83,0
2458,2459,15813303,Rearick,513,Spain,Male,88,10,0.00,2,True,1,52952.24,0
3033,3034,15578006,Yao,787,France,Female,85,10,0.00,2,True,1,116537.96,0
3387,3388,15798024,Lori,537,Germany,Male,84,8,92242.34,1,True,1,186235.98,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9782,9783,15728829,Weigel,509,France,Male,18,7,102983.91,1,True,0,171770.58,0
2141,2142,15758372,Wallace,674,France,Male,18,7,0.00,2,True,1,55753.12,1
9501,9502,15634146,Hou,835,Germany,Male,18,2,142872.36,1,True,1,117632.63,0
9520,9521,15673180,Onyekaozulu,727,Germany,Female,18,2,93816.70,2,True,0,126172.11,0


In [110]:
df.sort_values(['Age', ('CreditScore')])

Unnamed: 0,RowNumber,CustomerId,Surname,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
9782,9783,15728829,Weigel,509,France,Male,18,7,102983.91,1,True,0,171770.58,0
1678,1679,15569178,Kharlamov,570,France,Female,18,4,82767.42,1,True,0,71811.90,0
9029,9030,15722701,Bruno,594,Germany,Male,18,1,132694.73,1,True,0,167689.56,0
7334,7335,15759133,Vaguine,616,France,Male,18,6,0.00,2,True,1,27308.58,0
9526,9527,15665521,Chiazagomekpele,642,Germany,Male,18,5,111183.53,2,False,1,10063.75,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3387,3388,15798024,Lori,537,Germany,Male,84,8,92242.34,1,True,1,186235.98,0
3033,3034,15578006,Yao,787,France,Female,85,10,0.00,2,True,1,116537.96,0
2458,2459,15813303,Rearick,513,Spain,Male,88,10,0.00,2,True,1,52952.24,0
6759,6760,15660878,T'ien,705,France,Male,92,1,126076.24,2,True,1,34436.83,0
