In [2]:
import pandas as pd

## Loading data

In [3]:
data = pd.read_csv('economic_indicators.csv')

In [4]:
data

Unnamed: 0,Country,Population,GDP,Surface Area,HDI,Continent
0,Canada,35.467,1785387,9984670.0,0.913,America
1,France,63.951,2833687,640679.0,0.888,Europe
2,Germany,80.94,3874437,357114.0,0.916,Europe
3,Italy,60.665,2167744,301336.0,0.873,Europe
4,Japan,127.061,4602367,377930.0,0.891,Asia
5,United Kingdom,64.511,2950039,242495.0,0.907,Europe
6,United States,318.523,17348075,9525067.0,0.915,America
7,Western Sahara,,908900,,,Africa
8,North Korea,,32000000,120538.0,,Asia


## Vieweing and obtaining basic information about the dataset

In [5]:
data.head(5)

Unnamed: 0,Country,Population,GDP,Surface Area,HDI,Continent
0,Canada,35.467,1785387,9984670.0,0.913,America
1,France,63.951,2833687,640679.0,0.888,Europe
2,Germany,80.94,3874437,357114.0,0.916,Europe
3,Italy,60.665,2167744,301336.0,0.873,Europe
4,Japan,127.061,4602367,377930.0,0.891,Asia


In [6]:
data.tail(4)

Unnamed: 0,Country,Population,GDP,Surface Area,HDI,Continent
5,United Kingdom,64.511,2950039,242495.0,0.907,Europe
6,United States,318.523,17348075,9525067.0,0.915,America
7,Western Sahara,,908900,,,Africa
8,North Korea,,32000000,120538.0,,Asia


In [7]:
data.columns

Index(['Country', 'Population', 'GDP', 'Surface Area', 'HDI', 'Continent'], dtype='object')

In [8]:
data.size

54

In [9]:
data.shape

(9, 6)

In [10]:
data.dtypes

Country          object
Population      float64
GDP               int64
Surface Area    float64
HDI             float64
Continent        object
dtype: object

In [11]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 9 entries, 0 to 8
Data columns (total 6 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   Country       9 non-null      object 
 1   Population    7 non-null      float64
 2   GDP           9 non-null      int64  
 3   Surface Area  8 non-null      float64
 4   HDI           7 non-null      float64
 5   Continent     9 non-null      object 
dtypes: float64(3), int64(1), object(2)
memory usage: 560.0+ bytes


In [12]:
data.describe()

Unnamed: 0,Population,GDP,Surface Area,HDI
count,7.0,9.0,8.0,7.0
mean,107.302571,7607848.0,2693729.0,0.900429
std,97.24997,10402400.0,4362439.0,0.016592
min,35.467,908900.0,120538.0,0.873
25%,62.308,2167744.0,286625.8,0.8895
50%,64.511,2950039.0,367522.0,0.907
75%,104.0005,4602367.0,2861776.0,0.914
max,318.523,32000000.0,9984670.0,0.916


In [13]:
data['Continent'].value_counts()

Europe     4
America    2
Asia       2
Africa     1
Name: Continent, dtype: int64

## Indexing, Selection and Slicing - Retrieving basic information about the dataset

### Retrieving specific rows

In [14]:
data.loc[5]

Country         United Kingdom
Population              64.511
GDP                    2950039
Surface Area            242495
HDI                      0.907
Continent               Europe
Name: 5, dtype: object

In [15]:
data.loc[2:5] #-> 2, 3, 4, 5

Unnamed: 0,Country,Population,GDP,Surface Area,HDI,Continent
2,Germany,80.94,3874437,357114.0,0.916,Europe
3,Italy,60.665,2167744,301336.0,0.873,Europe
4,Japan,127.061,4602367,377930.0,0.891,Asia
5,United Kingdom,64.511,2950039,242495.0,0.907,Europe


In [16]:
data.loc[1:4]

Unnamed: 0,Country,Population,GDP,Surface Area,HDI,Continent
1,France,63.951,2833687,640679.0,0.888,Europe
2,Germany,80.94,3874437,357114.0,0.916,Europe
3,Italy,60.665,2167744,301336.0,0.873,Europe
4,Japan,127.061,4602367,377930.0,0.891,Asia


In [17]:
rows = [2, 4, 5]
data.loc[rows]

Unnamed: 0,Country,Population,GDP,Surface Area,HDI,Continent
2,Germany,80.94,3874437,357114.0,0.916,Europe
4,Japan,127.061,4602367,377930.0,0.891,Asia
5,United Kingdom,64.511,2950039,242495.0,0.907,Europe


In [18]:
#Alternatively
data.loc[[2, 4, 5]]

Unnamed: 0,Country,Population,GDP,Surface Area,HDI,Continent
2,Germany,80.94,3874437,357114.0,0.916,Europe
4,Japan,127.061,4602367,377930.0,0.891,Asia
5,United Kingdom,64.511,2950039,242495.0,0.907,Europe


### Retrieving Specific Columns

In [19]:
data['Population']

0     35.467
1     63.951
2     80.940
3     60.665
4    127.061
5     64.511
6    318.523
7        NaN
8        NaN
Name: Population, dtype: float64

In [20]:
data['Country']

0            Canada
1            France
2           Germany
3             Italy
4             Japan
5    United Kingdom
6     United States
7    Western Sahara
8       North Korea
Name: Country, dtype: object

In [18]:
population = data['Population']

In [19]:
cols = ['Country', 'Population', 'GDP']
data[cols]

Unnamed: 0,Country,Population,GDP
0,Canada,35.467,1785387
1,France,63.951,2833687
2,Germany,80.94,3874437
3,Italy,60.665,2167744
4,Japan,127.061,4602367
5,United Kingdom,64.511,2950039
6,United States,318.523,17348075
7,Western Sahara,,908900
8,North Korea,,32000000


In [20]:
#Alternatively
data[['Country', 'Population', 'GDP']]

Unnamed: 0,Country,Population,GDP
0,Canada,35.467,1785387
1,France,63.951,2833687
2,Germany,80.94,3874437
3,Italy,60.665,2167744
4,Japan,127.061,4602367
5,United Kingdom,64.511,2950039
6,United States,318.523,17348075
7,Western Sahara,,908900
8,North Korea,,32000000


### Filtering both rows and columns

In [21]:
data.loc[4:8, ['Country', 'Population']]

Unnamed: 0,Country,Population
4,Japan,127.061
5,United Kingdom,64.511
6,United States,318.523
7,Western Sahara,
8,North Korea,


In [22]:
data.loc[1:4, ['Country', 'GDP']]

Unnamed: 0,Country,GDP
1,France,2833687
2,Germany,3874437
3,Italy,2167744
4,Japan,4602367


### Changing indexes

In [25]:
data_new = data.set_index('Country')

In [26]:
data.head()

Unnamed: 0,Country,Population,GDP,Surface Area,HDI,Continent
0,Canada,35.467,1785387,9984670.0,0.913,America
1,France,63.951,2833687,640679.0,0.888,Europe
2,Germany,80.94,3874437,357114.0,0.916,Europe
3,Italy,60.665,2167744,301336.0,0.873,Europe
4,Japan,127.061,4602367,377930.0,0.891,Asia


In [27]:
data_new.head()

Unnamed: 0_level_0,Population,GDP,Surface Area,HDI,Continent
Country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Canada,35.467,1785387,9984670.0,0.913,America
France,63.951,2833687,640679.0,0.888,Europe
Germany,80.94,3874437,357114.0,0.916,Europe
Italy,60.665,2167744,301336.0,0.873,Europe
Japan,127.061,4602367,377930.0,0.891,Asia


In [30]:
data_new.loc['France']

Population       63.951
GDP             2833687
Surface Area     640679
HDI               0.888
Continent        Europe
Name: France, dtype: object

In [31]:
data_new.loc[0]

TypeError: cannot do label indexing on <class 'pandas.core.indexes.base.Index'> with these indexers [0] of <class 'int'>

In [32]:
data.loc[0]

Country              Canada
Population           35.467
GDP                 1785387
Surface Area    9.98467e+06
HDI                   0.913
Continent           America
Name: 0, dtype: object

### Advanced Filtering and query

In [33]:
data_new['Population'] > 70

Country
Canada            False
France            False
Germany            True
Italy             False
Japan              True
United Kingdom    False
United States      True
Western Sahara    False
North Korea       False
Name: Population, dtype: bool

In [34]:
query = data_new['Population'] > 70
data_new[query]

Unnamed: 0_level_0,Population,GDP,Surface Area,HDI,Continent
Country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Germany,80.94,3874437,357114.0,0.916,Europe
Japan,127.061,4602367,377930.0,0.891,Asia
United States,318.523,17348075,9525067.0,0.915,America


In [35]:
#alternatively 
data_new[data_new['Population'] > 70]

Unnamed: 0_level_0,Population,GDP,Surface Area,HDI,Continent
Country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Germany,80.94,3874437,357114.0,0.916,Europe
Japan,127.061,4602367,377930.0,0.891,Asia
United States,318.523,17348075,9525067.0,0.915,America


In [39]:
(data_new['Population'] > 70) & (data_new['HDI'] > 0.90)

Country
Canada            False
France            False
Germany            True
Italy             False
Japan             False
United Kingdom    False
United States      True
Western Sahara    False
North Korea       False
dtype: bool

In [41]:
query = (data_new['Population'] > 70) & (data_new['HDI'] > 0.90)

In [42]:
data_new[query]

Unnamed: 0_level_0,Population,GDP,Surface Area,HDI,Continent
Country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Germany,80.94,3874437,357114.0,0.916,Europe
United States,318.523,17348075,9525067.0,0.915,America


In [43]:
query = (data_new['Population'] > 70) | (data_new['HDI'] > 0.90)
data_new[query]

Unnamed: 0_level_0,Population,GDP,Surface Area,HDI,Continent
Country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Canada,35.467,1785387,9984670.0,0.913,America
Germany,80.94,3874437,357114.0,0.916,Europe
Japan,127.061,4602367,377930.0,0.891,Asia
United Kingdom,64.511,2950039,242495.0,0.907,Europe
United States,318.523,17348075,9525067.0,0.915,America


In [45]:
data_new[data_new['Continent'] == 'Asia']

Unnamed: 0_level_0,Population,GDP,Surface Area,HDI,Continent
Country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Japan,127.061,4602367,377930.0,0.891,Asia
North Korea,,32000000,120538.0,,Asia


### Creating columns using other columns

In [46]:
data_new.head()

Unnamed: 0_level_0,Population,GDP,Surface Area,HDI,Continent
Country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Canada,35.467,1785387,9984670.0,0.913,America
France,63.951,2833687,640679.0,0.888,Europe
Germany,80.94,3874437,357114.0,0.916,Europe
Italy,60.665,2167744,301336.0,0.873,Europe
Japan,127.061,4602367,377930.0,0.891,Asia


In [48]:
data_new['GDP_PER_CAPITA'] = data_new['GDP'] / data_new['Population']

In [49]:
data_new

Unnamed: 0_level_0,Population,GDP,Surface Area,HDI,Continent,GDP_PER_CAPITA
Country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Canada,35.467,1785387,9984670.0,0.913,America,50339.385908
France,63.951,2833687,640679.0,0.888,Europe,44310.284437
Germany,80.94,3874437,357114.0,0.916,Europe,47868.013343
Italy,60.665,2167744,301336.0,0.873,Europe,35733.025633
Japan,127.061,4602367,377930.0,0.891,Asia,36221.712406
United Kingdom,64.511,2950039,242495.0,0.907,Europe,45729.239975
United States,318.523,17348075,9525067.0,0.915,America,54464.12033
Western Sahara,,908900,,,Africa,
North Korea,,32000000,120538.0,,Asia,


In [52]:
data_new['Population_per_area'] = data_new['Population'] / data_new['Surface Area']

In [53]:
data_new

Unnamed: 0_level_0,Population,GDP,Surface Area,HDI,Continent,GDP_PER_CAPITA,Population_per_area
Country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
Canada,35.467,1785387,9984670.0,0.913,America,50339.385908,4e-06
France,63.951,2833687,640679.0,0.888,Europe,44310.284437,0.0001
Germany,80.94,3874437,357114.0,0.916,Europe,47868.013343,0.000227
Italy,60.665,2167744,301336.0,0.873,Europe,35733.025633,0.000201
Japan,127.061,4602367,377930.0,0.891,Asia,36221.712406,0.000336
United Kingdom,64.511,2950039,242495.0,0.907,Europe,45729.239975,0.000266
United States,318.523,17348075,9525067.0,0.915,America,54464.12033,3.3e-05
Western Sahara,,908900,,,Africa,,
North Korea,,32000000,120538.0,,Asia,,


### Deleting rows and columns

In [22]:
data

Unnamed: 0,Country,Population,GDP,Surface Area,HDI,Continent
0,Canada,35.467,1785387,9984670.0,0.913,America
1,France,63.951,2833687,640679.0,0.888,Europe
2,Germany,80.94,3874437,357114.0,0.916,Europe
3,Italy,60.665,2167744,301336.0,0.873,Europe
4,Japan,127.061,4602367,377930.0,0.891,Asia
5,United Kingdom,64.511,2950039,242495.0,0.907,Europe
6,United States,318.523,17348075,9525067.0,0.915,America
7,Western Sahara,,908900,,,Africa
8,North Korea,,32000000,120538.0,,Asia


In [25]:
data = data.drop([7, 8])

In [26]:
data

Unnamed: 0,Country,Population,GDP,Surface Area,HDI,Continent
0,Canada,35.467,1785387,9984670.0,0.913,America
1,France,63.951,2833687,640679.0,0.888,Europe
2,Germany,80.94,3874437,357114.0,0.916,Europe
3,Italy,60.665,2167744,301336.0,0.873,Europe
4,Japan,127.061,4602367,377930.0,0.891,Asia
5,United Kingdom,64.511,2950039,242495.0,0.907,Europe
6,United States,318.523,17348075,9525067.0,0.915,America


In [28]:
data = data.drop(['Continent'])

KeyError: "['Continent'] not found in axis"

In [29]:
data = data.drop(['Continent'], axis=1)

In [30]:
data

Unnamed: 0,Country,Population,GDP,Surface Area,HDI
0,Canada,35.467,1785387,9984670.0,0.913
1,France,63.951,2833687,640679.0,0.888
2,Germany,80.94,3874437,357114.0,0.916
3,Italy,60.665,2167744,301336.0,0.873
4,Japan,127.061,4602367,377930.0,0.891
5,United Kingdom,64.511,2950039,242495.0,0.907
6,United States,318.523,17348075,9525067.0,0.915


### Basic Statistics

In [55]:
data_new.describe()

Unnamed: 0,Population,GDP,Surface Area,HDI,GDP_PER_CAPITA,Population_per_area
count,7.0,9.0,8.0,7.0,7.0,7.0
mean,107.302571,7607848.0,2693729.0,0.900429,44952.254576,0.000167
std,97.24997,10402400.0,4362439.0,0.016592,6954.983875,0.000124
min,35.467,908900.0,120538.0,0.873,35733.025633,4e-06
25%,62.308,2167744.0,286625.8,0.8895,40265.998421,6.7e-05
50%,64.511,2950039.0,367522.0,0.907,45729.239975,0.000201
75%,104.0005,4602367.0,2861776.0,0.914,49103.699626,0.000246
max,318.523,32000000.0,9984670.0,0.916,54464.12033,0.000336


In [56]:
GDP = data_new['GDP']

In [57]:
GDP.min()

908900

In [58]:
GDP.max()

32000000

In [59]:
GDP.mean()

7607848.444444444

In [60]:
GDP.std()

10402402.73210567

In [63]:
continent = data_new['Continent']

In [64]:
continent.value_counts()

Europe     4
Asia       2
America    2
Africa     1
Name: Continent, dtype: int64

In [67]:
GDP.quantile(0.25)

2167744.0

In [68]:
GDP.quantile(0.20)

2014801.2

In [69]:
GDP.quantile([0.20, 0.30, 0.40])

0.2    2014801.2
0.3    2434121.2
0.4    2856957.4
Name: GDP, dtype: float64