## Import Pandas

In [1]:
import pandas as pd
import numpy as np

## Series

In [3]:
fruits = ['Apple', 'Orange', 'Banana', 'Mango']

fruit_series = pd.Series(fruits)

In [4]:
fruit_series

0     Apple
1    Orange
2    Banana
3     Mango
dtype: object

In [5]:
type(fruit_series)

pandas.core.series.Series

Note: By default the index is 0, 1, 2, 3, ...

But we can put our own index

In [6]:
fruit_series = pd.Series(fruits, index=['x','y','z','w'])

In [7]:
fruit_series

x     Apple
y    Orange
z    Banana
w     Mango
dtype: object

In [8]:
fruit_series = pd.Series(fruits, index=['x','y','z','w'], name='Fruits')

In [9]:
fruit_series

x     Apple
y    Orange
z    Banana
w     Mango
Name: Fruits, dtype: object

## DataFrame

In [11]:
data = np.array([['Apple', 200],['Mango', 120],['Banana', 80], ['Orange', 150]])

In [12]:
data

array([['Apple', '200'],
       ['Mango', '120'],
       ['Banana', '80'],
       ['Orange', '150']], dtype='<U11')

In [13]:
pd.DataFrame(data)

Unnamed: 0,0,1
0,Apple,200
1,Mango,120
2,Banana,80
3,Orange,150


In [16]:
fruits_df = pd.DataFrame(data, columns=['Fruits','Price'])

fruits_df

Unnamed: 0,Fruits,Price
0,Apple,200
1,Mango,120
2,Banana,80
3,Orange,150


In [17]:
fruits = ['Apple','Guava','Mango','Orange']
prices = [200, 120, 150, 75]

In [20]:
for x in zip(fruits, prices):
    print(x)

('Apple', 200)
('Guava', 120)
('Mango', 150)
('Orange', 75)


In [21]:
pd.DataFrame(zip(fruits, prices), columns=['Fruits','Prices'])

Unnamed: 0,Fruits,Prices
0,Apple,200
1,Guava,120
2,Mango,150
3,Orange,75


In [22]:
d = {'Sachin':98, 'Sourav':75, 'Dravid': 120, 'Laxman': 50}

In [23]:
d

{'Sachin': 98, 'Sourav': 75, 'Dravid': 120, 'Laxman': 50}

In [25]:
d.keys()

dict_keys(['Sachin', 'Sourav', 'Dravid', 'Laxman'])

In [26]:
d.values()

dict_values([98, 75, 120, 50])

In [31]:
d.items()

dict_items([('Sachin', 98), ('Sourav', 75), ('Dravid', 120), ('Laxman', 50)])

In [32]:
pd.DataFrame(d.items(), columns=['Players', 'Runs'])

Unnamed: 0,Players,Runs
0,Sachin,98
1,Sourav,75
2,Dravid,120
3,Laxman,50


In [34]:
player_dict = {
    'name': ['Sachin', 'Steve', 'Vaughn', 'Sourav', 'Johnty', 'Lara'],
    'Runs' : [20000, 10000, 7500, 11000, 5000, 14000],
    'Country':['India', 'Australia', 'England', 'India', 'South Africa', 'West Indies']
}

In [35]:
player_dict

{'name': ['Sachin', 'Steve', 'Vaughn', 'Sourav', 'Johnty', 'Lara'],
 'Runs': [20000, 10000, 7500, 11000, 5000, 14000],
 'Country': ['India',
  'Australia',
  'England',
  'India',
  'South Africa',
  'West Indies']}

In [36]:
pd.DataFrame(player_dict)

Unnamed: 0,name,Runs,Country
0,Sachin,20000,India
1,Steve,10000,Australia
2,Vaughn,7500,England
3,Sourav,11000,India
4,Johnty,5000,South Africa
5,Lara,14000,West Indies


In [38]:
fruit_dict = {
    'Name': ['Apple', 'Banana'],
    'Price':[200, 120]
}

In [39]:
pd.DataFrame(fruit_dict)

Unnamed: 0,Name,Price
0,Apple,200
1,Banana,120


## Reading a csv file

In [40]:
data = pd.read_csv("./worldometer_coronavirus_summary_data.csv")

## Head and Tail

In [45]:
data.head()

Unnamed: 0,country,continent,total_confirmed,total_deaths,total_recovered,active_cases,serious_or_critical,total_cases_per_1m_population,total_deaths_per_1m_population,total_tests,total_tests_per_1m_population,population
0,Afghanistan,Asia,179267,7690.0,162202.0,9375.0,1124.0,4420,190.0,951337.0,23455.0,40560636
1,Albania,Europe,275574,3497.0,271826.0,251.0,2.0,95954,1218.0,1817530.0,632857.0,2871945
2,Algeria,Africa,265816,6875.0,178371.0,80570.0,6.0,5865,152.0,230861.0,5093.0,45325517
3,Andorra,Europe,42156,153.0,41021.0,982.0,14.0,543983,1974.0,249838.0,3223924.0,77495
4,Angola,Africa,99194,1900.0,97149.0,145.0,,2853,55.0,1499795.0,43136.0,34769277


In [48]:
data.tail(10)

Unnamed: 0,country,continent,total_confirmed,total_deaths,total_recovered,active_cases,serious_or_critical,total_cases_per_1m_population,total_deaths_per_1m_population,total_tests,total_tests_per_1m_population,population
216,USA,North America,84209473,1026646.0,81244260.0,1938567.0,1941.0,251659,3068.0,1016883000.0,3038939.0,334617623
217,Uzbekistan,Asia,238802,1637.0,236974.0,191.0,23.0,6947,48.0,1377915.0,40088.0,34372515
218,Vanuatu,Australia/Oceania,8457,14.0,7974.0,469.0,,26386,44.0,24976.0,77926.0,320508
219,Venezuela,South America,522921,5711.0,516170.0,1040.0,230.0,18487,202.0,3359014.0,118752.0,28285909
220,Viet Nam,Asia,10696630,43065.0,9355040.0,1298525.0,340.0,108080,435.0,85811480.0,867048.0,98969721
221,Wallis And Futuna Islands,Australia/Oceania,454,7.0,438.0,9.0,,41755,644.0,20508.0,1886140.0,10873
222,Western Sahara,Africa,10,1.0,9.0,0.0,,16,2.0,,,624681
223,Yemen,Asia,11819,2149.0,9009.0,661.0,23.0,381,69.0,265253.0,8543.0,31049015
224,Zambia,Africa,320591,3983.0,315997.0,611.0,,16575,206.0,3452554.0,178497.0,19342381
225,Zimbabwe,Africa,249206,5482.0,242417.0,1307.0,12.0,16324,359.0,2287793.0,149863.0,15265849


## Info, Describe

In [49]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 226 entries, 0 to 225
Data columns (total 12 columns):
 #   Column                          Non-Null Count  Dtype  
---  ------                          --------------  -----  
 0   country                         226 non-null    object 
 1   continent                       226 non-null    object 
 2   total_confirmed                 226 non-null    int64  
 3   total_deaths                    218 non-null    float64
 4   total_recovered                 204 non-null    float64
 5   active_cases                    204 non-null    float64
 6   serious_or_critical             145 non-null    float64
 7   total_cases_per_1m_population   226 non-null    int64  
 8   total_deaths_per_1m_population  218 non-null    float64
 9   total_tests                     212 non-null    float64
 10  total_tests_per_1m_population   212 non-null    float64
 11  population                      226 non-null    int64  
dtypes: float64(7), int64(3), object(2)
m

In [50]:
data.describe()

Unnamed: 0,total_confirmed,total_deaths,total_recovered,active_cases,serious_or_critical,total_cases_per_1m_population,total_deaths_per_1m_population,total_tests,total_tests_per_1m_population,population
count,226.0,218.0,204.0,204.0,145.0,226.0,218.0,212.0,212.0,226.0
mean,2305651.0,28844.42,2256851.0,68610.29,269.482759,148156.809735,1157.550459,29874450.0,1944650.0,34955210.0
std,7575510.0,99712.54,7613357.0,235043.0,883.174727,155202.909225,1212.83359,107963500.0,3318400.0,139033800.0
min,2.0,1.0,1.0,0.0,1.0,16.0,2.0,5117.0,5093.0,805.0
25%,24126.0,237.0,16193.0,239.75,5.0,11748.25,172.5,347296.8,166726.0,560512.5
50%,179375.0,2251.5,137274.0,1634.0,17.0,98271.5,768.0,2238918.0,775335.5,5800570.0
75%,1090902.0,14006.5,1006245.0,19296.75,139.0,255632.75,1850.0,12338620.0,2267408.0,21872840.0
max,84209470.0,1026646.0,81244260.0,1938567.0,8318.0,704302.0,6297.0,1016883000.0,21842470.0,1439324000.0


## isna

In [53]:
data

Unnamed: 0,country,continent,total_confirmed,total_deaths,total_recovered,active_cases,serious_or_critical,total_cases_per_1m_population,total_deaths_per_1m_population,total_tests,total_tests_per_1m_population,population
0,Afghanistan,Asia,179267,7690.0,162202.0,9375.0,1124.0,4420,190.0,951337.0,23455.0,40560636
1,Albania,Europe,275574,3497.0,271826.0,251.0,2.0,95954,1218.0,1817530.0,632857.0,2871945
2,Algeria,Africa,265816,6875.0,178371.0,80570.0,6.0,5865,152.0,230861.0,5093.0,45325517
3,Andorra,Europe,42156,153.0,41021.0,982.0,14.0,543983,1974.0,249838.0,3223924.0,77495
4,Angola,Africa,99194,1900.0,97149.0,145.0,,2853,55.0,1499795.0,43136.0,34769277
...,...,...,...,...,...,...,...,...,...,...,...,...
221,Wallis And Futuna Islands,Australia/Oceania,454,7.0,438.0,9.0,,41755,644.0,20508.0,1886140.0,10873
222,Western Sahara,Africa,10,1.0,9.0,0.0,,16,2.0,,,624681
223,Yemen,Asia,11819,2149.0,9009.0,661.0,23.0,381,69.0,265253.0,8543.0,31049015
224,Zambia,Africa,320591,3983.0,315997.0,611.0,,16575,206.0,3452554.0,178497.0,19342381


In [54]:
data.isna().any()  # returns whether a column has null values or not

country                           False
continent                         False
total_confirmed                   False
total_deaths                       True
total_recovered                    True
active_cases                       True
serious_or_critical                True
total_cases_per_1m_population     False
total_deaths_per_1m_population     True
total_tests                        True
total_tests_per_1m_population      True
population                        False
dtype: bool

In [56]:
data.columns

Index(['country', 'continent', 'total_confirmed', 'total_deaths',
       'total_recovered', 'active_cases', 'serious_or_critical',
       'total_cases_per_1m_population', 'total_deaths_per_1m_population',
       'total_tests', 'total_tests_per_1m_population', 'population'],
      dtype='object')

In [57]:
data.index

RangeIndex(start=0, stop=226, step=1)

## Selecting few columns

In [58]:
data

Unnamed: 0,country,continent,total_confirmed,total_deaths,total_recovered,active_cases,serious_or_critical,total_cases_per_1m_population,total_deaths_per_1m_population,total_tests,total_tests_per_1m_population,population
0,Afghanistan,Asia,179267,7690.0,162202.0,9375.0,1124.0,4420,190.0,951337.0,23455.0,40560636
1,Albania,Europe,275574,3497.0,271826.0,251.0,2.0,95954,1218.0,1817530.0,632857.0,2871945
2,Algeria,Africa,265816,6875.0,178371.0,80570.0,6.0,5865,152.0,230861.0,5093.0,45325517
3,Andorra,Europe,42156,153.0,41021.0,982.0,14.0,543983,1974.0,249838.0,3223924.0,77495
4,Angola,Africa,99194,1900.0,97149.0,145.0,,2853,55.0,1499795.0,43136.0,34769277
...,...,...,...,...,...,...,...,...,...,...,...,...
221,Wallis And Futuna Islands,Australia/Oceania,454,7.0,438.0,9.0,,41755,644.0,20508.0,1886140.0,10873
222,Western Sahara,Africa,10,1.0,9.0,0.0,,16,2.0,,,624681
223,Yemen,Asia,11819,2149.0,9009.0,661.0,23.0,381,69.0,265253.0,8543.0,31049015
224,Zambia,Africa,320591,3983.0,315997.0,611.0,,16575,206.0,3452554.0,178497.0,19342381


In [61]:
data['country']     # it will return a series type object

0                    Afghanistan
1                        Albania
2                        Algeria
3                        Andorra
4                         Angola
                 ...            
221    Wallis And Futuna Islands
222               Western Sahara
223                        Yemen
224                       Zambia
225                     Zimbabwe
Name: country, Length: 226, dtype: object

In [62]:
data[['country', 'population']]

Unnamed: 0,country,population
0,Afghanistan,40560636
1,Albania,2871945
2,Algeria,45325517
3,Andorra,77495
4,Angola,34769277
...,...,...
221,Wallis And Futuna Islands,10873
222,Western Sahara,624681
223,Yemen,31049015
224,Zambia,19342381


## Indexing and Slicing

### Interger Indexing (iloc)

In [65]:
data

Unnamed: 0,country,continent,total_confirmed,total_deaths,total_recovered,active_cases,serious_or_critical,total_cases_per_1m_population,total_deaths_per_1m_population,total_tests,total_tests_per_1m_population,population
0,Afghanistan,Asia,179267,7690.0,162202.0,9375.0,1124.0,4420,190.0,951337.0,23455.0,40560636
1,Albania,Europe,275574,3497.0,271826.0,251.0,2.0,95954,1218.0,1817530.0,632857.0,2871945
2,Algeria,Africa,265816,6875.0,178371.0,80570.0,6.0,5865,152.0,230861.0,5093.0,45325517
3,Andorra,Europe,42156,153.0,41021.0,982.0,14.0,543983,1974.0,249838.0,3223924.0,77495
4,Angola,Africa,99194,1900.0,97149.0,145.0,,2853,55.0,1499795.0,43136.0,34769277
...,...,...,...,...,...,...,...,...,...,...,...,...
221,Wallis And Futuna Islands,Australia/Oceania,454,7.0,438.0,9.0,,41755,644.0,20508.0,1886140.0,10873
222,Western Sahara,Africa,10,1.0,9.0,0.0,,16,2.0,,,624681
223,Yemen,Asia,11819,2149.0,9009.0,661.0,23.0,381,69.0,265253.0,8543.0,31049015
224,Zambia,Africa,320591,3983.0,315997.0,611.0,,16575,206.0,3452554.0,178497.0,19342381


In [66]:
data.iloc[2,1]   # df.iloc[row_index, column_index]

'Africa'

In [69]:
data.iloc[:5, :3]

Unnamed: 0,country,continent,total_confirmed
0,Afghanistan,Asia,179267
1,Albania,Europe,275574
2,Algeria,Africa,265816
3,Andorra,Europe,42156
4,Angola,Africa,99194


In [70]:
data.iloc[:5, [0, 1, 11]]

Unnamed: 0,country,continent,population
0,Afghanistan,Asia,40560636
1,Albania,Europe,2871945
2,Algeria,Africa,45325517
3,Andorra,Europe,77495
4,Angola,Africa,34769277


### Using loc

In [71]:
data.loc[:5, ['country', 'continent', 'population']]

Unnamed: 0,country,continent,population
0,Afghanistan,Asia,40560636
1,Albania,Europe,2871945
2,Algeria,Africa,45325517
3,Andorra,Europe,77495
4,Angola,Africa,34769277
5,Anguilla,North America,15252


In [72]:
data.loc[[150, 157], ['country', 'continent', 'population']]

Unnamed: 0,country,continent,population
150,Niue,Australia/Oceania,1647
157,Paraguay,South America,7295705


## Different Methods

### Sum

In [73]:
data['population'].sum()

7899878348

### sort

In [77]:
sorted_by_pop = data.sort_values(by='population', ascending=False)

In [78]:
sorted_by_pop

Unnamed: 0,country,continent,total_confirmed,total_deaths,total_recovered,active_cases,serious_or_critical,total_cases_per_1m_population,total_deaths_per_1m_population,total_tests,total_tests_per_1m_population,population
44,China,Asia,221804,5209.0,210454.0,6141.0,383.0,154,4.0,1.600000e+08,111163.0,1439323776
94,India,Asia,43121599,524214.0,42579693.0,17692.0,698.0,30686,373.0,8.438369e+08,600479.0,1405273033
216,USA,North America,84209473,1026646.0,81244260.0,1938567.0,1941.0,251659,3068.0,1.016883e+09,3038939.0,334617623
95,Indonesia,Asia,6050776,156458.0,5889534.0,4784.0,2771.0,21694,561.0,9.724539e+07,348662.0,278910317
153,Pakistan,Asia,1529249,30376.0,1494141.0,4732.0,109.0,6681,133.0,2.835045e+07,123867.0,228878790
...,...,...,...,...,...,...,...,...,...,...,...,...
172,Saint Pierre And Miquelon,North America,2739,1.0,2449.0,289.0,1.0,477095,174.0,2.368700e+04,4125936.0,5741
137,Montserrat,North America,747,2.0,390.0,355.0,,149460,400.0,1.123800e+04,2248499.0,4998
69,Falkland Islands Malvinas,South America,1126,,,,,306896,,8.632000e+03,2352685.0,3669
150,Niue,Australia/Oceania,9,,9.0,0.0,,5464,,,,1647


In [82]:
sorted_by_pop.reset_index(drop=True, inplace=True)

In [83]:
sorted_by_pop

Unnamed: 0,country,continent,total_confirmed,total_deaths,total_recovered,active_cases,serious_or_critical,total_cases_per_1m_population,total_deaths_per_1m_population,total_tests,total_tests_per_1m_population,population
0,China,Asia,221804,5209.0,210454.0,6141.0,383.0,154,4.0,1.600000e+08,111163.0,1439323776
1,India,Asia,43121599,524214.0,42579693.0,17692.0,698.0,30686,373.0,8.438369e+08,600479.0,1405273033
2,USA,North America,84209473,1026646.0,81244260.0,1938567.0,1941.0,251659,3068.0,1.016883e+09,3038939.0,334617623
3,Indonesia,Asia,6050776,156458.0,5889534.0,4784.0,2771.0,21694,561.0,9.724539e+07,348662.0,278910317
4,Pakistan,Asia,1529249,30376.0,1494141.0,4732.0,109.0,6681,133.0,2.835045e+07,123867.0,228878790
...,...,...,...,...,...,...,...,...,...,...,...,...
221,Saint Pierre And Miquelon,North America,2739,1.0,2449.0,289.0,1.0,477095,174.0,2.368700e+04,4125936.0,5741
222,Montserrat,North America,747,2.0,390.0,355.0,,149460,400.0,1.123800e+04,2248499.0,4998
223,Falkland Islands Malvinas,South America,1126,,,,,306896,,8.632000e+03,2352685.0,3669
224,Niue,Australia/Oceania,9,,9.0,0.0,,5464,,,,1647


### Mean

In [108]:
data['total_confirmed'].mean()

2305651.115044248

Create a column with % population. (this column will store population of a country as a % of total population)

In [85]:
total_population = data['population'].sum()

In [86]:
total_population

7899878348

In [87]:
data['percentage_population'] = np.round(100*data['population']/total_population, 2)

In [89]:
data.sort_values('percentage_population', ascending=False)

Unnamed: 0,country,continent,total_confirmed,total_deaths,total_recovered,active_cases,serious_or_critical,total_cases_per_1m_population,total_deaths_per_1m_population,total_tests,total_tests_per_1m_population,population,percentage_population
44,China,Asia,221804,5209.0,210454.0,6141.0,383.0,154,4.0,1.600000e+08,111163.0,1439323776,18.22
94,India,Asia,43121599,524214.0,42579693.0,17692.0,698.0,30686,373.0,8.438369e+08,600479.0,1405273033,17.79
216,USA,North America,84209473,1026646.0,81244260.0,1938567.0,1941.0,251659,3068.0,1.016883e+09,3038939.0,334617623,4.24
95,Indonesia,Asia,6050776,156458.0,5889534.0,4784.0,2771.0,21694,561.0,9.724539e+07,348662.0,278910317,3.53
153,Pakistan,Asia,1529249,30376.0,1494141.0,4732.0,109.0,6681,133.0,2.835045e+07,123867.0,228878790,2.90
...,...,...,...,...,...,...,...,...,...,...,...,...,...
183,Sint Maarten,North America,10123,86.0,9985.0,52.0,10.0,231208,1964.0,6.205600e+04,1417354.0,43783,0.00
48,Cook Islands,Australia/Oceania,5410,1.0,5291.0,118.0,,307491,57.0,1.746900e+04,992895.0,17594,0.00
107,Kiribati,Australia/Oceania,3093,13.0,2602.0,478.0,3.0,25175,106.0,,,122862,0.00
27,British Virgin Islands,North America,6573,62.0,,,1.0,214762,2026.0,1.036370e+05,3386166.0,30606,0.00


## Filtering

In [91]:
data

Unnamed: 0,country,continent,total_confirmed,total_deaths,total_recovered,active_cases,serious_or_critical,total_cases_per_1m_population,total_deaths_per_1m_population,total_tests,total_tests_per_1m_population,population,percentage_population
0,Afghanistan,Asia,179267,7690.0,162202.0,9375.0,1124.0,4420,190.0,951337.0,23455.0,40560636,0.51
1,Albania,Europe,275574,3497.0,271826.0,251.0,2.0,95954,1218.0,1817530.0,632857.0,2871945,0.04
2,Algeria,Africa,265816,6875.0,178371.0,80570.0,6.0,5865,152.0,230861.0,5093.0,45325517,0.57
3,Andorra,Europe,42156,153.0,41021.0,982.0,14.0,543983,1974.0,249838.0,3223924.0,77495,0.00
4,Angola,Africa,99194,1900.0,97149.0,145.0,,2853,55.0,1499795.0,43136.0,34769277,0.44
...,...,...,...,...,...,...,...,...,...,...,...,...,...
221,Wallis And Futuna Islands,Australia/Oceania,454,7.0,438.0,9.0,,41755,644.0,20508.0,1886140.0,10873,0.00
222,Western Sahara,Africa,10,1.0,9.0,0.0,,16,2.0,,,624681,0.01
223,Yemen,Asia,11819,2149.0,9009.0,661.0,23.0,381,69.0,265253.0,8543.0,31049015,0.39
224,Zambia,Africa,320591,3983.0,315997.0,611.0,,16575,206.0,3452554.0,178497.0,19342381,0.24


In [92]:
data[data['continent'] == 'Asia']

Unnamed: 0,country,continent,total_confirmed,total_deaths,total_recovered,active_cases,serious_or_critical,total_cases_per_1m_population,total_deaths_per_1m_population,total_tests,total_tests_per_1m_population,population,percentage_population
0,Afghanistan,Asia,179267,7690.0,162202.0,9375.0,1124.0,4420,190.0,951337.0,23455.0,40560636,0.51
8,Armenia,Asia,422896,8623.0,412048.0,2225.0,,142219,2900.0,3068217.0,1031834.0,2973558,0.04
12,Azerbaijan,Asia,792638,9709.0,782869.0,60.0,,76885,942.0,6838458.0,663324.0,10309383,0.13
14,Bahrain,Asia,576997,1479.0,569758.0,5760.0,4.0,318491,816.0,9775981.0,5396149.0,1811659,0.02
15,Bangladesh,Asia,1953012,29127.0,1899419.0,24466.0,1273.0,11643,174.0,14051455.0,83767.0,167745162,2.12
22,Bhutan,Asia,59570,21.0,59491.0,58.0,,75651,27.0,2303690.0,2925573.0,787432,0.01
28,Brunei Darussalam,Asia,144659,220.0,142788.0,1651.0,,324871,494.0,717784.0,1611980.0,445281,0.01
33,Cambodia,Asia,136262,3056.0,133194.0,12.0,,7945,178.0,2961934.0,172709.0,17149867,0.22
42,China Hong Kong Sar,Asia,1208506,9361.0,,,5.0,158800,1230.0,44972952.0,5909516.0,7610260,0.1
43,China Macao Sar,Asia,82,,82.0,0.0,,123,,5375.0,8073.0,665819,0.01


In [93]:
data[data['continent']=='Asia']['percentage_population'].sum()

59.17999999999999

Filter out the countries where death is more than or equal to 20000

In [96]:
data[data['total_deaths'] >= 20000].sort_values('total_deaths', ascending=False)

Unnamed: 0,country,continent,total_confirmed,total_deaths,total_recovered,active_cases,serious_or_critical,total_cases_per_1m_population,total_deaths_per_1m_population,total_tests,total_tests_per_1m_population,population,percentage_population
216,USA,North America,84209473,1026646.0,81244260.0,1938567.0,1941.0,251659,3068.0,1016883000.0,3038939.0,334617623,4.24
26,Brazil,South America,30682094,664920.0,29718402.0,298772.0,8318.0,142460,3087.0,63776170.0,296119.0,215373503,2.73
94,India,Asia,43121599,524214.0,42579693.0,17692.0,698.0,30686,373.0,843836900.0,600479.0,1405273033,17.79
165,Russia,Europe,18260293,377670.0,17647179.0,235444.0,2300.0,125027,2586.0,273400000.0,1871949.0,146050996,1.85
131,Mexico,North America,5745652,324465.0,5044893.0,376294.0,4798.0,43708,2468.0,15855490.0,120615.0,131455607,1.66
158,Peru,South America,3571919,213023.0,,,248.0,105593,6297.0,30226870.0,893567.0,33827210,0.43
212,UK,Europe,22159805,176708.0,21677896.0,305201.0,253.0,323264,2578.0,519264100.0,7574950.0,68550166,0.87
101,Italy,Europe,17057873,165244.0,15894511.0,998118.0,347.0,282901,2741.0,217853700.0,3613054.0,60296265,0.76
95,Indonesia,Asia,6050776,156458.0,5889534.0,4784.0,2771.0,21694,561.0,97245390.0,348662.0,278910317,3.53
72,France,Europe,29160802,147257.0,28156674.0,856871.0,1329.0,444914,2247.0,271490200.0,4142201.0,65542502,0.83


## Group By

In [102]:
data[['continent','country']].groupby(by='continent').count()

Unnamed: 0_level_0,country
continent,Unnamed: 1_level_1
Africa,58
Asia,49
Australia/Oceania,18
Europe,48
North America,39
South America,14


In [104]:
data[['continent','population']].groupby(by='continent').sum()

Unnamed: 0_level_0,population
continent,Unnamed: 1_level_1
Africa,1400358940
Asia,4672455975
Australia/Oceania,43505620
Europe,748470690
North America,597707388
South America,437379735
