In [2]:
import pandas as pd # type: ignore

### Reading CSV files

##### Tips
- Use r'' to read it as a string literal even with the backslashes
- methods
  - read_csv(...)
  - read_table(..., sep=',' || delimiter=',')
- kwargs
  - header
    - which row is the header and keep rows below it
    - None creates indexes as an header and makes the old header a row of data
  - names
    - creating your own header

In [4]:
pd.read_csv(r'resources\countries of the world.csv', header = None, names = ['C', 'R'])

Unnamed: 0,C,R
0,Country,Region
1,Afghanistan,ASIA (EX. NEAR EAST)
2,Albania,EASTERN EUROPE
3,Algeria,NORTHERN AFRICA
4,American Samoa,OCEANIA
...,...,...
223,West Bank,NEAR EAST
224,Western Sahara,NORTHERN AFRICA
225,Yemen,NEAR EAST
226,Zambia,SUB-SAHARAN AFRICA


#### Reading Text Files

##### Tips
- methods
  - read_csv(..., sep='\t' || delimiter='\t')
  - read_table(...)
- Kwrags


In [5]:
df1 = pd.read_csv(r'resources\countries of the world.txt', delimiter='\t')
pd.read_csv(r'resources\countries of the world.txt', sep='\t')

Unnamed: 0,Country,Region
0,Afghanistan,ASIA (EX. NEAR EAST)
1,Albania,EASTERN EUROPE
2,Algeria,NORTHERN AFRICA
3,American Samoa,OCEANIA
4,Andorra,WESTERN EUROPE
...,...,...
222,West Bank,NEAR EAST
223,Western Sahara,NORTHERN AFRICA
224,Yemen,NEAR EAST
225,Zambia,SUB-SAHARAN AFRICA


#### Reading JSON files

##### Tips
- methods
  - read_json(...)

In [6]:
df2 = pd.read_json(r'resources\json_sample.json')
df2

Unnamed: 0,12 Strong,A Fantastic Woman (Una Mujer Fantástica),All The Money In The World,Bilal: A New Breed Of Hero,Call Me By Your Name,Darkest Hour,Den Of Thieves,Ferdinand,Fifty Shades Freed,Film Stars Don'T Die In Liverpool,...,The 15:17 To Paris,The Commuter,The Disaster Artist,The Greatest Showman,The Insult (L'Insulte),The Post,The Shape Of Water,"Three Billboards Outside Ebbing, Missouri",Till The End Of The World,Winchester
0,"{'Genre': 'Action', 'Gross': '$453,173', 'IMDB...","{'popcornscore': 83, 'rating': 'R', 'tomatosco...","{'popcornscore': 71, 'rating': 'R', 'tomatosco...","{'popcornscore': 91, 'rating': 'PG13', 'tomato...","{'popcornscore': 87, 'rating': 'R', 'tomatosco...","{'popcornscore': 84, 'rating': 'PG13', 'tomato...","{'Genre': 'Action', 'Gross': '$491,898', 'IMDB...","{'popcornscore': 49, 'rating': 'PG', 'tomatosc...","{'Genre': 'Drama', 'Gross': 'unknown', 'IMDB M...","{'popcornscore': 69, 'rating': 'R', 'tomatosco...",...,"{'Genre': 'Drama', 'Gross': 'unknown', 'IMDB M...","{'popcornscore': 48, 'rating': 'PG13', 'tomato...","{'popcornscore': 89, 'rating': 'R', 'tomatosco...","{'Genre': 'Biography', 'Gross': '$627,248', 'I...","{'popcornscore': 86, 'rating': 'R', 'tomatosco...","{'Genre': 'Biography', 'Gross': '$463,228', 'I...","{'Genre': 'Adventure', 'Gross': '$448,287', 'I...","{'popcornscore': 87, 'rating': 'R', 'tomatosco...","{'popcornscore': -1, 'rating': 'NR', 'tomatosc...","{'Genre': 'Biography', 'Gross': '$696,786', 'I..."


#### Reading excel files 

##### Tips
- methods
  - read_excel(...)

In [7]:
pd.read_excel(r'resources\world_population_excel_workbook.xlsx')

ImportError: Missing optional dependency 'openpyxl'.  Use pip or conda to install openpyxl.

#### Settings
- Max Rows/Columns to show

In [8]:
pd.set_option('display.max.rows', 235)
pd.set_option('display.max.columns', 40)

#### Information about dataframes

In [9]:
df1.shape

(227, 2)

In [10]:
df1.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 227 entries, 0 to 226
Data columns (total 2 columns):
 #   Column   Non-Null Count  Dtype 
---  ------   --------------  ----- 
 0   Country  227 non-null    object
 1   Region   227 non-null    object
dtypes: object(2)
memory usage: 3.7+ KB


In [11]:
df1.describe()

Unnamed: 0,Country,Region
count,227,227
unique,227,11
top,Afghanistan,SUB-SAHARAN AFRICA
freq,1,51


#### Sneak peak at the dataframe
- methods
  - head(x)
  - tail(x)
    - x - number of rows, by default it's 5

In [12]:
df1.head(10)

Unnamed: 0,Country,Region
0,Afghanistan,ASIA (EX. NEAR EAST)
1,Albania,EASTERN EUROPE
2,Algeria,NORTHERN AFRICA
3,American Samoa,OCEANIA
4,Andorra,WESTERN EUROPE
5,Angola,SUB-SAHARAN AFRICA
6,Anguilla,LATIN AMER. & CARIB
7,Antigua & Barbuda,LATIN AMER. & CARIB
8,Argentina,LATIN AMER. & CARIB
9,Armenia,C.W. OF IND. STATES


In [13]:
df1.tail(10)

Unnamed: 0,Country,Region
217,Vanuatu,OCEANIA
218,Venezuela,LATIN AMER. & CARIB
219,Vietnam,ASIA (EX. NEAR EAST)
220,Virgin Islands,LATIN AMER. & CARIB
221,Wallis and Futuna,OCEANIA
222,West Bank,NEAR EAST
223,Western Sahara,NORTHERN AFRICA
224,Yemen,NEAR EAST
225,Zambia,SUB-SAHARAN AFRICA
226,Zimbabwe,SUB-SAHARAN AFRICA


In [14]:
pd.set_option('display.max.rows', 10)
pd.set_option('display.max.columns', 10)

##### Reading specific columns
- df[col_name]
- df[[col_names]]

In [15]:
df1['Country']

0         Afghanistan 
1             Albania 
2             Algeria 
3      American Samoa 
4             Andorra 
            ...       
222         West Bank 
223    Western Sahara 
224             Yemen 
225            Zambia 
226          Zimbabwe 
Name: Country, Length: 227, dtype: object

In [16]:
df1[['Country', 'Region']]

Unnamed: 0,Country,Region
0,Afghanistan,ASIA (EX. NEAR EAST)
1,Albania,EASTERN EUROPE
2,Algeria,NORTHERN AFRICA
3,American Samoa,OCEANIA
4,Andorra,WESTERN EUROPE
...,...,...
222,West Bank,NEAR EAST
223,Western Sahara,NORTHERN AFRICA
224,Yemen,NEAR EAST
225,Zambia,SUB-SAHARAN AFRICA


##### Reading specific rows
- df.iloc[row_num] - using the row number no matter how indexed it is
- df.loc[index_val] - using a value in the index column

In [17]:
df1.iloc[3]

Country                        American Samoa 
Region     OCEANIA                            
Name: 3, dtype: object

In [18]:
df1.loc[3]

Country                        American Samoa 
Region     OCEANIA                            
Name: 3, dtype: object

### Indexing

##### Setting Index Column

In [19]:
df = pd.read_csv(r'resources\world_population.csv', index_col='Rank')
df

Unnamed: 0_level_0,CCA3,Country,Capital,Continent,2022 Population,...,1970 Population,Area (km²),Density (per km²),Growth Rate,World Population Percentage
Rank,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
36,AFG,Afghanistan,Kabul,Asia,41128771.0,...,10752971.0,652230.0,63.0587,1.0257,0.52
138,ALB,Albania,Tirana,Europe,2842321.0,...,2324731.0,28748.0,98.8702,0.9957,0.04
34,DZA,Algeria,Algiers,Africa,44903225.0,...,13795915.0,2381741.0,18.8531,1.0164,0.56
213,ASM,American Samoa,Pago Pago,Oceania,44273.0,...,27075.0,199.0,222.4774,0.9831,0.00
203,AND,Andorra,Andorra la Vella,Europe,79824.0,...,19860.0,468.0,170.5641,1.0100,0.00
...,...,...,...,...,...,...,...,...,...,...,...
226,WLF,Wallis and Futuna,Mata-Utu,Oceania,11572.0,...,9377.0,142.0,81.4930,0.9953,0.00
172,ESH,Western Sahara,El Aaiún,Africa,575986.0,...,76371.0,266000.0,2.1654,1.0184,0.01
46,YEM,Yemen,Sanaa,Asia,33696614.0,...,6843607.0,527968.0,63.8232,1.0217,0.42
63,ZMB,Zambia,Lusaka,Africa,20017675.0,...,4281671.0,752612.0,26.5976,1.0280,0.25


<i>Now to see the difference on loc and iloc...</i>

In [20]:
df.iloc[[1]]

Unnamed: 0_level_0,CCA3,Country,Capital,Continent,2022 Population,...,1970 Population,Area (km²),Density (per km²),Growth Rate,World Population Percentage
Rank,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
138,ALB,Albania,Tirana,Europe,2842321.0,...,2324731.0,28748.0,98.8702,0.9957,0.04


In [21]:
df.loc[[1]]

Unnamed: 0_level_0,CCA3,Country,Capital,Continent,2022 Population,...,1970 Population,Area (km²),Density (per km²),Growth Rate,World Population Percentage
Rank,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
1,CHN,China,Beijing,Asia,1425887000.0,...,822534450.0,9706961.0,146.8933,1.0,17.88


##### <i>Note</i>
- when setting another column as an index column DO NOT forget to reset the index if the current index column must be kept as just another column to store data
- the inplace kwarg is false by default, if true it changes the current df instead of returning a new one

In [22]:
df.reset_index(inplace=True)
df

Unnamed: 0,Rank,CCA3,Country,Capital,Continent,...,1970 Population,Area (km²),Density (per km²),Growth Rate,World Population Percentage
0,36,AFG,Afghanistan,Kabul,Asia,...,10752971.0,652230.0,63.0587,1.0257,0.52
1,138,ALB,Albania,Tirana,Europe,...,2324731.0,28748.0,98.8702,0.9957,0.04
2,34,DZA,Algeria,Algiers,Africa,...,13795915.0,2381741.0,18.8531,1.0164,0.56
3,213,ASM,American Samoa,Pago Pago,Oceania,...,27075.0,199.0,222.4774,0.9831,0.00
4,203,AND,Andorra,Andorra la Vella,Europe,...,19860.0,468.0,170.5641,1.0100,0.00
...,...,...,...,...,...,...,...,...,...,...,...
229,226,WLF,Wallis and Futuna,Mata-Utu,Oceania,...,9377.0,142.0,81.4930,0.9953,0.00
230,172,ESH,Western Sahara,El Aaiún,Africa,...,76371.0,266000.0,2.1654,1.0184,0.01
231,46,YEM,Yemen,Sanaa,Asia,...,6843607.0,527968.0,63.8232,1.0217,0.42
232,63,ZMB,Zambia,Lusaka,Africa,...,4281671.0,752612.0,26.5976,1.0280,0.25


##### <i>Note</i>
- set_index method sets a new index column just as index_col attribute we used when we read the file

In [23]:
df.set_index('Country', inplace=True)
df

Unnamed: 0_level_0,Rank,CCA3,Capital,Continent,2022 Population,...,1970 Population,Area (km²),Density (per km²),Growth Rate,World Population Percentage
Country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
Afghanistan,36,AFG,Kabul,Asia,41128771.0,...,10752971.0,652230.0,63.0587,1.0257,0.52
Albania,138,ALB,Tirana,Europe,2842321.0,...,2324731.0,28748.0,98.8702,0.9957,0.04
Algeria,34,DZA,Algiers,Africa,44903225.0,...,13795915.0,2381741.0,18.8531,1.0164,0.56
American Samoa,213,ASM,Pago Pago,Oceania,44273.0,...,27075.0,199.0,222.4774,0.9831,0.00
Andorra,203,AND,Andorra la Vella,Europe,79824.0,...,19860.0,468.0,170.5641,1.0100,0.00
...,...,...,...,...,...,...,...,...,...,...,...
Wallis and Futuna,226,WLF,Mata-Utu,Oceania,11572.0,...,9377.0,142.0,81.4930,0.9953,0.00
Western Sahara,172,ESH,El Aaiún,Africa,575986.0,...,76371.0,266000.0,2.1654,1.0184,0.01
Yemen,46,YEM,Sanaa,Asia,33696614.0,...,6843607.0,527968.0,63.8232,1.0217,0.42
Zambia,63,ZMB,Lusaka,Africa,20017675.0,...,4281671.0,752612.0,26.5976,1.0280,0.25


In [24]:
df.loc[['Andorra']]

Unnamed: 0_level_0,Rank,CCA3,Capital,Continent,2022 Population,...,1970 Population,Area (km²),Density (per km²),Growth Rate,World Population Percentage
Country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
Andorra,203,AND,Andorra la Vella,Europe,79824.0,...,19860.0,468.0,170.5641,1.01,0.0


##### <i>Almost forgot resetting the index 😅</i>

In [25]:
df.reset_index(inplace=True)

In [14]:
df.set_index('Continent', inplace=True)
df

Unnamed: 0_level_0,Rank,CCA3,Country,Capital,2022 Population,2020 Population,2015 Population,2010 Population,2000 Population,1990 Population,1980 Population,1970 Population,Area (km²),Density (per km²),Growth Rate,World Population Percentage
Continent,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
Asia,36,AFG,Afghanistan,Kabul,41128771.0,38972230.0,33753499.0,28189672.0,19542982.0,10694796.0,12486631.0,10752971.0,652230.0,63.0587,1.0257,0.52
Europe,138,ALB,Albania,Tirana,2842321.0,2866849.0,2882481.0,2913399.0,3182021.0,3295066.0,2941651.0,2324731.0,28748.0,98.8702,0.9957,0.04
Africa,34,DZA,Algeria,Algiers,44903225.0,43451666.0,39543154.0,35856344.0,30774621.0,25518074.0,18739378.0,13795915.0,2381741.0,18.8531,1.0164,0.56
Oceania,213,ASM,American Samoa,Pago Pago,44273.0,46189.0,51368.0,54849.0,58230.0,47818.0,32886.0,27075.0,199.0,222.4774,0.9831,0.00
Europe,203,AND,Andorra,Andorra la Vella,79824.0,77700.0,71746.0,71519.0,66097.0,53569.0,35611.0,19860.0,468.0,170.5641,1.0100,0.00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Oceania,226,WLF,Wallis and Futuna,Mata-Utu,11572.0,11655.0,12182.0,13142.0,14723.0,13454.0,11315.0,9377.0,142.0,81.4930,0.9953,0.00
Africa,172,ESH,Western Sahara,El Aaiún,575986.0,556048.0,491824.0,413296.0,270375.0,178529.0,116775.0,76371.0,266000.0,2.1654,1.0184,0.01
Asia,46,YEM,Yemen,Sanaa,33696614.0,32284046.0,28516545.0,24743946.0,18628700.0,13375121.0,9204938.0,6843607.0,527968.0,63.8232,1.0217,0.42
Africa,63,ZMB,Zambia,Lusaka,20017675.0,18927715.0,,13792086.0,9891136.0,7686401.0,5720438.0,4281671.0,752612.0,26.5976,1.0280,0.25


##### <i>Note</i>
- Unlike iloc return multiple rows using loc is possible

In [27]:
df.loc[['Europe']].head()

Unnamed: 0_level_0,Country,Rank,CCA3,Capital,2022 Population,...,1970 Population,Area (km²),Density (per km²),Growth Rate,World Population Percentage
Continent,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
Europe,Albania,138,ALB,Tirana,2842321.0,...,2324731.0,28748.0,98.8702,0.9957,0.04
Europe,Andorra,203,AND,Andorra la Vella,79824.0,...,19860.0,468.0,170.5641,1.01,0.0
Europe,Austria,99,AUT,Vienna,8939617.0,...,7465301.0,83871.0,106.5877,1.002,0.11
Europe,Belarus,96,BLR,Minsk,9534954.0,...,9170786.0,207600.0,45.9295,0.9955,0.12
Europe,Belgium,81,BEL,Brussels,11655930.0,...,9629376.0,30528.0,381.8111,1.0038,0.15


In [28]:
df.reset_index(inplace=True)

##### <i>Note</i>
- multiple columns can be set as indexes
- an index column cannot be selected using df[column_name]

In [39]:
df.set_index(['Continent', 'Country'], inplace=True)
df

Unnamed: 0_level_0,Unnamed: 1_level_0,Rank,CCA3,Capital,2022 Population,2020 Population,...,1970 Population,Area (km²),Density (per km²),Growth Rate,World Population Percentage
Continent,Country,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
Asia,Afghanistan,36,AFG,Kabul,41128771.0,38972230.0,...,10752971.0,652230.0,63.0587,1.0257,0.52
Europe,Albania,138,ALB,Tirana,2842321.0,2866849.0,...,2324731.0,28748.0,98.8702,0.9957,0.04
Africa,Algeria,34,DZA,Algiers,44903225.0,43451666.0,...,13795915.0,2381741.0,18.8531,1.0164,0.56
Oceania,American Samoa,213,ASM,Pago Pago,44273.0,46189.0,...,27075.0,199.0,222.4774,0.9831,0.00
Europe,Andorra,203,AND,Andorra la Vella,79824.0,77700.0,...,19860.0,468.0,170.5641,1.0100,0.00
...,...,...,...,...,...,...,...,...,...,...,...,...
Oceania,Wallis and Futuna,226,WLF,Mata-Utu,11572.0,11655.0,...,9377.0,142.0,81.4930,0.9953,0.00
Africa,Western Sahara,172,ESH,El Aaiún,575986.0,556048.0,...,76371.0,266000.0,2.1654,1.0184,0.01
Asia,Yemen,46,YEM,Sanaa,33696614.0,32284046.0,...,6843607.0,527968.0,63.8232,1.0217,0.42
Africa,Zambia,63,ZMB,Lusaka,20017675.0,18927715.0,...,4281671.0,752612.0,26.5976,1.0280,0.25


##### Sorting using the index column
- kwargs
  - ascending
    - takes a boolean value (pretty much self explantory)

In [30]:
df.sort_index()
df.sort_index(ascending=False)

Unnamed: 0_level_0,Unnamed: 1_level_0,Rank,CCA3,Capital,2022 Population,2020 Population,...,1970 Population,Area (km²),Density (per km²),Growth Rate,World Population Percentage
Continent,Country,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
South America,Venezuela,51,VEN,Caracas,28301696.0,28490453.0,...,11355475.0,,30.8820,1.0036,0.35
South America,Uruguay,133,URY,Montevideo,3422794.0,3429086.0,...,2790265.0,181034.0,18.9069,0.9990,0.04
South America,Suriname,170,SUR,Paramaribo,618040.0,607065.0,...,379918.0,163820.0,3.7727,1.0082,0.01
South America,Peru,44,PER,Lima,34049588.0,33304756.0,...,13562371.0,1285216.0,26.4933,1.0099,0.43
South America,Paraguay,109,PRY,Asunción,6780744.0,6618695.0,...,2408787.0,406752.0,16.6705,1.0115,0.09
...,...,...,...,...,...,...,...,...,...,...,...,...
Africa,Burkina Faso,58,BFA,Ouagadougou,22673762.0,21522626.0,...,5611666.0,272967.0,83.0641,1.0259,0.28
Africa,Botswana,144,BWA,Gaborone,2630296.0,2546402.0,...,592244.0,582000.0,4.5194,1.0162,0.03
Africa,Benin,77,BEN,Porto-Novo,13352864.0,12643123.0,...,3023443.0,112622.0,118.5635,1.0274,0.17
Africa,Angola,42,AGO,Luanda,35588987.0,33428485.0,...,6029700.0,1246700.0,28.5466,1.0315,0.45


In [31]:
df.sort_index(ascending=[True, False])

Unnamed: 0_level_0,Unnamed: 1_level_0,Rank,CCA3,Capital,2022 Population,2020 Population,...,1970 Population,Area (km²),Density (per km²),Growth Rate,World Population Percentage
Continent,Country,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
Africa,Zimbabwe,74,ZWE,Harare,16320537.0,15669666.0,...,5202918.0,390757.0,41.7665,1.0204,0.20
Africa,Zambia,63,ZMB,Lusaka,20017675.0,18927715.0,...,4281671.0,752612.0,26.5976,1.0280,0.25
Africa,Western Sahara,172,ESH,El Aaiún,575986.0,556048.0,...,76371.0,266000.0,2.1654,1.0184,0.01
Africa,Uganda,31,UGA,Kampala,47249585.0,44404611.0,...,10317212.0,241550.0,195.6100,1.0304,0.59
Africa,Tunisia,79,TUN,Tunis,12356117.0,12161723.0,...,5047404.0,163610.0,75.5218,1.0076,0.15
...,...,...,...,...,...,...,...,...,...,...,...,...
South America,Colombia,28,COL,Bogota,51874024.0,50930662.0,...,20905254.0,1141748.0,45.4339,1.0069,0.65
South America,Chile,65,CHL,Santiago,19603733.0,19300315.0,...,9820481.0,756102.0,25.9274,1.0057,0.25
South America,Brazil,7,BRA,Brasilia,215313498.0,213196304.0,...,96369875.0,8515767.0,25.2841,1.0046,2.70
South America,Bolivia,80,BOL,Sucre,12224110.0,11936162.0,...,4585693.0,1098581.0,11.1272,1.0120,0.15


##### Renaming Columns

In [41]:
df.reset_index(inplace=True)

In [45]:
df

Unnamed: 0,Continent,Country,Rank,CCA3,Capital,...,1970 Population,Area (km²),Density (per km²),Growth Rate,World Population Percentage
0,Asia,Afghanistan,36,AFG,Kabul,...,10752971.0,652230.0,63.0587,1.0257,0.52
1,Europe,Albania,138,ALB,Tirana,...,2324731.0,28748.0,98.8702,0.9957,0.04
2,Africa,Algeria,34,DZA,Algiers,...,13795915.0,2381741.0,18.8531,1.0164,0.56
3,Oceania,American Samoa,213,ASM,Pago Pago,...,27075.0,199.0,222.4774,0.9831,0.00
4,Europe,Andorra,203,AND,Andorra la Vella,...,19860.0,468.0,170.5641,1.0100,0.00
...,...,...,...,...,...,...,...,...,...,...,...
229,Oceania,Wallis and Futuna,226,WLF,Mata-Utu,...,9377.0,142.0,81.4930,0.9953,0.00
230,Africa,Western Sahara,172,ESH,El Aaiún,...,76371.0,266000.0,2.1654,1.0184,0.01
231,Asia,Yemen,46,YEM,Sanaa,...,6843607.0,527968.0,63.8232,1.0217,0.42
232,Africa,Zambia,63,ZMB,Lusaka,...,4281671.0,752612.0,26.5976,1.0280,0.25


In [15]:
df.rename(columns={'Country': 'Name'})

Unnamed: 0_level_0,Rank,CCA3,Name,Capital,2022 Population,2020 Population,2015 Population,2010 Population,2000 Population,1990 Population,1980 Population,1970 Population,Area (km²),Density (per km²),Growth Rate,World Population Percentage
Continent,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
Asia,36,AFG,Afghanistan,Kabul,41128771.0,38972230.0,33753499.0,28189672.0,19542982.0,10694796.0,12486631.0,10752971.0,652230.0,63.0587,1.0257,0.52
Europe,138,ALB,Albania,Tirana,2842321.0,2866849.0,2882481.0,2913399.0,3182021.0,3295066.0,2941651.0,2324731.0,28748.0,98.8702,0.9957,0.04
Africa,34,DZA,Algeria,Algiers,44903225.0,43451666.0,39543154.0,35856344.0,30774621.0,25518074.0,18739378.0,13795915.0,2381741.0,18.8531,1.0164,0.56
Oceania,213,ASM,American Samoa,Pago Pago,44273.0,46189.0,51368.0,54849.0,58230.0,47818.0,32886.0,27075.0,199.0,222.4774,0.9831,0.00
Europe,203,AND,Andorra,Andorra la Vella,79824.0,77700.0,71746.0,71519.0,66097.0,53569.0,35611.0,19860.0,468.0,170.5641,1.0100,0.00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Oceania,226,WLF,Wallis and Futuna,Mata-Utu,11572.0,11655.0,12182.0,13142.0,14723.0,13454.0,11315.0,9377.0,142.0,81.4930,0.9953,0.00
Africa,172,ESH,Western Sahara,El Aaiún,575986.0,556048.0,491824.0,413296.0,270375.0,178529.0,116775.0,76371.0,266000.0,2.1654,1.0184,0.01
Asia,46,YEM,Yemen,Sanaa,33696614.0,32284046.0,28516545.0,24743946.0,18628700.0,13375121.0,9204938.0,6843607.0,527968.0,63.8232,1.0217,0.42
Africa,63,ZMB,Zambia,Lusaka,20017675.0,18927715.0,,13792086.0,9891136.0,7686401.0,5720438.0,4281671.0,752612.0,26.5976,1.0280,0.25


##### <i>Note</i>
- iloc have issues with multiple indexed then sorted dataframes, by reading the old indexed column

In [76]:
df.iloc[[0]]

Unnamed: 0_level_0,Unnamed: 1_level_0,Rank,CCA3,Capital,2022 Population,2020 Population,2015 Population,2010 Population,2000 Population,1990 Population,1980 Population,1970 Population,Area (km²),Density (per km²),Growth Rate,World Population Percentage
Continent,Country,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
Asia,Afghanistan,36,AFG,Kabul,41128771.0,38972230.0,33753499.0,28189672.0,19542982.0,10694796.0,12486631.0,10752971.0,652230.0,63.0587,1.0257,0.52


##### Picking multiple rows and columns

In [6]:
df = pd.read_csv(r'resources\world_population.csv')
df.iloc[2:8]

Unnamed: 0,Rank,CCA3,Country,Capital,Continent,2022 Population,2020 Population,2015 Population,2010 Population,2000 Population,1990 Population,1980 Population,1970 Population,Area (km²),Density (per km²),Growth Rate,World Population Percentage
2,34,DZA,Algeria,Algiers,Africa,44903225.0,43451666.0,39543154.0,35856344.0,30774621.0,25518074.0,18739378.0,13795915.0,2381741.0,18.8531,1.0164,0.56
3,213,ASM,American Samoa,Pago Pago,Oceania,44273.0,46189.0,51368.0,54849.0,58230.0,47818.0,32886.0,27075.0,199.0,222.4774,0.9831,0.0
4,203,AND,Andorra,Andorra la Vella,Europe,79824.0,77700.0,71746.0,71519.0,66097.0,53569.0,35611.0,19860.0,468.0,170.5641,1.01,0.0
5,42,AGO,Angola,Luanda,Africa,35588987.0,33428485.0,28127721.0,23364185.0,16394062.0,11828638.0,8330047.0,6029700.0,1246700.0,28.5466,1.0315,0.45
6,224,AIA,Anguilla,The Valley,North America,15857.0,15585.0,14525.0,13172.0,11047.0,8316.0,6560.0,6283.0,91.0,174.2527,1.0066,0.0
7,201,ATG,Antigua and Barbuda,Saint John’s,North America,93763.0,92664.0,89941.0,85695.0,75055.0,63328.0,64888.0,64516.0,442.0,212.1335,1.0058,0.0


In [10]:
df[['Country', 'Capital', 'Growth Rate']]

Unnamed: 0,Country,Capital,Growth Rate
0,Afghanistan,Kabul,1.0257
1,Albania,Tirana,0.9957
2,Algeria,Algiers,1.0164
3,American Samoa,Pago Pago,0.9831
4,Andorra,Andorra la Vella,1.0100
...,...,...,...
229,Wallis and Futuna,Mata-Utu,0.9953
230,Western Sahara,El Aaiún,1.0184
231,Yemen,Sanaa,1.0217
232,Zambia,Lusaka,1.0280


##### Merging both approaches

In [13]:
df[['Country', 'Capital', 'Growth Rate']].iloc[2:8]

Unnamed: 0,Country,Capital,Growth Rate
2,Algeria,Algiers,1.0164
3,American Samoa,Pago Pago,0.9831
4,Andorra,Andorra la Vella,1.01
5,Angola,Luanda,1.0315
6,Anguilla,The Valley,1.0066
7,Antigua and Barbuda,Saint John’s,1.0058


##### <i>To test iloc</i>

In [82]:
df = pd.read_csv(r'resources\world_population.csv')
df

Unnamed: 0,Rank,CCA3,Country,Capital,Continent,2022 Population,2020 Population,2015 Population,2010 Population,2000 Population,1990 Population,1980 Population,1970 Population,Area (km²),Density (per km²),Growth Rate,World Population Percentage
0,36,AFG,Afghanistan,Kabul,Asia,41128771.0,38972230.0,33753499.0,28189672.0,19542982.0,10694796.0,12486631.0,10752971.0,652230.0,63.0587,1.0257,0.52
1,138,ALB,Albania,Tirana,Europe,2842321.0,2866849.0,2882481.0,2913399.0,3182021.0,3295066.0,2941651.0,2324731.0,28748.0,98.8702,0.9957,0.04
2,34,DZA,Algeria,Algiers,Africa,44903225.0,43451666.0,39543154.0,35856344.0,30774621.0,25518074.0,18739378.0,13795915.0,2381741.0,18.8531,1.0164,0.56
3,213,ASM,American Samoa,Pago Pago,Oceania,44273.0,46189.0,51368.0,54849.0,58230.0,47818.0,32886.0,27075.0,199.0,222.4774,0.9831,0.00
4,203,AND,Andorra,Andorra la Vella,Europe,79824.0,77700.0,71746.0,71519.0,66097.0,53569.0,35611.0,19860.0,468.0,170.5641,1.0100,0.00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
229,226,WLF,Wallis and Futuna,Mata-Utu,Oceania,11572.0,11655.0,12182.0,13142.0,14723.0,13454.0,11315.0,9377.0,142.0,81.4930,0.9953,0.00
230,172,ESH,Western Sahara,El Aaiún,Africa,575986.0,556048.0,491824.0,413296.0,270375.0,178529.0,116775.0,76371.0,266000.0,2.1654,1.0184,0.01
231,46,YEM,Yemen,Sanaa,Asia,33696614.0,32284046.0,28516545.0,24743946.0,18628700.0,13375121.0,9204938.0,6843607.0,527968.0,63.8232,1.0217,0.42
232,63,ZMB,Zambia,Lusaka,Africa,20017675.0,18927715.0,,13792086.0,9891136.0,7686401.0,5720438.0,4281671.0,752612.0,26.5976,1.0280,0.25


In [84]:
df.iloc[[0]]

Unnamed: 0,Rank,CCA3,Country,Capital,Continent,2022 Population,2020 Population,2015 Population,2010 Population,2000 Population,1990 Population,1980 Population,1970 Population,Area (km²),Density (per km²),Growth Rate,World Population Percentage
0,36,AFG,Afghanistan,Kabul,Asia,41128771.0,38972230.0,33753499.0,28189672.0,19542982.0,10694796.0,12486631.0,10752971.0,652230.0,63.0587,1.0257,0.52


In [85]:
df.set_index('Country', inplace=True)
df

Unnamed: 0_level_0,Rank,CCA3,Capital,Continent,2022 Population,2020 Population,2015 Population,2010 Population,2000 Population,1990 Population,1980 Population,1970 Population,Area (km²),Density (per km²),Growth Rate,World Population Percentage
Country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
Afghanistan,36,AFG,Kabul,Asia,41128771.0,38972230.0,33753499.0,28189672.0,19542982.0,10694796.0,12486631.0,10752971.0,652230.0,63.0587,1.0257,0.52
Albania,138,ALB,Tirana,Europe,2842321.0,2866849.0,2882481.0,2913399.0,3182021.0,3295066.0,2941651.0,2324731.0,28748.0,98.8702,0.9957,0.04
Algeria,34,DZA,Algiers,Africa,44903225.0,43451666.0,39543154.0,35856344.0,30774621.0,25518074.0,18739378.0,13795915.0,2381741.0,18.8531,1.0164,0.56
American Samoa,213,ASM,Pago Pago,Oceania,44273.0,46189.0,51368.0,54849.0,58230.0,47818.0,32886.0,27075.0,199.0,222.4774,0.9831,0.00
Andorra,203,AND,Andorra la Vella,Europe,79824.0,77700.0,71746.0,71519.0,66097.0,53569.0,35611.0,19860.0,468.0,170.5641,1.0100,0.00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Wallis and Futuna,226,WLF,Mata-Utu,Oceania,11572.0,11655.0,12182.0,13142.0,14723.0,13454.0,11315.0,9377.0,142.0,81.4930,0.9953,0.00
Western Sahara,172,ESH,El Aaiún,Africa,575986.0,556048.0,491824.0,413296.0,270375.0,178529.0,116775.0,76371.0,266000.0,2.1654,1.0184,0.01
Yemen,46,YEM,Sanaa,Asia,33696614.0,32284046.0,28516545.0,24743946.0,18628700.0,13375121.0,9204938.0,6843607.0,527968.0,63.8232,1.0217,0.42
Zambia,63,ZMB,Lusaka,Africa,20017675.0,18927715.0,,13792086.0,9891136.0,7686401.0,5720438.0,4281671.0,752612.0,26.5976,1.0280,0.25


In [86]:
df.sort_index(ascending=False)

Unnamed: 0_level_0,Rank,CCA3,Capital,Continent,2022 Population,2020 Population,2015 Population,2010 Population,2000 Population,1990 Population,1980 Population,1970 Population,Area (km²),Density (per km²),Growth Rate,World Population Percentage
Country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
Zimbabwe,74,ZWE,Harare,Africa,16320537.0,15669666.0,14154937.0,12839771.0,11834676.0,10113893.0,7049926.0,5202918.0,390757.0,41.7665,1.0204,0.20
Zambia,63,ZMB,Lusaka,Africa,20017675.0,18927715.0,,13792086.0,9891136.0,7686401.0,5720438.0,4281671.0,752612.0,26.5976,1.0280,0.25
Yemen,46,YEM,Sanaa,Asia,33696614.0,32284046.0,28516545.0,24743946.0,18628700.0,13375121.0,9204938.0,6843607.0,527968.0,63.8232,1.0217,0.42
Western Sahara,172,ESH,El Aaiún,Africa,575986.0,556048.0,491824.0,413296.0,270375.0,178529.0,116775.0,76371.0,266000.0,2.1654,1.0184,0.01
Wallis and Futuna,226,WLF,Mata-Utu,Oceania,11572.0,11655.0,12182.0,13142.0,14723.0,13454.0,11315.0,9377.0,142.0,81.4930,0.9953,0.00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Andorra,203,AND,Andorra la Vella,Europe,79824.0,77700.0,71746.0,71519.0,66097.0,53569.0,35611.0,19860.0,468.0,170.5641,1.0100,0.00
American Samoa,213,ASM,Pago Pago,Oceania,44273.0,46189.0,51368.0,54849.0,58230.0,47818.0,32886.0,27075.0,199.0,222.4774,0.9831,0.00
Algeria,34,DZA,Algiers,Africa,44903225.0,43451666.0,39543154.0,35856344.0,30774621.0,25518074.0,18739378.0,13795915.0,2381741.0,18.8531,1.0164,0.56
Albania,138,ALB,Tirana,Europe,2842321.0,2866849.0,2882481.0,2913399.0,3182021.0,3295066.0,2941651.0,2324731.0,28748.0,98.8702,0.9957,0.04


In [88]:
df.iloc[[0]]

Unnamed: 0_level_0,Rank,CCA3,Capital,Continent,2022 Population,2020 Population,2015 Population,2010 Population,2000 Population,1990 Population,1980 Population,1970 Population,Area (km²),Density (per km²),Growth Rate,World Population Percentage
Country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
Afghanistan,36,AFG,Kabul,Asia,41128771.0,38972230.0,33753499.0,28189672.0,19542982.0,10694796.0,12486631.0,10752971.0,652230.0,63.0587,1.0257,0.52


##### This shows iloc uses the initial index set when the data is loaded into the df