In [82]:
import numpy as np
import pandas as pd

### 1- Use the Country column (index 0) as the DataFrame index

In [30]:
countries = pd.read_csv('countries.csv',index_col='Country')

In [31]:
name_dict = {
    'Original Name': 'New Name',
    'Area (sq. mi.)': 'Area',
    'Pop. Density (per sq. mi.)': 'Pop Density',
    'Coastline (coast/area ratio)': 'Coastline',
    'Net migration': 'Migration',
    'Infant mortality (per 1000 births)': 'Infant Mortality',
    'GDP ($ per capita)': 'GDP Capita',
    'Literacy (%)': 'Literacy',
    'Phones (per 1000)': 'Phones',
    'Arable (%)': 'Arable',
    'Crops (%)': 'Crops',
    'Other (%)': 'Other'    
}

countries.rename(columns=name_dict,inplace=True)

In [32]:
cols_to_change = [
    'Pop Density',
    'Coastline',
    'Migration',
    'Infant Mortality',
    'Literacy',
    'Phones',
    'Arable',
    'Crops',
    'Other',
    'Climate',
    'Birthrate',
    'Deathrate',
    'Agriculture',
    'Industry',
    'Service'
]

for col in cols_to_change:
    countries[col] = countries[col].str.replace(',','.').astype(float)

### 2- The Birthrate is smaller than the Deathrate

In [97]:
birth_vs_death = countries[countries['Birthrate'] < countries['Deathrate']]

### 3- The Literacy is 100%

In [98]:
literacy_100 = countries[countries['Literacy'] == 100]

### 4- The GDP Capita is larger than the average value of the GDP Capita column

In [99]:
above_average_gdp = countries[countries['GDP Capita'].mean() < countries['GDP Capita']]

### 5- The Agriculture value is larger than the Industry value

In [96]:
agriculture_vs_industry = countries[countries['Agriculture'] > countries['Industry']]

### 6- The Pop Density value is larger than or equal to 10,000

In [37]:
high_density = countries[countries['Pop Density'] >= 10000]
high_density

Unnamed: 0_level_0,Region,Population,Area,Pop Density,Coastline,Migration,Infant Mortality,GDP Capita,Literacy,Phones,Arable,Crops,Other,Climate,Birthrate,Deathrate,Agriculture,Industry,Service
Country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
Macau,ASIA (EX. NEAR EAST),453125,28,16183.0,146.43,4.86,4.39,19400.0,94.5,384.9,0.0,0.0,100.0,2.0,8.48,4.47,0.001,0.072,0.927
Monaco,WESTERN EUROPE,32543,2,16271.5,205.0,7.75,5.43,27000.0,99.0,1035.6,0.0,0.0,100.0,,9.19,12.91,0.17,,


### 7- Select the country with the lowest population density and the highest population density

In [38]:
lowest_density = countries[countries['Pop Density'] == countries['Pop Density'].min()].index[0]
highest_density = countries[countries['Pop Density'] == countries['Pop Density'].max()].index[0]
print(lowest_density)
print(highest_density)

Greenland 
Monaco 


### 8- Count how many countries have a population density lower than Portugal

In [77]:
countries_lower_pt = countries[countries['Pop Density'] < countries.loc['Portugal ', 'Pop Density']]
lower_density_pt_count = countries_lower_pt.shape[0]

### 8-  The Population is at greater than or equal to 108 and The Area is smaller than or equal to 106

In [46]:
my_selection = countries[(countries['Population'] >= 10**8) & (countries['Area'] <= 10**6)]
my_selection

Unnamed: 0_level_0,Region,Population,Area,Pop Density,Coastline,Migration,Infant Mortality,GDP Capita,Literacy,Phones,Arable,Crops,Other,Climate,Birthrate,Deathrate,Agriculture,Industry,Service
Country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
Bangladesh,ASIA (EX. NEAR EAST),147365352,144000,1023.4,0.4,-0.71,62.6,1900.0,43.1,7.3,62.11,3.07,34.82,2.0,29.8,8.27,0.199,0.198,0.603
Japan,ASIA (EX. NEAR EAST),127463611,377835,337.4,7.87,0.0,3.26,28200.0,99.0,461.2,12.19,0.96,86.85,3.0,9.37,9.16,0.017,0.258,0.725
Nigeria,SUB-SAHARAN AFRICA,131859731,923768,142.7,0.09,0.26,98.8,900.0,68.0,9.3,31.29,2.96,65.75,1.5,40.43,16.94,0.269,0.487,0.244
Pakistan,ASIA (EX. NEAR EAST),165803560,803940,206.2,0.13,-2.77,72.44,2100.0,45.7,31.8,27.87,0.87,71.26,1.0,29.74,8.23,0.216,0.251,0.533


### 9- The Infant Mortality smaller than 4 or the Birthrate is bigger than 45

In [100]:
my_selection = countries[(countries['Infant Mortality'] < 4) | (countries['Birthrate'] > 45)]

### 10- The Coastline is equal to 0,the Migration is equal to 0( that satisfy at least one of these conditions but not both)

In [92]:
my_selection = countries[(countries['Coastline'] == 0) ^ (countries['Migration'] == 0)]

### 11- The Region is equal to OCEANIA or equal to BALTICS and The Coastline is smaller than or equal to 1

In [76]:
# using Series.isin()
my_selection = countries.loc[(countries['Region'].isin(['OCEANIA', 'BALTICS'])) 
                             & (countries['Coastline'] <= 1), ['Population', 'Area']]

'''
Alternative answer
my_selection = countries.loc[
    ((countries['Region'] == 'OCEANIA') | (countries['Region'] == 'BALTICS')) 
       & (countries['Coastline'] <= 1), ['Population', 'Area']]
'''

### 12- The Region is not equal to WESTERN EUROPE and not equal to EASTERN EUROPE and The Arable is greater than 50

In [78]:
my_selection = countries.loc[~(countries['Region'].isin(['WESTERN EUROPE','EASTERN EUROPE'])) 
                             & (countries['Arable'] > 50),['Coastline','Crops','Climate']]

### 13- Select all columns  that have missing values

In [80]:
cols_with_nan = countries[countries.columns[countries.isnull().any()]]

### 14- Select all numeric columns

In [84]:
numeric_cols = countries.select_dtypes(include=np.number)

### 15- Select all integer columns 

In [85]:
integer_cols = countries.select_dtypes(include=['int'])

### 16- Select all floating-point columns

In [86]:
float_cols = countries.select_dtypes(include=['float'])

### 17- Select all numeric columns from the countries DataFrame whose sum is at most 1,000

In [89]:
numeric_cols = countries.select_dtypes(include=np.number)
numeric_cols_leq_1000 = numeric_cols.loc[:, numeric_cols.sum() <= 1000]

### 18- Select all numeric columns(that contain negative values)

In [90]:
numeric_cols = countries.select_dtypes(include=np.number)
numeric_cols_negative = numeric_cols.loc[:, numeric_cols.min() < 0]

### 19- Select all numeric columns(between 0 and 100 (both inclusive))

In [91]:
numeric_cols = countries.select_dtypes(include=np.number)
cols_0_to_100 = numeric_cols.loc[:, (numeric_cols.min() >= 0) & (numeric_cols.max() <= 100)]

### 20- Count how many countries have a bigger coastline than Singapore

In [95]:
countries_bigger_coastline_singapore = countries[countries['Coastline'] > countries.loc['Singapore ', 'Coastline']]
bigger_coastline_singapore = countries_bigger_coastline_singapore.shape[0]