In [2]:
import pandas as pd
import numpy as np

In [6]:
df = pd.read_csv("https://raw.githubusercontent.com/edlich/eternalrepo/master/DS-WAHLFACH/countries.csv")

In [7]:
# Display some basic information as rows, columns and some basic statistical info.
df

Unnamed: 0,Name,People,Area,BIP,Currency
0,Germany,82521653,357385,3466,EUR
1,Japan,126045000,377835,4938,YEN
2,Canada,36503097,9984670,1529,CAD
3,Italy,60501718,301338,1850,EUR
4,Brazilia,208360000,8515770,1798,REAL


In [8]:
df.describe()

Unnamed: 0,People,Area,BIP
count,5.0,5.0,5.0
mean,102786300.0,3907400.0,2716.2
std,67599700.0,4904957.0,1457.86083
min,36503100.0,301338.0,1529.0
25%,60501720.0,357385.0,1798.0
50%,82521650.0,377835.0,1850.0
75%,126045000.0,8515770.0,3466.0
max,208360000.0,9984670.0,4938.0


In [11]:
# Show the last 4 rows of the data frame.
df[-4:]

Unnamed: 0,Name,People,Area,BIP,Currency
1,Japan,126045000,377835,4938,YEN
2,Canada,36503097,9984670,1529,CAD
3,Italy,60501718,301338,1850,EUR
4,Brazilia,208360000,8515770,1798,REAL


In [15]:
# Show all the row of countries who have the EURO
df[df["Currency"] == "EUR"]

Unnamed: 0,Name,People,Area,BIP,Currency
0,Germany,82521653,357385,3466,EUR
3,Italy,60501718,301338,1850,EUR


In [16]:
# Show only name and Currency in a new data frame
new_df = df[["Name", "Currency"]]
new_df

Unnamed: 0,Name,Currency
0,Germany,EUR
1,Japan,YEN
2,Canada,CAD
3,Italy,EUR
4,Brazilia,REAL


In [17]:
# Show only the rows/countries that have more than 2000 BIP (it is in Milliarden USD Bruttoinlandsprodukt)
df[df["BIP"] > 2000]

Unnamed: 0,Name,People,Area,BIP,Currency
0,Germany,82521653,357385,3466,EUR
1,Japan,126045000,377835,4938,YEN


In [24]:
# Select all countries where with inhabitants between 50 and 150 Mio
df[df["People"].between(50000000, 1500000000)]

Unnamed: 0,Name,People,Area,BIP,Currency
0,Germany,82521653,357385,3466,EUR
1,Japan,126045000,377835,4938,YEN
3,Italy,60501718,301338,1850,EUR
4,Brazilia,208360000,8515770,1798,REAL


In [26]:
# Change BIP to Bip
df = df.rename(columns={"BIP": "Bip"})
df

Unnamed: 0,Name,People,Area,Bip,Currency
0,Germany,82521653,357385,3466,EUR
1,Japan,126045000,377835,4938,YEN
2,Canada,36503097,9984670,1529,CAD
3,Italy,60501718,301338,1850,EUR
4,Brazilia,208360000,8515770,1798,REAL


In [29]:
# Calculate the Bip sum
df["Bip"].sum()

13581

In [32]:
# Calculate the average people of all countries
df["People"].mean()

102786293.6

In [33]:
# Sort by name alphabetically
df.sort_values("Name")

Unnamed: 0,Name,People,Area,Bip,Currency
4,Brazilia,208360000,8515770,1798,REAL
2,Canada,36503097,9984670,1529,CAD
0,Germany,82521653,357385,3466,EUR
3,Italy,60501718,301338,1850,EUR
1,Japan,126045000,377835,4938,YEN


In [42]:
# Create a new data frame from the original where the area is changed as follows: 
# all countries with > 1000000 get BIG and <= 1000000 get SMALL in the cell replaced!
new_df = df.copy()

def transform_country(value):
    if value > 1000000:
        return "BIG"
    else:
        return "SMALL"
    
new_df.Area = new_df.Area.apply(transform_country)

new_df

Unnamed: 0,Name,People,Area,Bip,Currency
0,Germany,82521653,SMALL,3466,EUR
1,Japan,126045000,SMALL,4938,YEN
2,Canada,36503097,BIG,1529,CAD
3,Italy,60501718,SMALL,1850,EUR
4,Brazilia,208360000,BIG,1798,REAL
