## Pandas

Python Advanced Data Analysis Toolkit (Pandas)

Hauptwerkzeug zur Verarbeitung von Daten

In [1]:
import numpy as np
import pandas as pd

### Serie

Ein Numpy Array mit zusätzlichen Eigenschaften

- Benannter Index
- Name
- ...

In [2]:
pd.Series([8.6, 83.7, 0.38, 9])  # Die Zahlen sagen alleine nichts aus

0     8.60
1    83.70
2     0.38
3     9.00
dtype: float64

#### Name

In [3]:
e = pd.Series([8.6, 83.7, 0.38, 9])

In [4]:
e.name = "Einwohnerzahlen"

In [5]:
e

0     8.60
1    83.70
2     0.38
3     9.00
Name: Einwohnerzahlen, dtype: float64

#### Index

In [6]:
e.index = ["CH", "DE", "LI", "AT"]

In [7]:
e

CH     8.60
DE    83.70
LI     0.38
AT     9.00
Name: Einwohnerzahlen, dtype: float64

In [8]:
e[0]  # Warnung

  e[0]  # Warnung


np.float64(8.6)

In [9]:
e["CH"]

np.float64(8.6)

In [10]:
e["CH":"DE"]  # WICHTIG: Hier ist die Obergrenze NICHT exkludiert

CH     8.6
DE    83.7
Name: Einwohnerzahlen, dtype: float64

In [11]:
e.values  # Das unterliegende Numpy Array

array([ 8.6 , 83.7 ,  0.38,  9.  ])

In [12]:
e.index  # Index Spalte ohne die Werte

Index(['CH', 'DE', 'LI', 'AT'], dtype='object')

#### Vektorisierung

In [13]:
e > 5

CH     True
DE     True
LI    False
AT     True
Name: Einwohnerzahlen, dtype: bool

In [14]:
b = e > 5

In [15]:
e[b]

CH     8.6
DE    83.7
AT     9.0
Name: Einwohnerzahlen, dtype: float64

In [16]:
e.mean()

np.float64(25.419999999999998)

In [17]:
e[e < e.mean()]  # Alle Länder, die unterdurchschnittlich viele Einwohner haben

CH    8.60
LI    0.38
AT    9.00
Name: Einwohnerzahlen, dtype: float64

### DataFrame

Effektiv eine Tabelle

Für ein DataFrame wird generell eine Datenquelle verwendet

In [18]:
pd.DataFrame({"Spalte 1": [1, 2, 3], "Spalte 2": [4, 5, 6], "Spalte 3": [7, 8, 9]})

Unnamed: 0,Spalte 1,Spalte 2,Spalte 3
0,1,4,7
1,2,5,8
2,3,6,9


#### read_csv

Daten aus einer CSV-Quelle einlesen

Wird per Parameter konfiguriert

Beispiele:
- delimiter/sep
- thousands
- decimal
- parse_dates
- index_col
- ...

In [19]:
data = pd.read_csv("Data/PopulationData.csv", delimiter=";", thousands=",", decimal=".")

In [20]:
data

Unnamed: 0,#,Country (or dependency),Population(2020),YearlyChange,NetChange,Density(P/Km²),Land Area(Km²),Migrants(net),Fert.Rate,Med.Age,UrbanPop %,WorldShare
0,1,China,1439323776,0.39 %,5540090,153,9388211,-348399.0,1.7,38,61 %,18.47 %
1,2,India,1380004385,0.99 %,13586631,464,2973190,-532687.0,2.2,28,35 %,17.70 %
2,3,United States,331002651,0.59 %,1937734,36,9147420,954806.0,1.8,38,83 %,4.25 %
3,4,Indonesia,273523615,1.07 %,2898047,151,1811570,-98955.0,2.3,30,56 %,3.51 %
4,5,Pakistan,220892340,2.00 %,4327022,287,770880,-233379.0,3.6,23,35 %,2.83 %
...,...,...,...,...,...,...,...,...,...,...,...,...
230,231,Montserrat,4992,0.06 %,3,50,100,,N.A.,N.A.,10 %,0.00 %
231,232,Falkland Islands,3480,3.05 %,103,0,12170,,N.A.,N.A.,66 %,0.00 %
232,233,Niue,1626,0.68 %,11,6,260,,N.A.,N.A.,46 %,0.00 %
233,234,Tokelau,1357,1.27 %,17,136,10,,N.A.,N.A.,0 %,0.00 %


In [21]:
data.info()  # Verschafft einen Überblick über das Datenset

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 235 entries, 0 to 234
Data columns (total 12 columns):
 #   Column                   Non-Null Count  Dtype  
---  ------                   --------------  -----  
 0   #                        235 non-null    int64  
 1   Country (or dependency)  235 non-null    object 
 2   Population(2020)         235 non-null    int64  
 3   YearlyChange             235 non-null    object 
 4   NetChange                235 non-null    int64  
 5   Density(P/Km²)           235 non-null    int64  
 6   Land Area(Km²)           235 non-null    int64  
 7   Migrants(net)            201 non-null    float64
 8   Fert.Rate                235 non-null    object 
 9   Med.Age                  235 non-null    object 
 10  UrbanPop %               235 non-null    object 
 11  WorldShare               235 non-null    object 
dtypes: float64(1), int64(5), object(6)
memory usage: 22.2+ KB


#### Probleme mit dem Datenset

DataFrame:
- Schlechte Spaltennamen
- Doppelter Index

Daten selbst:
- Prozentzeichen
- Leere Felder
- N.A.
- Datentypen
- Tausendertrennzeichen/Dezimalzeichen

#### Index setzen

- set_index("Spaltenname")
- index_col = "Spalte" (bei read_csv)
- data.index = [...]

In [22]:
# Nimmt eine Spalte und bewegt diese zum Index
# WICHTIG: Alle Funktionen verändern NIEMALS das originale DataFrame
# Mithilfe von inplace=True kann die Änderung auf das original angewandt werden
data.set_index("Country (or dependency)", inplace=True)

In [23]:
data

Unnamed: 0_level_0,#,Population(2020),YearlyChange,NetChange,Density(P/Km²),Land Area(Km²),Migrants(net),Fert.Rate,Med.Age,UrbanPop %,WorldShare
Country (or dependency),Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
China,1,1439323776,0.39 %,5540090,153,9388211,-348399.0,1.7,38,61 %,18.47 %
India,2,1380004385,0.99 %,13586631,464,2973190,-532687.0,2.2,28,35 %,17.70 %
United States,3,331002651,0.59 %,1937734,36,9147420,954806.0,1.8,38,83 %,4.25 %
Indonesia,4,273523615,1.07 %,2898047,151,1811570,-98955.0,2.3,30,56 %,3.51 %
Pakistan,5,220892340,2.00 %,4327022,287,770880,-233379.0,3.6,23,35 %,2.83 %
...,...,...,...,...,...,...,...,...,...,...,...
Montserrat,231,4992,0.06 %,3,50,100,,N.A.,N.A.,10 %,0.00 %
Falkland Islands,232,3480,3.05 %,103,0,12170,,N.A.,N.A.,66 %,0.00 %
Niue,233,1626,0.68 %,11,6,260,,N.A.,N.A.,46 %,0.00 %
Tokelau,234,1357,1.27 %,17,136,10,,N.A.,N.A.,0 %,0.00 %


In [24]:
data.index.name = "Country"

In [25]:
data

Unnamed: 0_level_0,#,Population(2020),YearlyChange,NetChange,Density(P/Km²),Land Area(Km²),Migrants(net),Fert.Rate,Med.Age,UrbanPop %,WorldShare
Country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
China,1,1439323776,0.39 %,5540090,153,9388211,-348399.0,1.7,38,61 %,18.47 %
India,2,1380004385,0.99 %,13586631,464,2973190,-532687.0,2.2,28,35 %,17.70 %
United States,3,331002651,0.59 %,1937734,36,9147420,954806.0,1.8,38,83 %,4.25 %
Indonesia,4,273523615,1.07 %,2898047,151,1811570,-98955.0,2.3,30,56 %,3.51 %
Pakistan,5,220892340,2.00 %,4327022,287,770880,-233379.0,3.6,23,35 %,2.83 %
...,...,...,...,...,...,...,...,...,...,...,...
Montserrat,231,4992,0.06 %,3,50,100,,N.A.,N.A.,10 %,0.00 %
Falkland Islands,232,3480,3.05 %,103,0,12170,,N.A.,N.A.,66 %,0.00 %
Niue,233,1626,0.68 %,11,6,260,,N.A.,N.A.,46 %,0.00 %
Tokelau,234,1357,1.27 %,17,136,10,,N.A.,N.A.,0 %,0.00 %


#### Spalten anpassen

Spalten entfernen: drop()

Spalten umbenennen: rename()

Beide dieser Funktionen besitzen auch den inplace Parameter

In [26]:
data.drop(columns=["#"], inplace=True)

In [27]:
data

Unnamed: 0_level_0,Population(2020),YearlyChange,NetChange,Density(P/Km²),Land Area(Km²),Migrants(net),Fert.Rate,Med.Age,UrbanPop %,WorldShare
Country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
China,1439323776,0.39 %,5540090,153,9388211,-348399.0,1.7,38,61 %,18.47 %
India,1380004385,0.99 %,13586631,464,2973190,-532687.0,2.2,28,35 %,17.70 %
United States,331002651,0.59 %,1937734,36,9147420,954806.0,1.8,38,83 %,4.25 %
Indonesia,273523615,1.07 %,2898047,151,1811570,-98955.0,2.3,30,56 %,3.51 %
Pakistan,220892340,2.00 %,4327022,287,770880,-233379.0,3.6,23,35 %,2.83 %
...,...,...,...,...,...,...,...,...,...,...
Montserrat,4992,0.06 %,3,50,100,,N.A.,N.A.,10 %,0.00 %
Falkland Islands,3480,3.05 %,103,0,12170,,N.A.,N.A.,66 %,0.00 %
Niue,1626,0.68 %,11,6,260,,N.A.,N.A.,46 %,0.00 %
Tokelau,1357,1.27 %,17,136,10,,N.A.,N.A.,0 %,0.00 %


In [28]:
# Die rename Funktion kann auch Zeilennamen ändern
# Dafür kann columns= weggelassen werden
data.rename(columns={
    "Population(2020)": "Pop",
    "Density(P/Km²)": "Density",
    "Land Area(Km²)": "LandArea",
    "Migrants(net)": "Migrants",
    "Fert.Rate": "FertRate",
    "Med.Age": "MedAge",
    "UrbanPop %": "UrbanPop"
}, inplace=True)

In [29]:
data

Unnamed: 0_level_0,Pop,YearlyChange,NetChange,Density,LandArea,Migrants,FertRate,MedAge,UrbanPop,WorldShare
Country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
China,1439323776,0.39 %,5540090,153,9388211,-348399.0,1.7,38,61 %,18.47 %
India,1380004385,0.99 %,13586631,464,2973190,-532687.0,2.2,28,35 %,17.70 %
United States,331002651,0.59 %,1937734,36,9147420,954806.0,1.8,38,83 %,4.25 %
Indonesia,273523615,1.07 %,2898047,151,1811570,-98955.0,2.3,30,56 %,3.51 %
Pakistan,220892340,2.00 %,4327022,287,770880,-233379.0,3.6,23,35 %,2.83 %
...,...,...,...,...,...,...,...,...,...,...
Montserrat,4992,0.06 %,3,50,100,,N.A.,N.A.,10 %,0.00 %
Falkland Islands,3480,3.05 %,103,0,12170,,N.A.,N.A.,66 %,0.00 %
Niue,1626,0.68 %,11,6,260,,N.A.,N.A.,46 %,0.00 %
Tokelau,1357,1.27 %,17,136,10,,N.A.,N.A.,0 %,0.00 %


#### Daten speichern

In [30]:
data.to_csv("Data/PopulationDataFastFertig.csv")

#### Daten analysieren

Erkenntnisse aus den Daten ziehen / mit den Daten arbeiten

In [31]:
data.head(3)  # Die obersten X Datensätze

Unnamed: 0_level_0,Pop,YearlyChange,NetChange,Density,LandArea,Migrants,FertRate,MedAge,UrbanPop,WorldShare
Country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
China,1439323776,0.39 %,5540090,153,9388211,-348399.0,1.7,38,61 %,18.47 %
India,1380004385,0.99 %,13586631,464,2973190,-532687.0,2.2,28,35 %,17.70 %
United States,331002651,0.59 %,1937734,36,9147420,954806.0,1.8,38,83 %,4.25 %


In [32]:
data.tail(3)  # Die untersten X Datensätze

Unnamed: 0_level_0,Pop,YearlyChange,NetChange,Density,LandArea,Migrants,FertRate,MedAge,UrbanPop,WorldShare
Country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
Niue,1626,0.68 %,11,6,260,,N.A.,N.A.,46 %,0.00 %
Tokelau,1357,1.27 %,17,136,10,,N.A.,N.A.,0 %,0.00 %
Vatican State,801,0.25 %,2,2003,0,,N.A.,N.A.,N.A.,0.00 %


#### Sortieren

- sort_index()
- sort_values("Spaltenname")

Alle Sortierungen können mithilfe von ascending=True/False angegeben werden

In [33]:
data.sort_index()

Unnamed: 0_level_0,Pop,YearlyChange,NetChange,Density,LandArea,Migrants,FertRate,MedAge,UrbanPop,WorldShare
Country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
Afghanistan,38928346,2.33 %,886592,60,652860,-62920.0,4.6,18,25 %,0.50 %
Albania,2877797,-0.11 %,-3120,105,27400,-14000.0,1.6,36,63 %,0.04 %
Algeria,43851044,1.85 %,797990,18,2381740,-10000.0,3.1,29,73 %,0.56 %
American Samoa,55191,-0.22 %,-121,276,200,,N.A.,N.A.,88 %,0.00 %
Andorra,77265,0.16 %,123,164,470,,N.A.,N.A.,88 %,0.00 %
...,...,...,...,...,...,...,...,...,...,...
Wallis & Futuna,11239,-1.69 %,-193,80,140,,N.A.,N.A.,0 %,0.00 %
Western Sahara,597339,2.55 %,14876,2,266000,5582.0,2.4,28,87 %,0.01 %
Yemen,29825964,2.28 %,664042,56,527970,-30000.0,3.8,20,38 %,0.38 %
Zambia,18383955,2.93 %,522925,25,743390,-8000.0,4.7,18,45 %,0.24 %


In [34]:
data.sort_index(ascending=False)

Unnamed: 0_level_0,Pop,YearlyChange,NetChange,Density,LandArea,Migrants,FertRate,MedAge,UrbanPop,WorldShare
Country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
Zimbabwe,14862924,1.48 %,217456,38,386850,-116858.0,3.6,19,38 %,0.19 %
Zambia,18383955,2.93 %,522925,25,743390,-8000.0,4.7,18,45 %,0.24 %
Yemen,29825964,2.28 %,664042,56,527970,-30000.0,3.8,20,38 %,0.38 %
Western Sahara,597339,2.55 %,14876,2,266000,5582.0,2.4,28,87 %,0.01 %
Wallis & Futuna,11239,-1.69 %,-193,80,140,,N.A.,N.A.,0 %,0.00 %
...,...,...,...,...,...,...,...,...,...,...
Andorra,77265,0.16 %,123,164,470,,N.A.,N.A.,88 %,0.00 %
American Samoa,55191,-0.22 %,-121,276,200,,N.A.,N.A.,88 %,0.00 %
Algeria,43851044,1.85 %,797990,18,2381740,-10000.0,3.1,29,73 %,0.56 %
Albania,2877797,-0.11 %,-3120,105,27400,-14000.0,1.6,36,63 %,0.04 %


In [37]:
data.sort_values("Pop", ascending=False)

Unnamed: 0_level_0,Pop,YearlyChange,NetChange,Density,LandArea,Migrants,FertRate,MedAge,UrbanPop,WorldShare
Country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
China,1439323776,0.39 %,5540090,153,9388211,-348399.0,1.7,38,61 %,18.47 %
India,1380004385,0.99 %,13586631,464,2973190,-532687.0,2.2,28,35 %,17.70 %
United States,331002651,0.59 %,1937734,36,9147420,954806.0,1.8,38,83 %,4.25 %
Indonesia,273523615,1.07 %,2898047,151,1811570,-98955.0,2.3,30,56 %,3.51 %
Pakistan,220892340,2.00 %,4327022,287,770880,-233379.0,3.6,23,35 %,2.83 %
...,...,...,...,...,...,...,...,...,...,...
Montserrat,4992,0.06 %,3,50,100,,N.A.,N.A.,10 %,0.00 %
Falkland Islands,3480,3.05 %,103,0,12170,,N.A.,N.A.,66 %,0.00 %
Niue,1626,0.68 %,11,6,260,,N.A.,N.A.,46 %,0.00 %
Tokelau,1357,1.27 %,17,136,10,,N.A.,N.A.,0 %,0.00 %


In [42]:
data.sort_values(["MedAge", "Pop"], ascending=True).head(20)  # Subsequente Sortierung

Unnamed: 0_level_0,Pop,YearlyChange,NetChange,Density,LandArea,Migrants,FertRate,MedAge,UrbanPop,WorldShare
Country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
Niger,24206644,3.84 %,895929,19,1266700,4000.0,7.0,15,17 %,0.31 %
Mali,20250833,3.02 %,592802,17,1220190,-40000.0,5.9,16,44 %,0.26 %
Burundi,11890784,3.12 %,360204,463,25680,2001.0,5.5,17,14 %,0.15 %
Somalia,15893222,2.92 %,450317,25,627340,-40000.0,6.1,17,47 %,0.20 %
Chad,16425864,3.00 %,478988,13,1259200,2000.0,5.8,17,23 %,0.21 %
Angola,32866272,3.27 %,1040977,26,1246700,6413.0,5.6,17,67 %,0.42 %
Uganda,45741007,3.32 %,1471413,229,199810,168694.0,5.0,17,26 %,0.59 %
DR Congo,89561403,3.19 %,2770836,40,2267050,23861.0,6.0,17,46 %,1.15 %
Gambia,2416668,2.94 %,68962,239,10120,-3087.0,5.3,18,59 %,0.03 %
Central African Republic,4829767,1.78 %,84582,8,622980,-40000.0,4.8,18,43 %,0.06 %


#### Daten verarbeiten

Drei Möglichkeiten:

- Index: Spalten
- loc: Zeilen
- iloc: Selbige Funktionalität wie loc, verwendet aber Zahlen

In [55]:
# Aufgabe: Alle Länder finden, mit mind. 100m Einwohner
data["Pop"]

Country
China               1439323776
India               1380004385
United States        331002651
Indonesia            273523615
Pakistan             220892340
                       ...    
Montserrat                4992
Falkland Islands          3480
Niue                      1626
Tokelau                   1357
Vatican State              801
Name: Pop, Length: 235, dtype: int64

In [56]:
data["Pop"] >= 100_000_000

Country
China                True
India                True
United States        True
Indonesia            True
Pakistan             True
                    ...  
Montserrat          False
Falkland Islands    False
Niue                False
Tokelau             False
Vatican State       False
Name: Pop, Length: 235, dtype: bool

In [57]:
x = data["Pop"] >= 100_000_000

In [59]:
data[x]  # Kurzschreibweise: data[data["Pop"] >= 100_000_000]

Unnamed: 0_level_0,Pop,YearlyChange,NetChange,Density,LandArea,Migrants,FertRate,MedAge,UrbanPop,WorldShare
Country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
China,1439323776,0.39 %,5540090,153,9388211,-348399.0,1.7,38,61 %,18.47 %
India,1380004385,0.99 %,13586631,464,2973190,-532687.0,2.2,28,35 %,17.70 %
United States,331002651,0.59 %,1937734,36,9147420,954806.0,1.8,38,83 %,4.25 %
Indonesia,273523615,1.07 %,2898047,151,1811570,-98955.0,2.3,30,56 %,3.51 %
Pakistan,220892340,2.00 %,4327022,287,770880,-233379.0,3.6,23,35 %,2.83 %
Brazil,212559417,0.72 %,1509890,25,8358140,21200.0,1.7,33,88 %,2.73 %
Nigeria,206139589,2.58 %,5175990,226,910770,-60000.0,5.4,18,52 %,2.64 %
Bangladesh,164689383,1.01 %,1643222,1265,130170,-369501.0,2.1,28,39 %,2.11 %
Russia,145934462,0.04 %,62206,9,16376870,182456.0,1.8,40,74 %,1.87 %
Mexico,128932753,1.06 %,1357224,66,1943950,-60000.0,2.1,29,84 %,1.65 %


In [63]:
data["China":"Brazil"]  # Normaler Index wird bei einem Bereich auf Zeilen angewandt

Unnamed: 0_level_0,Pop,YearlyChange,NetChange,Density,LandArea,Migrants,FertRate,MedAge,UrbanPop,WorldShare
Country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
China,1439323776,0.39 %,5540090,153,9388211,-348399.0,1.7,38,61 %,18.47 %
India,1380004385,0.99 %,13586631,464,2973190,-532687.0,2.2,28,35 %,17.70 %
United States,331002651,0.59 %,1937734,36,9147420,954806.0,1.8,38,83 %,4.25 %
Indonesia,273523615,1.07 %,2898047,151,1811570,-98955.0,2.3,30,56 %,3.51 %
Pakistan,220892340,2.00 %,4327022,287,770880,-233379.0,3.6,23,35 %,2.83 %
Brazil,212559417,0.72 %,1509890,25,8358140,21200.0,1.7,33,88 %,2.73 %


In [65]:
data.loc["China":"Brazil"]

Unnamed: 0_level_0,Pop,YearlyChange,NetChange,Density,LandArea,Migrants,FertRate,MedAge,UrbanPop,WorldShare
Country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
China,1439323776,0.39 %,5540090,153,9388211,-348399.0,1.7,38,61 %,18.47 %
India,1380004385,0.99 %,13586631,464,2973190,-532687.0,2.2,28,35 %,17.70 %
United States,331002651,0.59 %,1937734,36,9147420,954806.0,1.8,38,83 %,4.25 %
Indonesia,273523615,1.07 %,2898047,151,1811570,-98955.0,2.3,30,56 %,3.51 %
Pakistan,220892340,2.00 %,4327022,287,770880,-233379.0,3.6,23,35 %,2.83 %
Brazil,212559417,0.72 %,1509890,25,8358140,21200.0,1.7,33,88 %,2.73 %


In [69]:
data.loc[:, "Pop":"NetChange"]  # : Alleine -> Alles

Unnamed: 0_level_0,Pop,YearlyChange,NetChange
Country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
China,1439323776,0.39 %,5540090
India,1380004385,0.99 %,13586631
United States,331002651,0.59 %,1937734
Indonesia,273523615,1.07 %,2898047
Pakistan,220892340,2.00 %,4327022
...,...,...,...
Montserrat,4992,0.06 %,3
Falkland Islands,3480,3.05 %,103
Niue,1626,0.68 %,11
Tokelau,1357,1.27 %,17


In [74]:
data.loc[:, ("Pop", "NetChange", "WorldShare")]  # Einzelne Spalten auswählen (ohne Doppelpunkt)

Unnamed: 0_level_0,Pop,NetChange,WorldShare
Country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
China,1439323776,5540090,18.47 %
India,1380004385,13586631,17.70 %
United States,331002651,1937734,4.25 %
Indonesia,273523615,2898047,3.51 %
Pakistan,220892340,4327022,2.83 %
...,...,...,...
Montserrat,4992,3,0.00 %
Falkland Islands,3480,103,0.00 %
Niue,1626,11,0.00 %
Tokelau,1357,17,0.00 %


In [82]:
data.loc[("China", "Germany"), :]  # Wähle China und Germany UND alle Spalten

Unnamed: 0_level_0,Pop,YearlyChange,NetChange,Density,LandArea,Migrants,FertRate,MedAge,UrbanPop,WorldShare
Country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
China,1439323776,0.39 %,5540090,153,9388211,-348399.0,1.7,38,61 %,18.47 %
Germany,83783942,0.32 %,266897,240,348560,543822.0,1.6,46,76 %,1.07 %


#### Daten filtern

In [93]:
# Aufgabe: alle Länder finden, bei denen das MedAge 20 ist, und die Pop > 10m
x = data["MedAge"] == "20"

In [94]:
x

Country
China               False
India               False
United States       False
Indonesia           False
Pakistan            False
                    ...  
Montserrat          False
Falkland Islands    False
Niue                False
Tokelau             False
Vatican State       False
Name: MedAge, Length: 235, dtype: bool

In [96]:
y = data[x]

In [97]:
y

Unnamed: 0_level_0,Pop,YearlyChange,NetChange,Density,LandArea,Migrants,FertRate,MedAge,UrbanPop,WorldShare
Country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
Kenya,53771296,2.28 %,1197323,94,569140,-10000.0,3.5,20,28 %,0.69 %
Sudan,43849260,2.42 %,1036022,25,1765048,-50000.0,4.4,20,35 %,0.56 %
Yemen,29825964,2.28 %,664042,56,527970,-30000.0,3.8,20,38 %,0.38 %
Madagascar,27691018,2.68 %,721711,48,581795,-1500.0,4.1,20,39 %,0.36 %
Rwanda,12952218,2.58 %,325268,525,24670,-9000.0,4.1,20,18 %,0.17 %
Mauritania,4649658,2.74 %,123962,5,1030700,5000.0,4.6,20,57 %,0.06 %
Comoros,869601,2.20 %,18715,467,1861,-2000.0,4.2,20,29 %,0.01 %
Solomon Islands,686884,2.55 %,17061,25,27990,-1600.0,4.4,20,23 %,0.01 %
Mayotte,272815,2.50 %,6665,728,375,0.0,3.7,20,46 %,0.00 %


In [98]:
z = y["Pop"] >= 10_000_000

In [99]:
z

Country
Kenya               True
Sudan               True
Yemen               True
Madagascar          True
Rwanda              True
Mauritania         False
Comoros            False
Solomon Islands    False
Mayotte            False
Name: Pop, dtype: bool

In [100]:
y[z]

Unnamed: 0_level_0,Pop,YearlyChange,NetChange,Density,LandArea,Migrants,FertRate,MedAge,UrbanPop,WorldShare
Country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
Kenya,53771296,2.28 %,1197323,94,569140,-10000.0,3.5,20,28 %,0.69 %
Sudan,43849260,2.42 %,1036022,25,1765048,-50000.0,4.4,20,35 %,0.56 %
Yemen,29825964,2.28 %,664042,56,527970,-30000.0,3.8,20,38 %,0.38 %
Madagascar,27691018,2.68 %,721711,48,581795,-1500.0,4.1,20,39 %,0.36 %
Rwanda,12952218,2.58 %,325268,525,24670,-9000.0,4.1,20,18 %,0.17 %


In [103]:
data[(data["MedAge"] == "20") & (data["Pop"] >= 10_000_000)]  # Kurzform

Unnamed: 0_level_0,Pop,YearlyChange,NetChange,Density,LandArea,Migrants,FertRate,MedAge,UrbanPop,WorldShare
Country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
Kenya,53771296,2.28 %,1197323,94,569140,-10000.0,3.5,20,28 %,0.69 %
Sudan,43849260,2.42 %,1036022,25,1765048,-50000.0,4.4,20,35 %,0.56 %
Yemen,29825964,2.28 %,664042,56,527970,-30000.0,3.8,20,38 %,0.38 %
Madagascar,27691018,2.68 %,721711,48,581795,-1500.0,4.1,20,39 %,0.36 %
Rwanda,12952218,2.58 %,325268,525,24670,-9000.0,4.1,20,18 %,0.17 %
