## Fehlerbehandlung mit Pandas

Datensets haben oft Fehler, welche für die Analyse ausgebessert werden müssen

z.B. Prozente, NaN, ...

In [1]:
import numpy as np
import pandas as pd

In [2]:
x = 5

In [3]:
pd.isnull(x)

False

In [4]:
pd.notnull(x)

True

In [5]:
data = pd.read_csv("Data/PopulationDataFastFertig.csv", index_col="Country")

In [6]:
data

Unnamed: 0_level_0,Pop,YearlyChange,NetChange,Density,Area,Migrants,Fert.Rate,Med.Age,UrbanPct,WorldShare
Country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
China,1439323776,0.39 %,5540090,153,9388211,-348399.0,1.7,38,61 %,18.47 %
India,1380004385,0.99 %,13586631,464,2973190,-532687.0,2.2,28,35 %,17.70 %
United States,331002651,0.59 %,1937734,36,9147420,954806.0,1.8,38,83 %,4.25 %
Indonesia,273523615,1.07 %,2898047,151,1811570,-98955.0,2.3,30,56 %,3.51 %
Pakistan,220892340,2.00 %,4327022,287,770880,-233379.0,3.6,23,35 %,2.83 %
...,...,...,...,...,...,...,...,...,...,...
Montserrat,4992,0.06 %,3,50,100,,N.A.,N.A.,10 %,0.00 %
Falkland Islands,3480,3.05 %,103,0,12170,,N.A.,N.A.,66 %,0.00 %
Niue,1626,0.68 %,11,6,260,,N.A.,N.A.,46 %,0.00 %
Tokelau,1357,1.27 %,17,136,10,,N.A.,N.A.,0 %,0.00 %


In [7]:
pd.isnull(data)

Unnamed: 0_level_0,Pop,YearlyChange,NetChange,Density,Area,Migrants,Fert.Rate,Med.Age,UrbanPct,WorldShare
Country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
China,False,False,False,False,False,False,False,False,False,False
India,False,False,False,False,False,False,False,False,False,False
United States,False,False,False,False,False,False,False,False,False,False
Indonesia,False,False,False,False,False,False,False,False,False,False
Pakistan,False,False,False,False,False,False,False,False,False,False
...,...,...,...,...,...,...,...,...,...,...
Montserrat,False,False,False,False,False,True,False,False,False,False
Falkland Islands,False,False,False,False,False,True,False,False,False,False
Niue,False,False,False,False,False,True,False,False,False,False
Tokelau,False,False,False,False,False,True,False,False,False,False


### Automatisches auffüllen von NaN

In [8]:
data.fillna(method="ffill")

  data.fillna(method="ffill")


Unnamed: 0_level_0,Pop,YearlyChange,NetChange,Density,Area,Migrants,Fert.Rate,Med.Age,UrbanPct,WorldShare
Country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
China,1439323776,0.39 %,5540090,153,9388211,-348399.0,1.7,38,61 %,18.47 %
India,1380004385,0.99 %,13586631,464,2973190,-532687.0,2.2,28,35 %,17.70 %
United States,331002651,0.59 %,1937734,36,9147420,954806.0,1.8,38,83 %,4.25 %
Indonesia,273523615,1.07 %,2898047,151,1811570,-98955.0,2.3,30,56 %,3.51 %
Pakistan,220892340,2.00 %,4327022,287,770880,-233379.0,3.6,23,35 %,2.83 %
...,...,...,...,...,...,...,...,...,...,...
Montserrat,4992,0.06 %,3,50,100,0.0,N.A.,N.A.,10 %,0.00 %
Falkland Islands,3480,3.05 %,103,0,12170,0.0,N.A.,N.A.,66 %,0.00 %
Niue,1626,0.68 %,11,6,260,0.0,N.A.,N.A.,46 %,0.00 %
Tokelau,1357,1.27 %,17,136,10,0.0,N.A.,N.A.,0 %,0.00 %


In [9]:
data.fillna(method="bfill")

  data.fillna(method="bfill")


Unnamed: 0_level_0,Pop,YearlyChange,NetChange,Density,Area,Migrants,Fert.Rate,Med.Age,UrbanPct,WorldShare
Country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
China,1439323776,0.39 %,5540090,153,9388211,-348399.0,1.7,38,61 %,18.47 %
India,1380004385,0.99 %,13586631,464,2973190,-532687.0,2.2,28,35 %,17.70 %
United States,331002651,0.59 %,1937734,36,9147420,954806.0,1.8,38,83 %,4.25 %
Indonesia,273523615,1.07 %,2898047,151,1811570,-98955.0,2.3,30,56 %,3.51 %
Pakistan,220892340,2.00 %,4327022,287,770880,-233379.0,3.6,23,35 %,2.83 %
...,...,...,...,...,...,...,...,...,...,...
Montserrat,4992,0.06 %,3,50,100,,N.A.,N.A.,10 %,0.00 %
Falkland Islands,3480,3.05 %,103,0,12170,,N.A.,N.A.,66 %,0.00 %
Niue,1626,0.68 %,11,6,260,,N.A.,N.A.,46 %,0.00 %
Tokelau,1357,1.27 %,17,136,10,,N.A.,N.A.,0 %,0.00 %


In [10]:
data.ffill(inplace=True)

In [11]:
data.bfill()

Unnamed: 0_level_0,Pop,YearlyChange,NetChange,Density,Area,Migrants,Fert.Rate,Med.Age,UrbanPct,WorldShare
Country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
China,1439323776,0.39 %,5540090,153,9388211,-348399.0,1.7,38,61 %,18.47 %
India,1380004385,0.99 %,13586631,464,2973190,-532687.0,2.2,28,35 %,17.70 %
United States,331002651,0.59 %,1937734,36,9147420,954806.0,1.8,38,83 %,4.25 %
Indonesia,273523615,1.07 %,2898047,151,1811570,-98955.0,2.3,30,56 %,3.51 %
Pakistan,220892340,2.00 %,4327022,287,770880,-233379.0,3.6,23,35 %,2.83 %
...,...,...,...,...,...,...,...,...,...,...
Montserrat,4992,0.06 %,3,50,100,0.0,N.A.,N.A.,10 %,0.00 %
Falkland Islands,3480,3.05 %,103,0,12170,0.0,N.A.,N.A.,66 %,0.00 %
Niue,1626,0.68 %,11,6,260,0.0,N.A.,N.A.,46 %,0.00 %
Tokelau,1357,1.27 %,17,136,10,0.0,N.A.,N.A.,0 %,0.00 %


In [12]:
for col in data.isnull().columns:
    print(f"{col}: {len(data) - (data.isnull()[col] == True).value_counts()[False]}")

Pop: 0
YearlyChange: 0
NetChange: 0
Density: 0
Area: 0
Migrants: 0
Fert.Rate: 0
Med.Age: 0
UrbanPct: 0
WorldShare: 0


### NaN manuell beheben

In [57]:
x = data["Migrants"][data["Migrants"].isnull()]  # NaN in einer gegebenen Spalte finden

In [58]:
data.loc[x.index, "Migrants"] = 0  # NaN in der Spalte austauschen

In [63]:
def fixNaN(col: str, newValue):
    x = data[data[col].isnull()][col].index
    data.loc[x, col] = newValue

### Weitere Schritte

- Prozente

- N.A.

- Datentypen

In [13]:
data.info()

<class 'pandas.core.frame.DataFrame'>
Index: 235 entries, China to Vatican State
Data columns (total 10 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   Pop           235 non-null    int64  
 1   YearlyChange  235 non-null    object 
 2   NetChange     235 non-null    int64  
 3   Density       235 non-null    int64  
 4   Area          235 non-null    int64  
 5   Migrants      235 non-null    float64
 6   Fert.Rate     235 non-null    object 
 7   Med.Age       235 non-null    object 
 8   UrbanPct      235 non-null    object 
 9   WorldShare    235 non-null    object 
dtypes: float64(1), int64(4), object(5)
memory usage: 20.2+ KB


#### N.A.

In [14]:
data["Fert.Rate"] == "N.A."  # Bei einer Spalte N.A. finden

Country
China               False
India               False
United States       False
Indonesia           False
Pakistan            False
                    ...  
Montserrat           True
Falkland Islands     True
Niue                 True
Tokelau              True
Vatican State        True
Name: Fert.Rate, Length: 235, dtype: bool

In [15]:
data[data["Fert.Rate"] == "N.A."]  # Datensätze mit N.A. filtern

Unnamed: 0_level_0,Pop,YearlyChange,NetChange,Density,Area,Migrants,Fert.Rate,Med.Age,UrbanPct,WorldShare
Country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
Isle of Man,85033,0.53 %,449,149,570,0.0,N.A.,N.A.,53 %,0.00 %
Andorra,77265,0.16 %,123,164,470,0.0,N.A.,N.A.,88 %,0.00 %
Dominica,71986,0.25 %,178,96,750,0.0,N.A.,N.A.,74 %,0.00 %
Cayman Islands,65722,1.19 %,774,274,240,0.0,N.A.,N.A.,97 %,0.00 %
Bermuda,62278,-0.36 %,-228,1246,50,0.0,N.A.,N.A.,97 %,0.00 %
Marshall Islands,59190,0.68 %,399,329,180,0.0,N.A.,N.A.,70 %,0.00 %
Northern Mariana Islands,57559,0.60 %,343,125,460,0.0,N.A.,N.A.,88 %,0.00 %
Greenland,56770,0.17 %,98,0,410450,0.0,N.A.,N.A.,87 %,0.00 %
American Samoa,55191,-0.22 %,-121,276,200,0.0,N.A.,N.A.,88 %,0.00 %
Saint Kitts & Nevis,53199,0.71 %,376,205,260,0.0,N.A.,N.A.,33 %,0.00 %


In [16]:
data[data["Fert.Rate"] == "N.A."]["Fert.Rate"]

Country
Isle of Man                 N.A.
Andorra                     N.A.
Dominica                    N.A.
Cayman Islands              N.A.
Bermuda                     N.A.
Marshall Islands            N.A.
Northern Mariana Islands    N.A.
Greenland                   N.A.
American Samoa              N.A.
Saint Kitts & Nevis         N.A.
Faeroe Islands              N.A.
Sint Maarten                N.A.
Monaco                      N.A.
Turks and Caicos            N.A.
Saint Martin                N.A.
Liechtenstein               N.A.
San Marino                  N.A.
Gibraltar                   N.A.
British Virgin Islands      N.A.
Caribbean Netherlands       N.A.
Palau                       N.A.
Cook Islands                N.A.
Anguilla                    N.A.
Tuvalu                      N.A.
Wallis & Futuna             N.A.
Nauru                       N.A.
Saint Barthelemy            N.A.
Saint Helena                N.A.
Saint Pierre & Miquelon     N.A.
Montserrat                  N.A.
Fa

In [17]:
x = data[data["Fert.Rate"] == "N.A."]["Fert.Rate"]

In [18]:
data.loc[x.index, "Fert.Rate"] = 0

In [19]:
data

Unnamed: 0_level_0,Pop,YearlyChange,NetChange,Density,Area,Migrants,Fert.Rate,Med.Age,UrbanPct,WorldShare
Country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
China,1439323776,0.39 %,5540090,153,9388211,-348399.0,1.7,38,61 %,18.47 %
India,1380004385,0.99 %,13586631,464,2973190,-532687.0,2.2,28,35 %,17.70 %
United States,331002651,0.59 %,1937734,36,9147420,954806.0,1.8,38,83 %,4.25 %
Indonesia,273523615,1.07 %,2898047,151,1811570,-98955.0,2.3,30,56 %,3.51 %
Pakistan,220892340,2.00 %,4327022,287,770880,-233379.0,3.6,23,35 %,2.83 %
...,...,...,...,...,...,...,...,...,...,...
Montserrat,4992,0.06 %,3,50,100,0.0,0,N.A.,10 %,0.00 %
Falkland Islands,3480,3.05 %,103,0,12170,0.0,0,N.A.,66 %,0.00 %
Niue,1626,0.68 %,11,6,260,0.0,0,N.A.,46 %,0.00 %
Tokelau,1357,1.27 %,17,136,10,0.0,0,N.A.,0 %,0.00 %


In [20]:
def fixNA(col: str, newValue):
    x = data[data[col] == "N.A."][col].index  # Fehlerhafte Zeilen finden
    data.loc[x, col] = newValue  # Nimm alle Zeilen mit Fehlern, und schreibe newValue in jede Zeile hinein

In [21]:
fixNA("Med.Age", 0)

In [22]:
data

Unnamed: 0_level_0,Pop,YearlyChange,NetChange,Density,Area,Migrants,Fert.Rate,Med.Age,UrbanPct,WorldShare
Country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
China,1439323776,0.39 %,5540090,153,9388211,-348399.0,1.7,38,61 %,18.47 %
India,1380004385,0.99 %,13586631,464,2973190,-532687.0,2.2,28,35 %,17.70 %
United States,331002651,0.59 %,1937734,36,9147420,954806.0,1.8,38,83 %,4.25 %
Indonesia,273523615,1.07 %,2898047,151,1811570,-98955.0,2.3,30,56 %,3.51 %
Pakistan,220892340,2.00 %,4327022,287,770880,-233379.0,3.6,23,35 %,2.83 %
...,...,...,...,...,...,...,...,...,...,...
Montserrat,4992,0.06 %,3,50,100,0.0,0,0,10 %,0.00 %
Falkland Islands,3480,3.05 %,103,0,12170,0.0,0,0,66 %,0.00 %
Niue,1626,0.68 %,11,6,260,0.0,0,0,46 %,0.00 %
Tokelau,1357,1.27 %,17,136,10,0.0,0,0,0 %,0.00 %


In [23]:
for col in data.columns:
    fixNA(col, 0)

In [24]:
data

Unnamed: 0_level_0,Pop,YearlyChange,NetChange,Density,Area,Migrants,Fert.Rate,Med.Age,UrbanPct,WorldShare
Country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
China,1439323776,0.39 %,5540090,153,9388211,-348399.0,1.7,38,61 %,18.47 %
India,1380004385,0.99 %,13586631,464,2973190,-532687.0,2.2,28,35 %,17.70 %
United States,331002651,0.59 %,1937734,36,9147420,954806.0,1.8,38,83 %,4.25 %
Indonesia,273523615,1.07 %,2898047,151,1811570,-98955.0,2.3,30,56 %,3.51 %
Pakistan,220892340,2.00 %,4327022,287,770880,-233379.0,3.6,23,35 %,2.83 %
...,...,...,...,...,...,...,...,...,...,...
Montserrat,4992,0.06 %,3,50,100,0.0,0,0,10 %,0.00 %
Falkland Islands,3480,3.05 %,103,0,12170,0.0,0,0,66 %,0.00 %
Niue,1626,0.68 %,11,6,260,0.0,0,0,46 %,0.00 %
Tokelau,1357,1.27 %,17,136,10,0.0,0,0,0 %,0.00 %


#### Prozente

In [25]:
data["YearlyChange"].replace(" %", "", regex=True)

Country
China               0.39
India               0.99
United States       0.59
Indonesia           1.07
Pakistan            2.00
                    ... 
Montserrat          0.06
Falkland Islands    3.05
Niue                0.68
Tokelau             1.27
Vatican State       0.25
Name: YearlyChange, Length: 235, dtype: object

In [26]:
data["YearlyChange"] = data["YearlyChange"].replace(" %", "", regex=True)

In [27]:
data

Unnamed: 0_level_0,Pop,YearlyChange,NetChange,Density,Area,Migrants,Fert.Rate,Med.Age,UrbanPct,WorldShare
Country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
China,1439323776,0.39,5540090,153,9388211,-348399.0,1.7,38,61 %,18.47 %
India,1380004385,0.99,13586631,464,2973190,-532687.0,2.2,28,35 %,17.70 %
United States,331002651,0.59,1937734,36,9147420,954806.0,1.8,38,83 %,4.25 %
Indonesia,273523615,1.07,2898047,151,1811570,-98955.0,2.3,30,56 %,3.51 %
Pakistan,220892340,2.00,4327022,287,770880,-233379.0,3.6,23,35 %,2.83 %
...,...,...,...,...,...,...,...,...,...,...
Montserrat,4992,0.06,3,50,100,0.0,0,0,10 %,0.00 %
Falkland Islands,3480,3.05,103,0,12170,0.0,0,0,66 %,0.00 %
Niue,1626,0.68,11,6,260,0.0,0,0,46 %,0.00 %
Tokelau,1357,1.27,17,136,10,0.0,0,0,0 %,0.00 %


In [28]:
def fixPct(col: str):
    data[col] = data[col].replace(" %", "", regex=True)

In [29]:
for col in data.columns:
    fixPct(col)

In [30]:
data

Unnamed: 0_level_0,Pop,YearlyChange,NetChange,Density,Area,Migrants,Fert.Rate,Med.Age,UrbanPct,WorldShare
Country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
China,1439323776,0.39,5540090,153,9388211,-348399.0,1.7,38,61,18.47
India,1380004385,0.99,13586631,464,2973190,-532687.0,2.2,28,35,17.70
United States,331002651,0.59,1937734,36,9147420,954806.0,1.8,38,83,4.25
Indonesia,273523615,1.07,2898047,151,1811570,-98955.0,2.3,30,56,3.51
Pakistan,220892340,2.00,4327022,287,770880,-233379.0,3.6,23,35,2.83
...,...,...,...,...,...,...,...,...,...,...
Montserrat,4992,0.06,3,50,100,0.0,0,0,10,0.00
Falkland Islands,3480,3.05,103,0,12170,0.0,0,0,66,0.00
Niue,1626,0.68,11,6,260,0.0,0,0,46,0.00
Tokelau,1357,1.27,17,136,10,0.0,0,0,0,0.00


#### Datentypen

In [31]:
data.info()

<class 'pandas.core.frame.DataFrame'>
Index: 235 entries, China to Vatican State
Data columns (total 10 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   Pop           235 non-null    int64  
 1   YearlyChange  235 non-null    object 
 2   NetChange     235 non-null    int64  
 3   Density       235 non-null    int64  
 4   Area          235 non-null    int64  
 5   Migrants      235 non-null    float64
 6   Fert.Rate     235 non-null    object 
 7   Med.Age       235 non-null    object 
 8   UrbanPct      235 non-null    object 
 9   WorldShare    235 non-null    object 
dtypes: float64(1), int64(4), object(5)
memory usage: 28.3+ KB


In [32]:
data = data.convert_dtypes()

In [33]:
data.info()

<class 'pandas.core.frame.DataFrame'>
Index: 235 entries, China to Vatican State
Data columns (total 10 columns):
 #   Column        Non-Null Count  Dtype 
---  ------        --------------  ----- 
 0   Pop           235 non-null    Int64 
 1   YearlyChange  235 non-null    string
 2   NetChange     235 non-null    Int64 
 3   Density       235 non-null    Int64 
 4   Area          235 non-null    Int64 
 5   Migrants      235 non-null    Int64 
 6   Fert.Rate     235 non-null    object
 7   Med.Age       235 non-null    object
 8   UrbanPct      235 non-null    object
 9   WorldShare    235 non-null    string
dtypes: Int64(5), object(3), string(2)
memory usage: 29.4+ KB


In [34]:
data["YearlyChange"] = data["YearlyChange"].astype(np.float32)

In [35]:
data

Unnamed: 0_level_0,Pop,YearlyChange,NetChange,Density,Area,Migrants,Fert.Rate,Med.Age,UrbanPct,WorldShare
Country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
China,1439323776,0.39,5540090,153,9388211,-348399,1.7,38,61,18.47
India,1380004385,0.99,13586631,464,2973190,-532687,2.2,28,35,17.70
United States,331002651,0.59,1937734,36,9147420,954806,1.8,38,83,4.25
Indonesia,273523615,1.07,2898047,151,1811570,-98955,2.3,30,56,3.51
Pakistan,220892340,2.00,4327022,287,770880,-233379,3.6,23,35,2.83
...,...,...,...,...,...,...,...,...,...,...
Montserrat,4992,0.06,3,50,100,0,0,0,10,0.00
Falkland Islands,3480,3.05,103,0,12170,0,0,0,66,0.00
Niue,1626,0.68,11,6,260,0,0,0,46,0.00
Tokelau,1357,1.27,17,136,10,0,0,0,0,0.00


In [36]:
def fixType(col: str, newType):
    data[col] = data[col].astype(newType)

In [37]:
data.info()

<class 'pandas.core.frame.DataFrame'>
Index: 235 entries, China to Vatican State
Data columns (total 10 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   Pop           235 non-null    Int64  
 1   YearlyChange  235 non-null    float32
 2   NetChange     235 non-null    Int64  
 3   Density       235 non-null    Int64  
 4   Area          235 non-null    Int64  
 5   Migrants      235 non-null    Int64  
 6   Fert.Rate     235 non-null    object 
 7   Med.Age       235 non-null    object 
 8   UrbanPct      235 non-null    object 
 9   WorldShare    235 non-null    string 
dtypes: Int64(5), float32(1), object(3), string(1)
memory usage: 28.5+ KB


In [38]:
fixType("Pop", np.int32)

In [39]:
fixType("NetChange", np.int32)

In [40]:
fixType("Density", np.int16)

In [44]:
fixType("Area", np.int32)

In [45]:
fixType("Migrants", np.int32)

In [46]:
fixType("Fert.Rate", np.float32)

In [47]:
fixType("Med.Age", np.int8)

In [48]:
fixType("UrbanPct", np.float32)

In [49]:
fixType("WorldShare", np.float32)

In [50]:
data.info()

<class 'pandas.core.frame.DataFrame'>
Index: 235 entries, China to Vatican State
Data columns (total 10 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   Pop           235 non-null    int32  
 1   YearlyChange  235 non-null    float32
 2   NetChange     235 non-null    int32  
 3   Density       235 non-null    int16  
 4   Area          235 non-null    int32  
 5   Migrants      235 non-null    int32  
 6   Fert.Rate     235 non-null    float32
 7   Med.Age       235 non-null    int8   
 8   UrbanPct      235 non-null    float32
 9   WorldShare    235 non-null    float32
dtypes: float32(4), int16(1), int32(4), int8(1)
memory usage: 18.0+ KB


#### Neue Spalten hinzufügen

In [52]:
data["Pop/Area"] = data["Pop"] / data["Area"]

In [53]:
data

Unnamed: 0_level_0,Pop,YearlyChange,NetChange,Density,Area,Migrants,Fert.Rate,Med.Age,UrbanPct,WorldShare,Pop/Area
Country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
China,1439323776,0.39,5540090,153,9388211,-348399,1.7,38,61.0,18.469999,153.311827
India,1380004385,0.99,13586631,464,2973190,-532687,2.2,28,35.0,17.700001,464.149410
United States,331002651,0.59,1937734,36,9147420,954806,1.8,38,83.0,4.250000,36.185356
Indonesia,273523615,1.07,2898047,151,1811570,-98955,2.3,30,56.0,3.510000,150.987053
Pakistan,220892340,2.00,4327022,287,770880,-233379,3.6,23,35.0,2.830000,286.545688
...,...,...,...,...,...,...,...,...,...,...,...
Montserrat,4992,0.06,3,50,100,0,0.0,0,10.0,0.000000,49.920000
Falkland Islands,3480,3.05,103,0,12170,0,0.0,0,66.0,0.000000,0.285949
Niue,1626,0.68,11,6,260,0,0.0,0,46.0,0.000000,6.253846
Tokelau,1357,1.27,17,136,10,0,0.0,0,0.0,0.000000,135.700000


In [56]:
data.to_csv("Data/PopulationDataFertig.csv")