In [1]:
import pandas as pd
import matplotlib.pyplot as plt

## Initial Inspection

In [2]:
df = pd.read_csv("life_expectancy.csv")
df.info()


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 120 entries, 0 to 119
Data columns (total 5 columns):
 #   Column                   Non-Null Count  Dtype 
---  ------                   --------------  ----- 
 0   Country                  120 non-null    object
 1   Life expectancy males    120 non-null    object
 2   Life expectancy females  120 non-null    object
 3   Birth rate               120 non-null    object
 4   Death rate               120 non-null    object
dtypes: object(5)
memory usage: 4.8+ KB


In [3]:
df


Unnamed: 0,Country,Life expectancy males,Life expectancy females,Birth rate,Death rate
0,Hong Kong *,83.2 years,87.9 years,5.0 ‰,6.9 ‰
1,Macao *,82.8 years,87.9 years,10.1 ‰,4.1 ‰
2,Switzerland,81.9 years,85.9 years,10.3 ‰,8.2 ‰
3,Iceland,81.8 years,84.5 years,13.1 ‰,6.3 ‰
4,Norway,81.7 years,84.7 years,10.4 ‰,7.8 ‰
...,...,...,...,...,...
115,Congo (Dem. Republic),57.0 years,61.5 years,42.0 ‰,9.7 ‰
116,Zimbabwe,56.2 years,62.0 years,30.5 ‰,9.1 ‰
117,Somalia,53.2 years,57.4 years,43.6 ‰,11.6 ‰
118,Nigeria,52.3 years,53.1 years,37.1 ‰,13.1 ‰


In [4]:
df.head()

Unnamed: 0,Country,Life expectancy males,Life expectancy females,Birth rate,Death rate
0,Hong Kong *,83.2 years,87.9 years,5.0 ‰,6.9 ‰
1,Macao *,82.8 years,87.9 years,10.1 ‰,4.1 ‰
2,Switzerland,81.9 years,85.9 years,10.3 ‰,8.2 ‰
3,Iceland,81.8 years,84.5 years,13.1 ‰,6.3 ‰
4,Norway,81.7 years,84.7 years,10.4 ‰,7.8 ‰


In [5]:
df.tail()

Unnamed: 0,Country,Life expectancy males,Life expectancy females,Birth rate,Death rate
115,Congo (Dem. Republic),57.0 years,61.5 years,42.0 ‰,9.7 ‰
116,Zimbabwe,56.2 years,62.0 years,30.5 ‰,9.1 ‰
117,Somalia,53.2 years,57.4 years,43.6 ‰,11.6 ‰
118,Nigeria,52.3 years,53.1 years,37.1 ‰,13.1 ‰
119,Chad,50.8 years,54.3 years,43.4 ‰,12.5 ‰


In [6]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 120 entries, 0 to 119
Data columns (total 5 columns):
 #   Column                   Non-Null Count  Dtype 
---  ------                   --------------  ----- 
 0   Country                  120 non-null    object
 1   Life expectancy males    120 non-null    object
 2   Life expectancy females  120 non-null    object
 3   Birth rate               120 non-null    object
 4   Death rate               120 non-null    object
dtypes: object(5)
memory usage: 4.8+ KB


# Data Cleaning 

## Checking For Missing Values 

In [8]:
df[df["Country"].isna()]

Unnamed: 0,Country,Life expectancy males,Life expectancy females,Birth rate,Death rate


In [9]:
df[df["Life expectancy males"].isna()]

Unnamed: 0,Country,Life expectancy males,Life expectancy females,Birth rate,Death rate


In [10]:
df[df["Life expectancy females"].isna()]

Unnamed: 0,Country,Life expectancy males,Life expectancy females,Birth rate,Death rate


In [11]:
df[df["Birth rate"].isna()]

Unnamed: 0,Country,Life expectancy males,Life expectancy females,Birth rate,Death rate


In [12]:
df[df["Death rate"].isna()]

Unnamed: 0,Country,Life expectancy males,Life expectancy females,Birth rate,Death rate


### No Missing Values 

## Modifying Country Column 

In [13]:
df["Country"]

0                Hong Kong *
1                    Macao *
2                Switzerland
3                    Iceland
4                     Norway
               ...          
115    Congo (Dem. Republic)
116                 Zimbabwe
117                  Somalia
118                  Nigeria
119                     Chad
Name: Country, Length: 120, dtype: object

In [14]:
df[["Country"]]

Unnamed: 0,Country
0,Hong Kong *
1,Macao *
2,Switzerland
3,Iceland
4,Norway
...,...
115,Congo (Dem. Republic)
116,Zimbabwe
117,Somalia
118,Nigeria


Convert this to string datatype 

In [15]:
df.describe()

Unnamed: 0,Country,Life expectancy males,Life expectancy females,Birth rate,Death rate
count,120,120,120,120,120
unique,120,100,98,96,82
top,Hong Kong *,66.1 years,76.4 years,9.6 ‰,6.8 ‰
freq,1,3,3,4,5


In [18]:
df["Country"] = df["Country"].astype("string")

In [21]:
df["Country"]

0                Hong Kong *
1                    Macao *
2                Switzerland
3                    Iceland
4                     Norway
               ...          
115    Congo (Dem. Republic)
116                 Zimbabwe
117                  Somalia
118                  Nigeria
119                     Chad
Name: Country, Length: 120, dtype: string

## Changing Values 0 and 1

In [19]:
df.loc[[0], ["Country"]] = "Hong Kong"
df.loc[[0], ["Country"]]

Unnamed: 0,Country
0,Hong Kong


In [20]:
df.loc[[1], ["Country"]] = "Macao"
df.loc[[1], ["Country"]]

Unnamed: 0,Country
1,Macao


## Modifying Life expectency male and female columns

In [25]:
df["Life expectancy males"] = df["Life expectancy males"].astype("string")
df["Life expectancy females"] = df["Life expectancy females"].astype("string")

In [26]:
df["Life expectancy males"].str.contains("years")
df[df["Life expectancy males"].str.contains("years", na = False)]

Unnamed: 0,Country,Life expectancy males,Life expectancy females,Birth rate,Death rate
0,Hong Kong,83.2 years,87.9 years,5.0 ‰,6.9 ‰
1,Macao,82.8 years,87.9 years,10.1 ‰,4.1 ‰
2,Switzerland,81.9 years,85.9 years,10.3 ‰,8.2 ‰
3,Iceland,81.8 years,84.5 years,13.1 ‰,6.3 ‰
4,Norway,81.7 years,84.7 years,10.4 ‰,7.8 ‰
...,...,...,...,...,...
115,Congo (Dem. Republic),57.0 years,61.5 years,42.0 ‰,9.7 ‰
116,Zimbabwe,56.2 years,62.0 years,30.5 ‰,9.1 ‰
117,Somalia,53.2 years,57.4 years,43.6 ‰,11.6 ‰
118,Nigeria,52.3 years,53.1 years,37.1 ‰,13.1 ‰


In [27]:
df["Life expectancy females"].str.contains("years")
df[df["Life expectancy females"].str.contains("years", na = False)]

Unnamed: 0,Country,Life expectancy males,Life expectancy females,Birth rate,Death rate
0,Hong Kong,83.2 years,87.9 years,5.0 ‰,6.9 ‰
1,Macao,82.8 years,87.9 years,10.1 ‰,4.1 ‰
2,Switzerland,81.9 years,85.9 years,10.3 ‰,8.2 ‰
3,Iceland,81.8 years,84.5 years,13.1 ‰,6.3 ‰
4,Norway,81.7 years,84.7 years,10.4 ‰,7.8 ‰
...,...,...,...,...,...
115,Congo (Dem. Republic),57.0 years,61.5 years,42.0 ‰,9.7 ‰
116,Zimbabwe,56.2 years,62.0 years,30.5 ‰,9.1 ‰
117,Somalia,53.2 years,57.4 years,43.6 ‰,11.6 ‰
118,Nigeria,52.3 years,53.1 years,37.1 ‰,13.1 ‰


In [29]:
df["Life expectancy males"] = df["Life expectancy males"].str.replace("years", "")

In [30]:
df["Life expectancy females"] = df["Life expectancy females"].str.replace("years", "")

## Renaming all Columns 

In [33]:
mapper = {"Country" : "country","Life expectancy males" : "life_expectancy_males_(years)", "Life expectancy females" : "life_expectancy_females_(years)", "Birth rate" : "birth_rate(%)", "Death rate" : "death_rate(%)"}
df.rename(columns = mapper, inplace = True)

In [35]:
df

Unnamed: 0,country,life_expectancy_males_(years),life_expectancy_females_(years),birth_rate(%),death_rate(%)
0,Hong Kong,83.2,87.9,5.0 ‰,6.9 ‰
1,Macao,82.8,87.9,10.1 ‰,4.1 ‰
2,Switzerland,81.9,85.9,10.3 ‰,8.2 ‰
3,Iceland,81.8,84.5,13.1 ‰,6.3 ‰
4,Norway,81.7,84.7,10.4 ‰,7.8 ‰
...,...,...,...,...,...
115,Congo (Dem. Republic),57.0,61.5,42.0 ‰,9.7 ‰
116,Zimbabwe,56.2,62.0,30.5 ‰,9.1 ‰
117,Somalia,53.2,57.4,43.6 ‰,11.6 ‰
118,Nigeria,52.3,53.1,37.1 ‰,13.1 ‰


In [36]:
df["life_expectancy_males_(years)"] = df["life_expectancy_males_(years)"].astype("float")

In [37]:
df["life_expectancy_females_(years)"] = df["life_expectancy_females_(years)"].astype("float")

In [50]:
df["birth_rate(%)"] = df["birth_rate(%)"].astype("string")

## Modifying Death and Birth Rate Columns

In [39]:
df["death_rate(%)"] = df["death_rate(%)"].astype("string")

In [40]:
df["birth_rate(%)"] = df["birth_rate(%)"].str.replace("‰", "")

In [41]:
df ["death_rate(%)"] = df["death_rate(%)"].str.replace("‰", "")

In [42]:
df["birth_rate(%)"] = df["birth_rate(%)"].astype("float")

In [43]:
df["death_rate(%)"] = df["death_rate(%)"].astype("float")

In [58]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 120 entries, 0 to 119
Data columns (total 5 columns):
 #   Column                           Non-Null Count  Dtype  
---  ------                           --------------  -----  
 0   country                          120 non-null    string 
 1   life_expectancy_males_(years)    120 non-null    float64
 2   life_expectancy_females_(years)  120 non-null    float64
 3   birth_rate(%)                    120 non-null    float64
 4   death_rate(%)                    120 non-null    float64
dtypes: float64(4), string(1)
memory usage: 4.8 KB


## Creating Bins For Life Expectancy Male and Female and Birth and Death Rate 

In [45]:
df["life_expectancy_males_(years)"].nunique()

100

In [46]:
df["life_expectancy_males_(years)"].min()

50.8

In [47]:
df["life_expectancy_males_(years)"].max()

83.2

In [48]:
df["life_expectancy_males_(years)"].value_counts()

66.1    3
80.6    3
58.9    2
66.3    2
70.3    2
       ..
74.5    1
74.9    1
75.5    1
75.6    1
50.8    1
Name: life_expectancy_males_(years), Length: 100, dtype: int64

In [49]:
df["life_expectancy_females_(years)"].nunique()

98

In [50]:
df["life_expectancy_females_(years)"].max()

87.9

In [51]:
df["life_expectancy_females_(years)"].min()

53.1

In [52]:
df["life_expectancy_males_(years)"].value_counts()

66.1    3
80.6    3
58.9    2
66.3    2
70.3    2
       ..
74.5    1
74.9    1
75.5    1
75.6    1
50.8    1
Name: life_expectancy_males_(years), Length: 100, dtype: int64

In [53]:
df.head()

Unnamed: 0,country,life_expectancy_males_(years),life_expectancy_females_(years),birth_rate(%),death_rate(%)
0,Hong Kong,83.2,87.9,5.0,6.9
1,Macao,82.8,87.9,10.1,4.1
2,Switzerland,81.9,85.9,10.3,8.2
3,Iceland,81.8,84.5,13.1,6.3
4,Norway,81.7,84.7,10.4,7.8


In [71]:
range_bins = [50,60,70,80,90]
range_labels = ['50-60', '60-70', '70-80', '80-90']

In [72]:
df["range_males(years)"] = pd.cut(df["life_expectancy_males_(years)"], range_bins, labels = range_labels)
df["range_females(years)"] = pd.cut(df["life_expectancy_females_(years)"], range_bins, labels = range_labels)

In [74]:
df

Unnamed: 0,country,life_expectancy_males_(years),life_expectancy_females_(years),birth_rate(%),death_rate(%),range_males(years),range_females(years)
0,Hong Kong,83.2,87.9,5.0,6.9,80-90,80-90
1,Macao,82.8,87.9,10.1,4.1,80-90,80-90
2,Switzerland,81.9,85.9,10.3,8.2,80-90,80-90
3,Iceland,81.8,84.5,13.1,6.3,80-90,80-90
4,Norway,81.7,84.7,10.4,7.8,80-90,80-90
...,...,...,...,...,...,...,...
115,Congo (Dem. Republic),57.0,61.5,42.0,9.7,50-60,60-70
116,Zimbabwe,56.2,62.0,30.5,9.1,50-60,60-70
117,Somalia,53.2,57.4,43.6,11.6,50-60,50-60
118,Nigeria,52.3,53.1,37.1,13.1,50-60,50-60


In [75]:
df["range_males(years)"].value_counts()

70-80    47
60-70    43
80-90    18
50-60    12
Name: range_males(years), dtype: int64

In [76]:
df["range_females(years)"].value_counts()

70-80    54
80-90    39
60-70    23
50-60     4
Name: range_females(years), dtype: int64

In [77]:
df["birth_rate(%)"].value_counts()

9.6     4
10.3    3
10.2    3
10.1    2
22.0    2
       ..
14.7    1
17.7    1
14.0    1
9.4     1
43.4    1
Name: birth_rate(%), Length: 96, dtype: int64

In [78]:
df["death_rate(%)"].value_counts()

6.8     5
9.7     4
7.2     4
6.7     3
6.2     3
       ..
13.8    1
14.7    1
16.1    1
3.8     1
12.5    1
Name: death_rate(%), Length: 82, dtype: int64

In [79]:
df["birth_rate(%)"].max()

45.3

In [80]:
df["birth_rate(%)"].min()

5.0

In [81]:
df["death_rate(%)"].min()

1.3

In [82]:
df["death_rate(%)"].max()

21.7

In [83]:
birth_bins = [0,10,20,30,50]
birth_labels = ['0-10', '10-20', '20-30', '30-50']

In [84]:
df["birth_range(%)"] = pd.cut(df["birth_rate(%)"], birth_bins, labels = birth_labels)

In [85]:
df

Unnamed: 0,country,life_expectancy_males_(years),life_expectancy_females_(years),birth_rate(%),death_rate(%),range_males(years),range_females(years),birth_range(%)
0,Hong Kong,83.2,87.9,5.0,6.9,80-90,80-90,0-10
1,Macao,82.8,87.9,10.1,4.1,80-90,80-90,10-20
2,Switzerland,81.9,85.9,10.3,8.2,80-90,80-90,10-20
3,Iceland,81.8,84.5,13.1,6.3,80-90,80-90,10-20
4,Norway,81.7,84.7,10.4,7.8,80-90,80-90,10-20
...,...,...,...,...,...,...,...,...
115,Congo (Dem. Republic),57.0,61.5,42.0,9.7,50-60,60-70,30-50
116,Zimbabwe,56.2,62.0,30.5,9.1,50-60,60-70,30-50
117,Somalia,53.2,57.4,43.6,11.6,50-60,50-60,30-50
118,Nigeria,52.3,53.1,37.1,13.1,50-60,50-60,30-50


In [86]:
df["birth_range(%)"].value_counts()

10-20    56
0-10     30
20-30    18
30-50    16
Name: birth_range(%), dtype: int64

In [87]:
death_bins = [0,5,10,15,25]
death_labels = ['0-5', '5-10', '10-15', '15-25']

In [88]:
df["death_range(%)"] = pd.cut(df["death_rate(%)"], death_bins, labels = death_labels)

In [89]:
df

Unnamed: 0,country,life_expectancy_males_(years),life_expectancy_females_(years),birth_rate(%),death_rate(%),range_males(years),range_females(years),birth_range(%),death_range(%)
0,Hong Kong,83.2,87.9,5.0,6.9,80-90,80-90,0-10,5-10
1,Macao,82.8,87.9,10.1,4.1,80-90,80-90,10-20,0-5
2,Switzerland,81.9,85.9,10.3,8.2,80-90,80-90,10-20,5-10
3,Iceland,81.8,84.5,13.1,6.3,80-90,80-90,10-20,5-10
4,Norway,81.7,84.7,10.4,7.8,80-90,80-90,10-20,5-10
...,...,...,...,...,...,...,...,...,...
115,Congo (Dem. Republic),57.0,61.5,42.0,9.7,50-60,60-70,30-50,5-10
116,Zimbabwe,56.2,62.0,30.5,9.1,50-60,60-70,30-50,5-10
117,Somalia,53.2,57.4,43.6,11.6,50-60,50-60,30-50,10-15
118,Nigeria,52.3,53.1,37.1,13.1,50-60,50-60,30-50,10-15


In [91]:
df["death_range(%)"].value_counts()

5-10     76
10-15    22
0-5      12
15-25    10
Name: death_range(%), dtype: int64

# Data Exploration 

Which Country has the highest life expectency?

In [None]:
Which Country has the lowest life expectency? 