In [2]:
import pandas as pd
import networkx as nx
import matplotlib.pyplot as plt

In [3]:
df = pd.read_csv("CO2 emission by countries.csv")
df.head(10)

Unnamed: 0,Country,Code,Calling Code,Year,CO2 emission (Tons),Population(2022),Area,% of World,Density(km2)
0,Afghanistan,AF,93,1750,0.0,41128771.0,652230.0,0.40%,63/km²
1,Afghanistan,AF,93,1751,0.0,41128771.0,652230.0,0.40%,63/km²
2,Afghanistan,AF,93,1752,0.0,41128771.0,652230.0,0.40%,63/km²
3,Afghanistan,AF,93,1753,0.0,41128771.0,652230.0,0.40%,63/km²
4,Afghanistan,AF,93,1754,0.0,41128771.0,652230.0,0.40%,63/km²
5,Afghanistan,AF,93,1755,0.0,41128771.0,652230.0,0.40%,63/km²
6,Afghanistan,AF,93,1756,0.0,41128771.0,652230.0,0.40%,63/km²
7,Afghanistan,AF,93,1757,0.0,41128771.0,652230.0,0.40%,63/km²
8,Afghanistan,AF,93,1758,0.0,41128771.0,652230.0,0.40%,63/km²
9,Afghanistan,AF,93,1759,0.0,41128771.0,652230.0,0.40%,63/km²


Fill in missing country codes

In [7]:
df.loc[df["Country"] == "Russia", "Code"] = "RU"

In [9]:
df.loc[df["Country"] == "Namibia", "Code"] = "NA"

In [11]:
df.loc[df["Country"] == "Tanzania", "Code"] = "TZ"

In [13]:
df.loc[df["Country"] == "Taiwan", "Code"] = "TW"

In [15]:
df.loc[df["Country"] == "Syria", "Code"] = "SY"

In [17]:
df.loc[df["Country"] == "South Korea", "Code"] = "KR"

In [19]:
df.isnull().sum()

Country                   0
Code                    542
Calling Code           3523
Year                      0
CO2 emission (Tons)       0
Population(2022)       6504
Area                   4336
% of World             4336
Density(km2)           6504
dtype: int64

In [21]:
#Drop unwanted column
df = df.drop("Calling Code", axis= 1)

In [23]:
#Remove formatting
df["Density(km2)"] = df["Density(km2)"].str.replace("/km²", "").str.replace(",", "")

In [25]:
df["% of World"] = df["% of World"].str.replace("%", "")

In [27]:
df.dtypes

Country                 object
Code                    object
Year                     int64
CO2 emission (Tons)    float64
Population(2022)       float64
Area                   float64
% of World              object
Density(km2)            object
dtype: object

In [29]:
#Get rid of null values
df = df.dropna()

In [31]:
#Make density an integer
df["Density(km2)"] = df["Density(km2)"].apply(lambda x: int(x))

In [33]:
#Calculate CO2 per person
df["CO2_per_capita"] = df["CO2 emission (Tons)"] / df["Population(2022)"].replace(0, float("nan"))
df["CO2_per_capita"] = df["CO2_per_capita"].round(2)

In [35]:
#Calculate CO2 per km^2
df["CO2_per_km2"] = df["CO2 emission (Tons)"] / df["Area"].replace(0, float("nan"))
df["CO2_per_km2"] = df["CO2_per_km2"].round(2)

In [37]:
#Rename columns
df.rename(columns={"CO2 emission (Tons)": "CO2_emission_Tons", "Population(2022)": "Population_2022", "Density(km2)": "Density_km2", "% of World":"%_of_World"}, inplace=True)

In [39]:
#Map each country to a continent
continent_map = {
    #North America
    'United States': 'North America', 'Canada': 'North America', 'Mexico': 'North America',
    'Guatemala': 'North America', 'Belize': 'North America', 'El Salvador': 'North America',
    'Honduras': 'North America', 'Nicaragua': 'North America', 'Costa Rica': 'North America',
    'Panama': 'North America', 'Bahamas': 'North America', 'Cuba': 'North America',
    'Jamaica': 'North America', 'Haiti': 'North America', 'Dominican Republic': 'North America',
    'Antigua and Barbuda' :'North America','Barbados': 'North America', 'Saint Vincent and the Grenadines':'North America',
    'Saint Lucia':'North America','Dominica':'North America','Saint Kitts and Nevis':'North America',
    'Puerto Rico':'North America','Grenada':'North America', 'Guadeloupe':'North America',
    
    #South America
    'Brazil': 'South America', 'Argentina': 'South America', 'Colombia': 'South America',
    'Chile': 'South America', 'Peru': 'South America', 'Venezuela': 'South America',
    'Ecuador': 'South America', 'Bolivia': 'South America', 'Paraguay': 'South America',
    'Uruguay': 'South America', 'Guyana': 'South America', 'Suriname': 'South America',
    'French Guiana': 'South America','Trinidad and Tobago':'South America','Martinique':'South America',
    
    #Europe
    'Germany': 'Europe', 'France': 'Europe', 'United Kingdom': 'Europe', 'Italy': 'Europe', 'Spain': 'Europe',
    'Netherlands': 'Europe', 'Belgium': 'Europe', 'Sweden': 'Europe', 'Norway': 'Europe', 'Denmark': 'Europe',
    'Switzerland': 'Europe', 'Portugal': 'Europe', 'Greece': 'Europe', 'Ireland': 'Europe', 'Poland': 'Europe',
    'Czech Republic': 'Europe', 'Hungary': 'Europe', 'Austria': 'Europe', 'Finland': 'Europe', 'Slovakia': 'Europe',
    'Slovenia': 'Europe', 'Croatia': 'Europe', 'Serbia': 'Europe', 'Bosnia and Herzegovina': 'Europe',
    'Montenegro': 'Europe', 'North Macedonia': 'Europe', 'Albania': 'Europe', 'Lithuania': 'Europe',
    'Latvia': 'Europe', 'Estonia': 'Europe', 'Belarus': 'Europe', 'Ukraine': 'Europe', 'Moldova': 'Europe',
    'Russia': 'Europe', 'Turkey': 'Europe', 'Iceland': 'Europe', 'Andorra':'Europe','Bulgaria':'Europe',
    'Cyprus':'Europe', 'Romania':'Europe','Greenland':'Europe','Malta':'Europe', 'Liechtenstein':'Europe',
    'Luxembourg':'Europe',
    #Asia
    'India': 'Asia', 'China': 'Asia', 'Japan': 'Asia', 'South Korea': 'Asia', 'Indonesia': 'Asia',
    'Pakistan': 'Asia', 'Bangladesh': 'Asia', 'Sri Lanka': 'Asia', 'Thailand': 'Asia', 'Vietnam': 'Asia',
    'Philippines': 'Asia', 'Malaysia': 'Asia', 'Singapore': 'Asia', 'Myanmar': 'Asia', 'Cambodia': 'Asia',
    'Laos': 'Asia', 'Mongolia': 'Asia', 'Nepal': 'Asia', 'Bhutan': 'Asia', 'Kazakhstan': 'Asia',
    'Uzbekistan': 'Asia', 'Turkmenistan': 'Asia', 'Kyrgyzstan': 'Asia', 'Tajikistan': 'Asia',
    'Afghanistan': 'Asia', 'Iran': 'Asia', 'Iraq': 'Asia', 'Saudi Arabia': 'Asia', 'Yemen': 'Asia',
    'United Arab Emirates': 'Asia', 'Qatar': 'Asia', 'Bahrain': 'Asia', 'Kuwait': 'Asia',
    'Oman': 'Asia', 'Lebanon': 'Asia', 'Jordan': 'Asia', 'Israel': 'Asia', 'Syria': 'Asia',
    'Georgia': 'Asia', 'Armenia': 'Asia', 'Azerbaijan': 'Asia','Taiwan': 'Asia','North Korea': 'Asia',
    'Maldives':'Asia',
    
    #Africa
    'South Africa': 'Africa', 'Nigeria': 'Africa', 'Egypt': 'Africa', 'Kenya': 'Africa',
    'Morocco': 'Africa', 'Algeria': 'Africa', 'Ethiopia': 'Africa', 'Uganda': 'Africa',
    'Zimbabwe': 'Africa', 'Zambia': 'Africa', 'Ghana': 'Africa', 'Angola': 'Africa',
    'Mozambique': 'Africa', 'Madagascar': 'Africa', 'Cameroon': 'Africa', 'Ivory Coast': 'Africa',
    'Tunisia': 'Africa', 'Senegal': 'Africa', 'Mali': 'Africa', 'Sudan': 'Africa', 'South Sudan': 'Africa',
    'Democratic Republic of the Congo': 'Africa', 'Republic of the Congo': 'Africa',
    'Burkina Faso': 'Africa', 'Botswana': 'Africa', 'Namibia': 'Africa', 'Gabon': 'Africa',
    'Sierra Leone': 'Africa', 'Liberia': 'Africa', 'Chad': 'Africa', 'Niger': 'Africa',
    'Mauritania': 'Africa', 'Eritrea': 'Africa', 'Djibouti': 'Africa', 'Rwanda': 'Africa', 'Tanzania': 'Africa',
    'Togo': 'Africa','Benin': 'Africa','Somalia': 'Africa','Burundi': 'Africa','Seychelles': 'Africa',
    'Central African Republic': 'Africa','Sao Tome and Principe': 'Africa','Comoros': 'Africa',
    'Equatorial Guinea':'Africa', 'Eswatini':'Africa','Gambia': 'Africa','Mayotte':'Africa','Guinea':'Africa',
    'Mauritius':'Africa','Lesotho':'Africa','Libya':'Africa', 'Malawi':'Africa',
    
    #Oceania
    'Australia': 'Oceania', 'New Zealand': 'Oceania', 'Fiji': 'Oceania', 'Papua New Guinea': 'Oceania',
    'Samoa': 'Oceania', 'Tonga': 'Oceania', 'Vanuatu': 'Oceania', 'Solomon Islands': 'Oceania',
    'Kiribati': 'Oceania', 'Micronesia': 'Oceania', 'Marshall Islands': 'Oceania', 'Palau': 'Oceania',
    'Tuvalu': 'Oceania', 'Nauru':'Oceania',
}
df["Continent"] = df["Country"].map(continent_map)

In [41]:
df.head(10)

Unnamed: 0,Country,Code,Year,CO2_emission_Tons,Population_2022,Area,%_of_World,Density_km2,CO2_per_capita,CO2_per_km2,Continent
0,Afghanistan,AF,1750,0.0,41128771.0,652230.0,0.4,63,0.0,0.0,Asia
1,Afghanistan,AF,1751,0.0,41128771.0,652230.0,0.4,63,0.0,0.0,Asia
2,Afghanistan,AF,1752,0.0,41128771.0,652230.0,0.4,63,0.0,0.0,Asia
3,Afghanistan,AF,1753,0.0,41128771.0,652230.0,0.4,63,0.0,0.0,Asia
4,Afghanistan,AF,1754,0.0,41128771.0,652230.0,0.4,63,0.0,0.0,Asia
5,Afghanistan,AF,1755,0.0,41128771.0,652230.0,0.4,63,0.0,0.0,Asia
6,Afghanistan,AF,1756,0.0,41128771.0,652230.0,0.4,63,0.0,0.0,Asia
7,Afghanistan,AF,1757,0.0,41128771.0,652230.0,0.4,63,0.0,0.0,Asia
8,Afghanistan,AF,1758,0.0,41128771.0,652230.0,0.4,63,0.0,0.0,Asia
9,Afghanistan,AF,1759,0.0,41128771.0,652230.0,0.4,63,0.0,0.0,Asia


In [43]:
#Write to a csv file
df.to_csv("data3.csv")