In [3]:
!pip install pycountry



In [4]:
import pandas as pd
import numpy as np
import pycountry    # finding country names
import difflib      # quanitfying string differences

In [5]:
# Function to create new columns for difference, percent change, and percentage makeup
def add_difference_and_percent(df, col1, col2):
    # Calculate the difference
    diff_col_name = f'{col1}_minus_{col2}'
    df[diff_col_name] = df[col1] - df[col2]
    
    # Calculate the percent change
    pct_change_col_name = f'{col1}_pct_change_{col2}'
    df[pct_change_col_name] = (df[col1] - df[col2]) / df[col2] * 100


    # Calculate the percent of previous
    # pct_prev_col_name = f'{col1}_pct_previous_{col2}'
    # df[pct_prev_col_name] = df[col1] / df[col2] * 100
    
    return df

In [6]:
# Function to get the closest ISO3 code from a country name
def get_closest_iso3_code(country_name):

    if isinstance(country_name, str):

        country_names = [country.name for country in pycountry.countries]                       # gets all possible country names
        closest_match = difflib.get_close_matches(country_name, country_names, n=1, cutoff=0.76) # finds the closest match that matches ninety pct
        if closest_match:
            country = pycountry.countries.get(name=closest_match[0])
            return country.alpha_3                                                              # return the closest match's iso code
        
    return None

In [15]:
volumes = pd.read_csv("./data/country_percent_scale_080124.csv")    # read in volumes of production in 2023
volumes = volumes.set_index('iso3_country')

In [10]:
new_prod_in = pd.read_csv("./data/country-climate-trace_oil-and-gas-production_100724.csv")
new_trans_in = pd.read_csv("./data/country-climate-trace_oil-and-gas-transport_100724.csv")

new_prod_in.dropna(subset=['start_date'], inplace=True)
new_prod = new_prod_in[
    new_prod_in['start_date'].str.contains('2023')
]
new_prod = new_prod[
    new_prod['end_date'].str.contains('2023')
]

new_trans_in.dropna(subset=['start_date'], inplace=True)
new_trans = new_trans_in[
    new_trans_in['start_date'].str.contains('2023')
]
new_trans = new_trans[
    new_trans['end_date'].str.contains('2023')
]

In [11]:
# Join dataframes
new_joined = new_prod.join(
    new_trans, how='outer',
    lsuffix='_prod',rsuffix='_trans')

new_joined.reset_index(inplace=True)

new_joined['CO2_total'] = new_joined['CO2_emissions_tonnes_prod'] + new_joined['CO2_emissions_tonnes_trans']
new_joined['CH4_total'] = new_joined['CH4_emissions_tonnes_prod'] + new_joined['CH4_emissions_tonnes_trans']

new_joined = new_joined.drop(
    columns={
        'index','N2O_emissions_tonnes_prod','N2O_emissions_tonnes_trans',
        'start_date_prod','end_date_prod','iso3_country_trans',
        'start_date_trans','end_date_trans'
    })
new_joined = new_joined.rename(columns={'iso3_country_prod':'iso3_country'})

new_joined = new_joined.set_index('iso3_country')

In [12]:
version4 = pd.read_csv("./data/all_fields_transposed.csv")
version4 = version4.drop_duplicates()
version4['iso3_country'] = version4['country'].apply(
    get_closest_iso3_code)

In [13]:
country4 = version4[[
    'iso3_country',
    'upstream_co2_tonnes_per_year','transport_co2_tonnes_per_year',
    'upstream_ch4_tonnes_per_year','transport_ch4_tonnes_per_year',
    'upstream_100_co2e_tonnes_per_year','transport_100_co2e_tonnes_per_year',
    'upstream_20_co2e_tonnes_per_year','transport_20_co2e_tonnes_per_year'
]].groupby('iso3_country').sum()

country4 = country4.rename(columns={
    'upstream_co2_tonnes_per_year': 'CO2_emissions_tonnes_prod',
    'transport_co2_tonnes_per_year': 'CO2_emissions_tonnes_trans',
    'upstream_ch4_tonnes_per_year': 'CH4_emissions_tonnes_prod',
    'transport_ch4_tonnes_per_year': 'CH4_emissions_tonnes_trans',
    'upstream_100_co2e_tonnes_per_year': 'total_CO2e_100yrGWP_prod',
    'transport_100_co2e_tonnes_per_year': 'total_CO2e_100yrGWP_trans',
    'upstream_20_co2e_tonnes_per_year': 'total_CO2e_20yrGWP_prod',
    'transport_20_co2e_tonnes_per_year': 'total_CO2e_20yrGWP_trans',
})

country4['CO2_total'] = country4['CO2_emissions_tonnes_prod'] + country4['CO2_emissions_tonnes_trans']
country4['CH4_total'] = country4['CH4_emissions_tonnes_prod'] + country4['CH4_emissions_tonnes_trans']


In [16]:

merged = volumes.join(new_joined,how='outer').join(
    country4,how='outer',lsuffix='_new',rsuffix='_v4g'
)
merged = merged.fillna(0)
merged = merged.reset_index()

world_row = pd.DataFrame([merged.sum()],columns=merged.columns)
world_row['iso3_country'] = 'World'
merged = pd.concat([merged,world_row], ignore_index=True)

In [17]:
out_table = merged

out_table = add_difference_and_percent(out_table,'CO2_emissions_tonnes_prod_v4g','CO2_emissions_tonnes_prod_new')
out_table = add_difference_and_percent(out_table,'CO2_emissions_tonnes_trans_v4g','CO2_emissions_tonnes_trans_new')
out_table = add_difference_and_percent(out_table,'CH4_emissions_tonnes_prod_v4g','CH4_emissions_tonnes_prod_new')
out_table = add_difference_and_percent(out_table,'CH4_emissions_tonnes_trans_v4g','CH4_emissions_tonnes_trans_new')
out_table = add_difference_and_percent(out_table,'CO2_total_v4g','CO2_total_new')
out_table = add_difference_and_percent(out_table,'CH4_total_v4g','CH4_total_new')
out_table = add_difference_and_percent(out_table,'total_CO2e_100yrGWP_prod_v4g','total_CO2e_100yrGWP_prod_new')
out_table = add_difference_and_percent(out_table,'total_CO2e_20yrGWP_prod_v4g','total_CO2e_20yrGWP_prod_new')
out_table = add_difference_and_percent(out_table,'total_CO2e_100yrGWP_trans_v4g','total_CO2e_100yrGWP_trans_new')
out_table = add_difference_and_percent(out_table,'total_CO2e_20yrGWP_trans_v4g','total_CO2e_20yrGWP_trans_new')

In [18]:
out_table.to_csv('./data/new_method_&_v4_100924.csv',index=False) # write csv of this

In [88]:
version4[version4['country']=='Mozambique']['upstream_co2_tonnes_per_year'].sum()

594193.2838468128

In [89]:
test = country4.reset_index()
test[test['iso3_country']=='TZA']

Unnamed: 0,iso3_country,CO2_emissions_tonnes_prod,CO2_emissions_tonnes_trans,CH4_emissions_tonnes_prod,CH4_emissions_tonnes_trans,total_CO2e_100yrGWP_prod,total_CO2e_100yrGWP_trans,total_CO2e_20yrGWP_prod,total_CO2e_20yrGWP_trans,CO2_total,CH4_total


In [90]:
new_prod[new_prod['iso3_country']=='MOZ']

Unnamed: 0,iso3_country,start_date,end_date,CO2_emissions_tonnes,CH4_emissions_tonnes,N2O_emissions_tonnes,total_CO2e_100yrGWP,total_CO2e_20yrGWP
633,MOZ,2023-01-01,2023-12-31,508785.071598,35313.986661,0.0,1561142.0,3422189.0
