# share_plastic_fate.csv

In [None]:
import pandas as pd
import numpy as np
from pathlib import Path

In [None]:
out_dir = Path("output")
out_dir.mkdir(exist_ok = True)

In [None]:
data_countries = pd.read_csv("data/Countries.csv")
data_countries

In [None]:
data_share_plastic_fate = pd.read_csv("data/share-plastic-fate.csv")
data_share_plastic_fate = data_share_plastic_fate.rename(columns={'Share of waste recycled from total regional waste': 'share_waste_recycled'})

data_share_plastic_fate

In [None]:
np.unique(data_countries["Year"])

In [None]:
np.unique(data_share_plastic_fate["Year"])

In [None]:
np.unique(data_share_plastic_fate["Entity"])

In [None]:
data_share_plastic_fate.loc[data_share_plastic_fate["Entity"] == "Americas (excl. USA)"]

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt

In [None]:
data_plastic_year_shifted = data_share_plastic_fate
data_plastic_year_shifted["Year"] -= 2000
data_plastic_year_shifted

In [None]:
plt.figure()
sns_plt = sns.lineplot(x = "Year", y = "share_waste_recycled", hue = "Entity", data = data_plastic_year_shifted)
sns_plt.set_xticks(range(20))
plt.xlabel("Year - 2000")
plt.savefig(out_dir / "share_waste_recycled_on_year.png")

In [None]:
per_entity_plastic_fate = data_share_plastic_fate[data_share_plastic_fate["Entity"] == "United States"]
line_tan, line_bias = np.polyfit(
    x = per_entity_plastic_fate["Year"],
    y = per_entity_plastic_fate["share_waste_recycled"],
    deg = 1)
print(f"y={line_tan:.3f}*x+{line_bias:.3f}")

In [None]:
dict_of_coefficients = {"Entity": [], "Tan": [], "Bias": []}
for entity in np.unique(data_share_plastic_fate["Entity"]):
    per_entity_plastic_fate = data_share_plastic_fate[data_share_plastic_fate["Entity"] == entity]
    line_tan, line_bias = np.polyfit(
    x = per_entity_plastic_fate["Year"],
    y = per_entity_plastic_fate["share_waste_recycled"],
    deg = 1)
    dict_of_coefficients["Entity"].append(entity)
    dict_of_coefficients["Tan"].append(line_tan)
    dict_of_coefficients["Bias"].append(line_bias)
    #print(f"{entity}: y={line_tan:.3f}*x+{line_bias:.3f}")
df_coefficients = pd.DataFrame(dict_of_coefficients)
df_coefficients.to_csv(out_dir/"coefficients.csv")

# Countries.csv

In [None]:
data_countries

In [None]:
np.unique(data_countries["Continent Name"])

In [None]:
#sns.PairGrid(data = data_countries, vars = ["GDP", "Population", "Land"])

In [None]:
north_africa_countries = {  # https://en.wikipedia.org/wiki/North_Africa
    "Algeria",
    "Egypt",
    "Libya",
    "Morocco",
    "Sudan",
    "Tunisia",
}

middle_east_countries = {  # https://en.wikipedia.org/wiki/Middle_East (Turkey -> Turkiye, State of Palestine -> West Bank and Gaza,
    # Yemen -> Yemen, Rep., Syria -> Syrian Arab Republic)
    "Akrotiri and Dhekelia",
    "Bahrain",
    "Cyprus",
    "Egypt",
    "Iran",
    "Iraq",
    "Israel",
    "Jordan",
    "Kuwait",
    "Lebanon",
    "Oman",
    "West Bank and Gaza",
    "Jordanian dinar",
    "Qatar",
    "Saudi Arabia",
    "Syrian Arab Republic",
    "Turkiye",
    "United Arab Emirates",
    "Yemen, Rep.",
}

def get_entity(country_name: str, continent_name: str) -> str:
    if country_name in ("China", "India", "United States"):
        return country_name
    if continent_name in ("North America", "South America"):
        return "Americas (excl. USA)"
    if continent_name == "Asia":
        if country_name in middle_east_countries:
            return "Middle East & North Africa"
        return "Asia (excl. China and India)"
    if continent_name in ("Oceania", "Europe"):
        return continent_name
    if continent_name == "Africa":
        if country_name in north_africa_countries:
            return "Middle East & North Africa"
        return "Sub-Saharan Africa"
    raise ValueError("unknown continent")

data_countries["Entity"] = [get_entity(country_name=row["Country Name"], continent_name=row["Continent Name"]) for _, row in data_countries.iterrows()]
data_countries[data_countries["Year"] == 2019][["Country Name", "Continent Name", "Entity"]]

In [None]:
data_countries["Country Name"].isin(["Armenia", "Azerbaijan", "Georgia", "Kazakhstan", "Russian Federation", "Turkiye"]) 

In [None]:
data_countries_Europe = data_countries.loc[data_countries["Continent Name"] == "Europe"]

In [None]:
indexes_to_drop = data_countries_Europe.loc[data_countries_Europe["Country Name"].isin(["Armenia", "Azerbaijan", "Georgia", "Kazakhstan", "Russian Federation", "Turkiye"]) ].index

In [None]:
data_countries_filtered = data_countries.drop(indexes_to_drop).reset_index()

In [None]:
data_countries_filtered_Asia = data_countries_filtered.loc[data_countries_filtered["Continent Name"] == "Asia"]
indexes_to_drop_Cyprus = data_countries_filtered_Asia.loc[data_countries_filtered_Asia["Country Name"] == "Cyprus"].index
data_countries_filtered_2 = data_countries_filtered.drop(indexes_to_drop_Cyprus).reset_index()
data_countries_filtered_3 = data_countries_filtered_2[["Entity", "Year", "GDP", "R&D", "Population", "Land", "Export", "Import", "Education Expenditure", "Health Expenditure", "Net Trade"]]

In [None]:
data_entities = data_countries_filtered_3.groupby(["Entity", "Year"]).sum().reset_index()
data_entities

In [None]:
data_share_plastic_fate["Year"] += 2000
merged_df = pd.merge(data_entities, data_share_plastic_fate[["share_waste_recycled", "Entity", "Year"]], on=['Entity', 'Year'])
merged_df.to_csv(out_dir / "data_merged.csv")