In [74]:
import numpy as numpy
import pandas as pd

In [75]:
# education data
# from archimedes import get_dat_path
# datasource = "owd-tertiary-education"  # Example datasource, can be changed
# datasource = "owd-government-expenditure-on-education-gdp"

# military expenditure data
# datasource = "owd-armed-forces-personnel"  # Example datasource, can be changed
# datasource = "military-spending-as-a-share-of-gdp-sipri"

# economic data
datasource = "merchandise-exports-gdp-cepii"

match datasource:
    case "owd-tertiary-education":
        dataName = "share-of-the-population-with-completed-tertiary-education"
        saveName = "owd_education_tertiary_completion_rate"
        columnName = "Combined - percentage of 25-64 years adults with incomplete tertiary education"
        newColumnName = "TERTIARYCOMPLETIONRATE"
    case "owd-government-expenditure-on-education-gdp":
        dataName = "total-government-expenditure-on-education-gdp"
        saveName = "owd_education_government_expenditure_gdp"
        columnName = "Public spending on education as a share of GDP (historical and recent)"
        newColumnName = "EDUCATIONGDPSHARE"
    case "owd-armed-forces-personnel":
        dataName = "victor/armed-forces-personnel-percent"
        saveName = "owd-armed-forces-personnel"
        columnName = "Armed forces personnel (% of total population)"
        newColumnName = "MILITARYPERSONNELPERCENT"
    case "merchandise-exports-gdp-cepii":
        dataName = "victor/merchandise-exports-gdp-cepii"
        saveName = "owd_exported_goods_as_share_of_gdp"
        columnName = "Value of global merchandise exports as a share of GDP (Fouquin and Hugot; CEPII 2016; National data)"
        newColumnName = "MERCHANDISEEXPORTSGDP"
    case "military-spending-as-a-share-of-gdp-sipri":
        dataName = "victor/military-spending-as-a-share-of-gdp-sipri"
        saveName = "owd_military_spending_as_share_of_gdp"
        columnName = "Military expenditure (% of GDP)"
        newColumnName = "MILITARYSPENDINGGDP"
        
        

        
        
# Read in the share-of-the-population-with-completed-tertiary-education dataset
df_raw = pd.read_csv(f'data/in/{dataName}.csv')



In [76]:
df_raw

Unnamed: 0,Entity,Code,Year,Value of global merchandise exports as a share of GDP (Fouquin and Hugot; CEPII 2016; National data)
0,Afghanistan,AFG,1956,21.65766
1,Afghanistan,AFG,1957,28.71579
2,Afghanistan,AFG,1958,16.64957
3,Afghanistan,AFG,1959,20.01660
4,Afghanistan,AFG,1960,12.42149
...,...,...,...,...
13803,Zollverein,,1866,10.77171
13804,Zollverein,,1867,10.39549
13805,Zollverein,,1868,14.46323
13806,Zollverein,,1869,14.94143


In [77]:
regions = ['Africa','Asia', 'Europe', 'North America', 'Oceania', 'South America']
code = {"Africa": "AFR",
        "Asia": "ASI",
        "Europe": "EUR",
        "North America": "AMN",
        "Oceania": "OCE",
        "South America": "AMS"}

for region in regions:
    df_raw["Code"].loc[df_raw["Entity"]==region] = code[region]
    print(df_raw["Code"].loc[df_raw["Entity"]==region])
    



48     AFR
49     AFR
50     AFR
51     AFR
52     AFR
53     AFR
54     AFR
55     AFR
56     AFR
57     AFR
58     AFR
59     AFR
60     AFR
61     AFR
62     AFR
63     AFR
64     AFR
65     AFR
66     AFR
67     AFR
68     AFR
69     AFR
70     AFR
71     AFR
72     AFR
73     AFR
74     AFR
75     AFR
76     AFR
77     AFR
78     AFR
79     AFR
80     AFR
81     AFR
82     AFR
83     AFR
84     AFR
85     AFR
86     AFR
87     AFR
88     AFR
89     AFR
90     AFR
91     AFR
92     AFR
93     AFR
94     AFR
95     AFR
96     AFR
97     AFR
98     AFR
99     AFR
100    AFR
101    AFR
102    AFR
Name: Code, dtype: object
536    ASI
537    ASI
538    ASI
539    ASI
540    ASI
541    ASI
542    ASI
543    ASI
544    ASI
545    ASI
546    ASI
547    ASI
548    ASI
549    ASI
550    ASI
551    ASI
552    ASI
553    ASI
554    ASI
555    ASI
556    ASI
557    ASI
558    ASI
559    ASI
560    ASI
561    ASI
562    ASI
563    ASI
564    ASI
565    ASI
566    ASI
567    ASI
568    ASI
569   

You are setting values through chained assignment. Currently this works in certain cases, but when using Copy-on-Write (which will become the default behaviour in pandas 3.0) this will never work to update the original DataFrame or Series, because the intermediate object on which we are setting values will behave as a copy.
A typical example is when you are setting values in a column of a DataFrame, like:

df["col"][row_indexer] = value

Use `df.loc[row_indexer, "col"] = values` instead, to perform the assignment in a single step and ensure this keeps updating the original `df`.

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

  df_raw["Code"].loc[df_raw["Entity"]==region] = code[region]
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_

In [78]:
df = df_raw[["Code", "Year", columnName]]
df = df.rename(columns={"Code": "COUNTRYCODE"})
df = df.rename(columns={"Year": "YEAR"})
df = df.rename(columns={columnName: newColumnName})
display(df)

Unnamed: 0,COUNTRYCODE,YEAR,MERCHANDISEEXPORTSGDP
0,AFG,1956,21.65766
1,AFG,1957,28.71579
2,AFG,1958,16.64957
3,AFG,1959,20.01660
4,AFG,1960,12.42149
...,...,...,...
13803,,1866,10.77171
13804,,1867,10.39549
13805,,1868,14.46323
13806,,1869,14.94143


In [79]:

df.to_csv(f'data/out/{saveName}.csv', index=False)