In [1]:
import pandas as pd
df_economy = pd.read_csv("C:\\Users\\krist\\OneDrive\\Visualisation project\\economy_data.csv")

In [3]:
from dateutil.parser import parse

def fiscal_year_to_md(fy_string):
    """
    Converts 'calendar year' or 'day Month - day Month'
    to month-day strings like '01-01' and '12-31'.
    No full dates. No year.
    """
    if pd.isna(fy_string):
        return None, None

    fy_string = str(fy_string).strip().lower()

    if fy_string == "calendar year":
        return "01-01", "12-31"

    if "-" in fy_string:
        part1, part2 = fy_string.split("-")
        start_md = parse(part1.strip(), dayfirst=True)
        end_md   = parse(part2.strip(), dayfirst=True)

        start_str = f"{start_md.month:02d}-{start_md.day:02d}"
        end_str   = f"{end_md.month:02d}-{end_md.day:02d}"

        return start_str, end_str

    return None, None

df_economy["Fiscal_Year_Start_Date"], df_economy["Fiscal_Year_End_Date"] = zip(
    *df_economy["Fiscal_Year"].apply(fiscal_year_to_md))

df_economy = df_economy.drop(columns=['Fiscal_Year'])

In [9]:
cols = list(df_economy.columns)

# find index of Public_Debt_percent_of_GDP
insert_at = cols.index("Public_Debt_percent_of_GDP") + 1

# remove the new columns from the end
cols.remove("Fiscal_Year_Start_Date")
cols.remove("Fiscal_Year_End_Date")

# insert them in correct order
cols[insert_at:insert_at] = ["Fiscal_Year_Start_Date", "Fiscal_Year_End_Date"]

# reorder dataframe
df_economy = df_economy[cols]


In [11]:
df_economy

Unnamed: 0,Country,Real_GDP_PPP_billion_USD,GDP_Official_Exchange_Rate_billion_USD,Real_GDP_Growth_Rate_percent,Real_GDP_per_Capita_USD,Unemployment_Rate_percent,Youth_Unemployment_Rate_percent,Budget_billion_USD,Budget_Surplus_billion_USD,Budget_Deficit_percent_of_GDP,Public_Debt_percent_of_GDP,Fiscal_Year_Start_Date,Fiscal_Year_End_Date,Exports_billion_USD,Imports_billion_USD,Exchange_Rate_per_USD,Population_Below_Poverty_Line_percent
0,AFGHANISTAN,60.80,20.24,-20.74,1500.0,13.28,20.2,5.09,15.1,-15.1,7.00,03-21,03-20,1.48,6.980000e+00,76.814,54.5
1,AKROTIRI,,,,,,,,,,,,,,,,
2,ALBANIA,40.82,15.27,8.52,14500.0,11.82,27.8,4.19,2.0,-2.0,84.06,01-01,12-31,5.61,8.000000e+00,103.520,14.3
3,ALGERIA,487.72,169.91,3.50,11000.0,12.70,31.9,55.19,9.6,-9.6,27.50,01-01,12-31,41.78,4.432000e+01,135.064,5.5
4,AMERICAN SAMOA,658.00,658.00,-1.87,11200.0,29.80,,249.00,2.1,-2.1,12.20,10-01,09-30,428.00,6.150000e+02,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
254,WEST BANK,27.78,9.83,7.05,5600.0,24.90,39.6,3.80,0.4,0.4,24.40,01-01,12-31,3.18,1.024000e+01,3.606,18.0
255,WORLD,134080.00,80270.00,5.87,17000.0,6.18,17.9,21680.00,3.0,-3.0,67.20,,,28108.00,2.712016e+13,,
256,YEMEN,73.63,54.36,-5.90,2500.0,13.57,25.5,2.21,5.2,-5.2,74.50,01-01,12-31,384.50,4.080000e+00,1.000,48.6
257,ZAMBIA,63.03,25.71,4.60,3200.0,13.03,26.1,4.76,7.3,-7.3,103.70,01-01,12-31,11.72,7.700000e+00,20.018,54.4


In [29]:
df_economy.to_csv("C:\\Users\\krist\\OneDrive\\Visualisation project\\economy_data_cleaned.csv", index=False)

In [25]:
df_demographics = pd.read_csv('C:\\Users\\krist\\OneDrive\\Visualisation project\\demographics_data.csv')

# List all percentage columns 
percent_cols = [
    'Population_Growth_Rate',
    'Total_Literacy_Rate',
    'Male_Literacy_Rate',
    'Female_Literacy_Rate',
    'Youth_Unemployment_Rate'
]

# Removing '%' 
for col in percent_cols:
    if col in df_demographics.columns:
        df_demographics[col] = (
            df_demographics[col]
            .astype(str)           
            .str.replace('%', '', regex=False)  
        )
        df_demographics[col] = pd.to_numeric(df_demographics[col], errors='coerce')

rename_map = {col: f"{col} [%]" for col in percent_cols if col in df_demographics.columns}
df_demographics = df_demographics.rename(columns=rename_map)


In [27]:
df_demographics

Unnamed: 0,Country,Total_Population,Population_Growth_Rate [%],Birth_Rate,Death_Rate,Net_Migration_Rate,Median_Age,Sex_Ratio,Infant_Mortality_Rate,Total_Fertility_Rate,Total_Literacy_Rate [%],Male_Literacy_Rate [%],Female_Literacy_Rate [%],Youth_Unemployment_Rate [%]
0,AFGHANISTAN,39232003,2.26,34.79,12.08,0.10,19.5,1.02,103.06,4.53,37.3,39.4,7.2,20.2
1,AKROTIRI,,,,,,,,,,,,,
2,ALBANIA,3101621,0.19,12.48,7.36,3.22,34.3,0.97,10.54,1.55,98.4,38.8,6.0,27.8
3,ALGERIA,44758398,1.27,17.84,4.33,0.81,28.9,1.03,19.22,2.47,81.4,41.3,0.7,31.9
4,AMERICAN SAMOA,44620,1.74,16.18,6.19,27.36,27.2,0.99,9.87,2.13,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
254,WEST BANK,3176549,2.10,28.30,3.50,3.90,21.7,1.03,16.10,3.54,97.5,98.8,96.2,39.6
255,WORLD,7979261010,1.03,18.10,7.70,,31.0,1.01,30.80,2.42,86.7,90.1,83.3,17.9
256,YEMEN,31565602,1.83,24.10,5.50,0.20,21.6,1.02,45.50,2.91,70.1,32.5,8.1,25.5
257,ZAMBIA,20216029,2.86,34.50,6.00,0.20,18.2,1.00,36.30,4.49,86.7,25.1,3.7,26.1


In [31]:
df_demographics.to_csv("C:\\Users\\krist\\OneDrive\\Visualisation project\\demographics_data_cleaned.csv", index=False)