<a href="https://colab.research.google.com/github/olilambourn/olilambourn.github.io/blob/main/OECDGDPForecast.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd  # library for data analysis
import requests  # library to handle requests
from bs4 import BeautifulSoup  # library to parse HTML documents
import numpy as np

# Cutomised download from https://stats.oecd.org/index.aspx?DataSetCode=EO 
# Downloading total tax revenues over time in USD for all avalible countries.
# Importing the csv file as a dataframe
df = pd.read_csv('OECDREALGDPOUTLOOK.csv')
df = df.loc[:, ['LOCATION', 'TIME', 'Value']]

df = df[df['LOCATION'].isin(['FRA', 'DEU', 
                           'GBR', 'USA', 'CHN', 'JPN', 'IND', ])]

df

Unnamed: 0,LOCATION,TIME,Value
0,FRA,2020,2.669280e+06
1,FRA,2021,2.823768e+06
2,FRA,2022,2.935513e+06
3,FRA,2023,2.992562e+06
4,FRA,2024,3.034071e+06
...,...,...,...
282,IND,2056,3.853272e+07
283,IND,2057,3.943112e+07
284,IND,2058,4.034179e+07
285,IND,2059,4.126573e+07


In [None]:
import json

# Convert the dataframe to a list of dictionaries
data = df.to_dict('records')

# Initialize a dictionary to store the JSON graphs
graphs = {}

# Iterate over the list of dictionaries
for row in data:
    # Get the location and time
    location = row['LOCATION']
    time = row['TIME']
    
    # If this is the first time we encounter this location, 
    # create a new entry in the dictionary
    if location not in graphs:
        graphs[location] = {'data': []}
    
    # Add the data point to the list of data points for this location
    graphs[location]['data'].append({'x': time, 'y': row['Value']})

# Convert the dictionary to a JSON object
json_data = json.dumps(graphs)

# Print the JSON object
print(json_data)


{"FRA": {"data": [{"x": 2020, "y": 2669280.2573897}, {"x": 2021, "y": 2823768.09620666}, {"x": 2022, "y": 2935513.43703451}, {"x": 2023, "y": 2992562.0}, {"x": 2024, "y": 3034071.0}, {"x": 2025, "y": 3071890.0}, {"x": 2026, "y": 3109641.0}, {"x": 2027, "y": 3148105.0}, {"x": 2028, "y": 3187267.0}, {"x": 2029, "y": 3226815.0}, {"x": 2030, "y": 3266564.0}, {"x": 2031, "y": 3306480.0}, {"x": 2032, "y": 3346445.0}, {"x": 2033, "y": 3386504.0}, {"x": 2034, "y": 3426783.0}, {"x": 2035, "y": 3467468.0}, {"x": 2036, "y": 3508727.0}, {"x": 2037, "y": 3550510.0}, {"x": 2038, "y": 3592766.0}, {"x": 2039, "y": 3635469.0}, {"x": 2040, "y": 3678671.0}, {"x": 2041, "y": 3722469.0}, {"x": 2042, "y": 3766844.0}, {"x": 2043, "y": 3811787.0}, {"x": 2044, "y": 3857318.0}, {"x": 2045, "y": 3903525.0}, {"x": 2046, "y": 3950559.0}, {"x": 2047, "y": 3998416.0}, {"x": 2048, "y": 4047162.0}, {"x": 2049, "y": 4096935.0}, {"x": 2050, "y": 4147962.0}, {"x": 2051, "y": 4200476.0}, {"x": 2052, "y": 4254467.0}, {"x":

In [None]:
import json

# Parse the JSON string into a Python object
parsed_json = json.loads(json_data)

# Iterate over the locations
for location, graph in parsed_json.items():
    print(f'Location: {location}')
    
    # Iterate over the data points for this location
    for point in graph['data']:
        print(f'  Time: {point["x"]}, Value: {point["y"]}')



Location: FRA
  Time: 2020, Value: 2669280.2573897
  Time: 2021, Value: 2823768.09620666
  Time: 2022, Value: 2935513.43703451
  Time: 2023, Value: 2992562.0
  Time: 2024, Value: 3034071.0
  Time: 2025, Value: 3071890.0
  Time: 2026, Value: 3109641.0
  Time: 2027, Value: 3148105.0
  Time: 2028, Value: 3187267.0
  Time: 2029, Value: 3226815.0
  Time: 2030, Value: 3266564.0
  Time: 2031, Value: 3306480.0
  Time: 2032, Value: 3346445.0
  Time: 2033, Value: 3386504.0
  Time: 2034, Value: 3426783.0
  Time: 2035, Value: 3467468.0
  Time: 2036, Value: 3508727.0
  Time: 2037, Value: 3550510.0
  Time: 2038, Value: 3592766.0
  Time: 2039, Value: 3635469.0
  Time: 2040, Value: 3678671.0
  Time: 2041, Value: 3722469.0
  Time: 2042, Value: 3766844.0
  Time: 2043, Value: 3811787.0
  Time: 2044, Value: 3857318.0
  Time: 2045, Value: 3903525.0
  Time: 2046, Value: 3950559.0
  Time: 2047, Value: 3998416.0
  Time: 2048, Value: 4047162.0
  Time: 2049, Value: 4096935.0
  Time: 2050, Value: 4147962.0
  Tim

In [31]:
import csv

# Open a CSV file for writing
with open('data.csv', 'w', newline='') as csvfile:
    # Create a CSV writer object
    writer = csv.DictWriter(csvfile, fieldnames=['Location', 'Time', 'Value'])
    
    # Write the header row
    writer.writeheader()
    
    # Iterate over the rows of the dataframe
    for _, row in df.iterrows():
        # Write the data row
        writer.writerow({'Location': row['LOCATION'], 'Time': row['TIME'], 'Value': row['Value']})