In [6]:
import codecs
import json
import csv

# Define the list to store the aggregated data
aggregated_data = []

# Iterate over the years from 2016 to 2021
for year in range(2018, 2023):
    # Open the CSV file for the corresponding year with UTF-8 encoding
    filename = f'{year}_edificacions_edat_mitjana.csv'
    with codecs.open(filename, 'r', encoding='utf-8-sig') as file:
        # Create a CSV reader object
        csv_reader = csv.reader(file)
        # Skip the header row
        next(csv_reader)
        
        # Create a dictionary to store the aggregated data for the current year
        year_data = {'year': year, 'data': {}}
        
        # Iterate over the rows in the CSV file
        for row in csv_reader:
            # Extract the relevant columns
            _, districte, nom_districte, codi_barri, nom_barri, _, age = row
            # Convert the age value to an integer
            age = float(age)
            
            # Check if the Codi_Barri already exists in the aggregated data dictionary
            if codi_barri in year_data['data']:
                # If it exists, increment the count and add the age value to the existing total age
                year_data['data'][codi_barri]['count'] += 1
                year_data['data'][codi_barri]['total_age'] += age
            else:
                # If it doesn't exist, create a new entry with the age value and count as 1
                year_data['data'][codi_barri] = {'total_age': age, 'count': 1}
        
        # Append the aggregated data for the current year to the list
        aggregated_data.append(year_data)

# Create a list to store the final JSON data
final_data = []

# Iterate over the aggregated data and create the final JSON structure
for entry in aggregated_data:
    year = entry['year']
    data = entry['data']
    
    for codi_barri, values in data.items():
        # Create a dictionary for each Codi_Barri
        bari_data = {'_id': codi_barri, 'neigh_name': '', 'district_id': '', 'district_name': '', 'info': []}
        
        # Iterate over the rows in the CSV file for the first year (2016) to extract neighborhood and district details
        filename = f'2023_edificacions_edat_mitjana.csv'
        with codecs.open(filename, 'r', encoding='utf-8-sig') as file:
            csv_reader = csv.reader(file)
            header = next(csv_reader)  # Skip the header row
            
            for row in csv_reader:
                _, districte, nom_districte, current_codi_barri, nom_barri, _, _ = row
                
                # Check if the current row corresponds to the Codi_Barri we're processing
                if current_codi_barri == codi_barri:
                    bari_data['neigh_name'] = nom_barri
                    bari_data['district_id'] = districte
                    bari_data['district_name'] = nom_districte
                    break
        
        # Iterate over the aggregated data for each year and add the 'year' and 'mean_age' values to the 'info' list
        for year_entry in aggregated_data:
            age_data = year_entry['data'].get(codi_barri)
            mean_age = age_data['total_age'] / age_data['count'] if age_data else 0
            bari_info = {'year': year_entry['year'], 'mean_age': mean_age}
            bari_data['info'].append(bari_info)
        
        # Append the final data for the current Codi_Barri to the list
        final_data.append(bari_data)

# Write the final data to a JSON file
with open('building_age.json', 'w', encoding='utf-8') as json_file:
    json.dump(final_data, json_file, ensure_ascii=False)
