In [2]:
# Importazione e dati vari
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from utils.constant import DIR_CLEANED_DATA, DIR_BAR, DIR_CARTESIAN
from time import time
from tqdm import tqdm
import os


# Definire le fasce d'età
age_columns = {
    'Age_0_9': "Fascia 0-9 anni", 
    'Age_9_19': "Fascia 9-19 anni", 
    'Age_19_29': "Fascia 19-29 anni", 
    'Age_29_39': "Fascia 29-39 anni", 
    'Age_39_49': "Fascia 39-49 anni",
    'Age_49_59': "Fascia 49-59 anni", 
    'Age_59_69': "Fascia 59-69 anni", 
    'Age_69_79': "Fascia 69-79 anni", 
    'Age_79_89': "Fascia 79-89 anni", 
    'Age_89_99': "Fascia 89-99 anni", 
    'Age_99_109': "Fascia 99+ anni"
}

# Dati utili per creare i grafici
file_path = DIR_CLEANED_DATA + 'merged_dataset1726321073.parquet'
data = pd.read_parquet(file_path)

### Creazione Istogrammi per fascie di età divisi tra maschi e femmine in ogni anno
Tutti i dati sono salvati in `charts/bar/population_hist_<numero>`

In [3]:
def plot_population_by_age_and_sex(data, year_to_analyze, territory_to_analyze, output_dir):
    data_filtered = data[(data['Year'] == year_to_analyze) & (data['Territory'] == territory_to_analyze)]

    male_data = data_filtered[data_filtered['Sex'] == 1]
    female_data = data_filtered[data_filtered['Sex'] == 2]


    # Assicurati che le colonne esistano nel dataframe
    age_keys = list(age_columns.keys())
    if not all(key in male_data.columns for key in age_keys) or not all(key in female_data.columns for key in age_keys):
        raise ValueError("Le colonne delle fasce di età non sono presenti nel dataframe.")

    male_population = male_data[age_keys].sum().values
    female_population = female_data[age_keys].sum().values

    bar_width = 0.4
    indices = np.arange(len(age_columns))

    plt.figure(figsize=(10, 8))

    plt.barh(indices - bar_width/2, male_population, bar_width, color='blue', label='Maschi')
    plt.barh(indices + bar_width/2, female_population, bar_width, color='violet', label='Femmine')

    plt.yticks(indices, list(age_columns.values()))

    plt.xlabel('Popolazione')
    plt.ylabel('Fascia di Età')
    plt.title(f'Popolazione per Fascia di Età e Sesso in {territory_to_analyze} nell\'anno {year_to_analyze}')

    plt.legend()
    plt.grid(axis='x', linestyle='--')

    # Salvare il grafico in formato JPEG
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    plt.savefig(os.path.join(output_dir, f'population_{year_to_analyze}.jpeg'))
    plt.close()


# Esempio di utilizzo della funzione
print("Starting create charts...")
output_dir = DIR_BAR + f'population_hist_{int(time())}'

for year in tqdm(range(1952, 2024)): 
    plot_population_by_age_and_sex(data, year, 'Italia', output_dir)

print("Done!")

Starting create charts...


100%|██████████| 72/72 [00:12<00:00,  5.95it/s]

Done!





## Creazione del grafico cartesiono per ogni fascia di eta

In [15]:
def plot_age_group_growth(data, age_start, age_end, territory, output_dir):
    years = np.arange(1952, 2025)
    
    format_col_age = f"Age_{age_start}_{age_end}"

    if format_col_age not in age_columns.keys():
        raise Exception()
    
    age_group = data[(data['Territory'] == territory)]
    
    male_population = []
    female_population = []
    
    for year in years:
        year_data = age_group[age_group['Year'] == year]
        male_population.append(year_data[year_data['Sex'] == 1][format_col_age].values[0])
        female_population.append(year_data[year_data['Sex'] == 2][format_col_age].values[0])
    

print("Starting drawing...")

print("Starting create charts...")
output_dir = DIR_CARTESIAN + f'age_popolation_{int(time())}'

ante = 0
for post in range(9, 99, 10):
    plot_age_group_growth(data, ante, post, "Italia", output_dir)
    ante = post

print("Done!")


Starting drawing...
Starting create charts...
7621822
7624632
7683768
7771660
7678724
7593568
7500956
7472352
7484078
7540808
7607882
7724870
7860490
8042718
8205202
8339678
8438030
8489902
8484406
8446080
8368512
8344348
8259424
8193988
8087504
7975588
7817574
7610454
7389314
7125206
6821992
6575484
6313433
6076667
5876823
5692170
5533383
5433934
5344526
5266584
4959505
4976680
4977755
4970444
4972818
4994317
4989410
4973326
4945108
4911794
4854974
4882075
4930259
4995620
5048134
5096021
5153182
5203624
5217309
5218895
5200709
5187836
5136122
5061557
4966141
4855364
4733459
4598942
8928808
8753814
8500008
8303964
4050099
8318644
8241720
8158850
8050066
8141712
8204922
8239440
8218438
8181028
8197876
8195154
8196468
8239786
8332690
8246132
8180728
8090922
8106900
8143278
8249500
8370132
8525074
8711786
8879038
9033520
9151230
9260994
9346630
9376970
9403202
9419622
9351085
9225088
9081232
8897835
8703050
8494720
8234260
7977152
7696644
7379637
7095492
6802547
6546758
6326104
6153959
60