## Funções

### Parte 2


In [33]:
def read_csv_dict(name_file: str, sep=','):
    """
    Lê um arquivo CSV e converte seus dados em um dicionário de listas.

    A primeira linha do arquivo é tratada como cabeçalho (chaves do dicionário), 
    e as linhas subsequentes são armazenadas como listas de valores para 
    cada respectiva coluna.

    Args:
        name_file (str): O caminho ou nome do arquivo CSV a ser lido.
        sep (str, optional): O caractere separador de colunas. Defaults to ','.

    Returns:
        dict: Um dicionário onde as chaves são os nomes das colunas e os 
            valores são listas contendo os dados de cada linha.

    Example:
        >>> dados = read_csv_dict('vendas.csv', sep=';')
        >>> print(dados)
        {'Produto': ['Teclado', 'Mouse'], 'Preço': ['150', '80']}
    """
    # le o csv e salva em memória
    file = open(name_file)

    lines = []

    for line in file:
        lines.append(line)
    
    # cria os campos 
    fields = lines[0].split(sep)
    lines = lines[1:]
    fields[-1] = fields[-1][:-1]

    # cria o dicionario de para recerber os dados 
    dados = {}

    for campo in fields:
        dados[campo] = []

    # cria o dicionario com todos os dados 
    for line in lines:
        infos = line.split(sep)

        if len(infos) != len(fields):
            pass
        else:
            for i in range(len(fields)):
                if i == len(fields) -1:
                    infos[i] = infos[i][:-1]
                dados[fields[i]].append(infos[i])
    return dados

In [27]:
name_file = '../files/Dataset salary 2024.csv'

dados_processados = read_csv_dict(name_file)

In [28]:
dados_processados['salary_in_usd']

['202730',
 '92118',
 '130500',
 '96000',
 '190000',
 '160000',
 '400000',
 '65000',
 '101520',
 '45864',
 '172469',
 '114945',
 '200000',
 '150000',
 '156450',
 '119200',
 '170000',
 '130000',
 '222200',
 '136000',
 '128000',
 '81500',
 '93838',
 '69535',
 '75000',
 '65000',
 '150000',
 '125000',
 '260570',
 '167030',
 '120000',
 '100000',
 '190000',
 '155000',
 '174000',
 '116000',
 '100000',
 '80000',
 '145000',
 '95000',
 '120000',
 '70000',
 '130000',
 '110000',
 '240000',
 '180000',
 '202800',
 '115000',
 '100000',
 '56250',
 '69230',
 '57692',
 '112300',
 '75100',
 '178200',
 '87000',
 '138000',
 '72000',
 '252000',
 '168000',
 '140000',
 '130000',
 '190000',
 '100000',
 '150650',
 '106700',
 '192300',
 '107900',
 '115000',
 '95500',
 '195000',
 '160000',
 '257600',
 '146000',
 '90000',
 '70000',
 '122222',
 '77777',
 '158200',
 '74100',
 '231400',
 '170955',
 '137000',
 '60300',
 '101098',
 '59469',
 '166363',
 '103977',
 '166000',
 '104000',
 '170400',
 '123800',
 '94763',
 '6

In [29]:
name_file = '../files/student_performance_data.csv'

dados_processados = read_csv_dict(name_file)

dados_processados

{'StudentID': ['1001',
  '1002',
  '1003',
  '1004',
  '1005',
  '1006',
  '1007',
  '1008',
  '1009',
  '1010',
  '1011',
  '1012',
  '1013',
  '1014',
  '1015',
  '1016',
  '1017',
  '1018',
  '1019',
  '1020',
  '1021',
  '1022',
  '1023',
  '1024',
  '1025',
  '1026',
  '1027',
  '1028',
  '1029',
  '1030',
  '1031',
  '1032',
  '1033',
  '1034',
  '1035',
  '1036',
  '1037',
  '1038',
  '1039',
  '1040',
  '1041',
  '1042',
  '1043',
  '1044',
  '1045',
  '1046',
  '1047',
  '1048',
  '1049',
  '1050',
  '1051',
  '1052',
  '1053',
  '1054',
  '1055',
  '1056',
  '1057',
  '1058',
  '1059',
  '1060',
  '1061',
  '1062',
  '1063',
  '1064',
  '1065',
  '1066',
  '1067',
  '1068',
  '1069',
  '1070',
  '1071',
  '1072',
  '1073',
  '1074',
  '1075',
  '1076',
  '1077',
  '1078',
  '1079',
  '1080',
  '1081',
  '1082',
  '1083',
  '1084',
  '1085',
  '1086',
  '1087',
  '1088',
  '1089',
  '1090',
  '1091',
  '1092',
  '1093',
  '1094',
  '1095',
  '1096',
  '1097',
  '1098',
  '1099

In [30]:
dados_processados['Age']

['17',
 '18',
 '15',
 '17',
 '17',
 '18',
 '15',
 '15',
 '17',
 '16',
 '17',
 '17',
 '17',
 '17',
 '18',
 '15',
 '18',
 '18',
 '18',
 '17',
 '16',
 '15',
 '16',
 '18',
 '18',
 '16',
 '16',
 '16',
 '18',
 '18',
 '15',
 '15',
 '18',
 '16',
 '16',
 '15',
 '18',
 '15',
 '15',
 '17',
 '17',
 '17',
 '16',
 '18',
 '18',
 '18',
 '18',
 '17',
 '16',
 '16',
 '17',
 '16',
 '17',
 '18',
 '17',
 '18',
 '18',
 '15',
 '17',
 '15',
 '17',
 '17',
 '15',
 '15',
 '17',
 '16',
 '18',
 '15',
 '18',
 '16',
 '16',
 '16',
 '15',
 '16',
 '15',
 '16',
 '18',
 '18',
 '17',
 '18',
 '17',
 '18',
 '15',
 '18',
 '17',
 '17',
 '16',
 '15',
 '18',
 '16',
 '18',
 '18',
 '16',
 '16',
 '16',
 '16',
 '16',
 '18',
 '16',
 '15',
 '17',
 '16',
 '16',
 '18',
 '16',
 '16',
 '16',
 '18',
 '16',
 '17',
 '18',
 '17',
 '18',
 '16',
 '17',
 '18',
 '15',
 '16',
 '18',
 '15',
 '18',
 '15',
 '16',
 '17',
 '15',
 '18',
 '16',
 '15',
 '18',
 '18',
 '18',
 '15',
 '15',
 '15',
 '17',
 '15',
 '15',
 '15',
 '17',
 '15',
 '18',
 '15',
 '18',

In [31]:
file_name = '../files/populacao_mundial.csv'

populacao = read_csv_dict(file_name, ';')

In [32]:
populacao['Population']

['1428627663',
 '1425671352',
 '339996563',
 '277534122',
 '240485658',
 '223804632',
 '216422446',
 '172954319',
 '144444359',
 '128455567',
 '126527060',
 '123294513',
 '117337368',
 '112716598',
 '102262808',
 '98858950',
 '89172767',
 '85816199',
 '83294633',
 '71801279',
 '67736802',
 '67438106',
 '64756584',
 '60414495',
 '58870762',
 '55100586',
 '54577997',
 '52085168',
 '51784059',
 '48582334',
 '48109006',
 '47519628',
 '45773884',
 '45606480',
 '45504560',
 '42239854',
 '41026067',
 '38781291',
 '37840044',
 '36947025',
 '36744634',
 '36684202',
 '35163944',
 '34449825',
 '34352719',
 '34308525',
 '34121985',
 '33897354',
 '30896590',
 '30325732',
 '28873034',
 '28838499',
 '28647293',
 '27202843',
 '26439111',
 '26160821',
 '23923276',
 '23293698',
 '23251485',
 '23227014',
 '21893579',
 '20931751',
 '20569737',
 '19892812',
 '19629590',
 '19606633',
 '18278568',
 '18190484',
 '18143378',
 '18092026',
 '17763163',
 '17618299',
 '16944826',
 '16665409',
 '14190612',
 '140946