In [1]:
import re
import typing as t
import numpy as np

In [3]:
class TableCsv:
    def __init__(self,
                 columns: t.List[str],
                 values: t.List[t.List[t.Any]]):
        self.columns = columns
        self.values = values


def remove_commas_in_quotes(line):
    return re.sub(r'"[^"]*"', lambda m: m.group(0).replace(',', ''), line)

class Table:
    def __init__(self,
                 table_name: str,
                 csv_location: str,
                 attributes: t.Dict[str, t.Any],
                 ):
        self.table_name = table_name
        self.csv_location = csv_location
        self.attributes = attributes

    @staticmethod
    def parse_table(csv_path: str) -> 'Table':
        with open(csv_path, 'r', encoding='utf-8', errors='replace') as f:
            raw_contents = f.readlines()
        table_name = raw_contents[0].replace(',', '').strip()
        attributes: t.Dict[str, t.Any] = {}
        current_block = []
        first_table = True
        for line in raw_contents[1:]:
            cleaned_line = remove_commas_in_quotes(line)
            cleaned_line = cleaned_line.replace('"', '').strip()
            if cleaned_line.replace(',', '').strip() == '':
                # start of the next attribute
                if len(current_block) <= 0:
                    continue
                sub_table_name = [v.strip() for v in current_block[0].split(',') if v.strip() != '']
                if (first_table or len(sub_table_name) == 1) and len(current_block) >= 3:
                    # treat it as a csv table.
                    if first_table:
                        sub_table_name = table_name
                    else:
                        sub_table_name = sub_table_name[0]
                    columns = [col.strip() for col in current_block[1].split(',')]
                    values = []
                    for l in current_block[2:]:
                        value = [val.strip() for val in l.split(',')]
                        values.append(value)
                    attributes[sub_table_name] = TableCsv(columns, values)
                else:
                    # treat it an unorganized texts
                    for l in current_block:
                        attr_name = l.split(',')[0]
                        attr_content = '|'.join([v.strip() for v in l.split(',')[1:] if v.strip() != ''])
                        attributes[attr_name] = attr_content
                current_block = []
                first_table = False
            else:
                current_block.append(cleaned_line)
        return Table(table_name, csv_path, attributes)

In [4]:
table1 = Table.parse_table("raw/CSVs/2024_CSN_State_Identity_Theft_Reports.csv")

In [8]:
def solve_q5() -> float:
    sub_table1 = table1.attributes['State: Identity Theft Reports']
    total_alamaba = 0
    for row in sub_table1.values:
        if row[0] == 'Alabama':
            total_alamaba += int(row[2])
    return total_alamaba
solve_q5()

15387

In [7]:
table1.attributes['State: Identity Theft Reports'].values

[['Alabama', 'Credit Card', '5245', '39%'],
 ['Alabama', 'Employment or Tax-Related', '746', '5%'],
 ['Alabama', 'Government Documents or Benefits', '696', '5%'],
 ['Alabama', 'Loan or Lease', '2846', '21%'],
 ['Alabama', 'Other Identity Theft', '4842', '36%'],
 ['Alabama', 'Phone or Utilities', '1012', '7%'],
 ['Alaska', 'Bank Account', '131', '17%'],
 ['Alaska', 'Credit Card', '252', '32%'],
 ['Alaska', 'Employment or Tax-Related', '69', '9%'],
 ['Alaska', 'Government Documents or Benefits', '79', '10%'],
 ['Alaska', 'Loan or Lease', '116', '15%'],
 ['Alaska', 'Other Identity Theft', '219', '28%'],
 ['Alaska', 'Phone or Utilities', '72', '9%'],
 ['Arizona', 'Bank Account', '1987', '10%'],
 ['Arizona', 'Credit Card', '8097', '39%'],
 ['Arizona', 'Employment or Tax-Related', '2502', '12%'],
 ['Arizona', 'Government Documents or Benefits', '1134', '5%'],
 ['Arizona', 'Loan or Lease', '3541', '17%'],
 ['Arizona', 'Other Identity Theft', '5617', '27%'],
 ['Arizona', 'Phone or Utilities', 

Q1:
