In [142]:
def get_database():
    class CustomCSVParser:
        def __init__(self, delimiter=','):
            self.delimiter = delimiter
            self.db = self.MyCustomDB()

        def load(self):
            self._create_tables()
            self._load_all_csvs()
            return self.db

        def _create_tables(self):
            self.db.create_table("zip_code", primary_key="Zip_Code_ID", indexes=["Zip_Code"])
            self.db.create_table("demographics_info", primary_key="Demographics_Info_ID", indexes=["F_Zip_Code_ID"])
            self.db.create_table("inspection_info", primary_key="Inspection_Info_ID", indexes=["F_Restaurant_Info_ID"])
            self.db.create_table("restaurant_info", primary_key="Restaurant_Info_ID", indexes=["F_Zip_Code_ID", "Restaurant_Name", "Categories"])

        def _load_all_csvs(self):
            self.load_csv("../data/zip_code.csv", table_name="zip_code")
            self.load_csv("../data/demographics_info.csv", table_name="demographics_info")
            self.load_csv("../data/inspection_info.csv", table_name="inspection_info")
            self.load_csv("../data/restaurant_info.csv", table_name="restaurant_info")

        def load_csv(self, filepath, table_name):
            import csv
            def infer(value):
                value = value.strip()
                if not value:
                    return None
                for cast in (int, float):
                    try:
                        return cast(value)
                    except ValueError:
                        continue
                return value

            with open(filepath, 'r', encoding='utf-8', newline='') as f:
                reader = csv.reader(f, delimiter=self.delimiter)
                headers = [h.strip() for h in next(reader)]

                for row_values in reader:
                    values = [infer(val) for val in row_values]
                    row = dict(zip(headers, values))
                    self.db.insert(table_name, row)

        class MyCustomDB:
            def __init__(self):
                self.database = {}

            def create_table(self, name, primary_key="id", indexes=None, foreign_keys=None):
                self.database[name] = {
                    "rows": {},
                    "next_id": 1,
                    "primary_key": primary_key,
                    "indexes": {col: {} for col in (indexes or [])},
                    "foreign_keys": foreign_keys or {}
                }

            def insert(self, table_name, row):
                table = self.database[table_name]
                pk = table["primary_key"]

                if pk not in row:
                    row[pk] = table["next_id"]
                    table["next_id"] += 1

                for fk_col, (ref_table, ref_col) in table["foreign_keys"].items():
                    if row[fk_col] not in self.database[ref_table]["indexes"].get(ref_col, {}):
                        raise ValueError(f"Foreign key constraint failed: {fk_col}={row[fk_col]} not found in {ref_table}.{ref_col}")

                key = row[pk]
                table["rows"][key] = row

                for index_col in table["indexes"]:
                    val = row.get(index_col)
                    if val is not None:
                        if val not in table["indexes"][index_col]:
                            table["indexes"][index_col][val] = []
                        table["indexes"][index_col][val].append(key)

            def get_all(self, table_name):
                return list(self.database[table_name]["rows"].values())

            def print_table(self, table_name):
                for row in self.get_all(table_name):
                    print(row)

            def print_result_table(self, rows):
                if not rows:
                    print("No data found.")
                    return

                def clean(col): return col.split('.', 1)[-1]

                raw_cols = list(rows[0].keys())
                disp_cols = [clean(c) for c in raw_cols]

                col_widths = {
                    disp_col: max(len(disp_col), max(len(str(row.get(raw_col, ""))) for row in rows))
                    for raw_col, disp_col in zip(raw_cols, disp_cols)
                }

                header = " | ".join(f"{disp_col:<{col_widths[disp_col]}}" for disp_col in disp_cols)
                separator = "-+-".join("-" * col_widths[disp_col] for disp_col in disp_cols)
                print(header)
                print(separator)

                for row in rows:
                    line = " | ".join(
                        f"{str(row.get(raw_col, '')):<{col_widths[clean(raw_col)]}}"
                        for raw_col in raw_cols
                    )
                    print(line)

            def inner_join(self, left_table, right_table, left_key, right_key):
                left_rows = self.get_all(left_table)
                right_rows = self.get_all(right_table)
                joined = []

                for l in left_rows:
                    for r in right_rows:
                        if l.get(left_key) == r.get(right_key):
                            joined.append({f"{left_table}.{k}": v for k, v in l.items()} | {f"{right_table}.{k}": v for k, v in r.items()})
                return joined

            def left_join(self, left_table, right_table, left_key, right_key):
                left_rows = self.get_all(left_table)
                right_rows = self.get_all(right_table)
                joined = []

                for l in left_rows:
                    match_found = False
                    for r in right_rows:
                        if l.get(left_key) == r.get(right_key):
                            joined.append({f"{left_table}.{k}": v for k, v in l.items()} | {f"{right_table}.{k}": v for k, v in r.items()})
                            match_found = True
                    if not match_found:
                        joined.append({f"{left_table}.{k}": v for k, v in l.items()} | {f"{right_table}.{k}": None for k in right_rows[0].keys()})
                return joined

            def select_where(self, rows, where):
                return [row for row in rows if where(row)]

            def group_by(self, rows, group_key, agg_col, agg_func, agg_func_text):
                from collections import defaultdict
                grouped = defaultdict(list)
                for row in rows:
                    group_val = row.get(group_key)
                    if group_val is not None:
                        grouped[group_val].append(row.get(agg_col))

                if group_key:
                    table, col = agg_col.split(".", 1)
                    return [
                        {
                            group_key: k,
                            f"{agg_func_text}_{table}.{col}": agg_func([v for v in vals if isinstance(v, (int, float)) and v is not None])
                        }
                        for k, vals in grouped.items()
                    ]
                else:
                    table, col = agg_col.split(".", 1)
                    return [
                        {
                            f"{agg_func_text}_{table}.{col}": agg_func([v for v in grouped[None] if isinstance(v, (int, float)) and v is not None])
                        }
                    ]

            def project_columns(self, rows, select):
                return [{col: row.get(col, None) for col in select} for row in rows]

            def order_by_rows(self, rows, order_by, descending=False):
                return sorted(rows, key=lambda r: r.get(order_by), reverse=descending)

            def select_query(self,
                             from_table,
                             joins=None,
                             where=None,
                             group_by=None,
                             agg_col=None,
                             agg_fn=None,
                             columns=None,
                             order_by=None,
                             descending=False):
                rows = list(self.database[from_table]["rows"].values())
                base = from_table

                if joins:
                    for join_table, on_keys, join_type in joins:
                        left_key, right_key = on_keys
                        if join_type == "inner":
                            rows = self.inner_join(base, join_table, left_key, right_key)
                        elif join_type == "left":
                            rows = self.left_join(base, join_table, left_key, right_key)
                        else:
                            raise ValueError("Only 'inner' and 'left' joins are supported.")
                        base = "tmpTable"
                        self.create_table(base, primary_key="id")
                        for i, row in enumerate(rows):
                            row_with_id = {"id": i + 1}
                            row_with_id.update(row)
                            self.insert(base, row_with_id)

                if where:
                    rows = self.select_where(rows, where)

                if group_by and agg_col and agg_fn:
                    func_map = {
                        "avg": lambda vals: round(sum(vals) / len(vals), 2) if vals else None,
                        "sum": lambda vals: sum(vals) if vals else 0,
                        "count": lambda vals: len(vals),
                        "max": lambda vals: max(vals) if vals else None,
                        "min": lambda vals: min(vals) if vals else None
                    }
                    if agg_fn not in func_map:
                        raise ValueError(f"Invalid agg_fn: {agg_fn}")
                    rows = self.group_by(rows, group_by, agg_col, func_map[agg_fn], agg_fn)

                if columns:
                    rows = self.project_columns(rows, columns)

                self.database.pop("tmpTable", None)
                return rows

    return CustomCSVParser().load()


In [143]:
#db.print_table("restaurant_info")
result = db.select_query(
    from_table="restaurant_info",
    joins=[("inspection_info", ("Restaurant_Info_ID", "F_Restaurant_Info_ID"), "inner")],
    group_by="restaurant_info.Categories",
    agg_col="inspection_info.Score",
    agg_fn="sum",
    columns=["restaurant_info.Categories","sum_inspection_info.Score"]
)
##db.print_table("restaurant_info")
db.print_result_table(result)
for row in result:
    print(row)

6016
<function get_database.<locals>.CustomCSVParser.MyCustomDB.select_query.<locals>.<lambda> at 0x117bda660>
Categories                                                    | Score  
--------------------------------------------------------------+--------
Mexican                                                       | 37644.0
Mexican, Breakfast & Brunch, Salvadoran                       | 187.0  
Fast Food                                                     | 845.0  
Food Trucks, American, Comfort Food                           | 92.0   
Burgers                                                       | 5076.0 
Sushi Bars                                                    | 4016.0 
Seafood, Cajun/Creole                                         | 381.0  
Soul Food, Southern                                           | 93.0   
Southern, Breakfast & Brunch, Cocktail Bars                   | 96.0   
Mexican, Seafood                                              | 2412.0 
Sports Bars, Mexican, Coc

In [150]:
result = db.select_query(
    from_table="demographics_info",
    agg_col="demographics_info.Total_Population",
    agg_fn="sum",
    columns=["sum_demographics_info.Total_Population"]
)

for row in result:
    print(row)
db.print_result_table(result)

182
{'sum_demographics_info.Total_Population': None}
{'sum_demographics_info.Total_Population': None}
{'sum_demographics_info.Total_Population': None}
{'sum_demographics_info.Total_Population': None}
{'sum_demographics_info.Total_Population': None}
{'sum_demographics_info.Total_Population': None}
{'sum_demographics_info.Total_Population': None}
{'sum_demographics_info.Total_Population': None}
{'sum_demographics_info.Total_Population': None}
{'sum_demographics_info.Total_Population': None}
{'sum_demographics_info.Total_Population': None}
{'sum_demographics_info.Total_Population': None}
{'sum_demographics_info.Total_Population': None}
{'sum_demographics_info.Total_Population': None}
{'sum_demographics_info.Total_Population': None}
{'sum_demographics_info.Total_Population': None}
{'sum_demographics_info.Total_Population': None}
{'sum_demographics_info.Total_Population': None}
{'sum_demographics_info.Total_Population': None}
{'sum_demographics_info.Total_Population': None}
{'sum_demographi

In [111]:
result = db.select_query(
    from_table="demographics_info",
    group_by="demographics_info.F_Zip_Code_ID",
    agg_col="demographics_info.Population",
    agg_fn="sum",
    columns=["sum_demographics_info.Population"]
)# SUM
for row in result:
    print(row)
db.print_result_table(result)

result = db.select_query(
    from_table="demographics_info",
    group_by="demographics_info.F_Zip_Code_ID",
    agg_col="demographics_info.Population",
    agg_fn="sum",
    columns=["demographics_info.F_Zip_Code_ID", "sum_demographics_info.Population"]
)

# AVG
result = db.select_query(
    from_table="restaurant_info",
    joins=[("inspection_info", ("Restaurant_Info_ID", "F_Restaurant_Info_ID"), "inner")],
    group_by="restaurant_info.Restaurant_Name",
    agg_col="inspection_info.Score",
    agg_fn="avg",
    columns=["restaurant_info.Restaurant_Name", "avg_inspection_info.Score"]
)

# COUNT
result = db.select_query(
    from_table="restaurant_info",
    group_by="restaurant_info.Categories",
    agg_col="restaurant_info.Categories",
    agg_fn="count",
    columns=["restaurant_info.Categories", "count_restaurant_info.Categories"]
)

# MIN / MAX
result = db.select_query(
    from_table="demographics_info",
    group_by="demographics_info.F_Zip_Code_ID",
    agg_col="demographics_info.Population",
    agg_fn="min",  # or min
    columns=["demographics_info.F_Zip_Code_ID", "min_demographics_info.Population"]
)



182
<function get_database.<locals>.CustomCSVParser.MyCustomDB.select_query.<locals>.<lambda> at 0x1118b71a0>
Population
----------
0         
0         
0         
0         
0         
0         
0         
0         
0         
0         
0         
0         
0         
0         
0         
0         
0         
0         
0         
0         
0         
0         
0         
0         
0         
0         
0         
0         
0         
0         
0         
0         
0         
0         
0         
0         
0         
0         
0         
0         
0         
0         
0         
0         
0         
0         
0         
0         
0         
0         
0         
0         
0         
0         
0         
0         
0         
0         
0         
0         
0         
0         
0         
0         
0         
0         
0         
0         
0         
0         
0         
0         
0         
0         
0         
0         
0         
0         
0         

In [91]:
result = db.select_query(
    from_table="restaurant_info",
    where=lambda r: r["Categories"] == "Fast Food",
    columns=["Restaurant_Name", "Categories", "Review_Count"]
)
db.print_result_table(result)
for row in result:
    print(row)

Restaurant_Name                | Categories | Review_Count
-------------------------------+------------+-------------
Bangin Buns                    | Fast Food  | 110         
Pioneer Chicken                | Fast Food  | 564         
Golden Ox                      | Fast Food  | 62          
Amanecer Ylobasqence Y Familia | Fast Food  | 13          
Louisiana Fried Chicken        | Fast Food  | 12          
Cafe El Colibri                | Fast Food  | 1           
SKECHERS Food Spot             | Fast Food  | 273         
Basil Thai Kitchen             | Fast Food  | 18          
Fry Shack                      | Fast Food  | 8           
{'Restaurant_Name': 'Bangin Buns', 'Categories': 'Fast Food', 'Review_Count': 110}
{'Restaurant_Name': 'Pioneer Chicken', 'Categories': 'Fast Food', 'Review_Count': 564}
{'Restaurant_Name': 'Golden Ox', 'Categories': 'Fast Food', 'Review_Count': 62}
{'Restaurant_Name': 'Amanecer Ylobasqence Y Familia', 'Categories': 'Fast Food', 'Review_Count': 13}


In [92]:
result = db.select_query(
    from_table="restaurant_info",
    joins=[("inspection_info", ("Restaurant_Info_ID", "F_Restaurant_Info_ID"), "inner")],
    where=lambda r: r["restaurant_info.Categories"] == "Fast Food",
    columns=["restaurant_info.Restaurant_Name","restaurant_info.Created_At", "inspection_info.Score","restaurant_info.Review_Count", "inspection_info.Created_At"]
)
db.print_result_table(result)
for row in result:
    print(row)
    

Restaurant_Name                | Created_At          | Score | Review_Count | Created_At         
-------------------------------+---------------------+-------+--------------+--------------------
Bangin Buns                    | 2025-04-13 14:16:56 | 93.0  | 110          | 2025-04-14 00:50:50
Pioneer Chicken                | 2025-04-13 14:16:56 | 95.0  | 564          | 2025-04-14 02:11:44
Golden Ox                      | 2025-04-13 14:16:56 | 94.0  | 62           | 2025-04-14 02:54:45
Amanecer Ylobasqence Y Familia | 2025-04-13 14:31:06 | 92.0  | 13           | 2025-04-14 21:56:56
Louisiana Fried Chicken        | 2025-04-13 14:31:06 | 91.0  | 12           | 2025-04-14 18:21:39
Cafe El Colibri                | 2025-04-13 16:12:36 | 90.0  | 1            | 2025-04-14 20:28:41
SKECHERS Food Spot             | 2025-04-13 21:48:19 | 97.0  | 273          | 2025-04-16 04:40:56
Basil Thai Kitchen             | 2025-04-14 01:19:41 | 97.0  | 18           | 2025-04-16 07:10:34
Fry Shack           