In [4]:
# Import Library
import os

## Universal Data Processing
import numpy as np
import pandas as pd

## Regular Expression for Text Data
import re

## JSON Files Manipulation
import json
from pathlib import Path

In [5]:
# Path
path = Path("C:/03. Other/op-shops/Datasets/opshops_with_coords.json")

# Load dataset
all_data = []

print(f"Loading file: {path}")

with open(path, "r", encoding="utf-8") as json_file:
    data = json.load(json_file)
    all_data.append(data)

print("Files loaded:", len(all_data))
print("Records in file:", len(all_data[0]) if all_data else 0)

Loading file: C:\03. Other\op-shops\Datasets\opshops_with_coords.json
Files loaded: 1
Records in file: 2297


In [14]:
all_data[:5]

[[['Koo Wee Rup and District Lions Community Opportunity Shop',
   '290 Rossiter Road, Koo Wee Rup',
   -38.1993407,
   145.4912486,
   'Mon - Fri 10am-4pm',
   'Sat 9am-12pm',
   'Sun 10am-3pm selected Sundays'],
  ['Rose Lodge Opportunity Shop',
   '42 McBride Avenue, Wonthaggi, 3996',
   -38.6063217,
   145.5909252,
   'Mon - Fri 9am - 4pm',
   'Sat 9am - 12pm'],
  ['The Green Shed Underground',
   'Basement level, 34 E Row Canberra ACT 2601 Australia',
   None,
   None,
   'Tue - Fri 10:30am - 6pm',
   'Sat 10:30am - 4pm'],
  ['Hopeworks Seaford Op Shop',
   '2/36 Hartnett Dr 3198 Seaford, Victoria',
   -38.1132955,
   145.1415596,
   'Mon - Fri 9am-4pm',
   'Sat 9am-3pm',
   'Sun CLOSED'],
  ['The Collective',
   '270 Rossiter Road, Koo Wee Rup',
   -38.1996897,
   145.4903231,
   'Mon - Tue 10am-4pm',
   'Fri - Sun 10am-4pm'],
  ['All Nations Christian Church Op Shop',
   '170 Seacombe Rd, Seaview Downs',
   -35.0292948,
   138.5415819,
   'Wed - Thu 10-4pm'],
  ['Anglican Op Sho

In [16]:
# Define custom headers
columns = [
    "name",
    "address",
    "latitude",
    "longitude",
    "hours_1",
    "hours_2",
    "hours_3"
    ]

# Normalize rows: pad with None so all rows have same length
max_len = len(columns)

normalized = []

for row in data:
    row = list(row)
    if len(row) < max_len:
        row = row + [None] * (max_len - len(row))
    else:
        row = row[:max_len]  # trim if too long
    normalized.append(row)

# Create DataFrame
df_opshop = pd.DataFrame(normalized, columns=columns)

# Show only first 5 rows
df_opshop.head(5)

Unnamed: 0,name,address,latitude,longitude,hours_1,hours_2,hours_3
0,Koo Wee Rup and District Lions Community Oppor...,"290 Rossiter Road, Koo Wee Rup",-38.199341,145.491249,Mon - Fri 10am-4pm,Sat 9am-12pm,Sun 10am-3pm selected Sundays
1,Rose Lodge Opportunity Shop,"42 McBride Avenue, Wonthaggi, 3996",-38.606322,145.590925,Mon - Fri 9am - 4pm,Sat 9am - 12pm,
2,The Green Shed Underground,"Basement level, 34 E Row Canberra ACT 2601 Aus...",,,Tue - Fri 10:30am - 6pm,Sat 10:30am - 4pm,
3,Hopeworks Seaford Op Shop,"2/36 Hartnett Dr 3198 Seaford, Victoria",-38.113295,145.14156,Mon - Fri 9am-4pm,Sat 9am-3pm,Sun CLOSED
4,The Collective,"270 Rossiter Road, Koo Wee Rup",-38.19969,145.490323,Mon - Tue 10am-4pm,Fri - Sun 10am-4pm,
