In [9]:
import json
import pandas as pd

# Open the file and read it line by line
address_list = []
with open("statewide-addresses-state.geojson", "r", encoding="utf-8") as file:
    for line in file:
        try:
            feature = json.loads(line.strip())  # Load each line as a separate JSON object
            address_list.append({
                "street": feature["properties"].get("street", ""),
                "city": feature["properties"].get("city", ""),
                "postcode": feature["properties"].get("postcode", ""),
                "state": "VERMONT",
                "country": "USA"
            })
        except json.JSONDecodeError as e:
            print(f"Skipping invalid line: {e}")

# Convert to DataFrame
df = pd.DataFrame(address_list)

# Display the extracted data
df.head(10)


Unnamed: 0,street,city,postcode,state,country
0,EASTBURN RD,STRAFFORD,5072,VERMONT,USA
1,CATKIN DR,SOUTH BURLINGTON,5403,VERMONT,USA
2,POMFRET RD,POMFRET,5084,VERMONT,USA
3,PARKER RD,VERSHIRE,5079,VERMONT,USA
4,MOUNTAIN RD,MONTGOMERY,5471,VERMONT,USA
5,ASPEN CIR,SHELBURNE,5482,VERMONT,USA
6,MONUMENT HILL RD,HUBBARDTON,5735,VERMONT,USA
7,GEORGIA MIDDLE RD,GEORGIA,5478,VERMONT,USA
8,SLATE LEDGE RD,WATERFORD,5819,VERMONT,USA
9,ALPINE DR,MOUNT HOLLY,5758,VERMONT,USA


In [10]:
df.shape

(351096, 5)

In [16]:
# remove records with null or empty values in "postcode"
df = df[df["postcode"].notnull() & (df["postcode"] != "")]



In [17]:
df.shape

(0, 5)

In [11]:


df_unique = df.drop_duplicates()

df_unique.shape

(29355, 5)

In [12]:
df_unique.to_csv("vermont.csv", index=False)

In [13]:
import pandas as pd
import os

# Directory containing the CSV files
csv_dir = './final'

# List to hold DataFrames
dfs = []

# Iterate over all files in the directory
for filename in os.listdir(csv_dir):
    if filename.endswith('.csv'):
        file_path = os.path.join(csv_dir, filename)
        df = pd.read_csv(file_path)
        dfs.append(df)

# Concatenate all DataFrames
merged_df = pd.concat(dfs, ignore_index=True)

# Save the merged DataFrame to a new CSV file
merged_df.to_csv('merged_output.csv', index=False)

# Display the merged DataFrame
merged_df.head(10)

Unnamed: 0,street,city,postcode,state,country
0,Fairview Avenue,Orange,7050.0,NEW JERSEY,USA
1,Smull Avenue,Caldwell,7006.0,NEW JERSEY,USA
2,Hillside Avenue,Orange,7050.0,NEW JERSEY,USA
3,Duane Street,Orange,7050.0,NEW JERSEY,USA
4,North Essex Avenue,Orange,7050.0,NEW JERSEY,USA
5,Mechanic Street,Orange,7050.0,NEW JERSEY,USA
6,Wood Street,Tuckerton,8087.0,NEW JERSEY,USA
7,Fairway Drive,Tuckerton,8087.0,NEW JERSEY,USA
8,East Main Street,Tuckerton,8087.0,NEW JERSEY,USA
9,Admiral Drive,Tuckerton,8087.0,NEW JERSEY,USA


In [14]:
merged_df.shape

(633430, 5)

In [15]:
merged_df = merged_df[merged_df["postcode"].notnull() & (merged_df["postcode"] != "")]
merged_df.shape

(633024, 5)