In [None]:
# SAS02 - Total Population (1981 Census)
import pandas as pd

base = "https://raw.githubusercontent.com/jourdee-lab/manchester-spatial-analysis-data-lab/1e2a6499d4f7997c50b0a150b60d7ab3e8857fde/data/raw/census_1981/sas02_total_population"
files = [f"{base}/81sas02ews_{i}.csv" for i in range(5)]

dfs = [pd.read_csv(url, dtype={"zoneid": "string"}) for url in files]
sas02_1981 = pd.concat(dfs, ignore_index=True)

# Clean column names and zoneid
sas02_1981.columns = sas02_1981.columns.str.strip()
sas02_1981["zoneid"] = sas02_1981["zoneid"].str.strip()

value_cols = [c for c in sas02_1981.columns if c != "zoneid"]
sas02_1981[value_cols] = sas02_1981[value_cols].apply(pd.to_numeric, errors="coerce")

# Specify Manchester district
MANCHESTER_DISTRICT = "03BN"
manchester_zones = sas02_1981.loc[
    sas02_1981["zoneid"].str.startswith(MANCHESTER_DISTRICT),
    "zoneid"
].unique()

print(f"Found {len(manchester_zones)} Manchester zones")
print(f"Sample zones: {sorted(manchester_zones)[:5]}")

# Aggregate all Manchester zones
manchester_agg = pd.DataFrame(
    [sas02_1981.loc[sas02_1981["zoneid"].str.startswith(MANCHESTER_DISTRICT), value_cols].sum(numeric_only=True)],
    columns=value_cols
)

manchester_agg.insert(0, "zoneid", MANCHESTER_DISTRICT)

print("\n=== Manchester City (1981) - SAS02 All Residents ===")
print(manchester_agg)

manchester_agg.to_csv("manchester_sas02_1981.csv", index=False)
print("\nFull aggregate saved to: manchester_sas02_1981.csv")

Found 1053 Manchester zones
Sample zones: ['03BN', '03BNAA', '03BNAA01', '03BNAA02', '03BNAA03']

=== Manchester City (1981) - SAS02 All Residents ===
  zoneid  81sas020050  81sas020051  81sas020052  81sas020053  81sas020054  \
0   03BN      1312935       638552       366571       271981       674383   

   81sas020055  81sas020056  81sas020057  81sas020058  ...  81sas020200  \
0       399825       274558        78115        40378  ...           42   

   81sas020201  81sas020202  81sas020203  81sas020204  81sas020205  \
0           88          224          590         3443        25475   

   81sas020206  81sas020207  81sas020208  81sas020209  
0        21184        12726         5736         2382  

[1 rows x 161 columns]

Full aggregate saved to: manchester_sas02_1981.csv


In [None]:
# SAS04 Country of Birth (1981 Census)
import pandas as pd

base = "https://raw.githubusercontent.com/jourdee-lab/FYP_Data_Pipeline/64e81707fa5cec98445e6b48c671f1ad71a3f153/81sas04"
files = [f"{base}/81sas04ews_{i}.csv" for i in range(5)]

dfs = [pd.read_csv(url, dtype={"zoneid": "string"}) for url in files]
sas04_1981 = pd.concat(dfs, ignore_index=True)

# Clean column names and zoneid
sas04_1981.columns = sas04_1981.columns.str.strip()
sas04_1981["zoneid"] = sas04_1981["zoneid"].str.strip()
value_cols = [c for c in sas04_1981.columns if c != "zoneid"]
sas04_1981[value_cols] = sas04_1981[value_cols].apply(pd.to_numeric, errors="coerce")

# Specify Manchester district
MANCHESTER_DISTRICT = "03BN"
manchester_zones = sas04_1981.loc[
    sas04_1981["zoneid"].str.startswith(MANCHESTER_DISTRICT),
    "zoneid"
].unique()

print(f"Found {len(manchester_zones)} Manchester zones")
print(f"Sample zones: {sorted(manchester_zones)[:5]}")

# Aggregate all Manchester zones
manchester_agg = pd.DataFrame(
    [sas04_1981.loc[sas04_1981["zoneid"].str.startswith(MANCHESTER_DISTRICT), value_cols].sum(numeric_only=True)],
    columns=value_cols
)

manchester_agg.insert(0, "zoneid", MANCHESTER_DISTRICT)

print("\n=== Manchester City (1981) - SAS04 Country of Birth ===")
print(manchester_agg)

In [None]:
# SAS07 Employment Data for Manchester (1981 Census)
import pandas as pd

base = "https://raw.githubusercontent.com/jourdee-lab/manchester-spatial-analysis-data-lab/1e2a6499d4f7997c50b0a150b60d7ab3e8857fde/data/raw/census_1981/sas07_employment"
files = [f"{base}/81sas07ews_{i}.csv" for i in range(5)]

dfs = [pd.read_csv(url, dtype={"zoneid": "string"}) for url in files]
sas07_1981 = pd.concat(dfs, ignore_index=True)
sas07_1981.columns = sas07_1981.columns.str.strip()
sas07_1981["zoneid"] = sas07_1981["zoneid"].str.strip()

value_cols = [c for c in sas07_1981.columns if c != "zoneid"]
sas07_1981[value_cols] = sas07_1981[value_cols].apply(pd.to_numeric, errors="coerce")

# Specify Manchester district
MANCHESTER_DISTRICT = "03BN"
manchester_zones = sas07_1981.loc[
    sas07_1981["zoneid"].str.startswith(MANCHESTER_DISTRICT),
    "zoneid"
].unique()

print(f"Found {len(manchester_zones)} Manchester zones")
print(f"Sample zones: {sorted(manchester_zones)[:5]}")

# Aggregate all Manchester zones
manchester_agg = pd.DataFrame(
    [sas07_1981.loc[sas07_1981["zoneid"].str.startswith(MANCHESTER_DISTRICT), value_cols].sum(numeric_only=True)],
    columns=value_cols
)

manchester_agg.insert(0, "zoneid", MANCHESTER_DISTRICT)

print("\n=== Manchester City (1981) - SAS07 Employment ===")
print(manchester_agg)
first_metric = next((c for c in value_cols if c.startswith("81sas07")), None)
if first_metric:
    print(f"\nExample check ({first_metric}): {int(manchester_agg[first_metric].iloc[0]):,}")

manchester_agg.to_csv("sas07_1981_combined.csv", index=False)
print("\nFull aggregate saved to: sas07_1981_combined.csv")

In [None]:
# SAS10 Housing Data for Manchester (1981 Census)
import pandas as pd

base = "https://raw.githubusercontent.com/jourdee-lab/manchester-spatial-analysis-data-lab/63a3d8ec45a814faeb5c821b7e184aee3cd49247/data/raw/census_1981/sas10_housing"
files = [f"{base}/81sas10ews_{i}.csv" for i in range(5)]
dfs = [pd.read_csv(url, dtype={"zoneid": "string"}) for url in files]
sas10_1981 = pd.concat(dfs, ignore_index=True)

# Cleaning 
sas10_1981.columns = sas10_1981.columns.str.strip()
sas10_1981["zoneid"] = sas10_1981["zoneid"].str.strip()
value_cols = [c for c in sas10_1981.columns if c != "zoneid"]

for c in value_cols:
    sas10_1981[c] = pd.to_numeric(sas10_1981[c], errors="coerce").fillna(0)

mask = sas10_1981["zoneid"].str.startswith("03BN", na=False)
manchester_sum = sas10_1981.loc[mask, value_cols].sum()
manchester_sas10 = pd.DataFrame([manchester_sum])
manchester_sas10.insert(0, "zoneid", "03BN")

# Household with residence checks
DENOM_A = "81sas100929"
required = [DENOM_A]

missing = [c for c in required if c not in manchester_sas10.columns]
if missing:
    raise KeyError(f"Missing expected SAS10 columns: {missing}")
if manchester_sas10[DENOM_A].iloc[0] <= 0:
    raise ValueError(f"{DENOM_A} is <= 0; check filtering/inputs.")

# Owner-occupied percentages func
for col in ["81sas100967", "81sas100968"]:
    if col in manchester_sas10.columns:
        total_hh = manchester_sas10[DENOM_A].iloc[0]
        owner_hh = manchester_sas10[col].iloc[0]
        print(col, "owner-occupied % =", (owner_hh / total_hh) * 100)

sas_cols = sorted([c for c in manchester_sas10.columns if c != "zoneid"])
manchester_sas10 = manchester_sas10[["zoneid"] + sas_cols]
out_path = "manchester_sas10_1981.csv"
manchester_sas10.to_csv(out_path, index=False)

print("Manchester SAS10 aggregate saved:", out_path)
print("Manchester households (Option A, 81sas100929):", int(manchester_sas10[DENOM_A].iloc[0]))