In [4]:
import pandas as pd

base = "https://raw.githubusercontent.com/jourdee-lab/manchester-spatial-analysis-data-lab/63a3d8ec45a814faeb5c821b7e184aee3cd49247/data/raw/census_1981/sas10_housing"
files = [f"{base}/81sas10ews_{i}.csv" for i in range(5)]

dfs = [pd.read_csv(url, dtype={"zoneid": "string"}) for url in files]
sas10_1981 = pd.concat(dfs, ignore_index=True)
sas10_1981.columns = sas10_1981.columns.str.strip()
sas10_1981["zoneid"] = sas10_1981["zoneid"].str.strip()

value_cols = [c for c in sas10_1981.columns if c != "zoneid"]
sas10_1981[value_cols] = sas10_1981[value_cols].apply(pd.to_numeric, errors="coerce")

# Manchester SAS10 aggregate
manchester_sas10 = pd.DataFrame(
    [sas10_1981.loc[sas10_1981["zoneid"].str.startswith("03BN"), value_cols].sum()],
    columns=value_cols
)
manchester_sas10.insert(0, "zoneid", "03BN")
manchester_sas10.to_csv("manchester_sas10_1981.csv", index=False)

print("Manchester SAS10 aggregate saved")
print(manchester_sas10.head()) 
print(sas10_1981.columns[:30].tolist())

sas10_cols = [c for c in sas10_1981.columns if c.startswith("81sas10")]
candidates = [c for c in sas10_cols if c.endswith(("0929", "0951", "0967", "0968"))]
denom = "81sas100951" if "81sas100951" in manchester_sas10.columns else "81sas100929"

print("SAS10 column count:", len(sas10_cols))
print("Candidate columns:", candidates)
print(f"Manchester households ({denom}): {int(manchester_sas10[denom].iloc[0]):,}")

total_hh = manchester_sas10["81sas100929"].iloc[0]

for col in ["81sas100967", "81sas100968"]:
    if col in manchester_sas10.columns:
        owner_hh = manchester_sas10[col].iloc[0]
        print(col, "owner-occupied % =", (owner_hh / total_hh) * 100)


Manchester SAS10 aggregate saved
  zoneid  81sas100929  81sas100930  81sas100931  81sas100932  81sas100933  \
0   03BN       492010       455337        14794        16159         5720   

   81sas100934  81sas100935  81sas100936  81sas100937  ...  81sas101139  \
0         6763        20836        14313      1291176  ...          230   

   81sas101140  81sas101141  81sas101142  81sas101143  81sas101144  \
0          230          260            0           58           12   

   81sas101145  81sas101146  81sas101147  81sas101148  
0          177           43           54           93  

[1 rows x 221 columns]
['zoneid', '81sas100929', '81sas100930', '81sas100931', '81sas100932', '81sas100933', '81sas100934', '81sas100935', '81sas100936', '81sas100937', '81sas100938', '81sas100939', '81sas100940', '81sas100941', '81sas100942', '81sas100943', '81sas100944', '81sas100945', '81sas100946', '81sas100947', '81sas100948', '81sas100949', '81sas100950', '81sas100951', '81sas100952', '81sas100953'

In [None]:
import pandas as pd

base = "https://raw.githubusercontent.com/jourdee-lab/manchester-spatial-analysis-data-lab/63a3d8ec45a814faeb5c821b7e184aee3cd49247/data/raw/census_1981/sas10_housing"
files = [f"{base}/81sas10ews_{i}.csv" for i in range(5)]
dfs = [pd.read_csv(url, dtype={"zoneid": "string"}) for url in files]
sas10_1981 = pd.concat(dfs, ignore_index=True)

# Cleaning 
sas10_1981.columns = sas10_1981.columns.str.strip()
sas10_1981["zoneid"] = sas10_1981["zoneid"].str.strip()
value_cols = [c for c in sas10_1981.columns if c != "zoneid"]

for c in value_cols:
    sas10_1981[c] = pd.to_numeric(sas10_1981[c], errors="coerce").fillna(0)

mask = sas10_1981["zoneid"].str.startswith("03BN", na=False)
manchester_sum = sas10_1981.loc[mask, value_cols].sum()
manchester_sas10 = pd.DataFrame([manchester_sum])
manchester_sas10.insert(0, "zoneid", "03BN")

# Household with residence checks
DENOM_A = "81sas100929"
required = [DENOM_A]

missing = [c for c in required if c not in manchester_sas10.columns]
if missing:
    raise KeyError(f"Missing expected SAS10 columns: {missing}")
if manchester_sas10[DENOM_A].iloc[0] <= 0:
    raise ValueError(f"{DENOM_A} is <= 0; check filtering/inputs.")

# Owner-occupied percentages func
for col in ["81sas100967", "81sas100968"]:
    if col in manchester_sas10.columns:
        total_hh = manchester_sas10[DENOM_A].iloc[0]
        owner_hh = manchester_sas10[col].iloc[0]
        print(col, "owner-occupied % =", (owner_hh / total_hh) * 100)

sas_cols = sorted([c for c in manchester_sas10.columns if c != "zoneid"])
manchester_sas10 = manchester_sas10[["zoneid"] + sas_cols]
out_path = "manchester_sas10_1981.csv"
manchester_sas10.to_csv(out_path, index=False)

print("Manchester SAS10 aggregate saved:", out_path)
print("Manchester households (Option A, 81sas100929):", int(manchester_sas10[DENOM_A].iloc[0]))
