# Charity Commission - Register of Charities

In [1]:
import pandas as pd
from pathlib import Path
import os
from pipeline_utils.reference.onspd import normalise_postcode, postcode_lookup

In [2]:
DATA_DIR = Path('../raw/charitydetails_2024.csv')
OUT_DIR = Path('../src/data/charity-commission/_data/')

data = pd.read_csv(DATA_DIR, usecols={'Charity Name', 'Charity Postcode', 'Charity Type', 'How the charity helps', 'What the charity does', 'Who the charity helps', 'Activities'})
ons_pcd_lookup = pd.read_csv('../data/reference/pcd_lad_aug_2023_NE.csv').set_index('pcd')


In [3]:
def get_lat_long(postcode, lookup):
    postcode = normalise_postcode(postcode)
    data = lookup.get(postcode)
    if data:
        return pd.Series([data['lat'], data['long']])
    else:
        return pd.Series([None, None])

### Create headline stats

In [4]:
headlines = pd.DataFrame(columns=['headline', 'count'])
headlines['headline'] = ['Total']
headlines['count'] = len(data)
#TODO: Add breakdown of charity classifications

### Filter by Newcastle LAD

In [5]:
# Remove organisations without postcodes
data = data.dropna(subset=['Charity Postcode'])

# Clean up the postcode column
data['Charity Postcode'] = data['Charity Postcode'].apply(lambda x: normalise_postcode(x))

# Create separate Latitude and Longitude columns from postcode_lookup
data[['Latitude', 'Longitude']] = data['Charity Postcode'].apply(
    lambda x: pd.Series(get_lat_long(x, postcode_lookup))
)
# Merge geo_data with ne_postcodes to filter only Newcastle-upon-Tyne LA (from ONS lookup)
ne_lad = data.merge(
    ons_pcd_lookup, how='inner', left_on='Charity Postcode', right_on='pcd'
)

# Rename columns to lowercase and replace spaces with underscores
ne_lad.columns = ne_lad.columns.str.lower().str.replace(' ', '_')


### Add filtered LA data to headlines

In [6]:
headline_to_add = pd.DataFrame({'headline': ['Total Newcastle LA'], 'count': [len(ne_lad)]})

headlines = pd.concat([headlines, headline_to_add], ignore_index=True)

headlines.to_csv(os.path.join(OUT_DIR, 'headlines.csv'), index=False)


### Visualise by location

In [7]:
geo_data = ne_lad[['charity_name', 'charity_postcode', 'latitude', 'longitude']].dropna()
geo_data['tooltip'] = geo_data.apply(lambda rec: f"<strong>{rec['charity_name'].lower().title()}</strong><br>Postcode: {rec['charity_postcode']}", axis=1)
geo_data.to_csv(os.path.join(OUT_DIR, 'charity_geo.csv'), index=False)
