# Charity Commission - Register of Charities

In [3]:
from pipeline_utils.db import connect
import pandas as pd
from pathlib import Path

In [4]:
db = connect(read_only=True)

In [5]:
db.query(
'''
CREATE OR REPLACE TEMP TABLE onspd AS SELECT * FROM read_csv('../data/reference/onspd_extract.csv') WHERE oslaua == 'E08000021';
''')

In [6]:
data = db.query(
'''
SELECT
    charity_name,
    pcds as charity_postcode,
    organisation_number,
    registered_charity_number,
    oslaua,
    lat as latitude,
    long as longitude
FROM Charities
LEFT JOIN onspd ON charity_contact_postcode == pcds;
''').df()

In [7]:
db.close()

In [8]:
OUT_DIR = Path('../src/data/charity-commission/_data/')

### Create headline stats

In [9]:
headlines = pd.DataFrame(columns=['headline', 'count'])
headlines['headline'] = ['Total']
headlines['count'] = len(data)

In [10]:
headlines.set_index('headline').to_json(OUT_DIR / 'headlines.json', orient='index')

### Visualise by location

In [11]:
geo_data = data.loc[
    data.latitude.notna(),
    ['charity_name', 'charity_postcode', 'latitude', 'longitude']
].dropna().sort_values('charity_name')
geo_data['tooltip'] = geo_data.apply(lambda rec: f"<strong>{rec['charity_name'].lower().title()}</strong><br>Postcode: {rec['charity_postcode']}", axis=1)
geo_data.to_csv(OUT_DIR / 'charity_geo.csv', index=False)