In [2]:
import pandas as pd

In [3]:
df = pd.read_csv("pcv_final.csv")

In [4]:
df.head()

Unnamed: 0,parent,id,Performance Title,composer_text,Concert Title,original_date,date_start,orchestra,Venue Name,Venue Town,seonametown
0,1,101,Der Freischutz; Overture,Weber,1st Concert of the 1st Season,30th Jan 1858,1858-01-30,Hallé Orchestra; 1857-,Free Trade Hall,Manchester,manchester-free-trade-hall
1,1,1798,Symphony No1 in C,Beethoven,1st Concert of the 1st Season,30th Jan 1858,1858-01-30,Hallé Orchestra; 1857-,Free Trade Hall,Manchester,manchester-free-trade-hall
2,1,2987,Symphony in E flat; Andante in A flat,Mozart,1st Concert of the 1st Season,30th Jan 1858,1858-01-30,Hallé Orchestra; 1857-,Free Trade Hall,Manchester,manchester-free-trade-hall
3,1,3950,The Syren; Overture,Auber,1st Concert of the 1st Season,30th Jan 1858,1858-01-30,Hallé Orchestra; 1857-,Free Trade Hall,Manchester,manchester-free-trade-hall
4,1,3952,The Siege of Corinth; Overture,Rossini,1st Concert of the 1st Season,30th Jan 1858,1858-01-30,Hallé Orchestra; 1857-,Free Trade Hall,Manchester,manchester-free-trade-hall


In [5]:
df.nunique()

parent                8149
id                   37217
Performance Title     8145
composer_text         1368
Concert Title          793
original_date         7804
date_start            7784
orchestra               58
Venue Name             254
Venue Town             262
seonametown            378
dtype: int64

In [6]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 37217 entries, 0 to 37216
Data columns (total 11 columns):
 #   Column             Non-Null Count  Dtype 
---  ------             --------------  ----- 
 0   parent             37217 non-null  int64 
 1   id                 37217 non-null  int64 
 2   Performance Title  37216 non-null  object
 3   composer_text      37202 non-null  object
 4   Concert Title      6098 non-null   object
 5   original_date      37217 non-null  object
 6   date_start         37217 non-null  object
 7   orchestra          37217 non-null  object
 8   Venue Name         37217 non-null  object
 9   Venue Town         37217 non-null  object
 10  seonametown        37217 non-null  object
dtypes: int64(2), object(9)
memory usage: 3.1+ MB


In [7]:
df.drop("Concert Title", axis=1, inplace=True)

In [8]:
df = df[~df.isna().any(axis=1)]

In [9]:
df = df[df["Venue Name"] != 'unlisted']

In [10]:
# Create a nested structure as described
grouped = df.groupby(['Venue Name', 'Venue Town', 'parent', 'date_start'])

# Create a structured dictionary for each group
def make_concert_info(group):
    performances = []
    for _, row in group.iterrows():
        performances.append({
            "title": row['Performance Title'],
            "composer": row['composer_text'],
            "orchestra": row['orchestra']
        })
    return performances

# Apply the function to group data
concerts = grouped.apply(make_concert_info).reset_index(name='performances')
concerts.rename(columns={'parent': 'concert_id', 'date_start': 'date'}, inplace=True)

# Organizing concerts under venues
venues = concerts.groupby(['Venue Name', 'Venue Town']).apply(lambda x: x[['concert_id', 'date', 'performances']].to_dict('records')).reset_index(name='concerts')
print(venues.head())


        Venue Name   Venue Town  \
0       ABC Cinema  Northampton   
1      ABC Theatre    Blackpool   
2     Abbey Cinema      Wexford   
3       Abbey Road       London   
4  Adam Smith Hall     Kirkaldy   

                                            concerts  
0  [{'concert_id': 6162, 'date': '1969-03-02', 'p...  
1  [{'concert_id': 7143, 'date': '1976-06-06', 'p...  
2  [{'concert_id': 4394, 'date': '1958-10-26', 'p...  
3  [{'concert_id': 2555, 'date': '1948-12-02', 'p...  
4  [{'concert_id': 1964, 'date': '1946-06-19', 'p...  


In [11]:
# Function to count unique concerts per venue
def count_unique_concerts(concert_list):
    # Extract concert_id from each concert and count unique IDs
    concert_ids = {concert['concert_id'] for concert in concert_list}
    return len(concert_ids)

# Apply the function to each row in the 'concerts' column
venues['concert_count'] = venues['concerts'].apply(count_unique_concerts)

# Output the updated DataFrame
print(venues[['Venue Name', 'Venue Town', 'concert_count']])

          Venue Name   Venue Town  concert_count
0         ABC Cinema  Northampton              1
1        ABC Theatre    Blackpool              1
2       Abbey Cinema      Wexford              1
3         Abbey Road       London             20
4    Adam Smith Hall     Kirkaldy              5
..               ...          ...            ...
370   Winter Gardens  Bournemouth             10
371   Winter Gardens   Eastbourne              1
372   Winter Gardens      Malvern              1
373   Winter Gardens      Margate              3
374   Winter Gardens    Morecambe              6

[375 rows x 3 columns]


In [12]:
# Get summary statistics for concert counts
summary_stats = venues['concert_count'].describe()
print(summary_stats)

# If you need additional statistics, such as the median explicitly
median = venues['concert_count'].median()
print(f'Median number of concerts: {median}'
      f'Minimum number of concerts: {summary_stats["min"]}'
      f'Maximum number of concerts: {summary_stats["max"]}')

count     375.000000
mean       21.712000
std       174.955311
min         1.000000
25%         1.000000
50%         2.000000
75%         6.000000
max      3221.000000
Name: concert_count, dtype: float64
Median number of concerts: 2.0Minimum number of concerts: 1.0Maximum number of concerts: 3221.0


In [13]:
max_concerts = venues['concert_count'].max()
max_venues = venues[venues['concert_count'] == max_concerts]
print(max_venues)


          Venue Name  Venue Town  \
120  Free Trade Hall  Manchester   

                                              concerts  concert_count  
120  [{'concert_id': 1, 'date': '1858-01-30', 'perf...           3221  


In [14]:
venues

Unnamed: 0,Venue Name,Venue Town,concerts,concert_count
0,ABC Cinema,Northampton,"[{'concert_id': 6162, 'date': '1969-03-02', 'p...",1
1,ABC Theatre,Blackpool,"[{'concert_id': 7143, 'date': '1976-06-06', 'p...",1
2,Abbey Cinema,Wexford,"[{'concert_id': 4394, 'date': '1958-10-26', 'p...",1
3,Abbey Road,London,"[{'concert_id': 2555, 'date': '1948-12-02', 'p...",20
4,Adam Smith Hall,Kirkaldy,"[{'concert_id': 1964, 'date': '1946-06-19', 'p...",5
...,...,...,...,...
370,Winter Gardens,Bournemouth,"[{'concert_id': 3042, 'date': '1952-06-06', 'p...",10
371,Winter Gardens,Eastbourne,"[{'concert_id': 2503, 'date': '1949-05-28', 'p...",1
372,Winter Gardens,Malvern,"[{'concert_id': 7500, 'date': '1978-06-04', 'p...",1
373,Winter Gardens,Margate,"[{'concert_id': 4939, 'date': '1962-04-29', 'p...",3


In [18]:
from geopy.geocoders import Nominatim
from geopy.extra.rate_limiter import RateLimiter
import time

# Initialize the geocoder with a specific user_agent
geolocator = Nominatim(user_agent="my_geocode_app")

# Define a rate limiter to manage request rates
geocode_with_delay = RateLimiter(geolocator.geocode, min_delay_seconds=1, error_wait_seconds=10)

def geocode_venue(row):
    try:
        # Combining venue name and town for a more precise geocode
        search_query = f"{row['Venue Name']}, {row['Venue Town']}"
        location = geocode_with_delay(search_query)
        if location:
            print(f"Geocoded: {search_query} -> Latitude: {location.latitude}, Longitude: {location.longitude}")
            return location.latitude, location.longitude
        else:
            print(f"Location not found: {search_query}")
            return None, None
    except Exception as e:
        print(f"Error geocoding {search_query}: {e}")
        return None, None

# Apply the geocoding function to the venues DataFrame
venues[['latitude', 'longitude']] = venues.apply(geocode_venue, axis=1, result_type='expand')

# Display the DataFrame to confirm coordinates are added
print(venues)


Location not found: ABC Cinema, Northampton
Location not found: ABC Theatre, Blackpool
Location not found: Abbey Cinema, Wexford
Geocoded: Abbey Road, London -> Latitude: 51.5325414, Longitude: 0.0036092
Location not found: Adam Smith Hall, Kirkaldy
Geocoded: Albert Hall, Manchester -> Latitude: 53.478299050000004, Longitude: -2.247901788881243
Geocoded: Albert Hall, Nottingham -> Latitude: 52.954312, Longitude: -1.156231121262001
Location not found: Arcadia Theatre, Llandudno
Geocoded: Ashton Hall, Lancaster -> Latitude: 54.0088778, Longitude: -2.8227046930252007
Geocoded: Assembly Hall, Tunbridge Wells -> Latitude: 51.13257285, Longitude: 0.2645546442675571
Geocoded: Assembly Hall, Walthamstow -> Latitude: 51.5908778, Longitude: -0.012399405402629649
Location not found: Assembly Hall, Wembley
Geocoded: Assembly Rooms, Derby -> Latitude: 52.923651250000006, Longitude: -1.476435354528702
Location not found: Auditorium der Universitat, Regensburg
Geocoded: BASF, Ludwigshafen -> Latitude

RateLimiter caught an error, retrying (0/2 tries). Called with (*('Civic Hall, Solihull',), **{}).
Traceback (most recent call last):
  File "/Users/danielgonzalez/opt/anaconda3/lib/python3.9/site-packages/urllib3/connectionpool.py", line 449, in _make_request
    six.raise_from(e, None)
  File "<string>", line 3, in raise_from
  File "/Users/danielgonzalez/opt/anaconda3/lib/python3.9/site-packages/urllib3/connectionpool.py", line 444, in _make_request
    httplib_response = conn.getresponse()
  File "/Users/danielgonzalez/opt/anaconda3/lib/python3.9/http/client.py", line 1377, in getresponse
    response.begin()
  File "/Users/danielgonzalez/opt/anaconda3/lib/python3.9/http/client.py", line 320, in begin
    version, status, reason = self._read_status()
  File "/Users/danielgonzalez/opt/anaconda3/lib/python3.9/http/client.py", line 281, in _read_status
    line = str(self.fp.readline(_MAXLINE + 1), "iso-8859-1")
  File "/Users/danielgonzalez/opt/anaconda3/lib/python3.9/socket.py", lin

Location not found: Civic Hall, Solihull
Location not found: Civic Hall, Southampton
Geocoded: Civic Hall, Whitehaven -> Latitude: 54.546689349999994, Longitude: -3.5862061431153807


RateLimiter caught an error, retrying (0/2 tries). Called with (*('Civic Hall, Wolverhampton',), **{}).
Traceback (most recent call last):
  File "/Users/danielgonzalez/opt/anaconda3/lib/python3.9/site-packages/urllib3/connectionpool.py", line 449, in _make_request
    six.raise_from(e, None)
  File "<string>", line 3, in raise_from
  File "/Users/danielgonzalez/opt/anaconda3/lib/python3.9/site-packages/urllib3/connectionpool.py", line 444, in _make_request
    httplib_response = conn.getresponse()
  File "/Users/danielgonzalez/opt/anaconda3/lib/python3.9/http/client.py", line 1377, in getresponse
    response.begin()
  File "/Users/danielgonzalez/opt/anaconda3/lib/python3.9/http/client.py", line 320, in begin
    version, status, reason = self._read_status()
  File "/Users/danielgonzalez/opt/anaconda3/lib/python3.9/http/client.py", line 281, in _read_status
    line = str(self.fp.readline(_MAXLINE + 1), "iso-8859-1")
  File "/Users/danielgonzalez/opt/anaconda3/lib/python3.9/socket.py"

Geocoded: Civic Hall, Wolverhampton -> Latitude: 52.58659075, Longitude: -2.1308165862918873
Location not found: Civic Theatre, Chesterfield


RateLimiter caught an error, retrying (0/2 tries). Called with (*('Civic Theatre, Corby',), **{}).
Traceback (most recent call last):
  File "/Users/danielgonzalez/opt/anaconda3/lib/python3.9/site-packages/urllib3/connectionpool.py", line 449, in _make_request
    six.raise_from(e, None)
  File "<string>", line 3, in raise_from
  File "/Users/danielgonzalez/opt/anaconda3/lib/python3.9/site-packages/urllib3/connectionpool.py", line 444, in _make_request
    httplib_response = conn.getresponse()
  File "/Users/danielgonzalez/opt/anaconda3/lib/python3.9/http/client.py", line 1377, in getresponse
    response.begin()
  File "/Users/danielgonzalez/opt/anaconda3/lib/python3.9/http/client.py", line 320, in begin
    version, status, reason = self._read_status()
  File "/Users/danielgonzalez/opt/anaconda3/lib/python3.9/http/client.py", line 281, in _read_status
    line = str(self.fp.readline(_MAXLINE + 1), "iso-8859-1")
  File "/Users/danielgonzalez/opt/anaconda3/lib/python3.9/socket.py", lin

Location not found: Civic Theatre, Corby
Location not found: Civic Theatre, Halifax
Geocoded: Cliffs Pavilion, Southend -> Latitude: 51.535223200000004, Longitude: 0.6969037184232711


RateLimiter caught an error, retrying (0/2 tries). Called with (*('Co-operative Hall, Oldham',), **{}).
Traceback (most recent call last):
  File "/Users/danielgonzalez/opt/anaconda3/lib/python3.9/site-packages/urllib3/connectionpool.py", line 449, in _make_request
    six.raise_from(e, None)
  File "<string>", line 3, in raise_from
  File "/Users/danielgonzalez/opt/anaconda3/lib/python3.9/site-packages/urllib3/connectionpool.py", line 444, in _make_request
    httplib_response = conn.getresponse()
  File "/Users/danielgonzalez/opt/anaconda3/lib/python3.9/http/client.py", line 1377, in getresponse
    response.begin()
  File "/Users/danielgonzalez/opt/anaconda3/lib/python3.9/http/client.py", line 320, in begin
    version, status, reason = self._read_status()
  File "/Users/danielgonzalez/opt/anaconda3/lib/python3.9/http/client.py", line 281, in _read_status
    line = str(self.fp.readline(_MAXLINE + 1), "iso-8859-1")
  File "/Users/danielgonzalez/opt/anaconda3/lib/python3.9/socket.py"

Location not found: Co-operative Hall, Oldham
Location not found: College Theatre, Billingham
Geocoded: Colston Hall, Bristol -> Latitude: 51.45470895, Longitude: -2.5982672862666463
Location not found: Concert Hall, Bergen
Location not found: Concert Hall, Gothenberg
Location not found: Concert Hall, Tivoli
Geocoded: Congress Theatre, Eastbourne -> Latitude: 50.7627894, Longitude: 0.28340345715925674
Location not found: Conservatory Guiseppe Verdi, Milan
Geocoded: Corn Exchange, Bedford -> Latitude: 52.136064250000004, Longitude: -0.46742707411525675
Geocoded: Corn Exchange, King's Lynn -> Latitude: 52.7564062, Longitude: 0.39336478041473855
Location not found: County Pavilion, Newtown
Location not found: County Secondary School, Haverfordwest
Location not found: County Secondary School, Llangefni
Location not found: County Secondary School Theatre, Fishguard


RateLimiter caught an error, retrying (0/2 tries). Called with (*('Covered Market Hall, Carlisle',), **{}).
Traceback (most recent call last):
  File "/Users/danielgonzalez/opt/anaconda3/lib/python3.9/site-packages/urllib3/connectionpool.py", line 449, in _make_request
    six.raise_from(e, None)
  File "<string>", line 3, in raise_from
  File "/Users/danielgonzalez/opt/anaconda3/lib/python3.9/site-packages/urllib3/connectionpool.py", line 444, in _make_request
    httplib_response = conn.getresponse()
  File "/Users/danielgonzalez/opt/anaconda3/lib/python3.9/http/client.py", line 1377, in getresponse
    response.begin()
  File "/Users/danielgonzalez/opt/anaconda3/lib/python3.9/http/client.py", line 320, in begin
    version, status, reason = self._read_status()
  File "/Users/danielgonzalez/opt/anaconda3/lib/python3.9/http/client.py", line 281, in _read_status
    line = str(self.fp.readline(_MAXLINE + 1), "iso-8859-1")
  File "/Users/danielgonzalez/opt/anaconda3/lib/python3.9/socket

Location not found: Covered Market Hall, Carlisle
Geocoded: Crystal Palace, London -> Latitude: 51.4193664, Longitude: -0.0711328
Location not found: Davis Theatre, Croydon
Geocoded: De La Warr Pavilion, Bexhill-on-Sea -> Latitude: 50.837523250000004, Longitude: 0.47122965124256166
Geocoded: De Montfort Hall, Leicester -> Latitude: 52.62502235, Longitude: -1.1220200960612958
Geocoded: Deutsches Museum, Munich -> Latitude: 48.1300409, Longitude: 11.582908990594504
Location not found: Diestenweg Schule, Linz
Geocoded: Dorking Halls, Dorking -> Latitude: 51.2347671, Longitude: -0.3258118
Location not found: Drill Hall, Macclesfield
Geocoded: Drill Hall, Wellingborough -> Latitude: 52.30453625, Longitude: -0.692248087347942
Location not found: Eastbrook Hall, Bradford
Geocoded: Eden Court Theatre, Inverness -> Latitude: 57.4731036, Longitude: -4.230864222497448
Location not found: Embassy Theatre, Peterborough
Location not found: Empire Theatre, Cardiff
Location not found: Empire Theatre, 

RateLimiter caught an error, retrying (0/2 tries). Called with (*('Fairfield Hall, Croydon',), **{}).
Traceback (most recent call last):
  File "/Users/danielgonzalez/opt/anaconda3/lib/python3.9/site-packages/urllib3/connectionpool.py", line 449, in _make_request
    six.raise_from(e, None)
  File "<string>", line 3, in raise_from
  File "/Users/danielgonzalez/opt/anaconda3/lib/python3.9/site-packages/urllib3/connectionpool.py", line 444, in _make_request
    httplib_response = conn.getresponse()
  File "/Users/danielgonzalez/opt/anaconda3/lib/python3.9/http/client.py", line 1377, in getresponse
    response.begin()
  File "/Users/danielgonzalez/opt/anaconda3/lib/python3.9/http/client.py", line 320, in begin
    version, status, reason = self._read_status()
  File "/Users/danielgonzalez/opt/anaconda3/lib/python3.9/http/client.py", line 281, in _read_status
    line = str(self.fp.readline(_MAXLINE + 1), "iso-8859-1")
  File "/Users/danielgonzalez/opt/anaconda3/lib/python3.9/socket.py", 

Location not found: Fairfield Hall, Croydon
Geocoded: Festhalle, Viersen -> Latitude: 51.25298085, Longitude: 6.391427662020734
Geocoded: Festival Theatre, Chichester -> Latitude: 50.8429048, Longitude: -0.7772216561704477
Geocoded: Festspielhaus, Salzburg -> Latitude: 47.7987295, Longitude: 13.040982237102769
Geocoded: Filharmonia, Lodz -> Latitude: 51.7713322, Longitude: 19.45987410839987
Geocoded: Filharmonia, Warsaw -> Latitude: 52.23436315, Longitude: 21.011280545053896
Geocoded: Floral Hall, Southport -> Latitude: 53.6517693, Longitude: -3.0060319
Location not found: Foxwood School, Leeds
Geocoded: Free Trade Hall, Manchester -> Latitude: 53.47767965, Longitude: -2.247247210739584
Geocoded: Gaiety Theatre, Dublin -> Latitude: 53.3403737, Longitude: -6.261642751088101
Location not found: Gaumont Theatre, Chester
Location not found: Gaumont Theatre, Ipswich
Location not found: Granada Cinema, Shrewsbury
Location not found: Grand Hall, Scarborough
Location not found: Grand Theatre, 

RateLimiter caught an error, retrying (0/2 tries). Called with (*('Heidelberg, Staadthalle',), **{}).
Traceback (most recent call last):
  File "/Users/danielgonzalez/opt/anaconda3/lib/python3.9/site-packages/urllib3/connectionpool.py", line 449, in _make_request
    six.raise_from(e, None)
  File "<string>", line 3, in raise_from
  File "/Users/danielgonzalez/opt/anaconda3/lib/python3.9/site-packages/urllib3/connectionpool.py", line 444, in _make_request
    httplib_response = conn.getresponse()
  File "/Users/danielgonzalez/opt/anaconda3/lib/python3.9/http/client.py", line 1377, in getresponse
    response.begin()
  File "/Users/danielgonzalez/opt/anaconda3/lib/python3.9/http/client.py", line 320, in begin
    version, status, reason = self._read_status()
  File "/Users/danielgonzalez/opt/anaconda3/lib/python3.9/http/client.py", line 281, in _read_status
    line = str(self.fp.readline(_MAXLINE + 1), "iso-8859-1")
  File "/Users/danielgonzalez/opt/anaconda3/lib/python3.9/socket.py", 

Location not found: Heidelberg, Staadthalle
Geocoded: Herod Atticus Odeon, Athens -> Latitude: 37.9711894, Longitude: 23.7248137
Location not found: Hippodrome, Leigh
Geocoded: Hippodrome Theatre, Derby -> Latitude: 52.9201446, Longitude: -1.4784938201467677
Location not found: Hippodrome at Ardwick Green, Manchester
Geocoded: Holland Park, London -> Latitude: 51.5030158, Longitude: -0.20411700053696852
Location not found: Ice Rink, Grangemouth
Location not found: Iveagh Bequest, Kenwood
Geocoded: Jahrhunderthalle, Frankfurt -> Latitude: 50.0994964, Longitude: 8.5189652
Location not found: Jephson Gardens Pavilion, Leamington Spa
Location not found: Kelsey-Kerridge Sports Hall, Cambridge
Geocoded: Kelvin Hall, Glasgow -> Latitude: 55.8685321, Longitude: -4.294295312007534
Location not found: Kiel, Schloss
Geocoded: King Edward's School, Birmingham -> Latitude: 52.450589, Longitude: -1.9229599026191502
Location not found: King George's Hall, Blackburn
Geocoded: King's College Chapel, Ca

RateLimiter caught an error, retrying (0/2 tries). Called with (*('Liederhalle, Stuttgart',), **{}).
Traceback (most recent call last):
  File "/Users/danielgonzalez/opt/anaconda3/lib/python3.9/site-packages/urllib3/connectionpool.py", line 449, in _make_request
    six.raise_from(e, None)
  File "<string>", line 3, in raise_from
  File "/Users/danielgonzalez/opt/anaconda3/lib/python3.9/site-packages/urllib3/connectionpool.py", line 444, in _make_request
    httplib_response = conn.getresponse()
  File "/Users/danielgonzalez/opt/anaconda3/lib/python3.9/http/client.py", line 1377, in getresponse
    response.begin()
  File "/Users/danielgonzalez/opt/anaconda3/lib/python3.9/http/client.py", line 320, in begin
    version, status, reason = self._read_status()
  File "/Users/danielgonzalez/opt/anaconda3/lib/python3.9/http/client.py", line 281, in _read_status
    line = str(self.fp.readline(_MAXLINE + 1), "iso-8859-1")
  File "/Users/danielgonzalez/opt/anaconda3/lib/python3.9/socket.py", l

Geocoded: Liederhalle, Stuttgart -> Latitude: 48.7791209, Longitude: 9.1678901
Location not found: Longford Theatre, Stretford
Location not found: Lonsdale Cinema, Carlisle
Geocoded: Maida Vale Studios, London -> Latitude: 51.5257722, Longitude: -0.1904117028895975
Location not found: Marquee, Llangollen
Location not found: Masshallen, Helsinki
Geocoded: Maxwell Hall, Salford -> Latitude: 53.48472125, Longitude: -2.271237261517636
Location not found: Mechanics Institute, Burnley
Geocoded: Meistersingerhalle, Nurnberg -> Latitude: 49.4372928, Longitude: 11.105157586014384
Location not found: Memorial Hall, Barry
Location not found: Memorial Theatre, Stratford-on-Avon
Geocoded: Mercatorhalle, Duisburg -> Latitude: 51.4330116, Longitude: 6.7707369
Location not found: Messehalle, Dornbirm


RateLimiter caught an error, retrying (0/2 tries). Called with (*('Minster, Southwell',), **{}).
Traceback (most recent call last):
  File "/Users/danielgonzalez/opt/anaconda3/lib/python3.9/site-packages/urllib3/connectionpool.py", line 449, in _make_request
    six.raise_from(e, None)
  File "<string>", line 3, in raise_from
  File "/Users/danielgonzalez/opt/anaconda3/lib/python3.9/site-packages/urllib3/connectionpool.py", line 444, in _make_request
    httplib_response = conn.getresponse()
  File "/Users/danielgonzalez/opt/anaconda3/lib/python3.9/http/client.py", line 1377, in getresponse
    response.begin()
  File "/Users/danielgonzalez/opt/anaconda3/lib/python3.9/http/client.py", line 320, in begin
    version, status, reason = self._read_status()
  File "/Users/danielgonzalez/opt/anaconda3/lib/python3.9/http/client.py", line 281, in _read_status
    line = str(self.fp.readline(_MAXLINE + 1), "iso-8859-1")
  File "/Users/danielgonzalez/opt/anaconda3/lib/python3.9/socket.py", line 

Geocoded: Minster, Southwell -> Latitude: 53.07684715, Longitude: -0.954137578991153
Geocoded: Minster, York -> Latitude: 53.9623561, Longitude: -1.0820834019803218


RateLimiter caught an error, retrying (0/2 tries). Called with (*('Morley College, London',), **{}).
Traceback (most recent call last):
  File "/Users/danielgonzalez/opt/anaconda3/lib/python3.9/site-packages/urllib3/connectionpool.py", line 449, in _make_request
    six.raise_from(e, None)
  File "<string>", line 3, in raise_from
  File "/Users/danielgonzalez/opt/anaconda3/lib/python3.9/site-packages/urllib3/connectionpool.py", line 444, in _make_request
    httplib_response = conn.getresponse()
  File "/Users/danielgonzalez/opt/anaconda3/lib/python3.9/http/client.py", line 1377, in getresponse
    response.begin()
  File "/Users/danielgonzalez/opt/anaconda3/lib/python3.9/http/client.py", line 320, in begin
    version, status, reason = self._read_status()
  File "/Users/danielgonzalez/opt/anaconda3/lib/python3.9/http/client.py", line 281, in _read_status
    line = str(self.fp.readline(_MAXLINE + 1), "iso-8859-1")
  File "/Users/danielgonzalez/opt/anaconda3/lib/python3.9/socket.py", l

Geocoded: Morley College, London -> Latitude: 51.4979117, Longitude: -0.11017231779578437
Geocoded: Music Hall, Aberdeen -> Latitude: 57.1453568, Longitude: -2.105056233356427
Geocoded: Music Hall, Shrewsbury -> Latitude: 52.7070111, Longitude: -2.7548057556552106
Geocoded: Musikverein, Vienna -> Latitude: 48.20048645, Longitude: 16.37272518981993
Location not found: New Hippopdrome, Darlington
Location not found: New Opera House, Blackpool
Geocoded: New Theatre, Oxford -> Latitude: 51.7538627, Longitude: -1.2598378
Location not found: New Victoria Cinema, Preston
Location not found: New Victoria Hall, Halifax
Geocoded: Nicholas Chamberlaine School, Bedworth -> Latitude: 52.48039685, Longitude: -1.4584463492112585
Location not found: Northern Grammar School, Portsmouth
Geocoded: Northgate Arena, Chester -> Latitude: 53.1960472, Longitude: -2.8910302602957243
Location not found: Odeon, Prestwich
Location not found: Odeon Cinema, Sale
Location not found: Odeon Theatre, Chelmsford
Locatio

In [19]:
venues.dropna(inplace=True)

In [20]:
# Custom function to format data correctly
def format_data(row):
    return {
        "venue_name": row['Venue Name'],
        "venue_town": row['Venue Town'],
        "latitude": row['latitude'],
        "longitude": row['longitude'],
        "concerts": row['concerts'],
        "concert_count": row['concert_count']
    }

# Apply the function and convert to JSON
formatted_data = venues.apply(format_data, axis=1).tolist()
import json
json_output = json.dumps(formatted_data, indent=4)  # indent for pretty printing


In [21]:
# Write JSON data to a file
with open('venue_data_new.json', 'w') as f:
    f.write(json_output)


In [23]:
geojson = {
    "type": "FeatureCollection",
    "features": [
        {
            "type": "Feature",
            "geometry": {
                "type": "Point",
                "coordinates": [venue["longitude"], venue["latitude"]]
            },
            "properties": {
                "venue_name": venue["venue_name"],
                "venue_town": venue["venue_town"],
                "concert_count": venue["concert_count"]
            }  # You might want to omit or modify this line to exclude specific properties
        } for venue in formatted_data
    ]
}


In [24]:


# Write GeoJSON to a file
with open('venues.geojson', 'w') as file:
    json.dump(geojson, file, indent=4)
