# Append lat/lon to Olympic venues

In [2]:
import requests
import pandas as pd
import urllib.parse

def geocode_venue(venue_name, access_token):
    base_url = "https://api.mapbox.com/geocoding/v5/mapbox.places/"
    encoded_venue = urllib.parse.quote(venue_name)
    url = f"{base_url}{encoded_venue}.json"
    params = {
        'access_token': access_token,
        'country': 'FR',
        'bbox': '2.224199,48.815573,2.469921,48.902145',
        'limit': 1
    }
    
    response = requests.get(url, params=params)
    data = response.json()
    
    if data['features']:
        coordinates = data['features'][0]['center']
        return coordinates[1], coordinates[0]  # returns lat, lon
    else:
        return None, None

# Usage
access_token = 'YOUR_MAPBOX_ACCESS_TOKEN'

# Read the CSV
df = pd.read_csv('paris_2024_data/venues.csv')

# Add new columns for latitude and longitude
df['latitude'] = None
df['longitude'] = None

# Iterate through venues and geocode
for index, row in df.iterrows():
    lat, lon = geocode_venue(row['venue'], access_token)
    df.at[index, 'latitude'] = lat
    df.at[index, 'longitude'] = lon

# Save the enriched dataset
df.to_csv('paris_2024_data/venues_with_coordinates.csv', index=False)


# Append neighborhood

In [8]:
import pandas as pd
import json
from shapely.geometry import Point, Polygon

# Read the CSV file with venue coordinates
venues_df = pd.read_csv('paris_2024_data/venues_with_coordinates.csv')

# Read the GeoJSON file
with open('geojson/neighbourhoods.geojson', 'r') as f:
    neighborhoods_data = json.load(f)

# Create a list of neighborhood polygons
neighborhoods = []
for feature in neighborhoods_data['features']:
    name = feature['properties']['neighbourhood']
    # Check if the geometry is a Polygon or MultiPolygon
    if feature['geometry']['type'] == 'Polygon':
        poly = Polygon(feature['geometry']['coordinates'][0])
        neighborhoods.append((name, poly))
    elif feature['geometry']['type'] == 'MultiPolygon':
        for polygon in feature['geometry']['coordinates']:
            poly = Polygon(polygon[0])
            neighborhoods.append((name, poly))

# Function to find the neighborhood for a point
def find_neighborhood(lat, lon):
    point = Point(lon, lat)
    for name, poly in neighborhoods:
        if poly.contains(point):
            return name
    return None

# Apply the function to each venue
venues_df['neighbourhood'] = venues_df.apply(lambda row: find_neighborhood(row['latitude'], row['longitude']), axis=1)

# Save the result to a new CSV file
venues_df.to_csv('paris_2024_data/venues_with_neighborhoods.csv', index=False)

print("Process completed. Check 'paris_2024_data/venues_with_neighborhoods.csv' for the result.")

Process completed. Check 'paris_2024_data/venues_with_neighborhoods.csv' for the result.


  return lib.contains(a, b, **kwargs)


# Fix neighborhood names

In [9]:
import pandas as pd

# Correct neighborhood names
correct_names = [
    "Observatoire", "Hôtel-de-Ville", "Entrepôt", "Opéra", "Vaugirard",
    "Louvre", "Luxembourg", "Popincourt", "Gobelins", "Bourse",
    "Buttes-Montmartre", "Buttes-Chaumont", "Temple", "Reuilly",
    "Élysée", "Panthéon", "Batignolles-Monceau", "Ménilmontant",
    "Palais-Bourbon", "Passy"
]

# Read the CSV file
df = pd.read_csv('paris_2024_data/venues_with_neighborhoods.csv')

# Function to find the closest match
def find_closest_match(name, correct_list):
    return min(correct_list, key=lambda x: sum(c1 != c2 for c1, c2 in zip(name.lower(), x.lower())))

# Correct the neighborhood names
df['neighbourhood'] = df['neighbourhood'].apply(lambda x: find_closest_match(str(x), correct_names) if pd.notnull(x) else x)

# Save the corrected CSV
df.to_csv('paris_2024_data/venues_with_corrected_neighborhoods.csv', index=False)

print("Neighborhood names have been corrected. Check 'venues_with_corrected_neighborhoods.csv' for the result.")

Neighborhood names have been corrected. Check 'venues_with_corrected_neighborhoods.csv' for the result.


## Neighborhood geojson to usable csv

In [10]:
import json
import csv

def geojson_to_csv(input_file, output_file):
    # Read the GeoJSON file
    with open(input_file, 'r') as f:
        geojson_data = json.load(f)

    # Open the CSV file for writing
    with open(output_file, 'w', newline='') as f:
        writer = csv.writer(f)
        
        # Write the header
        writer.writerow(['neighbourhood', 'contour'])

        # Process each feature in the GeoJSON
        for feature in geojson_data['features']:
            neighbourhood = feature['properties']['neighbourhood']
            coordinates = feature['geometry']['coordinates']

            # Extract and format coordinates
            contour = []
            for polygon in coordinates:
                for ring in polygon:
                    contour.extend(ring)

            # Write the row to CSV
            writer.writerow([neighbourhood, contour])

    print(f"CSV file '{output_file}' has been created successfully.")

# Usage
input_file = 'geojson/neighbourhoods.geojson'
output_file = 'geojson/neighbourhoods.csv'
geojson_to_csv(input_file, output_file)

CSV file 'geojson/neighbourhoods.csv' has been created successfully.


# Appending capacity to dataset

In [13]:
import pandas as pd

def merge_venue_capacity(existing_data_file, capacity_data, output_file):
    # Read the existing dataset
    existing_df = pd.read_csv(existing_data_file)

    # Create a dictionary of venue capacities
    capacity_dict = {
        'Aquatics Centre': 5000,
        'Bercy Arena': 15000,
        'Champ de Mars Arena': 9000,
        'Château de Versailles': 80000,
        'Chateauroux Shooting Centre': 3000,
        'Eiffel Tower Stadium': 12000,
        'Elancourt Hill': 25000,
        'Bordeaux Stadium': 42000,
        'La Beaujoire Stadium': 35000,
        'Geoffroy-Guichard Stadium': 42000,
        'Parc des Princes': 50000,
        'Lyon Stadium': 60000,
        'Marseille Stadium': 68000,
        'Nice Stadium': 35000,
        'Grand Palais': 8000,
        'Invalides': 8000,
        'La Concorde': 30000,
        'Le Bourget': 5000,
        'Le Golf National': 35000,
        'Marseille Marina': 5000,
        'North Paris Arena': 6000,
        'Paris La Defense Arena': 15000,
        'Pierre Mauroy Stadium': 26000,
        'Pont Alexandre III': 1000,
        'Porte de La Chapelle Arena': 8000,
        'Stade Roland-Garros': 36000,
        'Saint-Quentin Velodrome': 5000,
        'South Paris Arena': 6000,
        'Stade de France': 77000,
        'Tahiti': 5000,
        'Trocadéro': 13000,
        'Vaires-sur-Marne Nautical Stadium': 22000,
        'Yves-du-Manoir Stadium': 15000
    }

    # Add capacity column to the existing dataframe
    existing_df['capacity'] = existing_df['venue'].map(capacity_dict)

    # Save the merged dataset to a new CSV file
    existing_df.to_csv(output_file, index=False)
    print(f"Merged data saved to {output_file}")

# Example usage
existing_data_file = 'paris_2024_data/venues_with_corrected_neighborhoods.csv'
output_file = 'paris_2024_data/venues_with_corrected_neighborhoods_and_capacity.csv'

merge_venue_capacity(existing_data_file, None, output_file)

Merged data saved to paris_2024_data/venues_with_corrected_neighborhoods_and_capacity.csv
