## CBP Entry Ports Data

The directory of the entry ports in the CBP jurisdiction includes terrestrial ports, maritimal ports and airpoirts of the southwest landborder.

df_CA = https://www.cbp.gov/about/contact/ports/ca

df_AZ = https://www.cbp.gov/about/contact/ports/az

df_nm = https://www.cbp.gov/about/contact/ports/nm

df_tx = https://www.cbp.gov/about/contact/ports/tx


In [2]:
from bs4 import BeautifulSoup

import requests
import re

import pandas as pd

from geopy.geocoders import Photon
from geopy.geocoders import Nominatim

## All entries
### Step 1: Data Scrap

In [3]:
# URLs and their corresponding state abbreviations
urls = {
    'ca': 'https://www.cbp.gov/about/contact/ports/ca',
    'az': 'https://www.cbp.gov/about/contact/ports/az',
    'nm': 'https://www.cbp.gov/about/contact/ports/nm',
    'tx': 'https://www.cbp.gov/about/contact/ports/tx'
}

# Function to scrape and create a DataFrame from a given URL
def scrape_to_df(url):
    response = requests.get(url)
    soup = BeautifulSoup(response.content, 'html.parser')
    table = soup.find('table')
    data = []
    for row in table.find_all('tr'):
        cols = row.find_all('td')
        if len(cols) > 0:
            col_data = [ele.text.strip().replace('\n', ' ') for ele in cols]
            data.append(col_data)
    return pd.DataFrame(data, columns=['Port Name', 'Location Address', 'Field Operation Office'])

# Dictionary to hold each state's DataFrame
state_dfs = {}

# Scrape each URL and store the resulting DataFrame
for state_abbr, url in urls.items():
    df_temp = scrape_to_df(url)
    # Add a state column to each DataFrame
    df_temp['State'] = state_abbr.upper()
    state_dfs[f'df_{state_abbr}'] = df_temp

# Merge all state DataFrames into df_sb_all
df_sb_all = pd.concat(state_dfs.values(), ignore_index=True)


In [4]:
#Take out the airports
#Otra opcion podria ser quitar por CP, hacer una lista de cp que aplican y quitar los demas

In [5]:
# Filtrar el DataFrame para excluir filas que contienen la palabra "airport"
#df_sb_all = df_sb_all[~df_sb_all.apply(lambda x: x.astype(str).str.contains('airport', case=False, na=False)).any(axis=1)]


In [7]:
num_filas = df_sb_all.shape[0]
print(f"El número total de líneas (filas) es: {num_filas}")

El número total de líneas (filas) es: 47


In [None]:
#revisar nones


 ### CBP Entry Ports 
 #### Step 2: Get Geolocate Addresses to Coords

In [8]:
# Inicializar el geolocalizador de Photon
geolocator = Photon(user_agent="geoapiExercises")

# Función para obtener latitud y longitud
def get_lat_long(address):
    try:
        location = geolocator.geocode(address)
        if location:
            return (location.latitude, location.longitude)
        else:
            return (None, None)
    except:
        return (None, None)

# Aplicar la función a la columna 'Location Address'
df_sb_all['Coordinates'] = df_sb_all['Location Address'].apply(lambda x: get_lat_long(x))

# Separar las coordenadas en dos columnas nuevas: 'Latitude' y 'Longitude'
df_sb_all[['Latitude', 'Longitude']] = pd.DataFrame(df_sb_all['Coordinates'].tolist(), index=df_sb_all.index)


In [9]:
df_sb_all.head(70)

Unnamed: 0,Port Name,Location Address,Field Operation Office,State,Coordinates,Latitude,Longitude
0,"Andrade - Class A, California - 2502","235 Andrade Road Winterhaven, CA 92283 United ...",San Diego,CA,"(32.7184216, -114.727919)",32.718422,-114.727919
1,"Calexico East - Class A, California - 2507","1699 East Carr, Rd Bldg A Calexico, CA 92231 U...",San Diego,CA,"(32.6753413, -115.3887997)",32.675341,-115.3888
2,"Calexico West - Class A, California - 2503","200 East First Street Calexico, CA 92231 Unite...",San Diego,CA,"(38.88981295000001, -77.00902077737487)",38.889813,-77.009021
3,"Eureka, California - 2802","317 3rd Street, Suite 6 Eureka, CA 95501 Unite...",San Francisco,CA,"(40.8035929, -124.16821893938675)",40.803593,-124.168219
4,"Fresno (2803/2882), California - 2803","5177 E. Clinton Way Fresno, CA 93727 United St...",San Francisco,CA,"(36.7722997, -119.7316224)",36.7723,-119.731622
6,"Los Angeles/Long Beach Seaport, California - 2704","301 E. Ocean Blvd. Suite 1400 Long Beach, CA 9...",Los Angeles,CA,"(33.76468345, -118.19082874479344)",33.764683,-118.190829
8,"Otay Mesa, California - 2506","9777 Via De La Amistad San Diego, CA 92154 Uni...",San Diego,CA,"(32.5518614, -116.9357264)",32.551861,-116.935726
9,"Palm Springs, California - 2781","210 N. El Cielo Road Palm Springs, CA 92262 Un...",Los Angeles,CA,"(33.8248194, -116.5147028)",33.824819,-116.514703
10,"Port Hueneme, California - 2713","720 W. Hueneme Road Oxnard, CA 93033 United St...",Los Angeles,CA,"(34.20009175, -119.20789455598106)",34.200092,-119.207895
13,"San Diego, California - 2501",3835 N. Harbor Drive Terminal 2 West San Diego...,San Diego,CA,"(32.7289725, -117.1856172)",32.728972,-117.185617


In [17]:
#guardar el archivo CSV
path = "/Users/pablouriarte/Documents/1. Expediente Tec de Monterrey/1.Tesis/Mapa_Migracion_Irregular_Mexico/1. distribution_Institucional/3. CBP Entry Ports/cbp_entry_ports_loc.csv"

# Guardar el DataFrame como CSV
df_sb_all.to_csv(path, index=False)

print("Archivo guardado exitosamente en:", path)


Archivo guardado exitosamente en: /Users/pablouriarte/Documents/1. Expediente Tec de Monterrey/1.Tesis/Mapa_Migracion_Irregular_Mexico/1. distribution_Institucional/3. CBP Entry Ports/cbp_entry_ports_loc.csv


### Terrestrial Port Entries

https://en.wikipedia.org/wiki/List_of_Mexico%E2%80%93United_States_border_crossings

In [11]:
import pandas as pd

# Reemplaza 'tu_ruta_al_archivo.csv' con la ruta real de tu archivo CSV
ruta_archivo = '/Users/pablouriarte/Documents/1. Expediente Tec de Monterrey/1.Tesis/Mapa_Migracion_Irregular_Mexico/1. distribution_infraestructura/3. CBP Entry Ports/mxusa_entry_ports_loc.csv'
df = pd.read_csv(ruta_archivo)


In [12]:
df.head()

Unnamed: 0,Name,Code,Location,Coordinates,Unnamed: 4,Unnamed: 5
0,San Ysidro,SYS,"San Ysidro, California","32.54431071396611, -117.02856441126859",,
1,Otay Mesa,OTM,"Otay Mesa, California","32.548830333720865, -116.93751776676132",,
2,Tecate,TEC,"Tecate, California","32.576259490009384, -116.62745997805513",,
3,Calexico West,CAL,"Calexico, California","32.665106, -115.496349",,
4,Calexico East,IVP,"Calexico, California","32.67541923014072, -115.38831102864438",,


In [13]:
# Dividir la columna 'Coordinates' en dos nuevas columnas 'Latitude' y 'Longitude'
df[['Latitude', 'Longitude']] = df['Coordinates'].str.split(',', expand=True)

# Convertir las nuevas columnas a tipo float para operaciones numéricas
df['Latitude'] = df['Latitude'].astype(float)
df['Longitude'] = df['Longitude'].astype(float)

# Mostrar las primeras filas del DataFrame para verificar las nuevas columnas
print(df.head())

            Name Code                Location  \
0     San Ysidro  SYS  San Ysidro, California   
1      Otay Mesa  OTM   Otay Mesa, California   
2         Tecate  TEC      Tecate, California   
3  Calexico West  CAL    Calexico, California   
4  Calexico East  IVP    Calexico, California   

                               Coordinates  Unnamed: 4  Unnamed: 5   Latitude  \
0   32.54431071396611, -117.02856441126859         NaN         NaN  32.544311   
1  32.548830333720865, -116.93751776676132         NaN         NaN  32.548830   
2  32.576259490009384, -116.62745997805513         NaN         NaN  32.576259   
3                   32.665106, -115.496349         NaN         NaN  32.665106   
4   32.67541923014072, -115.38831102864438         NaN         NaN  32.675419   

    Longitude  
0 -117.028564  
1 -116.937518  
2 -116.627460  
3 -115.496349  
4 -115.388311  


In [14]:
# Eliminar columnas específicas por nombre
df = df.drop(columns=['Unnamed: 4', 'Unnamed: 5'])

# Mostrar las primeras filas para verificar los cambios
print(df.head())


            Name Code                Location  \
0     San Ysidro  SYS  San Ysidro, California   
1      Otay Mesa  OTM   Otay Mesa, California   
2         Tecate  TEC      Tecate, California   
3  Calexico West  CAL    Calexico, California   
4  Calexico East  IVP    Calexico, California   

                               Coordinates   Latitude   Longitude  
0   32.54431071396611, -117.02856441126859  32.544311 -117.028564  
1  32.548830333720865, -116.93751776676132  32.548830 -116.937518  
2  32.576259490009384, -116.62745997805513  32.576259 -116.627460  
3                   32.665106, -115.496349  32.665106 -115.496349  
4   32.67541923014072, -115.38831102864438  32.675419 -115.388311  


In [15]:
# Especifica la misma ruta del archivo original
archivo_path = '/Users/pablouriarte/Documents/1. Expediente Tec de Monterrey/1.Tesis/Mapa_Migracion_Irregular_Mexico/1. distribution_infraestructura/3. CBP Entry Ports/mxusa_entry_ports_loc.csv'

# Guardar el DataFrame actualizado en el archivo, sobrescribiendo el original
df.to_csv(archivo_path, index=False, encoding='utf-8')

print(f"Archivo actualizado guardado en: {archivo_path}")


Archivo actualizado guardado en: /Users/pablouriarte/Documents/1. Expediente Tec de Monterrey/1.Tesis/Mapa_Migracion_Irregular_Mexico/1. distribution_infraestructura/3. CBP Entry Ports/mxusa_entry_ports_loc.csv
