In [5]:
# Base da AWS

import csv
import requests
from concurrent.futures import ThreadPoolExecutor, as_completed
from requests.adapters import HTTPAdapter
from requests.packages.urllib3.util.retry import Retry

# Defina sua chave de API do Google
API_KEY = 'AIzaSyB5pCi6lgAW9Kq6b3w2tD1lh8vaXJsS6hc'

# Defina o URL da API
base_url = "https://maps.googleapis.com/maps/api/distancematrix/json"

# Configurar sessão com retries
session = requests.Session()
retries = Retry(total=5, backoff_factor=1, status_forcelist=[429, 500, 502, 503, 504])
adapter = HTTPAdapter(max_retries=retries)
session.mount('https://', adapter)

# Função para obter a distância e o tempo de viagem
def get_distance_duration(origins, destinations):
    params = {
        'origins': origins,
        'destinations': destinations,
        'key': API_KEY
    }
    try:
        response = session.get(base_url, params=params, timeout=10)
        result = response.json()
        
        if result['status'] == 'OK':
            row = result['rows'][0]
            element = row['elements'][0]
            if element['status'] == 'OK':
                distance = element['distance']['value'] # Distância em metros
                duration = element['duration']['value'] # Tempo de viagem em segundos
                return distance, duration
    except requests.exceptions.RequestException as e:
        print(f"Request failed: {e}")
    return None, None

# Arquivos de entrada e saída
input_csv = 'Data/Dataframelimpa_sem_latlong.csv'
output_csv = 'Data/Distancias_AWS2.csv'

# Função para processar cada linha do CSV
def process_row(row):
    seller_zip = row['seller_zip_code_prefix']
    buyer_zip = row['customer_zip_code_prefix']
    
    distance, duration = get_distance_duration(seller_zip, buyer_zip)
    
    row['distance_meters'] = distance
    row['duration_seconds'] = duration
    
    return row

# Abrindo os arquivos
with open(input_csv, mode='r') as infile, open(output_csv, mode='w', newline='') as outfile:
    reader = csv.DictReader(infile)
    fieldnames = reader.fieldnames + ['distance_meters', 'duration_seconds']
    writer = csv.DictWriter(outfile, fieldnames=fieldnames)
    
    writer.writeheader()
    
    rows_to_process = list(reader)
    
    # Utilizando ThreadPoolExecutor para processamento assíncrono
    with ThreadPoolExecutor(max_workers=10) as executor:
        futures = [executor.submit(process_row, row) for row in rows_to_process]
        
        for future in as_completed(futures):
            processed_row = future.result()
            writer.writerow(processed_row)


In [1]:
# Base do Kaggle

import csv
import requests
from concurrent.futures import ThreadPoolExecutor, as_completed
from requests.adapters import HTTPAdapter
from requests.packages.urllib3.util.retry import Retry

# Defina sua chave de API do Google
API_KEY = 'AIzaSyB5pCi6lgAW9Kq6b3w2tD1lh8vaXJsS6hc'

# Defina o URL da API
base_url = "https://maps.googleapis.com/maps/api/distancematrix/json"

# Configurar sessão com retries
session = requests.Session()
retries = Retry(total=5, backoff_factor=1, status_forcelist=[429, 500, 502, 503, 504])
adapter = HTTPAdapter(max_retries=retries)
session.mount('https://', adapter)

# Função para obter a distância e o tempo de viagem
def get_distance_duration(origins, destinations):
    params = {
        'origins': origins,
        'destinations': destinations,
        'key': API_KEY
    }
    try:
        response = session.get(base_url, params=params, timeout=10)
        result = response.json()
        
        if result['status'] == 'OK':
            row = result['rows'][0]
            element = row['elements'][0]
            if element['status'] == 'OK':
                distance = element['distance']['value'] # Distância em metros
                duration = element['duration']['value'] # Tempo de viagem em segundos
                return distance, duration
    except requests.exceptions.RequestException as e:
        print(f"Request failed: {e}")
    return None, None

# Arquivos de entrada e saída
input_csv = 'Data/Kaggle_enriquecido.csv'
output_csv = 'Data/Distancias_AWS_Kaggle.csv'

# Função para processar cada linha do CSV
def process_row(row):
    seller_zip = row['seller_zip_code_prefix']
    buyer_zip = row['customer_zip_code_prefix']
    
    distance, duration = get_distance_duration(seller_zip, buyer_zip)
    
    row['distance_meters'] = distance
    row['duration_seconds'] = duration
    
    return row

# Abrindo os arquivos
with open(input_csv, mode='r') as infile, open(output_csv, mode='w', newline='') as outfile:
    reader = csv.DictReader(infile)
    fieldnames = reader.fieldnames + ['distance_meters', 'duration_seconds']
    writer = csv.DictWriter(outfile, fieldnames=fieldnames)
    
    writer.writeheader()
    
    rows_to_process = list(reader)
    
    # Utilizando ThreadPoolExecutor para processamento assíncrono
    with ThreadPoolExecutor(max_workers=10) as executor:
        futures = [executor.submit(process_row, row) for row in rows_to_process]
        
        for future in as_completed(futures):
            processed_row = future.result()
            writer.writerow(processed_row)
