In [67]:
import requests
from pyspark.sql import SparkSession, Row, Window
from pyspark.sql.functions import *
from pyspark.sql.types import *
import time
import json
from delta import DeltaTable

spark = (SparkSession.builder
             .appName('lab') # Name the app
             .config("hive.metastore.uris", "thrift://metastore:9083") # Set external Hive Metastore
             .config("hive.metastore.schema.verification", "false") # Prevent some errors
             .config("spark.sql.repl.eagerEval.enabled", True)
             .config("spark.sql.extensions", "io.delta.sql.DeltaSparkSessionExtension")
             .config("spark.sql.catalog.spark_catalog", "org.apache.spark.sql.delta.catalog.DeltaCatalog")
             .enableHiveSupport()
             .getOrCreate())

dt_now = "2024-09-24"

In [84]:
spark.sql("show databases").show()

+---------+
|namespace|
+---------+
| business|
|  default|
|    stage|
+---------+



In [85]:
spark.sql("show tables from stage").show()

+---------+-----------+-----------+
|namespace|  tableName|isTemporary|
+---------+-----------+-----------+
|    stage|   corredor|      false|
|    stage|    posicao|      false|
|    stage|     linhas|      false|
|    stage|    paradas|      false|
|    stage|      trips|      false|
|    stage|    empresa|      false|
|    stage|   previsao|      false|
|    stage| linhas_api|      false|
|    stage|paradas_api|      false|
+---------+-----------+-----------+



In [103]:
spark.sql("show tables from business").show()

+---------+-----------+-----------+
|namespace|  tableName|isTemporary|
+---------+-----------+-----------+
| business|    posicao|      false|
| business|   previsao|      false|
| business|    paradas|      false|
| business|paradas_api|      false|
| business|      trips|      false|
+---------+-----------+-----------+



In [108]:
spark.sql("describe business.trips").show(truncate=False)

+---------------+---------+-------+
|col_name       |data_type|comment|
+---------------+---------+-------+
|route_id       |string   |       |
|service_id     |string   |       |
|trip_id        |string   |       |
|trip_headsign  |string   |       |
|direction_id   |string   |       |
|shape_id       |string   |       |
|               |         |       |
|# Partitioning |         |       |
|Not partitioned|         |       |
+---------------+---------+-------+



In [None]:
def autenticar():
    # endpoint da API para autenticar
    api_url = "https://api.olhovivo.sptrans.com.br/v2.1/Login/Autenticar?token=fa1ae741481d20625673b2020fdd07bcfdcf5d60f27d226a069812a94de3edd0"

    # Inicialize uma sessão do requests
    session = requests.Session()

    try:
        # Faz a requisição post para a API usando a sessão
        response = session.post(api_url)

        # Verifique se a requisição foi bem-sucedida (código de status 200)
        if response.status_code == 200:
            # Extraia o conteúdo JSON (ou texto) da resposta
            data = response.json()  # ou response.text se for texto
          
            return session                
        else:
            print(f"Falha ao acessar a API. Status Code: {session.post(api_url).status_code}")      
            
    finally:
        # Feche a sessão após o uso
        session.close()

In [71]:
def obterParadasLinha(linha_id, session):
    while True:
        r = session.get(f"https://api.olhovivo.sptrans.com.br/v2.1/Parada/BuscarParadasPorLinha?codigoLinha={linha_id}")

        if r.status_code == 200:
            return {"linha_id": linha_id, "data": r.json()}

        elif r.status_code == 429:
            time.sleep(60)
            
        else:
            return None

linhas_schema = StructType([
    StructField("route_id", StringType(), True),
    StructField("cl", StringType(), True),
    StructField("lc", StringType(), True),
    StructField("lt", StringType(), True),
    StructField("sl", StringType(), True),
    StructField("tl", StringType(), True),
    StructField("tp", StringType(), True),
    StructField("ts", StringType(), True)
])

session = autenticar()

linhas_df = spark.read.schema(linhas_schema).format("json").load(f"s3a://raw/olhovivo/linhas/dt={dt_now}/")
linhas_rdd = linhas_df.select("cl").distinct().rdd
paradas_rdd = linhas_rdd.map(lambda x: x.asDict()["cl"]).map(lambda x: obterParadasLinha(x, session)).map(lambda x: Row(data=json.dumps(x)))
paradas_df = spark.createDataFrame(paradas_rdd, StructType([StructField("data", StringType(), True)]))

paradas_df.count()

2650

In [72]:
df_paradas_api = spark.read.json(f"s3a://raw/olhovivo/paradas/dt={dt_now}/")
df_paradas_api

cp,ed,linha_id,np,px,py
560009160,R JOSE NICOLAU DE...,1159,JOSÉ NICOLAU DE L...,-46.72303,-23.777145
560009163,R NIELS CHRISTIAN...,1159,ELEVATÓRIA SABESP...,-46.725802,-23.781379
560009164,R NIELS CHRISTIAN...,1159,NIELS C. SORENSEN...,-46.726102,-23.785907
560009167,R PAIOL VELHO/ R ...,1159,ALFREDO REIMBERG C/B,-46.727033,-23.791088
560009169,AV FERNANDO DA CR...,1159,ESTRADA DO PINHAL...,-46.729377,-23.793311
560009172,R HENRIQUE HESSEL...,1159,BALTAZARINI C/B,-46.731426,-23.799048
560009174,R CAROLINA REIMBE...,1159,ESTRADA DO PAIOL C/B,-46.734008,-23.801384
560009231,R AMARO ALVES DO ...,1159,SATÉLITE ARIEL C/B,-46.72081,-23.773559
230009829,R FREDERICO RENE ...,2393,RIO BONITO ESQUER...,-46.698916,-23.71823
230009831,R FREDERICO RENE ...,2393,RIO BONITO DIREIT...,-46.698884,-23.718222


In [74]:
session = autenticar()

r= session.get(f"https://api.olhovivo.sptrans.com.br/v2.1/Linha/Buscar?termosBusca=8000")

pprint.pprint(r.json())

[{'cl': 2506,
  'lc': False,
  'lt': '8000',
  'sl': 1,
  'tl': 1,
  'tp': 'PÇA. RAMOS DE AZEVEDO',
  'ts': 'TERM. LAPA'},
 {'cl': 35274,
  'lc': False,
  'lt': '8000',
  'sl': 2,
  'tl': 1,
  'tp': 'PÇA. RAMOS DE AZEVEDO',
  'ts': 'TERM. LAPA'},
 {'cl': 1273,
  'lc': False,
  'lt': '8000',
  'sl': 1,
  'tl': 10,
  'tp': 'PÇA. RAMOS DE AZEVEDO',
  'ts': 'TERM. LAPA'},
 {'cl': 34041,
  'lc': False,
  'lt': '8000',
  'sl': 2,
  'tl': 10,
  'tp': 'PÇA. RAMOS DE AZEVEDO',
  'ts': 'TERM. LAPA'}]


In [78]:
from datetime import datetime

datetime.now().strftime("%H:%M")

'19:58:45'

In [83]:
df_paradas_api = spark.read.json(f"s3a://raw/olhovivo/paradas/dt={dt_now}/")
df_paradas_api

cp,ed,hr,linha_id,np,px,py
560009160,R JOSE NICOLAU DE...,20:10,1159,JOSÉ NICOLAU DE L...,-46.72303,-23.777145
560009163,R NIELS CHRISTIAN...,20:10,1159,ELEVATÓRIA SABESP...,-46.725802,-23.781379
560009164,R NIELS CHRISTIAN...,20:10,1159,NIELS C. SORENSEN...,-46.726102,-23.785907
560009167,R PAIOL VELHO/ R ...,20:10,1159,ALFREDO REIMBERG C/B,-46.727033,-23.791088
560009169,AV FERNANDO DA CR...,20:10,1159,ESTRADA DO PINHAL...,-46.729377,-23.793311
560009172,R HENRIQUE HESSEL...,20:10,1159,BALTAZARINI C/B,-46.731426,-23.799048
560009174,R CAROLINA REIMBE...,20:10,1159,ESTRADA DO PAIOL C/B,-46.734008,-23.801384
560009231,R AMARO ALVES DO ...,20:10,1159,SATÉLITE ARIEL C/B,-46.72081,-23.773559
230009829,R FREDERICO RENE ...,20:10,2393,RIO BONITO ESQUER...,-46.698916,-23.71823
230009831,R FREDERICO RENE ...,20:10,2393,RIO BONITO DIREIT...,-46.698884,-23.718222
