### Libreta para cargar los datos en S3

Ejecutando esta libreta se crea automaticamente un Bucket en S3 y se cargan los datos necesarios para ejecutar la App desde la fuente de Streamlit

In [1]:
import boto3
import botocore
import configparser
import pandas as pd
import os
import json

In [2]:
#Credentials in case need manual credentials
AWS_S3_BUCKET = "streamlit-traffic-accidents"
aws_access_key_id='-do-not-'
aws_secret_access_key='-share-aws-'
aws_session_token='-credentials-'

In [8]:
def get_aws_credentials():
    aws_credentials_path = '../source/aws_credentials'

    if os.path.exists(aws_credentials_path):
        # Load the configuration file
        config = configparser.ConfigParser()
        config.read(aws_credentials_path)

        # Extract AWS credentials
        aws_access_key_id = config.get('default', 'aws_access_key_id')
        aws_secret_access_key = config.get('default', 'aws_secret_access_key')
        aws_session_token = config.get('default', 'aws_session_token')        
    
        return aws_access_key_id,aws_secret_access_key,aws_session_token

In [9]:
aws_access_key_id,aws_secret_access_key,aws_session_token =  get_aws_credentials()

In [10]:
s3_session = boto3.session.Session(
 region_name="us-east-1",
 aws_access_key_id=aws_access_key_id,
 aws_secret_access_key=aws_secret_access_key,
 aws_session_token=aws_session_token
).client('s3')

In [11]:
response = s3_session.list_buckets()
response

{'ResponseMetadata': {'RequestId': '3YP7JQ3RKE637V1W',
  'HostId': '5dznjiwpd03zQhGHgNeuCEQcbT2floWQAAMjUhUOqhe4WKe/b3UgXUWHYNZ4TASV1Z561Di7ZwA=',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amz-id-2': '5dznjiwpd03zQhGHgNeuCEQcbT2floWQAAMjUhUOqhe4WKe/b3UgXUWHYNZ4TASV1Z561Di7ZwA=',
   'x-amz-request-id': '3YP7JQ3RKE637V1W',
   'date': 'Sun, 22 Oct 2023 03:36:26 GMT',
   'content-type': 'application/xml',
   'transfer-encoding': 'chunked',
   'server': 'AmazonS3'},
  'RetryAttempts': 0},
 'Buckets': [{'Name': 'jesus-mlops',
   'CreationDate': datetime.datetime(2023, 9, 22, 17, 59, 57, tzinfo=tzutc())},
  {'Name': 'mlflow-server-traffic',
   'CreationDate': datetime.datetime(2023, 10, 2, 8, 5, 6, tzinfo=tzutc())},
  {'Name': 'my-test-bucket-traffic',
   'CreationDate': datetime.datetime(2023, 10, 20, 13, 6, 21, tzinfo=tzutc())},
  {'Name': 'proyecto-airbnb-mbit',
   'CreationDate': datetime.datetime(2023, 7, 4, 15, 32, 11, tzinfo=tzutc())},
  {'Name': 'sagemaker-studio-570786501654-8vh4

In [12]:
def create_bucket_if_not_exists(s3_session, aws_s3_bucket):
    try:
        s3_session.head_bucket(Bucket=aws_s3_bucket)
    except botocore.exceptions.ClientError as e:
        if e.response['Error']['Code'] == '404':
            s3_session.create_bucket(Bucket=aws_s3_bucket)
            return True
        else:
            return False
    return True

In [13]:
def upload_folder_to_s3(s3_session,aws_s3_bucket, source_folder):
    source_directory = '../streamlit/source/'
    full_path = f'{source_directory}{source_folder}'

    for root, dirs, files in os.walk(full_path):
          for filename in files:
            local_path = os.path.join(root, filename)
            s3_path = os.path.join(source_folder, filename)            
            
            print(f'About to copy file {filename}>> source path:{local_path} destination path: {s3_path}')
            
            s3_session.upload_file(local_path, aws_s3_bucket, s3_path)
            

In [14]:
def streamlit_uploader_tool():
    AWS_S3_BUCKET = "streamlit-traffic-accidents"
    
    aws_access_key_id,aws_secret_access_key,aws_session_token =  get_aws_credentials()
    s3_session = boto3.session.Session(
     region_name="us-east-1",
     aws_access_key_id=aws_access_key_id,
     aws_secret_access_key=aws_secret_access_key,
     aws_session_token=aws_session_token
    ).client('s3')    
    
    create_bucket_if_not_exists(s3_session, AWS_S3_BUCKET)
    
    upload_folder_to_s3(s3_session, AWS_S3_BUCKET , 'data')
    upload_folder_to_s3(s3_session, AWS_S3_BUCKET ,'img')

In [10]:
streamlit_uploader_tool()

About to copy file Distritos_de_Madrid.geojson>> source path:../streamlit/source/data/Distritos_de_Madrid.geojson destination path: data/Distritos_de_Madrid.geojson
About to copy file LOV.json>> source path:../streamlit/source/data/LOV.json destination path: data/LOV.json
About to copy file 2023_Accidentalidad_c_g_s_d_clean.csv>> source path:../streamlit/source/data/2023_Accidentalidad_c_g_s_d_clean.csv destination path: data/2023_Accidentalidad_c_g_s_d_clean.csv
About to copy file portada.png>> source path:../streamlit/source/img/portada.png destination path: img/portada.png
About to copy file TrafficBW.jpeg>> source path:../streamlit/source/img/TrafficBW.jpeg destination path: img/TrafficBW.jpeg
About to copy file traffic.jpg>> source path:../streamlit/source/img/traffic.jpg destination path: img/traffic.jpg
About to copy file accidente_portada.jpg>> source path:../streamlit/source/img/accidente_portada.jpg destination path: img/accidente_portada.jpg


In [11]:
obj = s3_session.get_object(Bucket=AWS_S3_BUCKET, Key='data/2023_Accidentalidad_c_g_s_d_clean.csv')

In [12]:
obj

{'ResponseMetadata': {'RequestId': 'M9AJP5PG8496BMCR',
  'HostId': '/rdS/Oy1qoVcC8lVOSOdBgh2s1uD0YJ3v5tW8lakrXBUNPg0tCYG5dUMnm+GSBYa0jyEGsCKnpU=',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amz-id-2': '/rdS/Oy1qoVcC8lVOSOdBgh2s1uD0YJ3v5tW8lakrXBUNPg0tCYG5dUMnm+GSBYa0jyEGsCKnpU=',
   'x-amz-request-id': 'M9AJP5PG8496BMCR',
   'date': 'Fri, 20 Oct 2023 13:21:16 GMT',
   'last-modified': 'Fri, 20 Oct 2023 13:21:04 GMT',
   'etag': '"bf6d3645033286e0669e90a30b427947-2"',
   'x-amz-server-side-encryption': 'AES256',
   'accept-ranges': 'bytes',
   'content-type': 'binary/octet-stream',
   'server': 'AmazonS3',
   'content-length': '11561896'},
  'RetryAttempts': 0},
 'AcceptRanges': 'bytes',
 'LastModified': datetime.datetime(2023, 10, 20, 13, 21, 4, tzinfo=tzutc()),
 'ContentLength': 11561896,
 'ETag': '"bf6d3645033286e0669e90a30b427947-2"',
 'ContentType': 'binary/octet-stream',
 'ServerSideEncryption': 'AES256',
 'Metadata': {},
 'Body': <botocore.response.StreamingBody at 0x11fa42a40

In [13]:
df = pd.read_csv(obj['Body'], index_col=None, sep= ";")

In [14]:
df.sample(3)

Unnamed: 0.1,Unnamed: 0,num_expediente,fecha,hora,localizacion,numero,cod_distrito,distrito,tipo_accidente,estado_meteorológico,...,coordinates_speed_api,road_name_speed_api,speedlimit_kph_speed_api,dia_semana,hora_accidente,int_real,ocupacion,vmed,high_speed_point,hora_rango
5037,5384,2023S004207,2023-02-10,16:00:00,"CALL. GOLFO DE SALONICA, 4",4,16,Hortaleza,Colisión lateral,Despejado,...,"{'latitude': 40.47508141246795, 'longitude': -...",Calle del Golfo de Salónica,30,Friday,15:58:00,106.66537,7.070039,73.6,0,16
4011,4268,2023S003161,2023-02-02,19:15:00,"AVDA. ASTURIAS, 44",44,6,Tetuán,Alcance,Despejado,...,"{'latitude': 40.47054323399534, 'longitude': -...",Avenida de Asturias,30,Thursday,19:10:00,152.861502,10.126761,0.0,0,19
7319,7821,2023S006863,2023-03-01,00:30:00,AVDA. RAFAELA YBARRA / CALL. CRISTO DE LA VICT...,73,12,Usera,Colisión fronto-lateral,Despejado,...,"{'latitude': 40.37874987674009, 'longitude': -...",Avenida de Rafaela Ybarra,20,Wednesday,0:33:00,28.871575,1.524138,66.53,0,0


In [15]:
obj = s3_session.get_object(Bucket=AWS_S3_BUCKET, Key='data/LOV.json')

In [16]:
j = obj['Body'].read()

In [17]:
j

b'[{"lov_name": "lov_district", "data": [{"cod_distr": "2", "nombre": "Arganzuela"}, {"cod_distr": "21", "nombre": "Barajas"}, {"cod_distr": "11", "nombre": "Carabanchel"}, {"cod_distr": "1", "nombre": "Centro"}, {"cod_distr": "5", "nombre": "Chamart\\u00edn"}, {"cod_distr": "7", "nombre": "Chamber\\u00ed"}, {"cod_distr": "15", "nombre": "Ciudad Lineal"}, {"cod_distr": "8", "nombre": "Fuencarral - El Pardo"}, {"cod_distr": "16", "nombre": "Hortaleza"}, {"cod_distr": "10", "nombre": "Latina"}, {"cod_distr": "9", "nombre": "Moncloa - Aravaca"}, {"cod_distr": "14", "nombre": "Moratalaz"}, {"cod_distr": "13", "nombre": "Puente de Vallecas"}, {"cod_distr": "3", "nombre": "Retiro"}, {"cod_distr": "4", "nombre": "Salamanca"}, {"cod_distr": "20", "nombre": "San Blas - Canillejas"}, {"cod_distr": "6", "nombre": "Tetu\\u00e1n"}, {"cod_distr": "12", "nombre": "Usera"}, {"cod_distr": "19", "nombre": "Vic\\u00e1lvaro"}, {"cod_distr": "18", "nombre": "Villa de Vallecas"}, {"cod_distr": "17", "nombre

In [18]:
def get_file(filename):
    data_folder = "data/"
    path= data_folder+filename

    aws_access_key_id,aws_secret_access_key,aws_session_token =  get_aws_credentials()
    
    s3_session = boto3.session.Session(
     region_name="us-east-1",
     aws_access_key_id=aws_access_key_id,
     aws_secret_access_key=aws_secret_access_key,
     aws_session_token=aws_session_token
    ).client('s3')

    obj = s3_session.get_object(Bucket=AWS_S3_BUCKET, Key=path)

    return obj['Body']

In [19]:
LOV_file='LOV.json'

json_file = get_file(LOV_file)

loaded_data = json.load(json_file)
lov_date_data = loaded_data[1]['data']
lov_date_data       

[{'fecha_display': 'January-2023', 'min': '2023-01-01', 'max': '2023-01-31'},
 {'fecha_display': 'February-2023', 'min': '2023-02-01', 'max': '2023-02-28'},
 {'fecha_display': 'March-2023', 'min': '2023-03-01', 'max': '2023-03-31'},
 {'fecha_display': 'April-2023', 'min': '2023-04-01', 'max': '2023-04-30'},
 {'fecha_display': 'May-2023', 'min': '2023-05-01', 'max': '2023-05-31'}]