# Notebook with test code

---

## Imports

### Standard library imports

In [141]:
import yaml

import sys

from datetime import (date, datetime)

import os

import pickle

import re

### Third party libraries

In [2]:
import boto3

import pandas as pd

### Local application imports

In [3]:
sys.path.append("../")

In [4]:
%load_ext autoreload
%autoreload 2

from src.pipeline.ingesta_almacenamiento import (
    
    ## Parameters
    bucket_name,
    today_info,
    hist_ingest_path,
    hist_dat_prefix,
    cont_ingest_path,
    cont_dat_prefix,
    
    ## Functions
    get_client,
    ingesta_inicial,
    ingesta_consecutiva,
    get_s3_resource,
    guardar_ingesta
    
)

from src.utils.general import (
    get_api_token
)

---

## AWS base examples

### S3

#### Initial configuration

In [None]:
ses = boto3.session.Session(profile_name="robper_dpa")

In [None]:
s3 = ses.resource("s3")

In [None]:
[buck.name for buck in s3.buckets.all()]

In [None]:
# este archivo yaml vive en tu conf/local/credentials.yaml
# este va en src/utils general.py
def read_yaml_file(yaml_file):
    """ load yaml cofigurations """

    config = None
    try: 
        with open (yaml_file, 'r') as f:
            config = yaml.safe_load(f)
    except:
        raise FileNotFoundError('Couldnt load the file')
    
    return config

In [None]:
# este va en src/utils general.py
def get_s3_credentials(credentials_file):
    credentials = read_yaml_file(credentials_file)
    s3_creds = credentials['s3']

    return s3_creds

In [None]:
s3_creds = get_s3_credentials("../conf/local/credentials.yaml")

session = boto3.Session(
    aws_access_key_id=s3_creds['aws_access_key_id'],
    aws_secret_access_key=s3_creds['aws_secret_access_key']
)
s3 = session.client('s3')

#### Connecting to existing bucket and viewing contents

In [None]:
bucket = "dpa-robtest"
key ="rob_test_dir_s3"

In [None]:
objects = s3.list_objects_v2(Bucket=bucket, Prefix=key)['Contents']
objects

#### Creating bucket

In [None]:
bucket_name = "comdline-test-bucket-rob"

s3.create_bucket(
    Bucket=bucket_name,
    CreateBucketConfiguration={'LocationConstraint': 'us-west-2'},
    ACL="private"
)

#### Uploading file to bucket

In [None]:
file_to_upload = "../../admin/test_file3_for_s3.txt"
file_name = file_to_upload.split(sep="/")[-1]

In [None]:
# s3.upload_file(file_to_upload, bucket_name, file_name)

s3.meta.client.upload_file(file_to_upload, bucket_name, file_name)

In [None]:
[obj for obj in s3.Bucket(bucket_name).objects.all()]

#### Downloading files from bucket

In [None]:
path_to_download = "/Users/rp_mbp/Documents/ReposRob_RobPer/ITAMmcd/semestre_2/Arquitectura_Prod_Dat/Venv_ArqPD/repos/dpa_2021/_robdir/" + file_name

s3.meta.client.download_file(bucket_name, file_name, path_to_download)

#### Deleting bucket files

In [None]:
s3.Object(bucket_name, file_name).delete()

#### Deleting bucket

---

## Downloading and storing Chicago data

#### Interacting with API

In [41]:
dataset_id = "4ijn-s7e5"

In [42]:
token = get_api_token("../conf/local/credentials.yaml")
token

'5HfsId12lhMMzSlYANoAq451w'

In [43]:
client = get_client(token)
client

<sodapy.socrata.Socrata at 0x155626310>

In [174]:
most_rec_date = '2021-01-15'

In [180]:
soql_query = "inspection_date >= '{}'".format(most_rec_date)
soql_query

"inspection_date >= '2021-01-15'"

In [181]:
x = client.get(dataset_id, 
               limit=10,
               where=soql_query
              )

In [182]:
x

[{'inspection_id': '2472789',
  'dba_name': 'GEORGIS CATERING',
  'aka_name': 'GEORGIS CATERING',
  'license_': '69016',
  'facility_type': 'Restaurant',
  'risk': 'Risk 1 (High)',
  'address': '6339 S CENTRAL AVE ',
  'city': 'CHICAGO',
  'state': 'IL',
  'zip': '60638',
  'inspection_date': '2021-01-15T00:00:00.000',
  'inspection_type': 'Complaint Re-Inspection',
  'results': 'Pass',
  'violations': '37. FOOD PROPERLY LABELED; ORIGINAL CONTAINER - Comments: PRE PACKAGED INDIVIDUAL MEALS WITHOUT LABELS. INSTD TO LABEL ACCORDING TO THE FOLLOWING: (A) FOOD PACKAGED in a FOOD ESTABLISHMENT, shall be labeled as specified in LAW, including 21 CFR 101 -Food labeling, and 9 CFR 317 Labeling, marking devices, and containers. (B) Label information shall include: (1) The common name of the FOOD, or absent a common name, an adequately descriptive identity statement; (2) If made from two or more ingredients, a list of ingredients and sub-ingredients in descending order of predominance by weight,

In [183]:
len(x)

10

#### Uploading info

#### Checking aws s3 contents

In [89]:
s3 = get_s3_resource()

In [90]:
cont_ingest_path

'ingestion/consecutive/'

In [132]:
objects = s3.list_objects_v2(Bucket=bucket_name, Prefix=cont_ingest_path)['Contents']
objects

[{'Key': 'ingestion/consecutive/',
  'LastModified': datetime.datetime(2021, 2, 18, 16, 1, 56, tzinfo=tzutc()),
  'ETag': '"d41d8cd98f00b204e9800998ecf8427e"',
  'Size': 0,
  'StorageClass': 'STANDARD'},
 {'Key': 'ingestion/consecutive/consecutive-inspections-2021-02-21.pkl',
  'LastModified': datetime.datetime(2021, 2, 22, 2, 53, 54, tzinfo=tzutc()),
  'ETag': '"79cff7864a646f1dfd4d51b4e732a226"',
  'Size': 1152660,
  'StorageClass': 'STANDARD'}]

In [133]:
objects.append(
 {'Key': 'ingestion/consecutive/consecutive-inspections-2021-02-22.pkl',
  'LastModified': "datetime.datetime(2021, 2, 22, 2, 53, 54, tzinfo=tzutc())",
  'ETag': '"79cff7864a646f1dfd4d51b4e732a226"',
  'Size': 1152660,
  'StorageClass': 'STANDARD'}
)

objects.append(
 {'Key': 'ingestion/consecutive/consecutive-inspections-2021-02-15.pkl',
  'LastModified': "datetime.datetime(2021, 2, 22, 2, 53, 54, tzinfo=tzutc())",
  'ETag': '"79cff7864a646f1dfd4d51b4e732a226"',
  'Size': 1152660,
  'StorageClass': 'STANDARD'}
)

objects.append(
 {'Key': 'ingestion/consecutive/consecutive-inspections-2021-02-10.pkl',
  'LastModified': "datetime.datetime(2021, 2, 22, 2, 53, 54, tzinfo=tzutc())",
  'ETag': '"79cff7864a646f1dfd4d51b4e732a226"',
  'Size': 1152660,
  'StorageClass': 'STANDARD'}
)

In [168]:
rex = str(cont_dat_prefix) + "(.*).pkl"

lx = [datetime.strptime(re.search(rex, obj["Key"]).group(1), '%Y-%m-%d') for obj in objects if cont_dat_prefix in obj["Key"]]
most_rec_date = datetime.strftime(max(lx), '%Y-%m-%d')
most_rec_date

'2021-02-22'

In [95]:
s3x = pickle.loads(s3.get_object(Bucket=bucket_name, Key=(cont_ingest_path + "consecutive-inspections-2021-02-21.pkl"))['Body'].read())

In [96]:
dfx = pd.DataFrame(s3x)
dfx

Unnamed: 0,inspection_id,dba_name,aka_name,license_,facility_type,risk,address,city,state,zip,inspection_date,inspection_type,results,violations,latitude,longitude,location
0,2484930,EL PAISANO TACOS INC,EL PAISANO TACOS,2501058,Restaurant,Risk 1 (High),2429 W DIVISION ST,CHICAGO,IL,60622,2021-02-19T00:00:00.000,Complaint,Pass,47. FOOD & NON-FOOD CONTACT SURFACES CLEANABLE...,41.90289443680921,-87.68823386858675,"{'latitude': '-87.68823386858675', 'longitude'..."
1,2484934,LA ESCUELITA,LA ESCUELITA,2488235,Children's Services Facility,Risk 1 (High),5400 W FULLERTON AVE,CHICAGO,IL,60639,2021-02-19T00:00:00.000,License,Pass,36. THERMOMETERS PROVIDED & ACCURATE - Comment...,41.9241535287086,-87.76113266206144,"{'latitude': '-87.76113266206144', 'longitude'..."
2,2484936,THE BAR ON BUENA,THE BAR ON BUENA,47876,Restaurant,Risk 1 (High),910 W BUENA AVE,CHICAGO,IL,60613,2021-02-19T00:00:00.000,Canvass,Fail,39. CONTAMINATION PREVENTED DURING FOOD PREPAR...,41.958550897849705,-87.65352004893188,"{'latitude': '-87.65352004893188', 'longitude'..."
3,2484931,Dunkin Donuts Baskin Robbins,Dunkin Donuts Baskin Robbins,1475713,Restaurant,Risk 2 (Medium),1651 W ROOSEVELT RD,CHICAGO,IL,60608,2021-02-19T00:00:00.000,Complaint,Fail,50. HOT & COLD WATER AVAILABLE; ADEQUATE PRESS...,41.866749382827074,-87.66846405285052,"{'latitude': '-87.66846405285052', 'longitude'..."
4,2484917,"THE GRAND CHILD CARE CENTER, INC.","THE GRAND CHILD CARE CENTER, INC.",2220002,Children's Services Facility,Risk 1 (High),5945 W GRAND AVE,CHICAGO,IL,60639,2021-02-19T00:00:00.000,License,Pass,36. THERMOMETERS PROVIDED & ACCURATE - Comment...,41.920325606712524,-87.77494740927297,"{'latitude': '-87.77494740927297', 'longitude'..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
995,2472820,MOGADISHU RESTAURANT INC.,MOGADISHU RESTAURANT INC.,1959848,Restaurant,Risk 1 (High),931 N ORLEANS ST,CHICAGO,IL,60610,2021-01-19T00:00:00.000,Canvass,Pass w/ Conditions,2. CITY OF CHICAGO FOOD SERVICE SANITATION CER...,41.89989401176249,-87.63715073549078,"{'latitude': '-87.63715073549078', 'longitude'..."
996,2472846,GRIDDLE 24,GRIDDLE 24,2432253,Restaurant,Risk 1 (High),334 W CHICAGO AVE,CHICAGO,IL,60654,2021-01-19T00:00:00.000,Non-Inspection,No Entry,,41.89666241521221,-87.63710138046027,"{'latitude': '-87.63710138046027', 'longitude'..."
997,2472842,TRIO,TRIO,2664694,Restaurant,Risk 1 (High),840 N ORLEANS ST,CHICAGO,IL,60610,2021-01-19T00:00:00.000,Canvass Re-Inspection,Pass w/ Conditions,29. COMPLIANCE WITH VARIANCE/SPECIALIZED PROCE...,41.897839232090995,-87.63736478047242,"{'latitude': '-87.63736478047242', 'longitude'..."
998,2472817,CHAYHANA,CHAYHANA,2442858,Restaurant,Risk 1 (High),2245 W IRVING PARK RD,CHICAGO,IL,60618,2021-01-19T00:00:00.000,Canvass Re-Inspection,Pass,51. PLUMBING INSTALLED; PROPER BACKFLOW DEVICE...,41.95394859951932,-87.68539017232962,"{'latitude': '-87.68539017232962', 'longitude'..."


In [97]:
dfx["inspection_date"] = pd.to_datetime(dfx["inspection_date"])

In [98]:
print("max date: ", max(dfx["inspection_date"]))
print("min date: ", min(dfx["inspection_date"]))

max date:  2021-02-19 00:00:00
min date:  2021-01-19 00:00:00


In [71]:
dfx.groupby(pd.Grouper(key="inspection_date")).count()[["inspection_id"]]

Unnamed: 0_level_0,inspection_id
inspection_date,Unnamed: 1_level_1
2021-02-19,5
2021-02-18,24
2021-02-17,28
2021-02-16,2
2021-02-11,42
...,...
2010-01-08,43
2010-01-07,62
2010-01-06,81
2010-01-05,71


---