In [None]:
import pandas as pd
import requests
import json
from dataengineeringutils import s3
from dataengineeringutils.utils import read_json
from dataengineeringutils.pd_metadata_conformance import impose_exact_conformance_on_pd_df
import python_scripts.s3_utils as s3_utils
from datetime import datetime, timedelta, date

def get_secrets():
    return(s3_utils.read_json_from_s3("alpha-dag-matrix/api_secrets/secrets.json"))


def matrix_authenticate(session):
    secrets = get_secrets()
    username = secrets['username']
    password = secrets['password']
  
    url = "https://app.matrixbooking.com/api/v1/user/login"
    r = session.post(url,json = {'username':username,'password': password})
    return(session)


def make_booking_params(time_from, time_to, status = None, pageSize = None, pageNum = 0):
    params = {'f': time_from,
            't': time_to,
            'bc': 'ROOM',
            'status': status,
            'include': ['audit','locations'],
            'pageSize': pageSize,
            'pageNum': pageNum}
    return(params)
  

def get_payload(session, url, parameters):
    resp = session.get(url=url, cookies = session.cookies, params = parameters)
    print(f"GET {resp.url}")
    print(f"response status code: {resp.status_code}")
    return resp.json()
  
def scrape_days_from_api(start_date, end_date):
    
    url = "https://app.matrixbooking.com/api/v1/booking"
    page_size = 2500
    status = ['CONFIRMED','TENTATIVE','CANCELLED']
    
    params = make_booking_params(start_date, end_date, pageNum = 0, pageSize = page_size, status = status)
    
    bookings = []
    
    ses = requests.session()
    matrix_authenticate(ses)
    # Scrape the first page of data
    print(f"scraping page 0")
    data = get_payload(ses, url, params)
    rowcount = len(data['bookings'])
    print(f"records scraped: {rowcount}")
    
    bookings = data['bookings']
    locations = data['locations']

    i = 1
    total_rows = rowcount
    while(rowcount == page_size):
        print(f"scraping page {i}")
        params = make_booking_params(start_date, end_date, pageNum = i, pageSize = page_size, status = status)
        data = get_payload(ses, url, params)
        rowcount = len(data['bookings'])
        print(f"records scraped: {rowcount}")
        if(rowcount > 0):
            bookings.extend(data['bookings'])
        i += 1
        total_rows += rowcount
    
    print(f"Retrieved {len(locations)} locations")
    
    bookings_data = get_bookings_df(bookings)
    s3.pd_write_csv_s3(bookings_data, f"alpha-dag-matrix/bookings/{start_date}.csv", index = False, header = False)
    
    locations_data = get_locations_df(locations)
    s3.pd_write_csv_s3(locations_data, f"alpha-dag-matrix/locations/data.csv", index = False, header = False)
    
    
    return(bookings, locations)

                                                                    
def get_scrape_dates(start_date, end_date):

    def daterange(start_date, end_date):
        for n in range(int ((end_date - start_date).days + 1)):
            yield datetime.strftime(start_date + timedelta(n),"%Y-%m-%d")

    start_date = datetime.strptime(start_date, "%Y-%m-%d").date()
    end_date_1 = datetime.now().date() - timedelta(days=1)
    end_date_2 = datetime.strptime(end_date, "%Y-%m-%d").date()
    
    if end_date_1 < end_date_2:
        end_date = end_date_1
    else:
        end_date = end_date_2

    return daterange(start_date, end_date)


def get_bookings_df(bookings):
    bookings_df = pd.io.json.json_normalize(bookings)
    renames = read_json("metadata/bookings_renames.json")
    bookings_df = bookings_df[renames.keys()].rename(columns = renames)
    bookings_metadata = read_json('metadata/bookings.json')
    
    bookings_df = impose_exact_conformance_on_pd_df(bookings_df, bookings_metadata)
    
    return(bookings_df)

def get_locations_df(locations):
    locations_df = pd.io.json.json_normalize(locations)
    renames = read_json("metadata/locations_renames.json")
    locations_df = locations_df[renames.keys()].rename(columns = renames)
    locations_metadata = read_json('metadata/locations.json')
    
    locations_df = impose_exact_conformance_on_pd_df(locations_df, locations_metadata)
    
    return(locations_df)




In [None]:
# from etl_manager.meta import DatabaseMeta, TableMeta

# db = DatabaseMeta(name = 'matrix_db', bucket='alpha-dag-matrix')

# # Create table meta object
# bookings = TableMeta(name = 'bookings', location = 'bookings')

# # Add column defintions to the table
# bookings.add_column(name = 'id', type = 'character', description = 'Booking id')
# bookings.add_column(name = 'time_from',type = 'datetime', description = 'Start time of booking')
# bookings.add_column(name = 'time_to',type = 'datetime', description = 'End time of booking')
# bookings.add_column(name = 'created',type = 'datetime', description = 'Time the booking was created')
# bookings.add_column(name = 'location_id', type ='character', description = 'id to match to location')
# bookings.add_column(name = 'status',type = 'character', description = 'One of APPROVED, TENTATIVE or CANCELLED')
# bookings.add_column(name = 'status_reason', type ='character', description = 'Reason for cancellation where relevant')
# db.add_table(bookings)

# locations = TableMeta(name = 'locations', location = 'locations', data_format = 'csv_quoted_nodate')

# locations.add_column(name = 'id', type = 'character', description = 'locationId')
# locations.add_column(name = 'name', type = 'character', description = 'location name')
# locations.add_column(name = 'long_qualifier', type = 'character', description = 'long qualifier for location')
# locations.add_column(name = 'capacity', type = 'character', description = 'room capacity')
# db.add_table(locations)


# bookings.write_to_json('metadata/bookings.json')
# locations.write_to_json('metadata/locations.json')

#db.create_glue_database(delete_if_exists = True)

In [None]:
url = "https://app.matrixbooking.com/api/v1/booking"
page_size = 25
status = ['CONFIRMED','TENTATIVE','CANCELLED']
start_date = "2017-01-01"
end_date = "2019-04-15"
params = make_booking_params(start_date, end_date, pageNum = 0, pageSize = page_size, status = status)

bookings = []

ses = requests.session()
matrix_authenticate(ses)
# Scrape the first page of data
print(f"scraping page 0")
data = get_payload(ses, url, params)



## Booking categories

## Locations

Desk and Room (there are other types)

In [None]:
# Parameter for querying locations
param = dict(kind=["DESK", "ROOM"]) 

# Locations URL (from API documentation)
loc_url = "https://app.matrixbooking.com/api/v1/location"

# Desk locations
desk_locations = get_payload(ses, loc_url, parameters=param)

In [None]:
# Convert JSON output to dataframe
loc_df = pd.json_normalize(desk_locations)

# Shape of data
loc_df.shape


In [None]:
loc_df.head()