In [None]:
import pandas as pd
import requests
import json
from dataengineeringutils import s3
from dataengineeringutils.utils import read_json
from dataengineeringutils.pd_metadata_conformance import impose_exact_conformance_on_pd_df
import python_scripts.s3_utils as s3_utils
from datetime import datetime, timedelta, date

def get_secrets():
    return(s3_utils.read_json_from_s3("alpha-dag-matrix/api_secrets/secrets.json"))


def matrix_authenticate(session):
    secrets = get_secrets()
    username = secrets['username']
    password = secrets['password']
  
    url = "https://app.matrixbooking.com/api/v1/user/login"
    r = session.post(url,json = {'username':username,'password': password})
    return(session)


def make_booking_params(time_from, time_to, booking_categories = 'ROOM', status = None, pageSize = None, pageNum = 0):
    params = {'f': time_from,
            't': time_to,
            'bc': booking_categories,
            'status': status,
            'include': ['audit','locations'],
            'pageSize': pageSize,
            'pageNum': pageNum}
    return(params)
  

def get_payload(session, url, parameters):
    resp = session.get(url=url, cookies = session.cookies, params = parameters)
    print(f"GET {resp.url}")
    print(f"response status code: {resp.status_code}")
    return resp.json()
  
def scrape_days_from_api(start_date, end_date):
    
    url = "https://app.matrixbooking.com/api/v1/booking"
    page_size = 2500
    status = ['CONFIRMED','TENTATIVE','CANCELLED']
    
    params = make_booking_params(start_date, end_date, pageNum = 0, pageSize = page_size, status = status)
    
    bookings = []
    
    ses = requests.session()
    matrix_authenticate(ses)
    # Scrape the first page of data
    print(f"scraping page 0")
    data = get_payload(ses, url, params)
    rowcount = len(data['bookings'])
    print(f"records scraped: {rowcount}")
    
    bookings = data['bookings']
    locations = data['locations']

    i = 1
    total_rows = rowcount
    while(rowcount == page_size):
        print(f"scraping page {i}")
        params = make_booking_params(start_date, end_date, pageNum = i, pageSize = page_size, status = status)
        data = get_payload(ses, url, params)
        rowcount = len(data['bookings'])
        print(f"records scraped: {rowcount}")
        if(rowcount > 0):
            bookings.extend(data['bookings'])
        i += 1
        total_rows += rowcount
    
    print(f"Retrieved {len(locations)} locations")
    
    bookings_data = get_bookings_df(bookings)
    s3.pd_write_csv_s3(bookings_data, f"alpha-dag-matrix/bookings/{start_date}.csv", index = False, header = False)
    
    locations_data = get_locations_df(locations)
    s3.pd_write_csv_s3(locations_data, f"alpha-dag-matrix/locations/data.csv", index = False, header = False)
    
    
    return(bookings, locations)

                                                                    
def get_scrape_dates(start_date, end_date):

    def daterange(start_date, end_date):
        for n in range(int ((end_date - start_date).days + 1)):
            yield datetime.strftime(start_date + timedelta(n),"%Y-%m-%d")

    start_date = datetime.strptime(start_date, "%Y-%m-%d").date()
    end_date_1 = datetime.now().date() - timedelta(days=1)
    end_date_2 = datetime.strptime(end_date, "%Y-%m-%d").date()
    
    if end_date_1 < end_date_2:
        end_date = end_date_1
    else:
        end_date = end_date_2

    return daterange(start_date, end_date)


def get_bookings_df(bookings):
    bookings_df = pd.io.json.json_normalize(bookings)
    renames = read_json("metadata/bookings_renames.json")
    bookings_df = bookings_df[renames.keys()].rename(columns = renames)
    bookings_metadata = read_json('metadata/bookings.json')
    
    bookings_df = impose_exact_conformance_on_pd_df(bookings_df, bookings_metadata)
    
    return(bookings_df)

def get_locations_df(locations):
    locations_df = pd.io.json.json_normalize(locations)
    renames = read_json("metadata/locations_renames.json")
    locations_df = locations_df[renames.keys()].rename(columns = renames)
    locations_metadata = read_json('metadata/locations.json')
    
    locations_df = impose_exact_conformance_on_pd_df(locations_df, locations_metadata)
    
    return(locations_df)




In [None]:
url = "https://app.matrixbooking.com/api/v1/booking"
page_size = 1000
status = ['CONFIRMED','TENTATIVE','CANCELLED']
start_date = "2023-09-27"
end_date = "eod"
params = make_booking_params(start_date, end_date, pageNum = 0, pageSize = page_size, status = status)

bookings = []

ses = requests.session()
matrix_authenticate(ses)

# Default parameters

In [None]:
# Scrape the first page of data
print(f"scraping page 0")
data = get_payload(ses, url, params)

In [None]:
# Turn into dataframe
df = pd.json_normalize(data['bookings'])
df.shape


In [None]:
# Unique booking types
df['locationKind'].unique()

# All booking types

## Get booking categories available

In [None]:
# Get booking categories
res = requests.get("https://app.matrixbooking.com/api/v1/category", cookies=ses.cookies).json()
df_booking_categories = pd.json_normalize(res)

# List of booking types
booking_types = list(df_booking_categories['locationKind'])

# Print out number and types of booking
print(f"There are {len(booking_types)} booking categories: {booking_types}")

## Make request

In [None]:
# Get parameters for all booking_types
params = make_booking_params(start_date, end_date, booking_categories=booking_types, pageNum = 0, pageSize = page_size, status = status)

# Make1 request
data = get_payload(ses, url, params)


## Bookings

In [None]:
# Convert full thing to dataframe
bookings_df = pd.json_normalize(data['bookings'])
bookings_df.shape

## Locations

In [None]:
locations_df = pd.json_normalize(data['locations'])
locations_df.shape

## Write to s3 (parquet)

In [None]:
# Path for extended booking (test)
parent_path = "s3://alpha-dag-matrix/extended_categories"
booking_path = f"{parent_path}/bookings_{start_date}.parquet"
locations_paths = f"{parent_path}/locations_{start_date}.parquet"

# Write out the files
bookings_df.to_parquet(booking_path)
locations_df.to_parquet(locations_paths)


In [None]:
# What booking categories were used?
bookings_df.locationKind.value_counts()

In [None]:
# Test read data in 
booking_df_test = pd.read_parquet(booking_path)
booking_df_test.shape