# Loading data from Async-API with pybarb

## Set up the environment

In [1]:
import os
import re
import json

# Change the directory to the parent of the current directory
sub_dir = re.search('.+?\/barb_api', os.getcwd()).group(0)
os.chdir(sub_dir)
print(f"Working directory : {sub_dir}")

import pybarb as pb

%load_ext autoreload
%autoreload 2

Working directory : /Users/simon_business/Documents/code_repos/barb_api


## Connect to the API

In [2]:
# Read in Barb API credentials
with open("/Users/simon_business/Documents/disposable/clients/BARB/as_creds.json") as file:
    creds = json.loads(file.read())

# Create a BarbAPI object and connect
api_root = "https://uat.barb-api.co.uk/api/v1/"
barb_api = pb.BarbAPI(creds, api_root)
barb_api.connect()


## Query the Asynchronous API

We would like one day's worth of viewing data (2023-05-28) for BBC 1 and the x panel.

First we need to look up the station and panels to check we have the correct query parameters

In [3]:
barb_api.list_panels("BBC")

['BBC Network',
 'BBC East Region',
 'BBC West Region',
 'BBC South West Region',
 'BBC South Region',
 'BBC Yorkshire & Lincolnshire',
 'BBC North East & Cumbria',
 'BBC North West Region',
 'BBC Scotland Region',
 'BBC Ulster Region',
 'BBC Wales Region',
 'BBC Midlands West',
 'BBC Midlands East',
 'BBC London',
 'BBC South East']

In [4]:
barb_api.list_stations("ITV")

['ITV1',
 'ITV1 HD',
 'ITV1+1',
 'ITV4+1',
 'ITV Play',
 'ITV2+1',
 'ITV3+1',
 'ITV3',
 'ITV4',
 'CITV',
 'ITV Sport',
 'ITV Sport Select',
 'ITV2',
 'ITV2 HD',
 'ITV Encore',
 'ITV Encore +1',
 'ITVBe',
 'ITVBe +1']

In [13]:
barb_api.get_panel_code("BBC London")

515

In [15]:
barb_api.get_station_code("BBC1")

10

Next we need to request the data sets

In [None]:
# Need to complete the wrapper around query_asynch_endpoint

In [19]:
#params = {"min_session_date": "2023-05-28","max_session_date":"2023-05-29", 
#          "output_format": "parquet", "viewing_station_code": 10, "activity_type": "tv_live",
#          "panel_code": 515}
params = {"min_session_date": "2023-05-28","max_session_date":"2023-05-29", 
          "output_format": "parquet", 
          "panel_code": 50}
barb_api.query_asynch_endpoint("async-batch/viewing/", parameters=params)

{'message': 'Job sucessfully started.',
 'job_id': '0124adbe-edf9-47fb-b4e9-76ee4a2c7452'}

In [30]:
file_urls = barb_api.get_asynch_file_urls()
file_urls

['https://barb-uat-app-async-result.s3.amazonaws.com/reports/2023-06-09/bbeb2eee-d1ca-4a1e-98c3-615f50e00c7a/viewing/PARQUET/0124adbe-edf9-47fb-b4e9-76ee4a2c7452/2023-05-29/data_01acdada-0000-7099-0000-c319009c73e2_055_4_0.snappy.parquet?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=ASIAVB6CII5FKTXC2RE6%2F20230609%2Feu-west-2%2Fs3%2Faws4_request&X-Amz-Date=20230609T155951Z&X-Amz-Expires=86400&X-Amz-SignedHeaders=host&X-Amz-Security-Token=IQoJb3JpZ2luX2VjEEQaCWV1LXdlc3QtMiJIMEYCIQD4Yoai1%2FLlE%2BlkjDnb94szt2h0XA2zT9%2FRYIVZ4azHOwIhAPS%2FFjL4aUDy%2FaV2%2B3Wa3MNGTWs5bTDklcu8r0KYn%2BybKogECI3%2F%2F%2F%2F%2F%2F%2F%2F%2F%2FwEQARoMMzQ3NzYyODcwMDkwIgx7fUYBvimNVD%2FuG1wq3ANi6U67drDGzgcee2aJ5L5A%2BUo9sUiXlmqu0Uef9%2Bvq7FZeJkecC65kl%2FuqEMrMNDy8GVnwW7XEWKPqQ6O7tBdEfj2i7j94SYPY%2FAFe89M3L5uOj9F%2FSO1lhBL3ZiaIHtSx0bytTFABnbJkEhq2fYBd6GxlyeD1m%2BlO4GsSyKoLiY4Xqk3RqEjSyqqEUpdj8p1MYyU%2But11Z8vQ220%2FcY32QuClComU2uQ2lRwxmA5SNXg1hvtiGiCY2mt%2BQi9q3QGjlv4ojNzPt3Khk%2FVeiAj0riZk6oQU0Qmbzne1%2FdVjt5TI

In [42]:
viewing_results_set = barb_api.get_asynch_file(file_urls[0])

In [43]:
vr_df = viewing_results_set.to_dataframe()
vr_df.head()

Unnamed: 0,STANDARD_DATE_OF_ACTIVITY,SESSION_START,SESSION_END,HOUSEHOLD,DEVICE,PANEL_VIEWERS,GUEST_VIEWERS,PROGRAMMES_VIEWED,SPOTS_VIEWED,VIEWING_STATION,PANEL,PLATFORM,ACTIVITY_TYPE,CONTENT_ASSET_ITEM_OFFSET,PLAYBACK_TYPE,SKY_ULTRA_HD,START_OF_RECORDING,TARGETED_PROMOTION,VOD_INDICATOR,VOD_PROVIDER
0,2023-05-29,{'barb_polling_datetime': '2023-05-29 21:38:00...,{'barb_polling_datetime': '2023-05-29 21:59:00...,{'bbc_itv_segment': 'bbc scotland / scotland (...,"{'date_valid_for': '2023-05-29', 'device_numbe...","[{'date_of_birth': '1959-12-01', 'dependency_o...",{},"[{'broadcaster_premier': True, 'broadcaster_tr...","[{'break_type': 'centre break', 'broadcaster_s...","{'viewing_station_code': 4382, 'viewing_statio...","{'is_macro_region': False, 'panel_code': 50, '...",digital terrestrial,live viewing (excl targeted advertising),0,unknown,False,{},False,not identified,"{'vod_provider': 'unknown', 'vod_service': 'un..."
1,2023-05-29,{'barb_polling_datetime': '2023-05-29 19:32:00...,{'barb_polling_datetime': '2023-05-29 19:44:00...,{'bbc_itv_segment': 'bbc south east / south & ...,"{'date_valid_for': '2023-05-29', 'device_numbe...","[{'date_of_birth': '1957-10-01', 'dependency_o...",{},"[{'broadcaster_premier': False, 'broadcaster_t...","[{'break_type': 'centre break', 'broadcaster_s...","{'viewing_station_code': 5148, 'viewing_statio...","{'is_macro_region': False, 'panel_code': 50, '...",digital satellite,time-shifted/coded playback (1-7 days) (excl t...,0,pvr device,False,{'barb_polling_datetime': '2023-05-28 19:32:00...,False,not on-demand,"{'vod_provider': 'unknown', 'vod_service': 'un..."
2,2023-05-29,{'barb_polling_datetime': '2023-05-29 12:37:00...,{'barb_polling_datetime': '2023-05-29 12:44:00...,{'bbc_itv_segment': 'bbc yorkshire & lincolnsh...,"{'date_valid_for': '2023-05-29', 'device_numbe...","[{'date_of_birth': '1961-07-01', 'dependency_o...",{},"[{'broadcaster_premier': True, 'broadcaster_tr...","[{'break_type': 'centre break', 'broadcaster_s...","{'viewing_station_code': 5015, 'viewing_statio...","{'is_macro_region': False, 'panel_code': 50, '...",digital terrestrial,live viewing (excl targeted advertising),0,unknown,False,{},False,not identified,"{'vod_provider': 'unknown', 'vod_service': 'un..."
3,2023-05-29,{'barb_polling_datetime': '2023-05-29 20:11:42...,{'barb_polling_datetime': '2023-05-29 23:13:42...,{'bbc_itv_segment': 'bbc east / east of englan...,"{'date_valid_for': '2023-05-29', 'device_numbe...","[{'date_of_birth': '2001-01-01', 'dependency_o...",{},[{'broadcaster_transmission_code': '002011504'...,"[{'break_type': 'centre break', 'broadcaster_s...","{'viewing_station_code': 5200, 'viewing_statio...","{'is_macro_region': False, 'panel_code': 50, '...",online via other device (computer/tablet etc),online live/live offset,0,unknown,False,{},False,on-demand,"{'vod_provider': 'sky/ now', 'vod_service': 's..."
4,2023-05-29,{'barb_polling_datetime': '2023-05-29 22:50:58...,{'barb_polling_datetime': '2023-05-29 23:20:58...,"{'bbc_itv_segment': 'bbc london / london', 'bb...","{'date_valid_for': '2023-05-29', 'device_numbe...","[{'date_of_birth': '1968-12-01', 'dependency_o...",{},"[{'broadcaster_premier': False, 'consolidated_...",[],"{'viewing_station_code': 4980, 'viewing_statio...","{'is_macro_region': False, 'panel_code': 50, '...",online via other device (computer/tablet etc),online live/live offset,0,unknown,False,{'barb_polling_datetime': '2023-05-29 22:49:52...,False,on-demand,"{'vod_provider': 'bbc iplayer', 'vod_service':..."


In [40]:
vr_df['HOUSEHOLD'][0]

{'bbc_itv_segment': 'bbc scotland / scotland (north)',
 'bbc_region_code': 'bbc scotland region',
 'bbc_sub_reporting_region': 'bbc scotland (north)',
 'broadband': True,
 'date_valid_for': '2023-05-29',
 'demographic_cell': 'post family',
 'household_number': 66566,
 'language_spoken_at_home': 'undefined',
 'mosaic_classification_2014': 'unclassified/suppressed postcodes',
 'number_of_computers': 999,
 'number_of_dvd_players_not_recorders': 2,
 'number_of_dvd_recorders': 0,
 'number_of_dvds': 2,
 'number_of_other_pvrs': 1,
 'number_of_people': 1,
 'number_of_pvrs': 1,
 'number_of_sky_plus_pvrs': 0,
 'number_of_tv_sets': 2,
 'number_of_vcrs': 0,
 'panel_membership_status': 'home on panel (valid reporter)',
 'presence_of_children': 'no children',
 'replication_factor': 10,
 'social_class': 'D',
 'welsh_speaking_home': 'non welsh speaking'}

In [63]:
vr_df['PANEL_VIEWERS'][4454]

[{'date_of_birth': '2002-01-01',
  'dependency_of_children': 'unclassified',
  'disability': 'no',
  'ethnic_origin': 'white british',
  'gaelic_language': 'not gaelic speaking/not in scotland',
  'household_status': 'neither houseperson nor head of household',
  'life_stage': 'single no children with parents aged 16-34',
  'marital_status': 'single / divorced / separated',
  'panel_member_weights': {'abc1_adults_commercial_tv_viewing_sextile': 0,
   'abc1_adults_total_viewing_sextile': 0,
   'adults_commercial_tv_viewing_sextile': 6,
   'adults_commercial_tv_viewing_sextile_16_to_34': 6,
   'adults_total_viewing_sextile': 6,
   'adults_total_viewing_sextile_16_to_34': 6,
   'processing_weight': 1.3081},
  'person_membership_status': 'person on panel',
  'person_number': 7,
  'sex_code': 'female',
  'terminal_age_of_education': 'still in education',
  'welsh_language_code': 'can understand and speak some welsh',
  'working_status': 'full time education / under school age'}]

In [45]:
vr_df['VIEWING_STATION'][0]

{'viewing_station_code': 4382, 'viewing_station_name': 'More4'}