In [None]:
! pip install requests beautifulsoup4 pandas

In [None]:
import json
import re
import urllib.parse
from datetime import datetime

import pandas as pd
import requests
from bs4 import BeautifulSoup

base_url = "http://www.taphunter.com/bigscreen"
b30_url = f"{base_url}/5469327503392768"

In [None]:
def get_json_data(url: str) -> dict:
  response = requests.get(b30_url)
  soup = BeautifulSoup(response.content, "html.parser")

  json_relative_uri = None
  for script in soup.find_all("script"):
    if "getJSON" in script.text:
      json_line = [l for l in script.text.splitlines() if "getJSON" in l][0]
      json_relative_uri = json_line.split("'")[1].replace(".", "")

  if not json_relative_uri:
    raise Exception("JSON relative URI not found")

  json_url = f"{base_url}{json_relative_uri}"
  json_response = requests.get(json_url)
  json_data = json.loads(json_response.text)
  print(f"parsed {len(json_data)} beer entries from {json_url}")
  return json_data


def filter_brewery(brewery: str, n: str) -> str:
  brewery, n = brewery.strip(), n.strip()

  if n.startswith(f"{brewery}s"):
    return n.replace(f"{brewery}s", "")
  elif n.startswith(f"{brewery}'s"):
    return n.replace(f"{brewery}'s", "")
  elif n.startswith(brewery):
    return n.replace(brewery, "")
  else:
    return n


def compute_age(date_str) -> int:
  try:
    date_obj = datetime.strptime(date_str, "%m/%d/%Y")
    today = datetime.today()
    age_in_days = (today - date_obj).days
    return age_in_days
  except ValueError:
    return -1


def get_untappd_beer_info(search_string: str) -> tuple[str, str]:
  encoded_search_string = urllib.parse.quote(search_string)
  url = f"https://untappd.com/search?q={encoded_search_string}"

  # Emulate chrome.
  headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'
  }

  response = requests.get(url, headers=headers)
  response.raise_for_status()
  soup = BeautifulSoup(response.content, "html.parser")

  # Find the first beer in the search results list
  first_beer = soup.find("div", class_="beer-item")

  # Extract a numeric rating.
  rating = first_beer.find('div', class_='caps').get('data-rating')
  rating_n = rating or "N/A"

  # Extract the relative URI.
  uri = first_beer.find('p', class_='name').find('a').get('href')
  url = f"https://untappd.com{uri}"

  return rating, url


def safe_get_untappd_beer_info(search_string: str) -> tuple[str, str]:
  print(f"Fetching rating for: {search_string}")
  try:
    return get_untappd_beer_info(search_string)
  except Exception as e:
    print(f"Error getting beer info for {search_string}: {e}")
    return "N/A", ""


def create_untappd_rating_link(row) -> str:
  search_string = f"{row['brewery']} {row['name']}"
  rating, url = safe_get_untappd_beer_info(search_string)
  if not rating:
    return ''
  return f'<a href="{url}" target="_blank">{rating}</a>'


def create_dataframe(json_data: dict) -> pd.DataFrame:
  # Create a pandas DataFrame from the JSON dict.
  data = []
  for item in json_data:
    brewery = item['brewery']['common_name'].strip()
    row = {
        'tap number': item['serving_info']['tap_number'],
        'brewery': brewery,
        'name': filter_brewery(brewery, item['beer']['beer_name']).strip(),
        'abv': item['beer']['abv'],
        'category': item['beer']['style_category'],
        'origin': item['brewery']['origin'],
        'style': item['beer']['style'],
        'days old': compute_age(item['date_added'])
    }
    data.append(row)

  # Create a pandas DataFrame.
  df = pd.DataFrame(sorted(data, key=lambda e: e['tap number']))
  df = df.set_index('tap number')

  # Convert abv to float.
  df['abv'] = pd.to_numeric(df['abv'], errors='coerce').fillna(0).astype(float)

  # Filter out any cells with a zero value abv column.
  df = df[df['abv'] != 0]

  # Rate via Untapp'd.
  df['Rating'] = df.apply(create_untappd_rating_link, axis=1)

  return df


def render_table(df: pd.DataFrame):
  def highlight_abv(val: float) -> str:
    """
    Applies conditional formatting to the ABV column based on its value.
    """
    if val < 5.0:
      color = 'green'
    elif val < 6.1:
      color = 'green'
    elif val < 7.0:
      color = 'yellow'
    elif val <= 7.5:
      color = 'orange'
    else:
      color = 'red'
    return f'background-color: {color}; color: black'

  # Group the DataFrame by category and sort by abv
  grouped_df = df.groupby('category').apply(
    lambda x: x.sort_values('abv'),
    include_groups=False
  )

  # Apply the conditional formatting to the abv column
  styled_df = grouped_df.style.map(
      highlight_abv,
      subset=['abv']
    ).set_table_styles(
        [
            {'selector': 'th', 'props': [('border', '1px solid black')]},
            {'selector': 'td', 'props': [('border', '1px solid black')]}
        ]
    ).set_properties(**{'text-align': 'center'})

  # Render the DataFrame as a table
  display(styled_df)


In [None]:
json_data = get_json_data(b30_url)
df = create_dataframe(json_data)

In [None]:
render_table(df)