# Prep

## Imports

In [1]:
import pandas as pd
import numpy as np
import requests
import json
import os

from dotenv import load_dotenv
from bs4 import BeautifulSoup

## Auth

In [2]:
load_dotenv(r"C:\\Users\\User\\Documents\\GitHub\\movies\\tmdb_auth.env")

api_key = os.getenv("API_KEY")
access_token = os.getenv("ACCESS_TOKEN")

# TMDB API Data

In [None]:
tmdb_url = "https://api.themoviedb.org/3/account/21623434/rated/movies?language=en-US&page=1&sort_by=created_at.asc"

headers = {
    "accept": "application/json",
    "Authorization": f"Bearer {access_token}"
}

#response = requests.get(tmdb_url, headers=headers)

In [4]:
print(response.text)

{"page":1,"results":[{"adult":false,"backdrop_path":"/y4Kw8lT0BEBHaEm4hqw12AKJNoZ.jpg","genre_ids":[18,10751],"id":13841,"original_language":"en","original_title":"Rad","overview":"A BMX racer, who lives in a small town with his mother and sister, is faced with a tough decision, qualify for Helltrack or take his SATs in order to attend college.","popularity":14.588,"poster_path":"/yg7TR7U4f2xRrVrM0IyDUGuFSHa.jpg","release_date":"1986-03-21","title":"Rad","video":false,"vote_average":6.541,"vote_count":97,"rating":6.0},{"adult":false,"backdrop_path":"/gE0MLnPzXHoAJIogLhsqt0RW39R.jpg","genre_ids":[12,35,18,10751],"id":183,"original_language":"en","original_title":"The Wizard","overview":"A boy and his brother run away from home and hitch cross-country, with help from a girl they meet, to compete in the ultimate video-game championship.","popularity":10.41,"poster_path":"/dtgNNq3TBbpBX6RXHLt5gAMdwPV.jpg","release_date":"1989-12-15","title":"The Wizard","video":false,"vote_average":6.029,"

# Letterboxd Scraping Data

## Functions

In [20]:
def get_film_urls(list_url):
    content = requests.get(list_url).text
    soup = BeautifulSoup(content, 'html')

    url_list = [div['data-target-link'] for div in soup.find_all('div', class_='film-poster')]

    return url_list

In [21]:
def get_raw_film_html(film_url):
    url = "https://letterboxd.com" + film_url
    content = requests.get(url).text
    soup = BeautifulSoup(content, 'html')

    return soup

## Extraction

In [22]:
film_urls = get_film_urls("https://letterboxd.com/dromemario/list/fff-film-fueled-friends/")


In [24]:
film_urls[0]

'/film/the-wizard/'

In [25]:
get_raw_film_html(film_urls[0])


<!DOCTYPE html>

<html class="no-mobile no-js" id="html" lang="en">
<head>
<meta charset="utf-8"/>
<meta content="width=1024" name="viewport"/>
<meta content="IE=edge,chrome=1" http-equiv="X-UA-Compatible"/>
<meta content="A boy and his brother run away from home and hitch cross-country, with help from a girl they meet, to compete in the ultimate video-game championship." name="description"/>
<meta content="video.movie" property="og:type"/>
<meta content="https://letterboxd.com/film/the-wizard/" property="og:url"/>
<meta content="The Wizard (1989)" property="og:title"/>
<meta content="A boy and his brother run away from home and hitch cross-country, with help from a girl they meet, to compete in the ultimate video-game championship." property="og:description"/>
<meta content="https://a.ltrbxd.com/resized/sm/upload/f6/jy/hd/zk/wizard-1200-1200-675-675-crop-000000.jpg?v=c451310c79" property="og:image"/><meta content="1200" property="og:image:width"/><meta content="675" property="og:imag

In [13]:
#print(soup.prettify())

In [23]:
[a_tag.get_text(strip=True) for a_tag in soup.find(name='div', class_='cast-list').find_all('a')]

['Leon Lai',
 'Charlie Yeung',
 'Takeshi Kaneshiro',
 'Karen Mok Man-Wai',
 'Michelle Reis',
 'Chan Man-Lei',
 'Toru Saito',
 'Benz Kong To-Hoi',
 'Chan Fai-hung',
 'Kwan Lee-na',
 'Wu Yuk-Ho',
 'Johnnie Kong',
 'Chun Kang Wang',
 'Mak Shu-San',
 'Choi Kwok-Ping',
 'Chan Siu-Wah',
 'Chow Gam-Kong',
 'Leung Shing-Hung',
 'Wong Kim-Wai',
 'Wong Kim-Bun',
 'Choi Kwok-Keung',
 'Lee Tat-Chiu',
 'Cheung Chi-Ping']

In [22]:
soup.find(name='div', class_='cast-list').find_all('a')

[<a class="text-slug tooltip" href="/actor/leon-lai-1/" title="Wong Chi-Ming">Leon Lai</a>,
 <a class="text-slug tooltip" href="/actor/charlie-yeung/" title="Charlie">Charlie Yeung</a>,
 <a class="text-slug tooltip" href="/actor/takeshi-kaneshiro/" title="Ho Chi-mo">Takeshi Kaneshiro</a>,
 <a class="text-slug tooltip" href="/actor/karen-mok/" title="Blondie">Karen Mok Man-Wai</a>,
 <a class="text-slug tooltip" href="/actor/michelle-reis/" title="Killer's Agent">Michelle Reis</a>,
 <a class="text-slug tooltip" href="/actor/chan-man-lei/" title="Wu's Father">Chan Man-Lei</a>,
 <a class="text-slug tooltip" href="/actor/toru-saito/" title="Sato">Toru Saito</a>,
 <a class="text-slug tooltip" href="/actor/benz-kong-to-hoi/" title="Ah Hoi">Benz Kong To-Hoi</a>,
 <a class="text-slug tooltip" href="/actor/chan-fai-hung/" title="Man Forced to Eat Ice-cream">Chan Fai-hung</a>,
 <a class="text-slug tooltip" href="/actor/kwan-lee-na/" title="Woman Pressed to Buy Vegetables">Kwan Lee-na</a>,
 <a cla