In [1]:
import streamlit as st
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import altair as alt
import plotly.graph_objects as go
import plotly_express as px
import geopy
from geopy.extra.rate_limiter import  RateLimiter
import geopandas
import folium
from folium.plugins import MarkerCluster
from streamlit_folium import folium_static
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.common.by import By
from time import sleep

In [2]:
def get_film_data(title):
    if(not title):
        return None

    driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()))
    driver.get("https://www.imdb.com/")

    search_box = (driver
                  .find_element(By.CSS_SELECTOR, "div.react-autosuggest__container")
                  .find_element(By.TAG_NAME, "input"))
    search_box.send_keys(title)
    sleep(2)

    first_result = driver.find_element(By.CSS_SELECTOR, "a.sc-bqyKva.ehfErK.searchResult.searchResult--const")
    first_result.click()
    sleep(2)

    film_url = driver.current_url

    director = driver.find_element(By.CSS_SELECTOR, "div.ipc-metadata-list-item__content-container")
    director_name = director.find_element(By.TAG_NAME, "a")

    cast_page = (driver
                 .find_element(By.CSS_SELECTOR, "div.ipc-title__wrapper")
                 .find_element(By.TAG_NAME, "a"))
    cast_page.click()
    sleep(1)

    cast_table = driver.find_element(By.CSS_SELECTOR, "table.cast_list").find_element(By.TAG_NAME, "tbody")
    cast_list = cast_table.find_elements(By.TAG_NAME, "tr")

    actors_names = []
    for actor in cast_list[1:]:
        el_class = actor.get_attribute("class")
        if (el_class != "odd" and el_class != "even"):
            break
        actor_name = actor.find_elements(By.TAG_NAME, "td")[1].find_element(By.TAG_NAME, "a").text
        actors_names.append(actor_name)

    return [actors_names, film_url]

def get_locations(film_url):
    driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()))
    driver.get(film_url)
    sleep(1)

    details = driver.find_element(By.CSS_SELECTOR, '[data-testid="title-details-section"]')
    filming_locations = details.find_element(By.CSS_SELECTOR, '[data-testid="title-details-filminglocations"]')
    loc = filming_locations.find_element(By.CSS_SELECTOR,
                                         "a.ipc-metadata-list-item__label.ipc-metadata-list-item__label--link")
    loc.click()
    loc = driver.find_element(By.ID, "filming_locations")
    pr = loc.find_elements(By.TAG_NAME, "div")
    all_locations = []
    for el in pr:
        el_class = el.get_attribute("class")
        if (el_class != "soda sodavote odd" and el_class != "soda sodavote even"):
            continue
        all_locations.append(el.find_element(By.TAG_NAME, "dt").find_element(By.TAG_NAME, "a").text)

    locator = geopy.Nominatim(user_agent="myGeocoder")

    df = pd.DataFrame({"address": all_locations})
    geocode = RateLimiter(locator.geocode, min_delay_seconds=0.01)
    df['location'] = df['address'].apply(geocode)
    df = df.dropna()
    df['point'] = df['location'].apply(lambda cr: tuple(cr.point))
    df[['latitude', 'longitude', 'altitude']] = pd.DataFrame(df['point'].tolist(), index=df.index)

    coor = df.loc[:, ["address", "latitude", "longitude"]]
    coor.loc[:, "coordinates"] = list(zip(coor["latitude"], coor["longitude"]))

    return [list(coor["address"]), list(coor["coordinates"])]


def draw_locations(coordinates, all_locations):

    map = folium.Map(location=[40.76791227224293, -73.98658282967192], zoom_start=3)
    marker_cluster = MarkerCluster().add_to(map)

    pos = 0
    for point in coordinates:
        folium.Marker(point, popup=all_locations[pos], tooltip=all_locations[pos]).add_to(marker_cluster)
        pos += 1

    folium_static(map)

In [None]:
title = "The Dark Knight"

if(title):
    res = get_film_data(title)
    actors_names = res[0]
    film_url = res[1]
    loc_info = get_locations(film_url)
    all_locations = loc_info[0]
    coordinates = loc_info[1]

with open('TheDarkKnightLoc.txt', 'w') as f:
    f.write('\n'.join(all_locations))

with open('TheDarkKnightCoor.txt', 'w') as f:
    f.write('\n'.join(str(x) for x in coordinates))

with open('TheDarkKnightActors.txt', 'w') as f:
    f.write('\n'.join(actors_names))