In [11]:
from plexapi.server import PlexServer
from tmdbv3api import TMDb, Movie, Genre
from collections import Counter
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.decomposition import TruncatedSVD

In [2]:
baseurl = 'http://peterubuntuserver.ddns.net:32400'
token = 'ydWQy8X6StWBJVPHiLf2'
plex = PlexServer(baseurl, token)

tmdb = TMDb()
tmdb.api_key = 'ea023fa0879737d0cfd9ae3ca7365a6e'
movie_api = Movie()

In [21]:
def get_tmdb_ids(plex):
    ids = []
    for m in plex.library.section('Movies').all():
        for guid in m.guids:
            if guid.id.startswith('tmdb://'):
                try:
                    tmdb_str = guid.id.split('//')[-1].split('?')[0]
                    ids.append(int(tmdb_str))
                except ValueError:
                    pass
                break
    return ids

def build_counters(tmdb_ids, top_n_actors=500, top_n_directors=200):
    actor_ctr, director_ctr = Counter(), Counter()
    for tid in tmdb_ids:
        try:
            creds = movie_api.credits(tid)
            cast = list(creds.cast)
        except Exception:
            continue

        # cast is a list of dicts
        for a in cast[:5]:
            name = a.get('name')
            if name:
                actor_ctr[name] += 1

        # crew is a list of dicts
        for c in creds.crew:
            if c.get('job') == 'Director' and c.get('name'):
                director_ctr[c['name']] += 1

    top_actors   = [name for name, _ in actor_ctr.most_common(top_n_actors)]
    top_directors= [name for name, _ in director_ctr.most_common(top_n_directors)]
    return top_actors, top_directors

def vectorize_movie(md, creds, genres_master, top_actors, top_directors):
    v = []
    # — one-hot genres
    movie_genres = {g.name for g in md.genres}
    v += [1 if gm in movie_genres else 0 for gm in genres_master]

    # — normalized numeric
    year     = int(md.release_date[:4]) if md.release_date else 2000
    runtime  = md.runtime  or 90
    vote_avg = md.vote_average or 5
    v += [
        (year - 1900) / 125,
        runtime  / 300,
        vote_avg / 10,
    ]

    # — cast
    cast5 = {c['name'] for c in creds.cast[:5] if c.get('name')}
    v   += [1 if actor in cast5 else 0 for actor in top_actors]

    # — directors
    dirs = {c['name'] for c in creds.crew if c.get('job')=='Director' and c.get('name')}
    v   += [1 if d in dirs else 0 for d in top_directors]

    return np.array(v, dtype=float)

def build_dataset(plex, movie_api, top_actors, top_directors):
    genres_master = [g.name for g in Genre().movie_list()]

    content_vecs, overviews, titles = [], [], []
    for pm in plex.library.section('Movies').all():
        tmdb_id = None
        for guid in pm.guids:
            if guid.id.startswith('tmdb://'):
                try:
                    tmdb_id = int(guid.id.split('//')[-1].split('?')[0])
                except ValueError:
                    pass
                break
        if tmdb_id is None:
            continue

        try:
            md    = movie_api.details(tmdb_id)
            creds = movie_api.credits(tmdb_id)
        except Exception:
            continue

        content_vecs.append(
            vectorize_movie(md, creds, genres_master, top_actors, top_directors)
        )
        overviews.append(md.overview or "")
        titles.append(pm.title)

    content_matrix = np.vstack(content_vecs)

    # TF-IDF on plot + SVD
    tfidf = TfidfVectorizer(max_features=2000, stop_words='english')
    X_tfidf = tfidf.fit_transform(overviews)
    svd     = TruncatedSVD(n_components=200, random_state=42)
    X_text  = svd.fit_transform(X_tfidf)

    X_all = np.hstack([content_matrix, X_text])
    return { titles[i]: X_all[i] for i in range(len(titles)) }

def recommend(title, vectors, top_n=5):
    all_titles = list(vectors)
    mat        = np.vstack([vectors[t] for t in all_titles])
    target     = vectors[title].reshape(1, -1)
    sims       = cosine_similarity(target, mat)[0]
    best_idxs  = sims.argsort()[::-1][1:top_n+1]
    return [(all_titles[i], sims[i]) for i in best_idxs]

tmdb_ids      = get_tmdb_ids(plex)
top_actors, top_directors = build_counters(tmdb_ids)

vectors = build_dataset(plex, movie_api, top_actors, top_directors)

seed = "Inception"
for title, score in recommend(seed, vectors, top_n=5):
    print(f"{title:<40}  sim={score:.3f}")

TypeError: attribute name must be string, not 'slice'

In [15]:
import pandas as pd
import requests

TAUTULLI_URL  = "http://192.168.2.73:8181"      # ⚠️  change if remote / SSL
TAUTULLI_KEY  = "c0766a7cd7a24f73b8d110a118fed994"      # ⚠️  copy from Tautulli settings
MAX_HISTORY   = None     # None = fetch every record; else int for quick tests
BATCH_SIZE    = 1000 

def fetch_history_tautulli(url=TAUTULLI_URL, apikey=TAUTULLI_KEY,
                           media_type="movie", limit=MAX_HISTORY,
                           batch=BATCH_SIZE):
    """Return a DataFrame with at least user_id, rating_key, watched_status."""
    rows, start, keep_going = [], 0, True
    while keep_going:
        params = {
            "apikey": apikey,
            "cmd": "get_history",
            "media_type": media_type,
            "length": batch,
            "start": start,
            "order_column": "date",
            "order_dir": "asc",
        }
        r = requests.get(f"{url}/api/v2", params=params, timeout=20).json()
        if r["response"]["result"] != "success":
            raise RuntimeError(r["response"]["message"])
        data = r["response"]["data"]["data"]
        rows.extend(data)
        start += batch
        keep_going = data and (limit is None or start < limit)
    return pd.DataFrame(rows)[["user_id", "rating_key", "watched_status"]]

fetch_history_tautulli()

Unnamed: 0,user_id,rating_key,watched_status
0,22851609,612,1.0
1,514171027,560,0.25
2,22851609,1050,1.0
3,22851609,1923,1.0
4,27840798,560,1.0
5,22851609,1271,0.5
6,486175841,561,0.5
7,514171027,2457,0.5
8,22851609,272,0.0
9,486175841,5335,0.25


In [None]:
ids            = get_tmdb_ids(plex)
top_actors, top_directors = build_counters(ids)
vectors        = build_dataset(plex, movie_api, top_actors, top_directors)

seed = "Inception"
for t, score in recommend(seed, vectors, top_n=5):
    print(f"{t:<40}  sim={score:.3f}")

In [5]:
vectors = build_dataset(movie_api, movie_library,
                        top_actors, top_directors)

In [26]:
import requests
import numpy as np
import pandas as pd
from plexapi.server import PlexServer
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.decomposition import TruncatedSVD
from sklearn.preprocessing import MultiLabelBinarizer, MinMaxScaler
from sklearn.neighbors import NearestNeighbors

TMDB_API_KEY='ea023fa0879737d0cfd9ae3ca7365a6e'

def fetch_plex_list():
    """Get title+tmdb_id for every movie in Plex."""
    rows = []
    for m in plex.library.section("Movies").all():
        tmdb_id = None
        for g in m.guids:
            if "tmdb" in g.id:
                tmdb_id = g.id.split("//")[-1].split("?")[0]
                break
        if tmdb_id:
            rows.append({"title": m.title, "tmdb_id": tmdb_id})
    return pd.DataFrame(rows)


def tmdb_get(path, **params):
    """Helper to call TMDb v3 API and return .json()."""
    url = f"https://api.themoviedb.org/3{path}"
    params["api_key"] = TMDB_API_KEY
    return requests.get(url, params=params).json()


def enrich_with_tmdb(df):
    """Add overview, genres, runtime, vote, release_date, cast, directors."""
    meta = []
    for _, row in df.iterrows():
        tid = row["tmdb_id"]
        info = tmdb_get(f"/movie/{tid}", language="en-US")
        creds = tmdb_get(f"/movie/{tid}/credits")
        genres = [g["name"] for g in info.get("genres", [])]
        overview = info.get("overview", "") or ""
        runtime = info.get("runtime") or 0
        vote = info.get("vote_average") or 0
        rd = info.get("release_date") or ""
        # top 5 cast names
        cast5 = [c["name"] for c in creds.get("cast", [])[:5] if c.get("name")]
        # all directors
        dirs = [c["name"] for c in creds.get("crew", []) if c.get("job") == "Director"]
        meta.append({
            "overview": overview,
            "genres": genres,
            "runtime": runtime,
            "vote": vote,
            "release_date": rd,
            "cast": cast5,
            "directors": dirs
        })
    return pd.DataFrame(meta)


def build_features(df):
    # 1️⃣ MultiLabelBinarizers for genres, cast, directors
    mlb_genre = MultiLabelBinarizer(sparse_output=False)
    G = mlb_genre.fit_transform(df["genres"])

    mlb_cast = MultiLabelBinarizer(sparse_output=False, classes=None)
    C = mlb_cast.fit_transform(df["cast"])

    mlb_dir = MultiLabelBinarizer(sparse_output=False)
    D = mlb_dir.fit_transform(df["directors"])

    # 2️⃣ Numeric features: runtime, vote, year
    years = df["release_date"].str[:4].fillna("2000").astype(int)
    nums = np.vstack([df["runtime"], df["vote"], years]).T
    scaler = MinMaxScaler()
    N = scaler.fit_transform(nums)

    # 3️⃣ TF-IDF on overview + SVD → 100 dims
    tfidf = TfidfVectorizer(max_features=2000, stop_words="english")
    Xtxt = tfidf.fit_transform(df["overview"])
    svd  = TruncatedSVD(n_components=100, random_state=42)
    Ttxt = svd.fit_transform(Xtxt)

    # 4️⃣ Stack everything
    X = np.hstack([G, C, D, N, Ttxt])
    return X, (mlb_genre, mlb_cast, mlb_dir, scaler, tfidf, svd)


def train_index(X):
    knn = NearestNeighbors(n_neighbors=6, metric="cosine").fit(X)
    return knn


def recommend(title, df, X, knn):
    if "title" not in df.columns:
        raise KeyError("DataFrame must have a 'title' column")
    idxs = df.index[df["title"] == title].tolist()
    if not idxs:
        raise ValueError(f"'{title}' not found in your library")
    dist, nn = knn.kneighbors(X[idxs[0]].reshape(1, -1), n_neighbors=6)
    recs = df.iloc[nn[0]][["title"]].copy()
    recs["score"] = 1 - dist[0]
    return recs.iloc[1:].reset_index(drop=True)

df_movies = fetch_plex_list()

# 2. Fetch all metadata from TMDb
df_meta   = enrich_with_tmdb(df_movies)

# 3. Stitch them together so we keep "title"
df = pd.concat([df_movies.reset_index(drop=True),
                df_meta.reset_index(drop=True)], axis=1)

# 4. Build feature matrix and index
X   = build_features(df)
knn = train_index(X)

# Demo
seed = "Inception"
print(f"\nRecommendations for {seed!r}:")
print(recommend(seed, df, X, knn).to_string(index=False))

ValueError: setting an array element with a sequence. The requested array has an inhomogeneous shape after 1 dimensions. The detected shape was (2,) + inhomogeneous part.