Skip to content

Commit

Permalink
🏋
Browse files Browse the repository at this point in the history
  • Loading branch information
transitive-bullshit committed Dec 1, 2023
1 parent 64da4d1 commit 3a82bc0
Show file tree
Hide file tree
Showing 10 changed files with 306 additions and 10 deletions.
2 changes: 1 addition & 1 deletion readme.md
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@ In addition to basic movie metadata, most movies also include:
- Letterboxd ratings
- FlickMetrix ratings
- Media
- **YouTube traileras**
- **YouTube trailers**
- Poster images
- Backdrop images

Expand Down
2 changes: 1 addition & 1 deletion src/get-imdb-movie.ts
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ async function main() {
// const imdbId = 'tt0903747' // tv series (breaking bad)
const imdbId = 'tt6443346'

// TODO
// various edge cases
// tt2322674 // CM Punk: Best in the World
// tt10702760 // National Theatre Live: Fleabag
// tt4020156 // National Theatre Live: A Streetcar Named Desire
Expand Down
10 changes: 5 additions & 5 deletions src/lib/tmdb.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import got from 'got'

import type { tmdb } from '../types'
import * as types from '../types'

const BASE_URL_V3 = 'https://api.themoviedb.org/3'

Expand Down Expand Up @@ -30,7 +30,7 @@ export class TMDB {
credits?: boolean
keywords?: boolean
} = {}
): Promise<tmdb.MovieDetails> {
): Promise<types.tmdb.MovieDetails> {
const opts =
videos || images
? {
Expand All @@ -46,13 +46,13 @@ export class TMDB {
}
: undefined

return this._get<tmdb.MovieDetails>(`/movie/${movieId}`, opts)
return this._get<types.tmdb.MovieDetails>(`/movie/${movieId}`, opts)
}

public async getMovieCredits(
movieId: string | number
): Promise<tmdb.Credits> {
return this._get<tmdb.Credits>(`/movie/${movieId}/credits`)
): Promise<types.tmdb.Credits> {
return this._get<types.tmdb.Credits>(`/movie/${movieId}/credits`)
}

private async _get<T>(path: string, opts?: GetOptions): Promise<T> {
Expand Down
89 changes: 89 additions & 0 deletions src/lib/yts.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
import got from 'got'

import * as types from '../types'

const BASE_URL = 'https://yts.mx/api'

export type YTSSortBy =
| 'title'
| 'year'
| 'rating'
| 'peers'
| 'seeds'
| 'download_count'
| 'like_count'
| 'date_added'

export type YTSOrderBy = 'desc' | 'asc'
export type YTSQuality = '720p' | '1080p' | '2160p' | '3D' | 'All'

export class YTS {
public async getMovies({
limit = 10,
page = 1,
quality,
minimumIMDBRating,
query,
genre,
sortBy = 'date_added',
orderBy = 'desc',
withRTRatings // RT ratings don't seem to work reliably
}: {
limit?: number
page?: number
quality?: YTSQuality
minimumIMDBRating?: number
query?: string
genre?: string
sortBy?: YTSSortBy
orderBy?: YTSOrderBy
withRTRatings?: boolean
} = {}): Promise<types.yts.Movie[]> {
const url = `${BASE_URL}/v2/list_movies.json`

const res = await got(url, {
searchParams: {
limit,
page,
quality,
minimum_rating: minimumIMDBRating,
query_term: query,
genre,
sort_by: sortBy,
order_by: orderBy,
with_rt_ratings: withRTRatings
}
}).json<types.yts.APIResponseListMovies>()

return res.data.movies
}

public async getMovie({
imdbId,
ytsId,
images = false,
cast = false
}: {
imdbId?: string
ytsId?: number
images?: boolean
cast?: boolean
}): Promise<types.yts.Movie> {
const url = `${BASE_URL}/v2/movie_details.json`

if (!imdbId && !ytsId) {
throw new Error(`imdbId or ytsId must be provided`)
}

const res = await got(url, {
searchParams: {
imdb_id: imdbId,
movie_id: ytsId,
with_images: images,
with_cast: cast
}
}).json<types.yts.APIResponseGetMovie>()

return res.data.movie
}
}
2 changes: 1 addition & 1 deletion src/populate-imdb-movies.ts
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,7 @@ async function main() {
if (++numErrors >= 3) {
return null
} else {
await delay(10000 + 1000 * numErrors * numErrors)
await delay(5000 + 1000 * numErrors * numErrors)
}
}
}
Expand Down
2 changes: 1 addition & 1 deletion src/populate-tmdb-movie-dump.ts
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,7 @@ async function main() {
}
},
{
concurrency: 16
concurrency: 32
}
)
).filter(Boolean)
Expand Down
102 changes: 102 additions & 0 deletions src/populate-yts-movies.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
import fs from 'node:fs/promises'

import delay from 'delay'
import makeDir from 'make-dir'
import pMap from 'p-map'

import * as config from './lib/config'
import * as types from './types'
import { getNumBatches } from './lib/utils'
import { YTS } from './lib/yts'

/**
* Fetches info on all previously downloaded movies from YTS.
*
* @example
* ```
* npx tsx src/populate-yts-movies.ts
* ```
*/
async function main() {
const yts = new YTS()
await makeDir(config.outDir)

const numBatches = await getNumBatches()

let batchNum = 0
let numMoviesTotal = 0
let numYTSMoviesDownloadedTotal = 0

do {
const srcFile = `${config.outDir}/movies-${batchNum}.json`
const movies: types.Movie[] = JSON.parse(
await fs.readFile(srcFile, { encoding: 'utf-8' })
)

console.log(
`\npopulating ${movies.length} movies in batch ${batchNum} (${srcFile})\n`
)

let numDownloaded = 0

await pMap(
movies,
async (movie, index): Promise<types.yts.Movie | null> => {
if (!movie.imdbId) {
return null
}

try {
console.log(
`${batchNum}:${index} yts ${movie.imdbId} (${movie.releaseYear}) ${movie.title}`
)

const ytsMovie = await yts.getMovie({ imdbId: movie.imdbId })

++numDownloaded

if (numDownloaded === 1 || numDownloaded % 50 === 0) {
console.log()
console.log(JSON.stringify(ytsMovie, null, 2))
console.log()
}

return
} catch (err) {
console.error('yts error', movie.imdbId, movie.title, err.toString())

const statusCode = err.response?.statusCode
if (statusCode === 404) {
return null
}
}
},
{
concurrency: 16
}
)

const numMovies = movies.length
const numYTSMoviesDownloaded = numDownloaded

numMoviesTotal += numMovies
numYTSMoviesDownloadedTotal += numYTSMoviesDownloaded

console.log()
console.log(`batch ${batchNum} done`, {
numMovies,
numYTSMoviesDownloaded,
numYTSMoviesDownloadedTotal
})

++batchNum
} while (batchNum < numBatches)

console.log()
console.log('done', {
numMoviesTotal,
numYTSMoviesDownloadedTotal
})
}

main()
26 changes: 25 additions & 1 deletion src/process-movies.ts
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,14 @@ async function main() {
let numTMDBMoviesTotal = 0
let numMoviesTotal = 0

let numAdult = 0
let numStatus = 0
let numIMDB = 0
let numRuntime = 0
let numTrailer = 0
let numIMDBType = 0
let numProcess = 0

console.log(`\nprocessing TMDB movies in ${numBatches} batches\n`)
do {
const srcFile = `${config.outDir}/tmdb-${batchNum}.json`
Expand All @@ -87,6 +95,7 @@ async function main() {

if (movie.adult) {
// console.log('warn adult movie', movie.tmdbId, movie.title)
++numAdult
return null
}

Expand All @@ -96,20 +105,24 @@ async function main() {
// movie.tmdbId,
// movie.title
// )
++numStatus
return null
}

if (!movie.imdbId) {
// console.log('warn missing imdb id', movie.tmdbId, movie.title)
++numIMDB
return null
}

if (movie.runtime < 60) {
++numRuntime
return null
}

if (!movie.trailerUrl) {
// console.log('warn missing trailer', movie.tmdbId, movie.title)
++numTrailer
return null
}

Expand All @@ -123,6 +136,7 @@ async function main() {
}

if (!populateMovieWithIMDBInfo(movie, { imdbRatings, imdbMovie })) {
++numIMDBType
return null
}

Expand All @@ -139,6 +153,7 @@ async function main() {
}

if (!processMovie(movie)) {
++numProcess
return null
}

Expand Down Expand Up @@ -178,7 +193,16 @@ async function main() {
console.log('done', {
numTMDBMoviesTotal,
numMoviesTotal,
percentMoviesTotal: `${((numMoviesTotal / numTMDBMoviesTotal) * 100) | 0}%`
percentMoviesTotal: `${((numMoviesTotal / numTMDBMoviesTotal) * 100) | 0}%`,
filters: {
numAdult,
numStatus,
numIMDB,
numRuntime,
numTrailer,
numIMDBType,
numProcess
}
})
}

Expand Down
2 changes: 2 additions & 0 deletions src/types/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,14 @@ import { imdb } from './imdb-types'
import { omdb } from './omdb-types'
import { tmdb } from './tmdb-types'
import { wikidata } from './wikidata-types'
import { yts } from './yts-types'

export type { tmdb }
export type { omdb }
export type { imdb }
export type { flickMetrix }
export type { wikidata }
export type { yts }

export interface Movie {
// ids
Expand Down
Loading

0 comments on commit 3a82bc0

Please sign in to comment.