Skip to content

Commit

Permalink
add image scraping support
Browse files Browse the repository at this point in the history
  • Loading branch information
keenbed committed Jan 5, 2024
1 parent bd7d4ac commit d7ddbe8
Show file tree
Hide file tree
Showing 26 changed files with 1,118 additions and 6 deletions.
21 changes: 21 additions & 0 deletions graphql/documents/data/scrapers.graphql
Original file line number Diff line number Diff line change
Expand Up @@ -203,6 +203,27 @@ fragment ScrapedGalleryData on ScrapedGallery {
}
}

fragment ScrapedImageData on ScrapedImage {
title
code
details
photographer
urls
date

studio {
...ScrapedSceneStudioData
}

tags {
...ScrapedSceneTagData
}

performers {
...ScrapedScenePerformerData
}
}

fragment ScrapedStashBoxSceneData on ScrapedScene {
title
code
Expand Down
26 changes: 26 additions & 0 deletions graphql/documents/queries/scrapers/scrapers.graphql
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,17 @@ query ListGalleryScrapers {
}
}

query ListImageScrapers {
listScrapers(types: [IMAGE]) {
id
name
image {
urls
supported_scrapes
}
}
}

query ListMovieScrapers {
listScrapers(types: [MOVIE]) {
id
Expand Down Expand Up @@ -108,12 +119,27 @@ query ScrapeSingleGallery(
}
}

query ScrapeSingleImage(
$source: ScraperSourceInput!
$input: ScrapeSingleImageInput!
) {
scrapeSingleImage(source: $source, input: $input) {
...ScrapedImageData
}
}

query ScrapeGalleryURL($url: String!) {
scrapeGalleryURL(url: $url) {
...ScrapedGalleryData
}
}

query ScrapeImageURL($url: String!) {
scrapeImageURL(url: $url) {
...ScrapedImageData
}
}

query ScrapeMovieURL($url: String!) {
scrapeMovieURL(url: $url) {
...ScrapedMovieData
Expand Down
8 changes: 8 additions & 0 deletions graphql/schema/schema.graphql
Original file line number Diff line number Diff line change
Expand Up @@ -150,6 +150,12 @@ type Query {
input: ScrapeSingleMovieInput!
): [ScrapedMovie!]!

"Scrape for a single image"
scrapeSingleImage(
source: ScraperSourceInput!
input: ScrapeSingleImageInput!
): [ScrapedImage!]!

"Scrapes content based on a URL"
scrapeURL(url: String!, ty: ScrapeContentType!): ScrapedContent

Expand All @@ -159,6 +165,8 @@ type Query {
scrapeSceneURL(url: String!): ScrapedScene
"Scrapes a complete gallery record based on a URL"
scrapeGalleryURL(url: String!): ScrapedGallery
"Scrapes a complete image record based on a URL"
scrapeImageURL(url: String!): ScrapedImage
"Scrapes a complete movie record based on a URL"
scrapeMovieURL(url: String!): ScrapedMovie

Expand Down
33 changes: 33 additions & 0 deletions graphql/schema/types/scraper.graphql
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ enum ScrapeType {
"Type of the content a scraper generates"
enum ScrapeContentType {
GALLERY
IMAGE
MOVIE
PERFORMER
SCENE
Expand All @@ -21,6 +22,7 @@ union ScrapedContent =
| ScrapedTag
| ScrapedScene
| ScrapedGallery
| ScrapedImage
| ScrapedMovie
| ScrapedPerformer

Expand All @@ -39,6 +41,8 @@ type Scraper {
scene: ScraperSpec
"Details for gallery scraper"
gallery: ScraperSpec
"Details for image scraper"
image: ScraperSpec
"Details for movie scraper"
movie: ScraperSpec
}
Expand Down Expand Up @@ -123,6 +127,26 @@ input ScrapedGalleryInput {
# no studio, tags or performers
}

type ScrapedImage {
title: String
code: String
details: String
photographer: String
urls: [String!]
date: String
studio: ScrapedStudio
tags: [ScrapedTag!]
performers: [ScrapedPerformer!]
}

input ScrapedImageInput {
title: String
code: String
details: String
urls: [String!]
date: String
}

input ScraperSourceInput {
"Index of the configured stash-box instance to use. Should be unset if scraper_id is set"
stash_box_index: Int @deprecated(reason: "use stash_box_endpoint")
Expand Down Expand Up @@ -185,6 +209,15 @@ input ScrapeSingleGalleryInput {
gallery_input: ScrapedGalleryInput
}

input ScrapeSingleImageInput {
"Instructs to query by string"
query: String
"Instructs to query by image id"
image_id: ID
"Instructs to query by image fragment"
image_input: ScrapedImageInput
}

input ScrapeSingleMovieInput {
"Instructs to query by string"
query: String
Expand Down
6 changes: 3 additions & 3 deletions internal/api/resolver_mutation_image.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ func (r *mutationResolver) getImage(ctx context.Context, id int) (ret *models.Im
return ret, nil
}

func (r *mutationResolver) ImageUpdate(ctx context.Context, input ImageUpdateInput) (ret *models.Image, err error) {
func (r *mutationResolver) ImageUpdate(ctx context.Context, input models.ImageUpdateInput) (ret *models.Image, err error) {
translator := changesetTranslator{
inputMap: getUpdateInputMap(ctx),
}
Expand All @@ -45,7 +45,7 @@ func (r *mutationResolver) ImageUpdate(ctx context.Context, input ImageUpdateInp
return r.getImage(ctx, ret.ID)
}

func (r *mutationResolver) ImagesUpdate(ctx context.Context, input []*ImageUpdateInput) (ret []*models.Image, err error) {
func (r *mutationResolver) ImagesUpdate(ctx context.Context, input []*models.ImageUpdateInput) (ret []*models.Image, err error) {
inputMaps := getUpdateInputMaps(ctx)

// Start the transaction and save the image
Expand Down Expand Up @@ -88,7 +88,7 @@ func (r *mutationResolver) ImagesUpdate(ctx context.Context, input []*ImageUpdat
return newRet, nil
}

func (r *mutationResolver) imageUpdate(ctx context.Context, input ImageUpdateInput, translator changesetTranslator) (*models.Image, error) {
func (r *mutationResolver) imageUpdate(ctx context.Context, input models.ImageUpdateInput, translator changesetTranslator) (*models.Image, error) {
imageID, err := strconv.Atoi(input.ID)
if err != nil {
return nil, fmt.Errorf("converting id: %w", err)
Expand Down
42 changes: 42 additions & 0 deletions internal/api/resolver_query_scraper.go
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,15 @@ func (r *queryResolver) ScrapeGalleryURL(ctx context.Context, url string) (*scra
return marshalScrapedGallery(content)
}

func (r *queryResolver) ScrapeImageURL(ctx context.Context, url string) (*scraper.ScrapedImage, error) {
content, err := r.scraperCache().ScrapeURL(ctx, url, scraper.ScrapeContentTypeImage)
if err != nil {
return nil, err
}

return marshalScrapedImage(content)
}

func (r *queryResolver) ScrapeMovieURL(ctx context.Context, url string) (*models.ScrapedMovie, error) {
content, err := r.scraperCache().ScrapeURL(ctx, url, scraper.ScrapeContentTypeMovie)
if err != nil {
Expand Down Expand Up @@ -363,6 +372,39 @@ func (r *queryResolver) ScrapeSingleGallery(ctx context.Context, source scraper.
}
}

func (r *queryResolver) ScrapeSingleImage(ctx context.Context, source scraper.Source, input ScrapeSingleImageInput) ([]*scraper.ScrapedImage, error) {
if source.StashBoxIndex != nil {
return nil, ErrNotSupported
}

if source.ScraperID == nil {
return nil, fmt.Errorf("%w: scraper_id must be set", ErrInput)
}

var c scraper.ScrapedContent

switch {
case input.ImageID != nil:
imageID, err := strconv.Atoi(*input.ImageID)
if err != nil {
return nil, fmt.Errorf("%w: image id is not an integer: '%s'", ErrInput, *input.ImageID)
}
c, err = r.scraperCache().ScrapeID(ctx, *source.ScraperID, imageID, scraper.ScrapeContentTypeImage)
if err != nil {
return nil, err
}
return marshalScrapedImages([]scraper.ScrapedContent{c})
case input.ImageInput != nil:
c, err := r.scraperCache().ScrapeFragment(ctx, *source.ScraperID, scraper.Input{Image: input.ImageInput})
if err != nil {
return nil, err
}
return marshalScrapedImages([]scraper.ScrapedContent{c})
default:
return nil, ErrNotImplemented
}
}

func (r *queryResolver) ScrapeSingleMovie(ctx context.Context, source scraper.Source, input ScrapeSingleMovieInput) ([]*models.ScrapedMovie, error) {
return nil, ErrNotSupported
}
31 changes: 31 additions & 0 deletions internal/api/scraped_content.go
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,27 @@ func marshalScrapedGalleries(content []scraper.ScrapedContent) ([]*scraper.Scrap
return ret, nil
}

func marshalScrapedImages(content []scraper.ScrapedContent) ([]*scraper.ScrapedImage, error) {
var ret []*scraper.ScrapedImage
for _, c := range content {
if c == nil {
// graphql schema requires images to be non-nil
continue
}

switch g := c.(type) {
case *scraper.ScrapedImage:
ret = append(ret, g)
case scraper.ScrapedImage:
ret = append(ret, &g)
default:
return nil, fmt.Errorf("%w: cannot turn ScrapedContent into ScrapedImage", models.ErrConversion)
}
}

return ret, nil
}

// marshalScrapedMovies converts ScrapedContent into ScrapedMovie. If conversion
// fails, an error is returned.
func marshalScrapedMovies(content []scraper.ScrapedContent) ([]*models.ScrapedMovie, error) {
Expand Down Expand Up @@ -129,6 +150,16 @@ func marshalScrapedGallery(content scraper.ScrapedContent) (*scraper.ScrapedGall
return g[0], nil
}

// marshalScrapedImage will marshal a single scraped image
func marshalScrapedImage(content scraper.ScrapedContent) (*scraper.ScrapedImage, error) {
g, err := marshalScrapedImages([]scraper.ScrapedContent{content})
if err != nil {
return nil, err
}

return g[0], nil
}

// marshalScrapedMovie will marshal a single scraped movie
func marshalScrapedMovie(content scraper.ScrapedContent) (*models.ScrapedMovie, error) {
m, err := marshalScrapedMovies([]scraper.ScrapedContent{content})
Expand Down
22 changes: 22 additions & 0 deletions pkg/models/image.go
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,28 @@ type ImageFilterType struct {
UpdatedAt *TimestampCriterionInput `json:"updated_at"`
}

type ImageUpdateInput struct {
ClientMutationID *string `json:"clientMutationId"`
ID string `json:"id"`
Title *string `json:"title"`
Code *string `json:"code"`
Urls []string `json:"urls"`
Date *string `json:"date"`
Details *string `json:"details"`
Photographer *string `json:"photographer"`
Rating100 *int `json:"rating100"`
Organized *bool `json:"organized"`
SceneIds []string `json:"scene_ids"`
StudioID *string `json:"studio_id"`
TagIds []string `json:"tag_ids"`
PerformerIds []string `json:"performer_ids"`
GalleryIds []string `json:"gallery_ids"`
PrimaryFileID *string `json:"primary_file_id"`

// deprecated
URL *string `json:"url"`
}

type ImageDestroyInput struct {
ID string `json:"id"`
DeleteFile *bool `json:"delete_file"`
Expand Down
1 change: 1 addition & 0 deletions pkg/scraper/action.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ type scraperActionImpl interface {

scrapeSceneByScene(ctx context.Context, scene *models.Scene) (*ScrapedScene, error)
scrapeGalleryByGallery(ctx context.Context, gallery *models.Gallery) (*ScrapedGallery, error)
scrapeImageByImage(ctx context.Context, image *models.Image) (*ScrapedImage, error)
}

func (c config) getScraper(scraper scraperTypeConfig, client *http.Client, globalConfig GlobalConfig) scraperActionImpl {
Expand Down

0 comments on commit d7ddbe8

Please sign in to comment.