Skip to content

Commit

Permalink
Update scrapers
Browse files Browse the repository at this point in the history
  • Loading branch information
jvlflame committed Aug 11, 2020
1 parent fb8ba90 commit be1743d
Show file tree
Hide file tree
Showing 7 changed files with 163 additions and 110 deletions.
15 changes: 10 additions & 5 deletions src/Javinizer/Public/Get-DmmDataObject.ps1
Expand Up @@ -238,7 +238,7 @@ function Get-DmmActress {
)

process {
$actressArray = @()
$movieActressObject = @()
$actressHtml = ((($WebRequest.Content -split '出演者:<\/td>')[1] -split '<\/td>')[0] -split '<span id="performer">')[1]
$actressHtml = $actressHtml -replace '<a href="\/digital\/videoa\/-\/list\/=\/article=actress\/id=(.*)\/">', ''
$actressHtml = $actressHtml -split '<\/a>', ''
Expand All @@ -247,14 +247,19 @@ function Get-DmmActress {
foreach ($actress in $actressHtml) {
$actress = Convert-HtmlCharacter -String $actress
if ($actress -ne '') {
$actressArray += $actress -replace '<\/a>', ''
$movieActressObject += [pscustomobject]@{
LastName = $null
FirstName = $null
JapaneseName = $actress -replace '<\/a>', ''
ThumbUrl = $null
}
}
}
Write-Output $actressArray
} else {
$actressArray = $null
Write-Output $actressArray
$movieActressObject = $null
}
Write-Output $movieActressObject

}
}

Expand Down
89 changes: 57 additions & 32 deletions src/Javinizer/Public/Get-JavbusDataObject.ps1
Expand Up @@ -17,23 +17,22 @@ function Get-JavbusDataObject {
}

$movieDataObject = [pscustomobject]@{
Source = 'javbus'
Url = $Url
Id = Get-JavbusId -WebRequest $webRequest
Title = Get-JavbusTitle -WebRequest $webRequest
Date = Get-JavbusReleaseDate -WebRequest $webRequest
Year = Get-JavbusReleaseYear -WebRequest $webRequest
Runtime = Get-JavbusRuntime -WebRequest $webRequest
Director = Get-JavbusDirector -WebRequest $webRequest
Maker = Get-JavbusMaker -WebRequest $webRequest
Label = Get-JavbusLabel -WebRequest $webRequest
Series = Get-JavbusSeries -WebRequest $webRequest
Rating = Get-JavbusRating -WebRequest $webRequest
Actress = (Get-JavbusActress -WebRequest $webRequest).Name
ActressThumbUrl = (Get-JavbusActress -WebRequest $webRequest).ThumbUrl
Genre = Get-JavbusGenre -WebRequest $webRequest
CoverUrl = Get-JavbusCoverUrl -WebRequest $webRequest
ScreenshotUrl = Get-JavbusScreenshotUrl -WebRequest $webRequest
Source = 'javbus'
Url = $Url
Id = Get-JavbusId -WebRequest $webRequest
Title = Get-JavbusTitle -WebRequest $webRequest
Date = Get-JavbusReleaseDate -WebRequest $webRequest
Year = Get-JavbusReleaseYear -WebRequest $webRequest
Runtime = Get-JavbusRuntime -WebRequest $webRequest
Director = Get-JavbusDirector -WebRequest $webRequest
Maker = Get-JavbusMaker -WebRequest $webRequest
Label = Get-JavbusLabel -WebRequest $webRequest
Series = Get-JavbusSeries -WebRequest $webRequest
Rating = Get-JavbusRating -WebRequest $webRequest
Actress = Get-JavbusActress -WebRequest $webRequest
Genre = Get-JavbusGenre -WebRequest $webRequest
CoverUrl = Get-JavbusCoverUrl -WebRequest $webRequest
ScreenshotUrl = Get-JavbusScreenshotUrl -WebRequest $webRequest
}

Write-JLog -Level Debug -Message "JavBus data object: $($movieDataObject | ConvertTo-Json -Depth 32 -Compress)"
Expand Down Expand Up @@ -248,26 +247,52 @@ function Get-JavbusActress {
)

process {
$actress = @()
$actresses = @()
$movieActressObject = @()
$textInfo = (Get-Culture).TextInfo

try {
$actress = ($WebRequest | ForEach-Object { $_ -split '\n' } |
Select-String '<a href="(.*)\/star\/(.*)">(.*)<\/a>').Matches |
ForEach-Object { $_.Groups[3].Value } |
Where-Object { $_ -ne '' } |
Select-Object -Unique

$actressThumb = ($WebRequest | ForEach-Object { $_ -split '\n' } |
Select-String '<a href="(.*)\/star\/(.*)"><img src="(.*)" title="(.*)"><\/a>').Matches |
ForEach-Object { $_.Groups[3].Value } |
Where-Object { $_ -ne '' }

try {
$actresses = ($WebRequest | Select-String -AllMatches -Pattern '<a href="https:\/\/www\.javbus\.com\/(?:.*)\/star\/(?:.*)"><img src="(.*)" title="(.*)"><\/a>').Matches
} catch {
return
}

$movieActressObject = [pscustomobject]@{
Name = $actress
ThumbUrl = $actressThumb
foreach ($actress in $actresses) {
$thumbUrl = $actress.Groups[1].Value
if ($thumbUrl -like '*nowprinting*' -or $thumbUrl -like '*now_printing*') {
$thumbUrl = $null
}

# Match if the name contains Japanese characters
if ($actress.Groups[2].Value -match '[\u3040-\u309f]|[\u30a0-\u30ff]|[\uff66-\uff9f]|[\u4e00-\u9faf]') {
$movieActressObject += [pscustomobject]@{
LastName = $null
FirstName = $null
JapaneseName = $actress.Groups[2].Value
ThumbUrl = $thumbUrl
}
} else {
$firstName = ($actress.Groups[2].Value -split ' ')[1]
if ($null -ne $firstName) {
$firstName = $textInfo.ToTitleCase($firstName.ToLower())
}

$lastName = ($actress.Groups[2].Value -split ' ')[0]
if ($null -ne $lastName) {
$lastName = $textInfo.ToTitleCase($lastName.ToLower())
}

$movieActressObject += [pscustomobject]@{
LastName = $lastName
FirstName = $firstName
JapaneseName = $null
ThumbUrl = $thumbUrl
}
}
}
} catch {
Write-Error $_
return
}

Expand Down
2 changes: 1 addition & 1 deletion src/Javinizer/Public/Get-JavbusUrl.ps1
Expand Up @@ -3,7 +3,7 @@ function Get-JavbusUrl {
param (
[Parameter(Mandatory = $true, Position = 0, ValueFromPipeline = $true)]
[string]$Id,
[Parameter(Mandatory = $true)]
[Parameter(Mandatory = $true, Position = 1)]
[ValidateSet('ja', 'en', 'zh')]
[string]$Language
)
Expand Down
105 changes: 62 additions & 43 deletions src/Javinizer/Public/Get-JavlibraryDataObject.ps1
Expand Up @@ -18,27 +18,28 @@ function Get-JavlibraryDataObject {
$movieDataObject = [pscustomobject]@{
Source = 'javlibrary'
Url = $Url
Id = Get-JLId -WebRequest $webRequest
AjaxId = Get-JLAjaxId -WebRequest $webRequest
Title = Get-JLTitle -WebRequest $webRequest
Date = Get-JLReleaseDate -WebRequest $webRequest
Year = Get-JLReleaseYear -WebRequest $webRequest
Runtime = Get-JLRuntime -WebRequest $webRequest
Director = Get-JLDirector -WebRequest $webRequest
Maker = Get-JLMaker -WebRequest $webRequest
Label = Get-JLLabel -WebRequest $webRequest
Rating = Get-JLRating -WebRequest $webRequest
Actress = Get-JLActress -WebRequest $webRequest
Genre = Get-JLGenre -WebRequest $webRequest
CoverUrl = Get-JLCoverUrl -WebRequest $webRequest
ScreenshotUrl = Get-JLScreenshotUrl -WebRequest $webRequest
Id = Get-JavlibraryId -WebRequest $webRequest
AjaxId = Get-JavlibraryAjaxId -WebRequest $webRequest
Title = Get-JavlibraryTitle -WebRequest $webRequest
Date = Get-JavlibraryReleaseDate -WebRequest $webRequest
Year = Get-JavlibraryReleaseYear -WebRequest $webRequest
Runtime = Get-JavlibraryRuntime -WebRequest $webRequest
Director = Get-JavlibraryDirector -WebRequest $webRequest
Maker = Get-JavlibraryMaker -WebRequest $webRequest
Label = Get-JavlibraryLabel -WebRequest $webRequest
Rating = Get-JavlibraryRating -WebRequest $webRequest
Actress = Get-JavlibraryActress -WebRequest $webRequest
Genre = Get-JavlibraryGenre -WebRequest $webRequest
CoverUrl = Get-JavlibraryCoverUrl -WebRequest $webRequest
ScreenshotUrl = Get-JavlibraryScreenshotUrl -WebRequest $webRequest
}

Write-JLog -Level Debug -Message "JAVLibrary data object: $($movieDataObject | ConvertTo-Json -Depth 32 -Compress)"
Write-Output $movieDataObject
}
}
function Get-JLId {

function Get-JavlibraryId {
param (
[Parameter(Mandatory = $true, Position = 0, ValueFromPipeline = $true)]
[object]$WebRequest
Expand All @@ -50,7 +51,7 @@ function Get-JLId {
}
}

function Get-JLAjaxId {
function Get-JavlibraryAjaxId {
param (
[Parameter(Mandatory = $true, Position = 0, ValueFromPipeline = $true)]
[object]$WebRequest
Expand All @@ -68,7 +69,7 @@ function Get-JLAjaxId {
}
}

function Get-JLTitle {
function Get-JavlibraryTitle {
param (
[Parameter(Mandatory = $true, Position = 0, ValueFromPipeline = $true)]
[object]$WebRequest
Expand All @@ -81,7 +82,7 @@ function Get-JLTitle {
}
}

function Get-JLReleaseDate {
function Get-JavlibraryReleaseDate {
param (
[Parameter(Mandatory = $true, Position = 0, ValueFromPipeline = $true)]
[object]$WebRequest
Expand All @@ -93,20 +94,20 @@ function Get-JLReleaseDate {
}
}

function Get-JLReleaseYear {
function Get-JavlibraryReleaseYear {
param (
[Parameter(Mandatory = $true, Position = 0, ValueFromPipeline = $true)]
[object]$WebRequest
)

process {
$releaseYear = Get-JLReleaseDate -WebRequest $WebRequest
$releaseYear = Get-JavlibraryReleaseDate -WebRequest $WebRequest
$releaseYear = ($releaseYear -split '-')[0]
Write-Output $releaseYear
}
}

function Get-JLRuntime {
function Get-JavlibraryRuntime {
param (
[Parameter(Mandatory = $true, Position = 0, ValueFromPipeline = $true)]
[object]$WebRequest
Expand All @@ -118,7 +119,7 @@ function Get-JLRuntime {
}
}

function Get-JLDirector {
function Get-JavlibraryDirector {
param (
[Parameter(Mandatory = $true, Position = 0, ValueFromPipeline = $true)]
[object]$WebRequest
Expand All @@ -131,12 +132,13 @@ function Get-JLDirector {
} else {
$director = $null
}

$director = Convert-HtmlCharacter -String $director
Write-Output $director
}
}

function Get-JLMaker {
function Get-JavlibraryMaker {
param (
[Parameter(Mandatory = $true, Position = 0, ValueFromPipeline = $true)]
[object]$WebRequest
Expand All @@ -149,7 +151,7 @@ function Get-JLMaker {
}
}

function Get-JLLabel {
function Get-JavlibraryLabel {
param (
[Parameter(Mandatory = $true, Position = 0, ValueFromPipeline = $true)]
[object]$WebRequest
Expand All @@ -162,7 +164,7 @@ function Get-JLLabel {
}
}

function Get-JLRating {
function Get-JavlibraryRating {
param (
[Parameter(Mandatory = $true, Position = 0, ValueFromPipeline = $true)]
[object]$WebRequest
Expand All @@ -175,7 +177,7 @@ function Get-JLRating {
}
}

function Get-JLGenre {
function Get-JavlibraryGenre {
param (
[Parameter(Mandatory = $true, Position = 0, ValueFromPipeline = $true)]
[object]$WebRequest
Expand All @@ -201,35 +203,52 @@ function Get-JLGenre {
}
}

function Get-JLActress {
function Get-JavlibraryActress {
param (
[Parameter(Mandatory = $true, Position = 0, ValueFromPipeline = $true)]
[object]$WebRequest
)

process {
$actress = @()
$actressSplitString = '<span class="star">'
$actressSplitHtml = $WebRequest.Content -split $actressSplitString

foreach ($section in $actressSplitHtml) {
$fullName = (($section -split "rel=`"tag`">")[1] -split "<\/a><\/span>")[0]
if ($fullName -ne '') {
if ($fullName.Length -lt 25) {
$actress += $fullName
}
}
$movieActressObject = @()

try {
$movieActress = ($WebRequest.Content | Select-String -Pattern '<a href="vl_star\.php\?s=(?:.*)" rel="tag">(.*)<\/a><\/span>').Matches.Groups[1].Value
} catch {
return
}

if ($actress.Count -eq 0) {
$actress = $null
foreach ($actress in $movieActress) {
if ($actress -match '[\u3040-\u309f]|[\u30a0-\u30ff]|[\uff66-\uff9f]|[\u4e00-\u9faf]') {
$movieActressObject += [pscustomobject]@{
LastName = $null
FirstName = $null
JapaneseName = $actress
ThumbUrl = $null
}
} else {
$nameParts = ($actress -split ' ').Count
if ($nameParts -eq 1) {
$lastName = $null
$firstName = $actress
} else {
$lastName = ($actress -split ' ')[0]
$firstName = ($actress -split ' ')[1]
}
$movieActressObject += [pscustomobject]@{
LastName = $lastName
FirstName = $firstName
JapaneseName = $null
ThumbUrl = $null
}
}
}

Write-Output $actress
Write-Output $movieActressObject
}
}

function Get-JLCoverUrl {
function Get-JavlibraryCoverUrl {
param (
[Parameter(Mandatory = $true, Position = 0, ValueFromPipeline = $true)]
[object]$WebRequest
Expand All @@ -246,7 +265,7 @@ function Get-JLCoverUrl {
}
}

function Get-JLScreenshotUrl {
function Get-JavlibraryScreenshotUrl {
param (
[Parameter(Mandatory = $true, Position = 0, ValueFromPipeline = $true)]
[object]$WebRequest
Expand Down

0 comments on commit be1743d

Please sign in to comment.