Skip to content

Commit

Permalink
Update scrapers
Browse files Browse the repository at this point in the history
  • Loading branch information
jvlflame committed Aug 29, 2020
1 parent e7ee92f commit 1a5fafa
Show file tree
Hide file tree
Showing 5 changed files with 174 additions and 166 deletions.
74 changes: 37 additions & 37 deletions src/Javinizer/Public/Get-DmmData.ps1
Expand Up @@ -2,7 +2,7 @@ function Get-DmmData {
[CmdletBinding()]
param (
[Parameter(Mandatory = $true, Position = 0, ValueFromPipeline = $true, ValueFromPipelineByPropertyName = $true)]
[string]$Url
[String]$Url
)

process {
Expand All @@ -16,7 +16,7 @@ function Get-DmmData {
Write-JLog -Level Error -Message "Error [GET] on URL [$dmmUrl]: $PSItem"
}

$movieDataObject = [pscustomobject]@{
$movieDataObject = [PSCustomObject]@{
Source = 'dmm'
Url = $dmmUrl
Id = Get-DmmContentId -WebRequest $webRequest
Expand Down Expand Up @@ -46,11 +46,11 @@ function Get-DmmData {
function Get-DmmContentId {
param (
[Parameter(Mandatory = $true, Position = 0, ValueFromPipeline = $true)]
[object]$WebRequest
[Object]$Webrequest
)

process {
$contentId = ((($WebRequest.Content -split '<td align="right" valign="top" class="nw">品番:<\/td>')[1] -split '<\/td>')[0] -split '<td>')[1]
$contentId = ((($Webrequest.Content -split '<td align="right" valign="top" class="nw">品番:<\/td>')[1] -split '<\/td>')[0] -split '<td>')[1]
$contentId = Convert-HtmlCharacter -String $contentId
Write-Output $contentId
}
Expand All @@ -59,11 +59,11 @@ function Get-DmmContentId {
function Get-DmmTitle {
param (
[Parameter(Mandatory = $true, Position = 0, ValueFromPipeline = $true)]
[object]$WebRequest
[Object]$Webrequest
)

process {
$title = (($WebRequest.Content -split '<h1 id="title" class="item fn">')[1] -split '<\/h1>')[0]
$title = (($Webrequest.Content -split '<h1 id="title" class="item fn">')[1] -split '<\/h1>')[0]
$title = Convert-HtmlCharacter -String $title
Write-Output $title
}
Expand All @@ -72,11 +72,11 @@ function Get-DmmTitle {
function Get-DmmDescription {
param (
[Parameter(Mandatory = $true, Position = 0, ValueFromPipeline = $true)]
[object]$WebRequest
[Object]$Webrequest
)

process {
$description = (($WebRequest.Content -split '<meta name="description" content=')[1] -split '\/>')[0]
$description = (($Webrequest.Content -split '<meta name="description" content=')[1] -split '\/>')[0]
# Remove the first 14 characters of the description string
# This will remove the 'Fanza' string prepending the description in the html
$description = $description.Substring(14)
Expand All @@ -93,11 +93,11 @@ function Get-DmmDescription {
function Get-DmmReleaseDate {
param (
[Parameter(Mandatory = $true, Position = 0, ValueFromPipeline = $true)]
[object]$WebRequest
[Object]$Webrequest
)

process {
$releaseDate = ((($WebRequest.Content -split '<td align="right" valign="top" class="nw">配信開始日:<\/td>')[1] -split '<\/td>')[0] -split '<td>')[1]
$releaseDate = ((($Webrequest.Content -split '<td align="right" valign="top" class="nw">配信開始日:<\/td>')[1] -split '<\/td>')[0] -split '<td>')[1]
$releaseDate = Convert-HtmlCharacter -String $releaseDate
$year, $month, $day = $releaseDate -split '/'
$releaseDate = Get-Date -Year $year -Month $month -Day $day -Format "yyyy-MM-dd"
Expand All @@ -108,11 +108,11 @@ function Get-DmmReleaseDate {
function Get-DmmReleaseYear {
param (
[Parameter(Mandatory = $true, Position = 0, ValueFromPipeline = $true)]
[object]$WebRequest
[Object]$Webrequest
)

process {
$releaseYear = Get-DmmReleaseDate -WebRequest $WebRequest
$releaseYear = Get-DmmReleaseDate -WebRequest $Webrequest
$releaseYear = ($releaseYear -split '-')[0]
Write-Output $releaseYear
}
Expand All @@ -121,11 +121,11 @@ function Get-DmmReleaseYear {
function Get-DmmRuntime {
param (
[Parameter(Mandatory = $true, Position = 0, ValueFromPipeline = $true)]
[object]$WebRequest
[Object]$Webrequest
)

process {
$length = ((($WebRequest.Content -split '<td align="right" valign="top" class="nw">収録時間:<\/td>')[1] -split '<\/td>')[0] -split '<td>')[1]
$length = ((($Webrequest.Content -split '<td align="right" valign="top" class="nw">収録時間:<\/td>')[1] -split '<\/td>')[0] -split '<td>')[1]
$length = ($length -split '')[0]
Write-Output $length
}
Expand All @@ -134,11 +134,11 @@ function Get-DmmRuntime {
function Get-DmmDirector {
param (
[Parameter(Mandatory = $true, Position = 0, ValueFromPipeline = $true)]
[object]$WebRequest
[Object]$Webrequest
)

process {
$director = ((($WebRequest.Content -split '監督:<\/td>')[1] -split '<\/a>')[0] -split '>')[2]
$director = ((($Webrequest.Content -split '監督:<\/td>')[1] -split '<\/a>')[0] -split '>')[2]
$director = Convert-HtmlCharacter -String $director

if ($director -eq '</tr') {
Expand All @@ -152,11 +152,11 @@ function Get-DmmDirector {
function Get-DmmMaker {
param (
[Parameter(Mandatory = $true, Position = 0, ValueFromPipeline = $true)]
[object]$WebRequest
[Object]$Webrequest
)

process {
$maker = ((($WebRequest.Content -split '<td align="right" valign="top" class="nw">メーカー:<\/td>')[1] -split '<\/a>')[0] -split '>')[2]
$maker = ((($Webrequest.Content -split '<td align="right" valign="top" class="nw">メーカー:<\/td>')[1] -split '<\/a>')[0] -split '>')[2]
$maker = Convert-HtmlCharacter -String $maker
Write-Output $maker
}
Expand All @@ -165,11 +165,11 @@ function Get-DmmMaker {
function Get-DmmLabel {
param (
[Parameter(Mandatory = $true, Position = 0, ValueFromPipeline = $true)]
[object]$WebRequest
[Object]$Webrequest
)

process {
$label = ((($WebRequest.Content -split '<td align="right" valign="top" class="nw">レーベル:<\/td>')[1] -split '<\/a>')[0] -split '>')[2]
$label = ((($Webrequest.Content -split '<td align="right" valign="top" class="nw">レーベル:<\/td>')[1] -split '<\/a>')[0] -split '>')[2]
$label = Convert-HtmlCharacter -String $label

if ($label -eq '</tr') {
Expand All @@ -183,11 +183,11 @@ function Get-DmmLabel {
function Get-DmmSeries {
param (
[Parameter(Mandatory = $true, Position = 0, ValueFromPipeline = $true)]
[object]$WebRequest
[Object]$Webrequest
)

process {
$series = ((($WebRequest.Content -split '<td align="right" valign="top" class="nw">シリーズ:<\/td>')[1] -split '<\/a>')[0] -split '>')[2]
$series = ((($Webrequest.Content -split '<td align="right" valign="top" class="nw">シリーズ:<\/td>')[1] -split '<\/a>')[0] -split '>')[2]
$series = Convert-HtmlCharacter -String $series

if ($series -eq '</tr') {
Expand All @@ -201,11 +201,11 @@ function Get-DmmSeries {
function Get-DmmRating {
param (
[Parameter(Mandatory = $true, Position = 0, ValueFromPipeline = $true)]
[object]$WebRequest
[Object]$Webrequest
)

process {
$rating = (((($WebRequest.Content -split '<p class="d-review__average">')[1] -split '<\/strong>')[0] -split '<strong>')[1] -split '')[0]
$rating = (((($Webrequest.Content -split '<p class="d-review__average">')[1] -split '<\/strong>')[0] -split '<strong>')[1] -split '')[0]
# Multiply the rating value by 2 to conform to 1-10 rating standard
$integer = [int]$rating * 2
if ($integer -eq 0) {
Expand All @@ -221,11 +221,11 @@ function Get-DmmRating {
function Get-DmmRatingCount {
param (
[Parameter(Mandatory = $true, Position = 0, ValueFromPipeline = $true)]
[object]$WebRequest
[Object]$Webrequest
)

process {
$ratingCount = (($WebRequest.Content -split '<p class="d-review__evaluates">')[1] -split '<\/p>')[0]
$ratingCount = (($Webrequest.Content -split '<p class="d-review__evaluates">')[1] -split '<\/p>')[0]
$ratingCount = (($ratingCount -split '<strong>')[1] -split '<\/strong>')[0]
Write-Output $ratingCount
}
Expand All @@ -234,20 +234,20 @@ function Get-DmmRatingCount {
function Get-DmmActress {
param (
[Parameter(Mandatory = $true, Position = 0, ValueFromPipeline = $true)]
[object]$WebRequest
[Object]$Webrequest
)

process {
$movieActressObject = @()
$actressHtml = ((($WebRequest.Content -split '出演者:<\/td>')[1] -split '<\/td>')[0] -split '<span id="performer">')[1]
$actressHtml = ((($Webrequest.Content -split '出演者:<\/td>')[1] -split '<\/td>')[0] -split '<span id="performer">')[1]
$actressHtml = $actressHtml -replace '<a href="\/digital\/videoa\/-\/list\/=\/article=actress\/id=(.*)\/">', ''
$actressHtml = $actressHtml -split '<\/a>', ''

if ($actressHtml[0] -ne '') {
foreach ($actress in $actressHtml) {
$actress = Convert-HtmlCharacter -String $actress
if ($actress -ne '') {
$movieActressObject += [pscustomobject]@{
$movieActressObject += [PSCustomObject]@{
LastName = $null
FirstName = $null
JapaneseName = $actress -replace '<\/a>', ''
Expand All @@ -266,12 +266,12 @@ function Get-DmmActress {
function Get-DmmGenre {
param (
[Parameter(Mandatory = $true, Position = 0, ValueFromPipeline = $true)]
[object]$WebRequest
[Object]$Webrequest
)

process {
$genreArray = @()
$genre = (((($WebRequest.Content -split 'ジャンル:<\/td>')[1] -split '<\/td>')[0] -split '<td>')[1] -split '">')
$genre = (((($Webrequest.Content -split 'ジャンル:<\/td>')[1] -split '<\/td>')[0] -split '<td>')[1] -split '">')
$genre = ($genre -replace '<\/a>', '') -replace '&nbsp;&nbsp;', ''
$genre = $genre -replace '<a href="\/digital\/videoa\/-\/list\/=\/article=keyword\/id=(.*)\/'

Expand All @@ -293,24 +293,24 @@ function Get-DmmGenre {
function Get-DmmCoverUrl {
param (
[Parameter(Mandatory = $true, Position = 0, ValueFromPipeline = $true)]
[object]$WebRequest
[Object]$Webrequest
)

process {
$coverUrl = ((($WebRequest.Content -split '<div class="center" id="sample-video">')[1] -split '" target')[0] -split '<a href="')[1]
$coverUrl = ((($Webrequest.Content -split '<div class="center" id="sample-video">')[1] -split '" target')[0] -split '<a href="')[1]
$coverUrl = Convert-HtmlCharacter -String $coverUrl
Write-Output $coverUrl
}
}
function Get-DmmScreenshotUrl {
param (
[Parameter(Mandatory = $true, Position = 0, ValueFromPipeline = $true)]
[object]$WebRequest
[Object]$Webrequest
)

process {
$screenshotUrl = @()
$screenshotHtml = $WebRequest.Links | Where-Object { $_.name -eq 'sample-image' }
$screenshotHtml = $Webrequest.Links | Where-Object { $_.name -eq 'sample-image' }
$screenshotHtml = $screenshotHtml.'outerHTML'

foreach ($screenshot in $screenshotHtml) {
Expand All @@ -325,15 +325,15 @@ function Get-DmmScreenshotUrl {
# ! Unable to get trailer url from HTTP from main DMM video page
<# function Get-DmmTrailerUrl {
param (
[object]$WebRequest
[Object]$Webrequest
)
begin {
$trailerUrl = @()
}
process {
$trailerHtml = $WebRequest.Content -split '\n'
$trailerHtml = $Webrequest.Content -split '\n'
$trailerHtml = $trailerHtml | Select-String -Pattern 'https:\/\/cc3001\.dmm\.co\.jp\/litevideo\/freepv' -AllMatches
foreach ($trailer in $trailerHtml) {
Expand Down

0 comments on commit 1a5fafa

Please sign in to comment.