Skip to content

Commit

Permalink
Merge pull request #39 from richard-muvirimi/development
Browse files Browse the repository at this point in the history
Add json data selector
  • Loading branch information
richard-muvirimi committed Apr 18, 2024
2 parents 3036316 + 7fb938d commit 9ea576d
Show file tree
Hide file tree
Showing 4 changed files with 68 additions and 33 deletions.
68 changes: 38 additions & 30 deletions app/Traits/ScrapesRates.php
Original file line number Diff line number Diff line change
Expand Up @@ -10,12 +10,12 @@
use Exception;
use GuzzleHttp\Client;
use GuzzleHttp\Exception\GuzzleException;
use Illuminate\Support\Arr;
use Illuminate\Support\Carbon;
use Illuminate\Support\Facades\Cache;
use Illuminate\Support\Str;
use Matex\Evaluator;
use NumberFormatter;
use Symfony\Component\CssSelector\CssSelectorConverter;
use Symfony\Component\DomCrawler\Crawler;
use wapmorgan\TimeParser\TimeParser;

Expand All @@ -28,20 +28,22 @@ public function scrape(): void
{
$site = Cache::get($this->source_url, '');

$locale = 'en-US';

if (empty($site)) {
$site = $this->getHtmlContent();

if ($site === '') {
return;
}

$site = '<html lang="en-US"><body>'.$site.'</body></html>';
$site = "<html lang=\"$locale\"><body>$site</body></html>";

Cache::set($this->source_url, $site, CarbonInterval::minutes(30));
}

if (Str::of($site)->isNotEmpty()) {
$this->parseHtml($site);
$this->parseHtml($site, $locale);
}
}

Expand Down Expand Up @@ -114,40 +116,57 @@ private function getUserAgent(): string
/**
* Parse given html for required values
*/
private function parseHtml(string $html): void
private function parseHtml(string $html, string $locale = 'en-US'): void
{

try {
//get html dom
$crawler = new Crawler();
$crawler->addHtmlContent($html);

$converter = new CssSelectorConverter();

$selector = $this->rate_selector;
if (! $this->isXpath($selector)) {
$selector = $converter->toXPath($selector);
//rate
switch ($this->selector_type) {
case 'xpath':
$rate = $crawler->filterXPath($this->rate_selector)->text();
break;
case 'css':
$rate = $crawler->filter($this->rate_selector)->text();
break;
case 'json':
$data = json_decode($crawler->text(), true);
$rate = strval(Arr::get($data, $this->rate_selector));
break;
default:
throw new Exception('Unexpected selector type!');
}

//locale
$locale = $crawler->getNode(0)->getAttribute('lang');

//rate
$rate = $this->cleanRate($crawler->filterXPath($selector)->text(), $locale);
$rate = $this->cleanRate($rate ?? 0, $locale);

if ($rate) {
if ($this->rate !== $rate) {
$this->last_rate = $this->rate;
$this->rate = $rate;
}

$selector = $this->rate_updated_at_selector;
if (! $this->isXpath($selector)) {
$selector = $converter->toXPath($selector);
//date
$date = Carbon::now()->toDateTimeString();
switch ($this->selector_type) {
case 'xpath':
$date = $crawler->filterXPath($this->rate_updated_at_selector)->text();
break;
case 'css':
$date = $crawler->filter($this->rate_updated_at_selector)->text();
break;
case 'json':
$data = json_decode($crawler->text(), true);
$date = strval(Arr::get($data, $this->rate_updated_at_selector));
break;
default:
throw new Exception('Unexpected selector type!');
}

//date
$this->rate_updated_at = $this->cleanDate($crawler->filterXPath($selector)->text(), $this->source_timezone);
$this->rate_updated_at = $this->cleanDate($date, $this->source_timezone);

$this->status = true;
$this->status_message = '';

Expand All @@ -163,17 +182,6 @@ private function parseHtml(string $html): void
}
}

/**
* Check if a given text is an xpath
*/
private function isXpath(string $selector): bool
{
return match ($selector) {
'text', 'comment' => true,
default => str_starts_with($selector, '//'),
};
}

/**
* Convert number to an int
*
Expand Down
1 change: 0 additions & 1 deletion composer.json
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,6 @@
"madorin/matex": "^1.0",
"mll-lab/laravel-graphiql": "^3.1",
"nuwave/lighthouse": "^6.22",
"symfony/css-selector": "^6.3",
"symfony/dom-crawler": "^6.3",
"wapmorgan/time-parser": "^2.0",
"ext-pdo": "*"
Expand Down
4 changes: 2 additions & 2 deletions composer.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

28 changes: 28 additions & 0 deletions database/migrations/2024_04_17_092016_add_selector_type_column.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
<?php

use Illuminate\Database\Migrations\Migration;
use Illuminate\Database\Schema\Blueprint;
use Illuminate\Support\Facades\Schema;

return new class extends Migration
{
/**
* Run the migrations.
*/
public function up(): void
{
Schema::table('rates', function (Blueprint $table) {
$table->enum('selector_type', ['css', 'xpath', 'json'])->after('rate_selector')->default('css');
});
}

/**
* Reverse the migrations.
*/
public function down(): void
{
Schema::table('rates', function (Blueprint $table) {
$table->dropColumn('selector_type');
});
}
};

0 comments on commit 9ea576d

Please sign in to comment.