-
Notifications
You must be signed in to change notification settings - Fork 81
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
1e298e5
commit ce2d62b
Showing
7 changed files
with
429 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,201 @@ | ||
# Extractors | ||
```php | ||
$job->extract($type, $source, $options); | ||
``` | ||
* `$type`: the type of the extractor (array, csv, etc). | ||
* `$source`: the data source (path to a file, url or array). | ||
* `$options`: an array containing the extractor options. | ||
|
||
|
||
## Array | ||
### Syntax | ||
```php | ||
$job->extract('array', $array, $options); | ||
``` | ||
### Options | ||
| Name | Type | Default | Description | | ||
| ---- |----- | ------- | ----------- | | ||
| columns | array | null | Columns that will be extracted. | | ||
|
||
### Examples | ||
Extract all array columns: | ||
```php | ||
$job->extract('array', $array); | ||
``` | ||
Extract specific columns: | ||
```php | ||
$options = [ | ||
'columns' => ['id', 'name'] | ||
]; | ||
|
||
$job->extract('array', $array, $options); | ||
``` | ||
|
||
|
||
## CSV | ||
### Syntax | ||
```php | ||
$job->extract('csv', 'path/to/file.csv', $options); | ||
``` | ||
### Options | ||
| Name | Type | Default | Description | | ||
| ---- |----- | ------- | ----------- | | ||
| columns | array | null | Columns that will be extracted. | | ||
| delimiter | string | ';' | Delimiter that separates items. | | ||
| enclosure | string | '"' | The value enclosure. | | ||
|
||
### Examples | ||
Extract from a CSV file with columns header: | ||
```php | ||
$job->extract('csv', 'path/to/file.csv'); | ||
``` | ||
Extract from a CSV file using custom columns: | ||
```php | ||
$options = [ | ||
'columns' => [ | ||
'id' => 1, // Index of the column. The first column is 1. | ||
'name' => 2 | ||
] | ||
]; | ||
|
||
$job->extract('csv', 'path/to/file.csv', $options); | ||
``` | ||
|
||
|
||
## Fixed Width | ||
### Syntax | ||
```php | ||
$job->extract('fixedWidth', 'path/to/file.txt', $options); | ||
``` | ||
### Options | ||
| Name | Type | Default | Description | | ||
| ---- |----- | ------- | ----------- | | ||
| columns | array | null | Columns that will be extracted. | | ||
|
||
### Examples | ||
Extract from a fixed width text file: | ||
```php | ||
$options = [ | ||
'columns' => [ | ||
'id' => [0, 5], // Start position and length of column. | ||
'name' => [5, 40], | ||
] | ||
]; | ||
|
||
$job->extract('fixedWidth', 'path/to/file.txt', $options); | ||
``` | ||
|
||
|
||
## Json | ||
### Syntax | ||
```php | ||
$job->extract('json', 'path/to/file.json', $options); | ||
``` | ||
### Options | ||
| Name | Type | Default | Description | | ||
| ---- |----- | ------- | ----------- | | ||
| columns | array | null | Columns that will be extracted. | | ||
|
||
### Examples | ||
Extract from a Json file: | ||
```php | ||
$job->extract('json', 'path/to/file.json'); | ||
``` | ||
Extract from a Json file with custom attributes path: | ||
```php | ||
$options = [ | ||
'columns' => [ | ||
'id' => '$..bindings[*].id.value', | ||
'name' => '$..bindings[*].name.value' | ||
] | ||
]; | ||
|
||
$job->extract('json', 'path/to/file.json', $options); | ||
``` | ||
|
||
|
||
## Query | ||
### Syntax | ||
```php | ||
$job->extract('query', 'select * from table', $options); | ||
``` | ||
### Options | ||
| Name | Type | Default | Description | | ||
| ---- |----- | ------- | ----------- | | ||
| columns | array | null | Columns that will be extracted. | | ||
| connection | string | 'default' | Name of the database connection to use. | | ||
|
||
### Examples | ||
Extract from a database table using a custom query: | ||
```php | ||
$query = 'select * from users'; | ||
|
||
$job->extract('query', $query); | ||
``` | ||
Extract from a database table using a custom query and bindings: | ||
```php | ||
$query = 'select * from users where status = ?'; | ||
$options = [ | ||
'bindings' => ['active'] | ||
]; | ||
|
||
$job->extract('query', $query, $options); | ||
``` | ||
|
||
|
||
## Table | ||
### Syntax | ||
```php | ||
$job->extract('table', 'table_name', $options); | ||
``` | ||
### Options | ||
| Name | Type | Default | Description | | ||
| ---- |----- | ------- | ----------- | | ||
| columns | array | null | Columns that will be extracted. | | ||
| connection | string | 'default' | Name of the database connection to use. | | ||
| where | array | [] | Array of where clause. For Example: ['status' => 'active']. | | ||
|
||
### Examples | ||
Extract from a database table: | ||
```php | ||
$job->extract('table', 'table_name'); | ||
``` | ||
Extract specific columns from a database table and a where clause: | ||
```php | ||
$ooptions = [ | ||
'columns' => ['id', 'nome'], | ||
'where' => ['status' => 'active'] | ||
]; | ||
|
||
$job->extract('table', 'table_name', $options); | ||
``` | ||
|
||
|
||
## XML | ||
### Syntax | ||
```php | ||
$job->extract('xml', 'path/to/file.xml', $options); | ||
``` | ||
### Options | ||
| Name | Type | Default | Description | | ||
| ---- |----- | ------- | ----------- | | ||
| columns | array | null | Columns that will be extracted. | | ||
| loop | string | '/' | The path to loop. | | ||
|
||
### Examples | ||
Extract from a XML file: | ||
```php | ||
$job->extract('xml', 'path/to/file.xml'); | ||
``` | ||
Extract from a XML file with custom attributes and loop path: | ||
```php | ||
$options = [ | ||
'columns' => [ | ||
'id' => 'id/value', | ||
'name' => 'name/value', | ||
], | ||
'loop' => '/users/user' | ||
]; | ||
|
||
$job->extract('xml', 'path/to/file.xml', $options); | ||
``` |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,73 @@ | ||
# PHP ETL | ||
Extract, Transform and Load data using PHP. | ||
|
||
|
||
## Installation | ||
In your application's folder, run: | ||
``` | ||
composer require marquine\php-etl | ||
``` | ||
|
||
|
||
## Setup | ||
### Global configuration | ||
Global configuration can be set using the `config` method. You can skip this configuration and use the full path when working with files. | ||
```php | ||
$config = [ | ||
'default_path' => '/path/to/etl/files', | ||
]; | ||
|
||
Etl::config($config); | ||
``` | ||
|
||
### Database | ||
SQLite conneciton: | ||
```php | ||
$connection = [ | ||
'driver' => 'sqlite', | ||
'database' => '/path/to/database.sqlite' | ||
]; | ||
``` | ||
|
||
Adding connections: | ||
```php | ||
use Marquine\Etl\Job; | ||
|
||
// default connection | ||
Etl::addConnection($connection); | ||
|
||
// named connection | ||
Etl::addConnection($connection, 'connection_name'); | ||
``` | ||
|
||
## Laravel Setup | ||
If you are using Laravel, PHP ETL provides a default configuration file and will register all supported connections of your application. | ||
|
||
Add the ServiceProvider to the `providers` array in `config/app.php` file: | ||
```php | ||
Marquine\Etl\Providers\Laravel\EtlServiceProvider::class, | ||
``` | ||
|
||
Publish the configuration file (`config/etl.php`) using the artisan command: | ||
``` | ||
php artisan vendor:publish --provider="Marquine\Etl\Providers\Laravel\EtlServiceProvider" | ||
``` | ||
|
||
## Example | ||
In the example below, we will extract data from a csv file, trim two columns and load the data into database: | ||
```php | ||
use Marquine\Etl\Job; | ||
|
||
Job::start()->extract('csv', 'path/to/file.csv') | ||
->transform('trim', ['columns' => ['name', 'email']]) | ||
->load('table', 'users'); | ||
``` | ||
or | ||
```php | ||
use Marquine\Etl\Job; | ||
|
||
$job = new Job; | ||
$job->extract('csv', 'path/to/file.csv') | ||
->transform('trim', ['columns' => ['name', 'email']]) | ||
->load('table', 'users'); | ||
``` |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,41 @@ | ||
# Loaders | ||
```php | ||
$job->load($type, $destination, $options); | ||
``` | ||
* `$type`: the type of the loader (table, etc). | ||
* `$destination`: the destination of the data (table_name, etc). | ||
* `$options`: an array containing the loader options. | ||
|
||
|
||
## Table | ||
### Syntax | ||
```php | ||
$job->load('table', $destination, $options); | ||
``` | ||
### Options | ||
| Name | Type | Default | Description | | ||
| ---- | ------ |-------- | ----------- | | ||
| connection | string | 'default' | Name of the database connection. | | ||
| keys | array|string | ['id'] | List of primary keys or identifiers of the table. | | ||
| insert | boolean | true | Insert rows that are in the source but not in the destination table. | | ||
| update | boolean | true | Update rows (based on primary_key option) that are in both source and destination and have new values from the source. | | ||
| delete | boolean|string | false | Delete rows that are in destination table but not in the source. If set to `soft`, the row will not be deleted and the column deleted_at will be set to the current timestamp. | | ||
| skipDataCheck | boolean | false | Do not check table current data before `insert`, `update` and `delete` statements execution. | | ||
| forceUpdate | boolean | false | Do not check for differences between source and destination when updating. | | ||
| timestamps | boolean | false | Use `created_at` and `updated_at` columns when inserting or updating. | | ||
| transaction | boolean|int | 100 | Transaction size. Set to `false` to execute statements without transactions. | | ||
|
||
### Examples | ||
Load data to a database table: | ||
```php | ||
$job->load('table', 'table_name'); | ||
``` | ||
Load data to a database table using timestamps and custom primary key: | ||
```php | ||
$options = [ | ||
'timestamps' => true, | ||
'keys' => ['id', 'company_id'] | ||
]; | ||
|
||
$job->load('table', 'table_name', $options); | ||
``` |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,34 @@ | ||
# Transformers | ||
```php | ||
$job->transform($type, $options); | ||
``` | ||
* `$type`: the type of the transformer (trim, etc). | ||
* `$options`: an array containing the transformer options. | ||
|
||
|
||
## Trim | ||
### Syntax | ||
```php | ||
$job->transform('trim', $options); | ||
``` | ||
### Options | ||
| Name | Type | Default | Description | | ||
| ---- | ------ |-------- | ----------- | | ||
| type | string | 'both' | The options `trim` | `all` | `both` will trim both sides, `ltrim` | `start` | `left` will trim the left side and `rtrim` | `end` | `right` will trim the right side of the string. | | ||
| mask | string | " \t\n\r\0\x0B" | The stripped characters. Simply list all characters that you want to be stripped. With .. you can specify a range of characters. | | ||
|
||
### Examples | ||
Strip whitespace from the beginning and end of a string in all transformation columns: | ||
```php | ||
$job->transform('trim'); | ||
``` | ||
Strip pipes from the beginning of a string in specific transformation columns: | ||
```php | ||
$options = [ | ||
'columns' => ['id', 'name'], | ||
'type' => 'left', | ||
'mask' => '|' | ||
]; | ||
|
||
$job->transform('trim', $options); | ||
``` |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,39 @@ | ||
# Utilities | ||
```php | ||
$job->utility($type, $options); | ||
``` | ||
* `$type`: the type of the utility (command, etc). | ||
* `$options`: an array containing the utility options. | ||
|
||
|
||
## Command | ||
### Syntax | ||
```php | ||
$job->utility('command', $options); | ||
``` | ||
### Options | ||
| Name | Type | Default | Description | | ||
| ---- | ------ |-------- | ----------- | | ||
| command | string | null | Command to be executed. | | ||
| commands | array | null | Array of commands to be executed. | | ||
|
||
### Examples | ||
Execute a command: | ||
```php | ||
$options = [ | ||
'command' => 'cp /path/to/file.csv /new/path/file.csv' | ||
]; | ||
|
||
$job->utility('command', $options); | ||
``` | ||
Execute multiple commands: | ||
```php | ||
$options = [ | ||
'commands' => [ | ||
'cp /path/to/file.csv /new/path/file.csv', | ||
'chmod 777 /new/path/file.csv' | ||
] | ||
]; | ||
|
||
$job->utility('command', $options); | ||
``` |
Oops, something went wrong.