# Scraping

## Init

In [16]:
import os
import sys
sys.path.insert(0, "../../")

from core import database, util
from core.scraping import Scrapper, RawDataRepo
from core.scraping.entities import RawData

database.init()

## Scraping

In [17]:
scrapper = Scrapper()

### scrap_params

In [18]:
params = scrapper.scrap_params()
params

<core.scraping.entities.Params at 0x8887948>

In [19]:
params.kabko[:5]

['',
 '- (STATUS PENDING)',
 'AWAK BUAH KAPAL',
 'KAB. BANGKALAN',
 'KAB. BANYUWANGI']

### scrap

In [20]:
result = scrapper.scrap('KOTA SURABAYA', '2020-06-01')
result

<core.scraping.entities.RawData at 0x7e66708>

In [21]:
result.total()

19837

### scrap_bulk

In [22]:
results = scrapper.scrap_bulk(params.kabko, ['2020-06-01'])
results[:5]

[<core.scraping.entities.RawData at 0x8053308>,
 <core.scraping.entities.RawData at 0x888b408>,
 <core.scraping.entities.RawData at 0x80539c8>,
 <core.scraping.entities.RawData at 0x8054608>,
 <core.scraping.entities.RawData at 0x8054448>]

In [23]:
[x.total() for x in results][:5]

[606123, 0, 0, 23231, 11162]

## Storage

### Entities Database Helper Methods

In [24]:
db_row = result.to_db_row()

In [25]:
keyword_row = RawData.from_db_row(db_row)

In [26]:
result2 = RawData(**keyword_row)

In [27]:
result2.to_db_row() == db_row

True

### Database

In [28]:
with database.get_conn() as conn, conn.cursor() as cur:
    cur.execute("""
        SELECT * FROM main.raw_covid_data
    """)
    
    print(cur.fetchall())

[('KOTA SURABAYA', datetime.date(2020, 6, 1), 5407, 3297, 3711, 0, 3205, 0, 506, 448, 0, 58, 3711, 0, 3205, 0, 506, 448, 58, 3711, 506, 3205, 0, 0, 448, 0, 0)]


### fetch_kabko

In [29]:
RawDataRepo.fetch_kabko()[:5]

['- (STATUS PENDING)',
 'AWAK BUAH KAPAL',
 'KAB. BANGKALAN',
 'KAB. BANYUWANGI',
 'KAB. BLITAR']

### fetch_kabko_dict

In [30]:
RawDataRepo.fetch_kabko_dict()[""]

'JAWA TIMUR'

### save_data

In [31]:
RawDataRepo.save_data([result.to_db_row()])

### fetch_data

In [32]:
RawDataRepo.fetch_data("KOTA SURABAYA")

[<core.scraping.entities.RawData at 0x8111188>]

### get_latest_tanggal

In [33]:
RawDataRepo.get_latest_tanggal()

datetime.date(2020, 6, 1)