# Manual testing of Scraper and Data Cleaner 
### Trying out the Scraper and DataCleaner manually, looking at the result and saving the data to a CSV file and finally storing it in SQL.

In [1]:
import pandas as pd
from fetch_weekly_offers.utils.scraper import Scraper
from fetch_weekly_offers.utils.cleaner import DataCleaner
from fetch_weekly_offers.utils.funcs import set_up_logging, save_to_sql

In [2]:
url = "https://ereklamblad.se/ICA-Maxi-Stormarknad/erbjudanden"

# Instantiate Scraper with URL to Coops leaflet and call the scraping method
scraper = Scraper(url)
scraped_data = scraper.scrape()

Number of offers found: 223


In [3]:
scraper.save_to_csv()

Data saved to offers_2024-09-25.csv


In [4]:
print(scraped_data)

[{'Name': 'Grönkålsblad i påse', 'Price': '25\xa0kr', 'Details': '400 g•62,50\xa0kr/kg', 'Store': 'ICA Maxi Stormarknad', 'ValidFrom': '2024-09-22T22:00:00.000Z', 'ValidThrough': '2024-09-29T22:00:00.000Z', 'ValidUntil': '2024-09-29T22:00:00.000Z'}, {'Name': 'Röda kärnfria druvor i ask', 'Price': '25\xa0kr', 'Details': '500 g•50\xa0kr/kg', 'Store': 'ICA Maxi Stormarknad', 'ValidFrom': '2024-09-22T22:00:00.000Z', 'ValidThrough': '2024-09-29T22:00:00.000Z', 'ValidUntil': '2024-09-29T22:00:00.000Z'}, {'Name': 'Babyplommontomater i ask', 'Price': '25\xa0kr', 'Details': '500 g•50\xa0kr/kg', 'Store': 'ICA Maxi Stormarknad', 'ValidFrom': '2024-09-22T22:00:00.000Z', 'ValidThrough': '2024-09-29T22:00:00.000Z', 'ValidUntil': '2024-09-29T22:00:00.000Z'}, {'Name': 'Rostbiff', 'Price': '149\xa0kr', 'Details': '1 kg•149\xa0kr/kg', 'Store': 'ICA Maxi Stormarknad', 'ValidFrom': '2024-09-22T22:00:00.000Z', 'ValidThrough': '2024-09-29T22:00:00.000Z', 'ValidUntil': '2024-09-29T22:00:00.000Z'}, {'Name': '

In [5]:
# Instantiate DataCleaner with the scraped data and call the cleaning method
cleaner = DataCleaner(scraped_data)
clean_df = cleaner.clean()

In [6]:
clean_df.head(10)

Unnamed: 0,Name,Price,Store,ValidFrom,ValidThrough,ValidUntil,Quantity,ComparisonPrice
0,Grönkålsblad i påse,25.0,ICA Maxi Stormarknad,2024-09-22,2024-09-29,2024-09-29,400 g,"62,50 kr/kg"
1,Röda kärnfria druvor i ask,25.0,ICA Maxi Stormarknad,2024-09-22,2024-09-29,2024-09-29,500 g,50 kr/kg
2,Babyplommontomater i ask,25.0,ICA Maxi Stormarknad,2024-09-22,2024-09-29,2024-09-29,500 g,50 kr/kg
3,Rostbiff,149.0,ICA Maxi Stormarknad,2024-09-22,2024-09-29,2024-09-29,1 kg,149 kr/kg
4,Bacon,79.0,ICA Maxi Stormarknad,2024-09-22,2024-09-29,2024-09-29,2x420 g,"94,05 kr/kg"
5,Avfallspåse,45.0,ICA Maxi Stormarknad,2024-09-22,2024-09-29,2024-09-29,3 stycken,15 kr/var
6,Diskborste,25.0,ICA Maxi Stormarknad,2024-09-22,2024-09-29,2024-09-29,3 stycken,"8,33 kr/var"
7,Stekpanna Jamie Oliver,299.0,ICA Maxi Stormarknad,2024-09-23,2024-10-14,2024-10-14,,
8,Kaffebryggare,599.0,ICA Maxi Stormarknad,2024-09-23,2024-10-14,2024-10-14,,
9,Ugnsform,69.9,ICA Maxi Stormarknad,2024-09-23,2024-10-14,2024-10-14,,


In [7]:
len(clean_df)

167

In [8]:
clean_df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 167 entries, 0 to 222
Data columns (total 8 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   Name             167 non-null    object 
 1   Price            167 non-null    float64
 2   Store            167 non-null    object 
 3   ValidFrom        167 non-null    object 
 4   ValidThrough     167 non-null    object 
 5   ValidUntil       167 non-null    object 
 6   Quantity         167 non-null    object 
 7   ComparisonPrice  81 non-null     object 
dtypes: float64(1), object(7)
memory usage: 11.7+ KB


In [9]:
clean_df.describe()

Unnamed: 0,Price
count,167.0
mean,169.690419
std,307.187106
min,12.0
25%,30.0
50%,59.9
75%,149.5
max,2199.0


In [10]:
cleaner.save_clean_data()

Cleaned data saved to cleaned_offers.csv


In [11]:
save_to_sql(clean_df)