In [1]:
import requests
import os
import json
import pandas as pd
from dotenv import load_dotenv
from google.cloud import storage

# Setting up the GCP infrastructure

In [2]:
# Load environment variables from .env file
load_dotenv()

# Check if the environment variable is set
if "GOOGLE_APPLICATION_CREDENTIALS" not in os.environ:
    raise ValueError("GOOGLE_APPLICATION_CREDENTIALS not found in environment variables.")

#os.environ['GOOGLE_APPLICATION_CREDENTIALS'] 

'credentials/farm-screener-842209070941.json'

In [3]:
raw_data_bucket = os.environ["GOOGLE_RAW_DATA_BUCKET"] 
#raw_data_bucket = "farm-screener-raw"

In [4]:
# Create a client
client = storage.Client()

In [5]:

# List buckets
print("Buckets:")
for bucket in client.list_buckets():
    print(bucket.name)

Buckets:
farm-screener-raw


In [6]:
def upload_to_gcs(data, bucket_name, file_name):
    try:
        client = storage.Client()
        bucket = client.bucket(bucket_name)
        blob = bucket.blob(file_name)
        blob.upload_from_string(json.dumps(data))
        print(f"Data uploaded to GCS: gs://{bucket_name}/{file_name}")
    except Exception as e:
        print(f"Error uploading data to GCS: {e}")

In [7]:
import requests

def download_data(url):
    try:
        response = requests.get(url)
        if response.status_code == 200:
            return response.json()
        else:
            print(f"Failed to download data. Status code: {response.status_code}")
            return None
    except requests.exceptions.RequestException as e:
        print(f"Error downloading data: {e}")
        return None


In [8]:

url = "https://www.hofsuche.schweizerbauern.ch/api/de/farms?offset=24&project=vomhof&size=24"
data = download_data(url)
if data:
    print("Data downloaded successfully!")
    # Now you can work with the downloaded data here

    for item in data["hits"]:
            # Add additional fields here as needed
            item['timestamp'] = 'value'
            
            # Upload the modified data item to GCS
            file_name = f"{item['_id']}.json"  # Using the 'id' field as the filename
            upload_to_gcs(item, raw_data_bucket, file_name)

else:
    print("Failed to download data. Please check the URL and try again.")


Data downloaded successfully!
Data uploaded to GCS: gs://farm-screener-raw/14959.json
Data uploaded to GCS: gs://farm-screener-raw/14953.json
Data uploaded to GCS: gs://farm-screener-raw/14923.json
Data uploaded to GCS: gs://farm-screener-raw/14944.json
Data uploaded to GCS: gs://farm-screener-raw/14839.json
Data uploaded to GCS: gs://farm-screener-raw/14940.json
Data uploaded to GCS: gs://farm-screener-raw/14941.json
Data uploaded to GCS: gs://farm-screener-raw/14950.json
Data uploaded to GCS: gs://farm-screener-raw/14929.json
Data uploaded to GCS: gs://farm-screener-raw/14921.json
Data uploaded to GCS: gs://farm-screener-raw/14928.json
Data uploaded to GCS: gs://farm-screener-raw/14937.json
Data uploaded to GCS: gs://farm-screener-raw/14932.json
Data uploaded to GCS: gs://farm-screener-raw/14915.json
Data uploaded to GCS: gs://farm-screener-raw/14918.json
Data uploaded to GCS: gs://farm-screener-raw/14911.json
Data uploaded to GCS: gs://farm-screener-raw/14912.json
Data uploaded to G

In [11]:
def get_farm_data(item):
    farm ={}
    farm["farm_id"] = item["_id"]
    farm["farm_name"] = item["_source"]["farmname"]
    farm["first_name"] = item["_source"]["first_name"]
    farm["last_name"] = item["_source"]["last_name"]
    farm["street"] = item["_source"]["street"]
    farm["zip"] = item["_source"]["zip"]
    farm["city"] = item["_source"]["city"]
    farm["canton"] = item["_source"]["canton"]["short"]
    farm["telephone"] = item["_source"]["telephone"]
    farm["mobile"] = item["_source"]["mobile"]
    farm["email"] = item["_source"]["email"]
    farm["website"] = item["_source"]["website"]
    farm["facebook_link"] = item["_source"]["facebook_link"]
    
    return farm

In [12]:
farm_data = []
product_offers = list()

for i, item in enumerate(data["hits"]):
    farm_data.append(get_farm_data(item))
    offers = item["_source"]["vomhof"]["offers"]
    for offer in offers:
        product = offer["product"]
        product_id = product["id"]
        product_name = product["name"]
        availability = offer["availability"]
        if len(availability) > 0:
            product_availability = [month["name"] for month in availability]
        else:
            product_availability = "Not specified"

        product_offers.append((farm_id, product_id, product_name, product_availability))
        #print(product)
        #print(product_availability)

In [13]:
columns = ["farm_id", "product_id", "product_name", "product_availability"]
product_df = pd.DataFrame.from_records(product_offers, columns=columns)
product_df

Unnamed: 0,farm_id,product_id,product_name,product_availability
0,14875,147,Äpfel,Not specified
1,14875,148,Birnen,Not specified
2,14875,38,Blumenkohl,"[Mai, Juni, Juli, August, September, Oktober, ..."
3,14875,40,Broccoli,"[Mai, Juni, Juli, August, September, Oktober, ..."
4,14875,158,Brombeeren,Not specified
...,...,...,...,...
181,14875,26,Rindfleisch,Not specified
182,14875,193,Teigwaren,Not specified
183,14875,16,Wurstwaren,Not specified
184,14875,185,Zopf,Not specified


In [18]:
farm_df = pd.DataFrame.from_records(farm_data)
farm_df

Unnamed: 0,farm_id,farm_name,first_name,last_name,street,zip,city,canton,telephone,mobile,email,website,facebook_link
0,14959,Hofladen Bötsch,Sarina,Hofmann-Bötsch,Hungerbühl 36,8599,Salmsach,TG,071 463 55 61,,buero@boetsch-gemuese.ch,https://boetsch-gemuese.ch/hofladen,
1,14953,Krähenbühl Grunderhof,Bernhard & Christa,Krähenbühl-Liechti,Hünigenstrasse 30,3504,Oberhünigen,BE,031 791 14 08,078 926 22 78,cb.kraehenbuehl@bluewin.ch,http://www.grunderhof.com,
2,14923,Natur-Hof,Albert,von Felten,Im Eich 2b,5079,Zeihen,AG,,079 327 80 41,info@natura-hof.ch,https://natura-hof.ch,https://www.facebook.com/natura-hof
3,14944,Biohof Aegerten,Rebekka,Richartz,Aegerten 8,8634,Hombrechtikon,ZH,,,info@biohof-aegerten.ch,http://www.biohof-aegerten.ch,https://facebook.com/biohof.aegerten
4,14839,Gisler Churer Spargeln und Steinobst,Gabriela,Gisler,Malixerstrasse 85,7000,Chur,GR,081 284 69 62,,,http://www.churer-spargeln.ch,https://www.facebook.com/gislerschurerspargeln...
5,14940,Hof Neumatt,Rene,Gisin,Neumatt 4,4462,Rickenbach BL,BL,,079 293 21 06,,,
6,14941,Neuhof,Sämi,Zimmermann,Hohestrasse 172,4104,Oberwil BL,BL,061 401 26 23,079 204 70 25,,https://www.team-zimmermann.ch/,https://www.facebook.com/NeuhofOberwil/?locale...
7,14950,Texas Longhorn Ranch,Daniela & Urs,Weiss,Eigenried 36,4463,Buus,BL,061 841 15 42,079 685 16 56,info@texaslonghorn.ch,https://www.texaslonghorn.ch/,https://www.facebook.com/profile.php?id=100046...
8,14929,Neufeldhof Vaduz,Heike & Christian,Konrad,Neufeldweg 9,9490,Vaduz,LI,+423 780 16 54,,hofladen@fl1.li,http://www.neufeldhof.li,https://www.facebook.com/groups/188213701720126
9,14921,Gfillhof,Werner und Cornelia,Bachmann-Michel,Fröschenthalweg 17,4852,Rothrist,AG,079 668 20 11,079 229 76 18,bachmann.werner@bluewin.ch,,


In [20]:
farm_df["farm_id"].value_counts()

farm_id
14959    1
14953    1
14801    1
14876    1
14874    1
14808    1
14886    1
14882    1
14912    1
14911    1
14918    1
14915    1
14932    1
14937    1
14928    1
14921    1
14929    1
14950    1
14941    1
14940    1
14839    1
14944    1
14923    1
14875    1
Name: count, dtype: int64

In [14]:
# item

In [47]:
farm_data

[{'farm_id': '14875',
  'farm_name': 'Guldenberg',
  'first_name': 'Anita & Michael',
  'last_name': 'Lienhard',
  'street': 'Guldenberg 120',
  'zip': '8424',
  'city': 'Embrach',
  'canton': 'ZH',
  'telephone': '044 865 23 46',
  'email': 'anita.lienhard@guldenberg.ch',
  'website': '',
  'facebook_link': 'https://www.facebook.com/profile.php?id=100067473105104'}]

In [20]:
for offers in data["hits"][0]["_source"]["vomhof"]["offers"]:
    #print(offers)
    product = offers["product"]
    availability = offers["availability"]
    if len(availability)>0:
        product_availability = [month["name"] for month in availability]
    else:
        product_availability = "Not specified"

    print(product
          )
    print(product_availability)

{'id': '147', 'name': 'Äpfel'}
Not specified
{'id': '148', 'name': 'Birnen'}
Not specified
{'id': '38', 'name': 'Blumenkohl'}
['Mai', 'Juni', 'Juli', 'August', 'September', 'Oktober', 'November']
{'id': '40', 'name': 'Broccoli'}
['Mai', 'Juni', 'Juli', 'August', 'September', 'Oktober', 'November']
{'id': '158', 'name': 'Brombeeren'}
Not specified
{'id': '2957', 'name': 'Dekoartikel'}
Not specified
{'id': '2722', 'name': 'Eingemachtes Gemüse'}
Not specified
{'id': '159', 'name': 'Erdbeeren'}
['April', 'Mai', 'Juni', 'Juli', 'August', 'September', 'Oktober']
{'id': '18', 'name': 'Essig'}
Not specified
{'id': '537', 'name': 'Geschenkkorb'}
Not specified
{'id': '52', 'name': 'Gurken'}
['Mai', 'Juni', 'Juli', 'August', 'September', 'Oktober']
{'id': '160', 'name': 'Heidelbeeren'}
Not specified
{'id': '161', 'name': 'Himbeeren'}
Not specified
{'id': '258', 'name': 'Honig'}
Not specified
{'id': '13', 'name': 'Hühnereier'}
Not specified
{'id': '55', 'name': 'Karotten'}
Not specified
{'id': '3'