In [6]:
import ee
ee.Authenticate()
ee.Initialize(project='cubevents')  # Replace with your GCP project ID


In [11]:
import folium

In [12]:
def add_ee_layer(self, ee_image_object, vis_params, name):
    map_id_dict = ee.Image(ee_image_object).getMapId(vis_params)
    folium.raster_layers.TileLayer(
        tiles=map_id_dict['tile_fetcher'].url_format,
        attr='Google Earth Engine',
        name=name,
        overlay=True,
        control=True
    ).add_to(self)

folium.Map.add_ee_layer = add_ee_layer

In [None]:
# Créer un notebook `01_data_collection.ipynb`
from services.data_collector import DataCollector
from services.weather_service import WeatherService
from services.satellite_service import SatelliteService
from database import SessionLocal

db = SessionLocal()
collector = DataCollector(WeatherService(), SatelliteService())

# Collecter pour une wilaya
result = await collector.collect_historical_data_for_wilaya(
    wilaya_code='16',
    start_year=2018,
    end_year=2023,
    db=db
)

print(f"Collecte terminée: {result['records_collected']} enregistrements")

In [13]:

# Define region and dataset
region = ee.Geometry.Rectangle([2.0, 36.0, 3.0, 37.0])
collection = ee.ImageCollection('COPERNICUS/S2_HARMONIZED') \
    .filterBounds(region) \
    .filterDate('2024-05-01', '2024-05-31') \
    .median()

# Compute NDVI
ndvi = collection.normalizedDifference(['B8', 'B4']).rename('NDVI')

# Visualization parameters
vis_params = {
    'min': 0,
    'max': 1,
    'palette': ['blue', 'white', 'green']
}

# Create a folium map
map = folium.Map(location=[36.5, 2.5], zoom_start=8)
map.add_ee_layer(ndvi, vis_params, 'NDVI Map')
map.add_child(folium.LayerControl())

display(map)


In [None]:
import base64
import json
import csv
import requests
import os

# ---------- CONFIG ----------
PDF_PATH = "SERIE-B-2019-8-16.pdf"          # Path to your PDF
OUTPUT_JSON = "testoutput.json"             # Where to save the Document AI response
OUTPUT_CSV = "tetsoutput.csv"               # Optional CSV output
PROJECT_ID = "cubevents"          # Google Cloud project ID
PROCESSOR_ID = "6354d81fff6f3b8d"      # Document AI processor ID
LOCATION = "eu"                         # Processor location: eu, us, etc.
# ----------------------------

def pdf_to_base64(pdf_path):
    with open(pdf_path, "rb") as f:
        return base64.b64encode(f.read()).decode("utf-8")

def get_access_token():
    # Requires gcloud SDK installed and authenticated
    token = os.popen("gcloud auth application-default print-access-token").read().strip()
    return token

def call_document_ai(pdf_base64, token):
    url = f"https://eu-documentai.googleapis.com/v1/projects/312473731223/locations/eu/processors/6354d81fff6f3b8d:process"
    
    headers = {
        "Authorization": f"Bearer {token}",
        "Content-Type": "application/json; charset=utf-8"
    }
    
    body = {
        "skipHumanReview": True,
        "rawDocument": {
            "mimeType": "application/pdf",
            "content": pdf_base64
        }
    }
    
    response = requests.post(url, headers=headers, json=body)
    response.raise_for_status()
    return response.json()

def save_json(data, filename):
    with open(filename, "w", encoding="utf-8") as f:
        json.dump(data, f, ensure_ascii=False, indent=2)
    print(f"Saved JSON output to {filename}")

def json_to_csv(document_json, csv_filename):
    """
    Convert simple form/table fields to CSV.
    For more advanced PDFs, you might need custom parsing.
    """
    rows = []
    pages = document_json.get("document", {}).get("pages", [])
    for page in pages:
        for table in page.get("tables", []):
            for row in table.get("headerRows", []) + table.get("bodyRows", []):
                row_text = [cell.get("layout", {}).get("text", "") for cell in row.get("cells", [])]
                rows.append(row_text)
    
    if rows:
        with open(csv_filename, "w", newline="", encoding="utf-8") as f:
            writer = csv.writer(f)
            writer.writerows(rows)
        print(f"Saved CSV output to {csv_filename}")
    else:
        print("No table data found to convert to CSV.")

def main():
    print("Converting PDF to base64...")
    pdf_base64 = pdf_to_base64(PDF_PATH)
    
    print("Getting access token...")
    token = get_access_token()
    
    print("Calling Google Document AI...")
    response_json = call_document_ai(pdf_base64, token)
    
    save_json(response_json, OUTPUT_JSON)
    
    # Optional: convert tables to CSV
    json_to_csv(response_json, OUTPUT_CSV)

if __name__ == "__main__":
    main()


Converting PDF to base64...
Getting access token...
Calling Google Document AI...


HTTPError: 401 Client Error: Unauthorized for url: https://eu-documentai.googleapis.com/v1/projects/cubevents/locations/eu/processors/6354d81fff6f3b8d:process

In [14]:
import pdfplumber
import pandas as pd

pdf_path = "SERIE-B-2019.pdf"
data = []

with pdfplumber.open(pdf_path) as pdf:
    for page in pdf.pages:
        tables = page.extract_tables()
        for table in tables:
            df = pd.DataFrame(table)
            data.append(df)

# Combine all tables into one big CSV
final_df = pd.concat(data, ignore_index=True)
final_df.to_csv("output.csv", index=False)



Cannot set gray non-stroke color because /'P264' is an invalid float value
Cannot set gray non-stroke color because /'P322' is an invalid float value
Cannot set gray non-stroke color because /'P374' is an invalid float value
Cannot set gray non-stroke color because /'P438' is an invalid float value
Cannot set gray non-stroke color because /'P495' is an invalid float value
Cannot set gray non-stroke color because /'P551' is an invalid float value
Cannot set gray non-stroke color because /'P610' is an invalid float value
Cannot set gray non-stroke color because /'P662' is an invalid float value
Cannot set gray non-stroke color because /'P714' is an invalid float value
Cannot set gray non-stroke color because /'P771' is an invalid float value
Cannot set gray non-stroke color because /'P825' is an invalid float value
Cannot set gray non-stroke color because /'P878' is an invalid float value
