In [4]:
import pandas as pd
import requests
import great_expectations as gx
import json
import datetime
import influxdb_client
from influxdb_client.client.write_api import SYNCHRONOUS

# --- STEP 1: Dynamic loading from Bronze (Dataflow Gen2) ---
try:
    df_locs = spark.read.table("ny_location").toPandas()
    active_city = df_locs.iloc[0]
    CITY_NAME = active_city['City']
    LAT = active_city['Lat']
    LON = active_city['Lon']
    print(f"Coordinates ready for {CITY_NAME}: {LAT}, {LON}")
except Exception as e:
    print(f"Load error of table: {e}")
    CITY_NAME, LAT, LON = "New York (Default)", 40.71, -74.00

# --- STEP 2: Weather for 2026 (Enrichment) ---
weather_url = "https://archive-api.open-meteo.com/v1/archive"
params = {
    "latitude": LAT, 
    "longitude": LON,
    "start_date": "2025-12-24", 
    "end_date": "2026-01-20",
    "hourly": "temperature_2m,rain",
    "timezone": "GMT"
}
weather_data = requests.get(weather_url, params=params).json()
df_weather = pd.DataFrame(weather_data['hourly'])
df_weather['city'] = CITY_NAME

# --- STEP 3: Great Expectations (Data Quality) ---
context = gx.get_context()
datasource_name = "weather_datasource"
try:
    context.data_sources.get(datasource_name)
    context.delete_datasource(datasource_name)
except Exception:
    pass

datasource = context.data_sources.add_pandas(name=datasource_name)
asset = datasource.add_dataframe_asset(name="weather_asset")
batch_definition = asset.add_batch_definition_whole_dataframe("all_data")
batch = batch_definition.get_batch(batch_parameters={"dataframe": df_weather})

validator = context.get_validator(batch=batch)
validator.expect_column_values_to_not_be_null("temperature_2m")
validator.expect_column_values_to_be_between("temperature_2m", min_value=-50, max_value=60)
dq_results = validator.validate()

# --- STEP 4: Discord Trigger ---
def send_discord_msg(results, df, city):
    webhook_url = "https://discord.com/api/webhooks/1461991011462156474/_4Nf2QFvDL1HSnUSUuRTfeLJkwtwVLEEyBYo74hHtjPJ8gcy9GwhH4TWc2exQGH9dRMb"
    status = "Success" if results.success else "Error"
    color = 3066993 if results.success else 15158332
    payload = {
        "embeds": [{
            "title": f"DQ Report: Weather 2025 ({city})",
            "color": color,
            "description": f"Check status: **{status}**\nTotal lines (hourly): **{len(df)}**",
            "footer": {"text": "Microsoft Fabric Framework | Admin Alert"}
        }]
    }
    requests.post(webhook_url, json=payload)

send_discord_msg(dq_results, df_weather, CITY_NAME)

# --- STEP 5: Saving in Silver (Delta Table) ---
spark.createDataFrame(df_weather).write.format("delta").mode("overwrite").saveAsTable("silver_weather_2025")

# --- STEP 6 : Uploading JSON to Dropbox via API ---

DROPBOX_TOKEN = "sl.u.AGO4_FKuTeYLEMNz_wBnUiDcNvkvGvRWCl4VpByxT-UMKu4PfBTIGtHhc4nkwgALpACk9h-PFVcxBpKdJ_0s9J_AJgQPE_BBGep-RE7Y-Bc59SVe_oRJEK180V7EiDqN8iRrcJHNEe8fujQ1Jev0UmYryt4JSa0LiK9vkZ9PElOt3MKUH5GD18d20a_IPg3ScenntUhvhbvPOphr-30xYCAFSkRyFkGEic4AwiBOAX7Yss3ShUhBl8hy2WTb0shD_5h34eRSE_Bw7NX3d5uZ0sofb9QMlHs9_Zt4orUSxtgMGQabMbeBDDb7JvAv_eb4E4PIyRH-wHuv2SJ3UmSNeoNkkSs0rxIUj_V2pl-54oFW8e8AuSG7JeqQP9lgT_WbjzY8p1uNEEjcSK4QcQ9St3XCZUuarLs6OmYHqk6LbOWLp4gSLaozxBG9u4z0Ex1yDqGzZ8Gb_YgJiW6Lo_aRNshXWHagAuIsYRL6-NMOAIFWfKNJ1kMswL3OBk1P5Y6zyf1QiehUmixoPMSfiJbuAq9Zkqi5z9h1p2s2t7fXkLCmWxTziznN7Aa0IquvGWFT7z1ME16h9EctL0sGmSNN3RbrtynW22Obnm0F5z9Y4wUT6ETo-qXdGN-8FsokqKAwMv788GiffHhXhilj2nupnRbxuEX8Sqk2R7FKlT7xSxITJ-1Q-bNzcWBu4VYJxaQiNN4Jo396N30PyOSD1b4hE37TTnUp-y4JVnyDQu9YAZxRqAibYUjggvvRBu5dcDQGrkWAauyB-5qL423acXc9C0zPIjw8A0mECEgptaqWNnUpU05fjL6lYlQv5ypLffL95fm0K2XGyynGlyjWw1H-fVFAz_YJCoqu3sajUDQBqJcEN7IM7XoF6x5cpa6e-fPMjQB79GgAS-PWtfFhXlbx8BrLiUjUre0w-mUAJWIyCB3REYtQeW4KxYeJ9y5-H58x7070jIVhyZoI8c0q7xZDoPy4ssCFm4OesW0VnkfLfZZxXI_J7j5jPPKtsQwqQkmKpHhX3l5YQjZKaZECelJ--KS8ncfd-hIKzHuRfyCnjh3MrOsjRw2QfW-0Yl7kPHsAitlrVIfqOF9AJK1uJkUPCmz6STZmbGYmyGUAjIuun3QW0BjRaozRdgRkPwZ0RYXxrmADoZOr87UFYvk4VGefXsTQm5c9_uxgExongT7kQX-048fH8jgEPr03OTHy3rdaEC6AnQmpK_8_5_CeJj2ixHWcTDUpTLTocpvgUzjRSFvNrWoW1BafIl2SwPiflHIx4lNUf9uryB33bk41I_VLN4UY"
dropbox_path = "/Reports/weather_2025_report.json"

df_weather['time'] = pd.to_datetime(df_weather['time'])
df_weather['time_local'] = df_weather['time'] - pd.Timedelta(hours=5)

ny_now = datetime.datetime.now(datetime.timezone.utc) - datetime.timedelta(hours=5)
current_hour_ny = ny_now.strftime("%Y-%m-%d %H:00")

current_data = df_weather[df_weather['time_local'] <= current_hour_ny].tail(5).copy()

current_data['time_display'] = current_data['time_local'].dt.strftime("%Y-%m-%d %H:%M")
df_report = current_data[['time_display', 'temperature_2m', 'rain', 'city']].rename(columns={'time_display': 'time'})

json_report = df_report.to_json(orient="records")

dropbox_path = "/Reports/weather_2025_report.json"
dbx_headers = {
    "Authorization": f"Bearer {DROPBOX_TOKEN}",
    "Content-Type": "application/octet-stream",
    "Dropbox-API-Arg": json.dumps({"path": dropbox_path, "mode": "overwrite"})
}

print(f"Uploading report for the LAST 5 hours (ending at {current_hour_ny})")

dbx_res = requests.post("https://content.dropboxapi.com/2/files/upload", headers=dbx_headers, data=json_report)

if dbx_res.status_code == 200:
    print("Success: Report sent to Dropbox")
else:
    print(f"Error {dbx_res.status_code}: {dbx_res.text}")

# --- STEP 7: Sending Data to InfluxDB (for Grafana) ---

# InfluxDB settings
INFLUX_URL = "https://us-east-1-1.aws.cloud2.influxdata.com" 
INFLUX_TOKEN = "d2J4KXUxzFHkaPjN0ST5OmVbwa94_EQ3Vwn7N2W7dxqviOb23Gnj5CbDPANICMAbEJ7ZGyD6xBru1USUd8qDtA==" 
INFLUX_ORG = "tk_248@outlook.com" 
INFLUX_BUCKET = "weather_analytics" 

print(f"Sending {len(df_weather)} rows to InfluxDB...")

try:
    with influxdb_client.InfluxDBClient(url=INFLUX_URL, token=INFLUX_TOKEN, org=INFLUX_ORG) as client:
        write_api = client.write_api(write_options=SYNCHRONOUS)
        
        # sending data in batches, to make it quick
        # converting datafarme into InfluxDB Line Protocol format
        for index, row in df_weather.iterrows():
            point = influxdb_client.Point("weather_metrics") \
                .tag("city", row['city']) \
                .field("temp", float(row['temperature_2m'])) \
                .field("rain", float(row['rain'])) \
                .time(pd.to_datetime(row['time']), influxdb_client.WritePrecision.NS)
            
            write_api.write(bucket=INFLUX_BUCKET, org=INFLUX_ORG, record=point)
            
    print("InfluxDB Data Upload: Success")
except Exception as e:
    print(f"InfluxDB Error: {e}")

StatementMeta(, 535a23df-c4e9-4f5e-bee0-4d61ff124c64, 10, Finished, Available, Finished)



Calculating Metrics:   0%|          | 0/6 [00:00<?, ?it/s]



Calculating Metrics:   0%|          | 0/8 [00:00<?, ?it/s]

Calculating Metrics:   0%|          | 0/9 [00:00<?, ?it/s]



Uploading report for the LAST 5 hours (ending at 2026-01-20 05:00)
Success: Report sent to Dropbox
Sending 672 rows to InfluxDB...
InfluxDB Data Upload: Success


StatementMeta(, 535a23df-c4e9-4f5e-bee0-4d61ff124c64, 11, Finished, Available, Finished)