In [34]:
import os
from google.cloud import bigquery

# --- CONFIGURATION ---
KEY_FILE = "ai-realtime-project-4de709b969f4.json"
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = KEY_FILE

# Update these to match your GCP project
PROJECT_ID = "ai-realtime-project"
DATASET_ID = "sensor_data_stream"
TABLE_ID = "real-weather"
full_table_path = f"{PROJECT_ID}.{DATASET_ID}.{TABLE_ID}"

client = bigquery.Client()

query = f"""
    SELECT timestamp, temperature, humidity, wind_speed 
    FROM `{full_table_path}` 
    ORDER BY timestamp ASC
"""

# This magic line turns your Cloud Table into a Pandas DataFrame
df = client.query(query).to_arrow().to_pandas()
df.head()



Unnamed: 0,timestamp,temperature,humidity,wind_speed
0,2025-12-23 06:23:24.130140+00:00,23.5,85,9.2
1,2025-12-23 06:25:33.979499+00:00,23.5,85,9.2
2,2025-12-23 06:30:40.151236+00:00,23.5,84,9.4
3,2025-12-23 06:36:06.651966+00:00,23.5,84,9.4
4,2025-12-23 06:41:19.007355+00:00,23.5,84,9.4


2. Feature Engineering for Time
A raw timestamp like 2023-10-27 14:05:00 is hard for an AI to read. We need to help it by "extracting" the useful parts:

In [35]:
import pandas as pd

# Convert the string column to actual Date objects
df['timestamp'] = pd.to_datetime(df['timestamp'])

# Create new "feature" columns
df['hour'] = df['timestamp'].dt.hour
df['day_of_week'] = df['timestamp'].dt.dayofweek

df.head()

Unnamed: 0,timestamp,temperature,humidity,wind_speed,hour,day_of_week
0,2025-12-23 06:23:24.130140+00:00,23.5,85,9.2,6,1
1,2025-12-23 06:25:33.979499+00:00,23.5,85,9.2,6,1
2,2025-12-23 06:30:40.151236+00:00,23.5,84,9.4,6,1
3,2025-12-23 06:36:06.651966+00:00,23.5,84,9.4,6,1
4,2025-12-23 06:41:19.007355+00:00,23.5,84,9.4,6,1


3. The Goal: Predicting the Future
In your previous project, you predicted the price of a house now. In this project, you want to predict the temperature 1 hour from now.

To do this, we create a "Target" column by shifting the temperature back by one row:

In [42]:
# Create a column that is the temperature 1 hour (or 1 row) in the future
df['target_temp'] = df['temperature'].shift(-1)

# Remove the last row (it won't have a future value yet!)
df.dropna(inplace=True)

df.head()

Unnamed: 0,timestamp,temperature,humidity,wind_speed,hour,day_of_week,target_temp
0,2025-12-23 06:23:24.130140+00:00,23.5,85,9.2,6,1,23.5
1,2025-12-23 06:25:33.979499+00:00,23.5,85,9.2,6,1,23.5
2,2025-12-23 06:30:40.151236+00:00,23.5,84,9.4,6,1,23.5
3,2025-12-23 06:36:06.651966+00:00,23.5,84,9.4,6,1,23.5
4,2025-12-23 06:41:19.007355+00:00,23.5,84,9.4,6,1,23.6


In [43]:
# 4. CLEANUP (Create df_clean)
# This removes the last row because it has a 'NaN' target
df_clean = df.dropna().copy() 

print(f"âœ… Prepared {len(df_clean)} rows for training.")

âœ… Prepared 5 rows for training.


In [45]:
from sklearn.linear_model import LinearRegression
# 5. TRAIN the AI
if len(df_clean) > 2:
    X = df_clean[['temperature', 'hour', 'humidity']]
    y = df_clean['target_temp']
    
    model = LinearRegression()
    model.fit(X, y)
    
    # 6. PREDICT the actual future
    # We use the VERY LATEST row (the one we dropped from training)
    latest_now = df.tail(1) 
    prediction = model.predict(latest_now[['temperature', 'hour', 'humidity']])
    
    print(f"âœ¨ Current Temp: {latest_now['temperature'].values[0]}Â°C")
    print(f"ðŸ”® AI Prediction for next update: {prediction[0]:.2f}Â°C")
else:
    print("Wait for more data! You need at least 3 rows to start predicting.")

âœ¨ Current Temp: 23.5Â°C
ðŸ”® AI Prediction for next update: 23.53Â°C
