In [None]:

# generated by chatgpt. 
# idea is i want to build sqlite database
# generate spatial features for my data 


# this bit of code is supposed to generate spatial lag features 
import sqlite3
import numpy as np
import pandas as pd
from sklearn.neighbors import NearestNeighbors
from sklearn.preprocessing import normalize

# Connect to SQLite database
conn = sqlite3.connect('../datasets/dallas.sqlite')

# Define the lag window size
lag_window = 3  # Number of lagged features to consider

# Function to create lag features using spatial weight matrices with varying k values
def create_lag_features_with_varying_k(db_conn, sensor_table, data_table, k_values, lag_window):
    lagged_data = pd.DataFrame()
    # Retrieve sensor locations from sensor_list table
    sensors = pd.read_sql_query(f"SELECT sensor_index, latitude, longitude FROM {sensor_table}", db_conn)
    for k in k_values:
        for index, sensor in sensors.iterrows():
            sensor_index = sensor['sensor_index']
            latitude = sensor['latitude']
            longitude = sensor['longitude']
            
            # Retrieve time series data for the current sensor
            query = f"SELECT time_stamp, sensor_index, value FROM {data_table} WHERE sensor_index = ?"
            sensor_data = pd.read_sql_query(query, db_conn, params=(sensor_index,))
            
            # Construct spatial weight matrix using k-nearest neighbor algorithm
            coords = sensors[['latitude', 'longitude']].values
            nbrs = NearestNeighbors(n_neighbors=k, algorithm='ball_tree').fit(coords)
            distances, indices = nbrs.kneighbors(np.array([[latitude, longitude]]))
            spatial_weights = np.zeros(len(sensors))
            spatial_weights[indices.flatten()] = 1 / k
            
            # Row-standardize the spatial weight matrix
            spatial_weights_normalized = normalize(spatial_weights.reshape(1, -1), norm='l1', axis=1)
            
            # Create lag features for the current sensor
            lagged_values = []
            for lag in range(1, lag_window + 1):
                lagged_value = []
                for _, row in sensor_data.iterrows():
                    lagged_value.append(np.dot(spatial_weights_normalized, sensor_data['value'].values))
                lagged_values.append(lagged_value)
            
            # Add lagged features to DataFrame
            for lag, lagged_value in enumerate(lagged_values, start=1):
                lagged_data[f'value_lag_k{k}_lag{lag}_sensor{sensor_index}'] = lagged_value
            
    return lagged_data

# Define the values of k for constructing spatial weight matrices
k_values = [5, 10, 15]

# Specify the table names
sensor_table = 'sensor_list'
data_table = 'time_series_data'

# Generate lag features using spatial weight matrices with varying k values
lagged_data = create_lag_features_with_varying_k(conn, sensor_table, data_table, k_values, lag_window)


