In [26]:
import numpy as np

def read_partial(filename, COMMON_SENSORS):
    num_common = len(COMMON_SENSORS)
    partial_observations = {}
    with open(filename, 'r') as f:
        for line in f.readlines():
            vals = line.split()
            if len(vals) >= 5: 
                sensor_id, temp = int(vals[3]), float(vals[4])
                time = vals[0] + '/' + vals[1][:4] # date and time to the tens of minutes
                # Within range of recorded temperature in Berkeley, CA
                if -4 <= temp and temp <= 42 and sensor_id in COMMON_SENSORS:
                    if time not in partial_observations:
                        partial_observations[time] = ['NA']*num_common
                    partial_observations[time][COMMON_SENSORS.index(sensor_id)] = temp                
    return partial_observations

def convert_observations(partial_observations):
    observations = []
    for time in partial_observations:
        if partial_observations[time].count('NA') == 0:
            observations += [partial_observations[time]]

    return np.array(observations).T # (num_sensors, num_observations)

def build_covariance(observations):
    xbar = np.mean(observations, 1) # (num_sensors,)
    num_sensors, num_observations = observations.shape
    S = np.empty((num_sensors,num_sensors))
    for i in range(num_observations):
        xi = observations[:,i] # (num_sensors,)
        outer = np.outer(xi-xbar, xi-xbar)/num_observations
        S = np.add(outer, S)
    return S

In [25]:
filename = 'data/temperature.txt'
# Sensors with more than 10,000 observations with temps within recorded temps of Berkeley, CA
COMMON_SENSORS = [1, 2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54]
partial_observations = read_partial(filename, COMMON_SENSORS)
observations = convert_observations(partial_observations)
S = build_covariance(observations)

[[ 7.0121907   5.61216751  6.26593107 ...  6.19684213  9.34878641
   6.59416487]
 [ 6.45372406  4.62479088  6.19156796 ...  5.29488333  5.54206585
   5.42201047]
 [ 6.26593107  5.09863716  5.74885241 ...  5.81187902  6.2183937
   9.41657952]
 ...
 [ 9.03603258  8.40483373  9.49914127 ... 18.07837117 17.59483964
  16.59672035]
 [ 9.34878729  8.50809241  9.73501499 ... 17.59483964 17.82918852
  17.09215329]
 [ 9.14123178  8.21197891  9.41657996 ... 16.59672035 17.09215329
  16.61200198]]
[[ 7.0121907   6.45372406  6.26593107 ...  9.03603258  9.34878729
   9.14123178]
 [ 5.61216751  4.62479088  5.09863716 ...  8.40483373  8.50809241
   8.21197891]
 [ 6.26593107  6.19156796  5.74885241 ...  9.49914127  9.73501499
   9.41657996]
 ...
 [ 6.19684213  5.29488333  5.81187902 ... 18.07837117 17.59483964
  16.59672035]
 [ 9.34878641  5.54206585  6.2183937  ... 17.59483964 17.82918852
  17.09215329]
 [ 6.59416487  5.42201047  9.41657952 ... 16.59672035 17.09215329
  16.61200198]]
