In [79]:
import numpy as np

def read_partial(filename, COMMON_SENSORS):
    num_common = len(COMMON_SENSORS)
    partial_observations = {}
    with open(filename, 'r') as f:
        for line in f.readlines():
            vals = line.split()
            if len(vals) >= 5: 
                sensor_id, temp = int(vals[3]), float(vals[4])
                time = vals[0] + '/' + vals[1][:4] # date and time to the tens of minutes
                # Within range of recorded temperature in Berkeley, CA
                if -4 <= temp and temp <= 42 and sensor_id in COMMON_SENSORS:
                    if time not in partial_observations:
                        partial_observations[time] = ['NA']*num_common
                    partial_observations[time][COMMON_SENSORS.index(sensor_id)] = temp                
    return partial_observations

def convert_observations(partial_observations):
    observations = []
    for time in partial_observations:
        if partial_observations[time].count('NA') == 0:
            observations += [partial_observations[time]]

    return np.array(observations).T # (num_sensors, num_observations)

def build_covariance(observations):
    xbar = np.mean(observations, 1) # (num_sensors,)
    num_sensors, num_observations = observations.shape
    sigma = np.zeros((num_sensors,num_sensors))
    for i in range(num_observations):
        xi = observations[:,i] # (num_sensors,)
        outer = np.outer(xi-xbar, xi-xbar)/num_observations
        sigma = np.add(outer, sigma) 
        print(np.max(sigma))
    return sigma

In [80]:
filename = 'data/temperature.txt'
# Sensors with more than 10,000 observations with temps within recorded temps of Berkeley, CA
COMMON_SENSORS = [1, 2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54]
partial_observations = read_partial(filename, COMMON_SENSORS)
observations = convert_observations(partial_observations)
sigma = build_covariance(observations)

In [83]:
S = [0,1,3]
print(sigma[:,S])

True
[[ 7.0121907   5.61216751  5.59848038]
 [ 5.61216751  4.62479088  4.61441738]
 [ 6.26593107  5.09863716  5.19759583]
 [ 5.59848038  4.61441738  4.88591312]
 [ 5.29777067  4.34108555  4.43222716]
 [ 5.31198769  4.37110647  4.61774799]
 [ 5.81155592  4.70114836  4.82986648]
 [ 5.98677637  4.79671388  4.9233286 ]
 [ 5.40308699  4.38359479  4.49278369]
 [ 5.86301729  4.71952819  4.77941801]
 [ 6.3054962   5.01639044  5.03314404]
 [ 5.01020413  4.06389747  4.14730336]
 [ 5.77489138  4.69459428  4.66770107]
 [ 6.33127457  5.21909326  5.26269974]
 [ 6.00832087  4.97523682  4.85850788]
 [ 6.66064884  5.58170941  5.16862529]
 [ 7.60947926  6.08296642  5.89337969]
 [ 8.06047538  6.45153984  6.3132017 ]
 [11.53027863  8.79691478  8.89607754]
 [11.25673673  8.68955681  8.71299714]
 [10.19396701  7.94907595  7.83416125]
 [12.06935546  9.36000554  9.11056784]
 [13.00214678 10.01298159  9.83263823]
 [10.93775603  8.50505814  8.23469888]
 [ 9.92456409  7.79432588  7.45094759]
 [12.09271947  9.360