In [1]:
import numpy as np
import my_statistics as stat
import scipy as sc
from datetime import datetime, timedelta
import pandas as pd

In [2]:
import sys

In [3]:
import matplotlib.pyplot as plt

In [4]:
%matplotlib nbagg

### Gathering and Plotting the data (for Parts 1 and 2)

In [5]:
all_hour = np.loadtxt("Data/AllStations_temperature_hour_data_2019.dat")

In [6]:
uvic_sci_hour_temp = np.zeros(len(all_hour) - 2)
uvic_sci_hour_time = np.zeros(len(all_hour) - 2)
for i in range(2, len(all_hour)):
    uvic_sci_hour_time[i-2] = all_hour[i][0]    
    uvic_sci_hour_temp[i-2] = all_hour[i][-3]

In [7]:
epoch = datetime(1, 1, 1)
uvic_sci_hour_time_pretty = [epoch + timedelta(days=y-367.0) for y in uvic_sci_hour_time]

In [8]:
hour_res_cd_17_time = []
hour_res_cd_17_temp = []
hour_res_cd_18_time = []
hour_res_cd_18_temp = []

for i in range(len(uvic_sci_hour_time)):
    time = uvic_sci_hour_time_pretty[i]
    
    stop_when = time.year == 2018 and time.month == 9 and time.day == 2    
    
    if time.year == 2017:
        if time.month == 10 and time.day >= 28:
            hour_res_cd_17_time.append(time)
            hour_res_cd_17_temp.append(uvic_sci_hour_temp[i])
        if time.month >= 11:
            hour_res_cd_17_time.append(time)
            hour_res_cd_17_temp.append(uvic_sci_hour_temp[i])            
    if time.year == 2018:
        if time.month == 1 and time.day <= 26:
            hour_res_cd_17_time.append(time)
            hour_res_cd_17_temp.append(uvic_sci_hour_temp[i])
        if time.month >= 6:
            hour_res_cd_18_time.append(time)
            hour_res_cd_18_temp.append(uvic_sci_hour_temp[i])  
    if stop_when:
        print("Done")
        break

Done


In [9]:
hour_17_dict = {'Time':hour_res_cd_17_time, 'Temperature': hour_res_cd_17_temp}
hour_17_df = pd.DataFrame(hour_17_dict)
hour_18_dict = {'Time':hour_res_cd_18_time, 'Temperature': hour_res_cd_18_temp}
hour_18_df = pd.DataFrame(hour_18_dict)

In [10]:
plt.figure(figsize=(11,7))
plt.plot(hour_res_cd_17_time, hour_res_cd_17_temp, 'k.-', label="Hour resolution")
plt.title("Winter: 28th Oct. 2017 till 26th Jan. 2018, at UVic Sci. Bldg.")
plt.xlabel("Local Time")
plt.ylabel("Temperature (in \u2070C)")
plt.grid(True)
plt.legend()

<IPython.core.display.Javascript object>


To register the converters:
	>>> from pandas.plotting import register_matplotlib_converters
	>>> register_matplotlib_converters()


<matplotlib.legend.Legend at 0x7fc41ae06978>

In [11]:
plt.close()

In [12]:
plt.figure(figsize=(11,7))
plt.plot(hour_res_cd_18_time, hour_res_cd_18_temp, 'k.-', label="Hour resolution")
plt.title("Summer: 1st June 2018 till 1st Sept. 2018, at UVic Sci. Bldg.")
plt.xlabel("Local Time")
plt.ylabel("Temperature (in \u2070C)")
plt.grid(True)
plt.legend()

<IPython.core.display.Javascript object>

<matplotlib.legend.Legend at 0x7fc419251e10>

In [13]:
plt.close()

### Part 1 - Winter Graph comparison

In [62]:
wint_mean = hour_17_df["Temperature"].mean()
wint_var = (hour_17_df["Temperature"].std())**2

In [63]:
auto_2d_winter = []

for i in range(41):
    if i == 0:
        auto_2d_winter.append((np.array(hour_res_cd_17_temp) - wint_mean)**2)
    else:
        auto_2d_winter.append(np.array(hour_res_cd_17_temp) - wint_mean)

In [64]:
for tau in range(1, 41):
    #print(tau)
    counter = 0
    buffer_time_pretty = [epoch + timedelta(days=y-367.0 + tau) for y in uvic_sci_hour_time]
    
    for i in range(len(uvic_sci_hour_time)):
        time = buffer_time_pretty[i]

        stop_when = time.year == 2018 and time.month == 1 and time.day == 27    

        if time.year == 2017:
            if time.month == 10 and time.day >= 28:
                auto_2d_winter[tau][counter] = auto_2d_winter[tau][counter]*(uvic_sci_hour_temp[i] - wint_mean)
                counter += 1
            if time.month >= 11:
                auto_2d_winter[tau][counter] = auto_2d_winter[tau][counter]*(uvic_sci_hour_temp[i] - wint_mean)
                counter += 1
        if time.year == 2018:
            if time.month == 1 and time.day <= 26:
                auto_2d_winter[tau][counter] = auto_2d_winter[tau][counter]*(uvic_sci_hour_temp[i] - wint_mean)
                counter += 1
        if stop_when:
            #print("Done")
            break    

In [65]:
auto_correlation_winter = []
for i in range(41):
    hour_wint_dict = {'Time':hour_res_cd_17_time[i:], 'Temperature': auto_2d_winter[i][i:]}
    hour_wint_df = pd.DataFrame(hour_wint_dict)
    auto_correlation_winter.append(hour_wint_df["Temperature"].mean()/wint_var)

In [72]:
plt.figure(figsize=(11,7))
plt.plot(range(41), np.array(auto_correlation_winter), 'ro-', label="Lagged Auto-Correlation Coefficient")
plt.plot(range(41), np.ones(41)/np.e, 'k-', label="1/e threshold")
plt.title("Winter: 28th Oct. 2017 till 26th Jan. 2018, at UVic Sci. Bldg.")
plt.xlabel("Time Lag, \u03C4 (in Days)")
plt.ylabel("Auto-correlation Coefficient, r_xx(\u03C4)")
plt.grid(True)
plt.legend()

<IPython.core.display.Javascript object>

<matplotlib.legend.Legend at 0x7fc4148bdac8>

In [73]:
plt.close()

Decorrelation time scale = ~4 days

### Part 2 - Summer Graph comparison

In [79]:
summ_mean = hour_18_df["Temperature"].mean()
summ_var = (hour_18_df["Temperature"].std())**2

In [80]:
auto_2d_summer = []

for i in range(41):
    if i == 0:
        auto_2d_summer.append((np.array(hour_res_cd_18_temp) - summ_mean)**2)
    else:
        auto_2d_summer.append(np.array(hour_res_cd_18_temp) - summ_mean)

In [81]:
for tau in range(1, 41):
    #print(tau)
    counter = 0
    buffer_time_pretty = [epoch + timedelta(days=y-367.0 + tau) for y in uvic_sci_hour_time]
    
    for i in range(len(uvic_sci_hour_time)):
        time = buffer_time_pretty[i]

        stop_when = time.year == 2018 and time.month == 9 and time.day == 2 

        if time.year == 2018:
            if time.month >= 6:
                auto_2d_summer[tau][counter] = auto_2d_summer[tau][counter]*(uvic_sci_hour_temp[i] - summ_mean)
                counter += 1
        if stop_when:
            #print("Done")
            break  

In [82]:
auto_correlation_summer = []
for i in range(41):
    hour_summ_dict = {'Time':hour_res_cd_18_time[i:], 'Temperature': auto_2d_summer[i][i:]}
    hour_summ_df = pd.DataFrame(hour_summ_dict)
    auto_correlation_summer.append(hour_summ_df["Temperature"].mean()/summ_var)

In [87]:
plt.figure(figsize=(11,7))
plt.plot(range(41), np.array(auto_correlation_summer), 'ro-', label="Lagged Auto-Correlation Coefficient")
plt.plot(range(41), np.ones(41)/np.e, 'k-', label="1/e threshold")
plt.plot(0, 0, 'w.')
plt.title("Summer: 1st June 2018 till 1st Sept. 2018, at UVic Sci. Bldg.")
plt.xlabel("Time Lag, \u03C4 (in Days)")
plt.ylabel("Auto-correlation Coefficient, r_xx(\u03C4)")
plt.grid(True)
plt.legend()

<IPython.core.display.Javascript object>

<matplotlib.legend.Legend at 0x7fc414076da0>

In [88]:
plt.close()

Decorrelation time scale >= 40 days

### Part 3 - Inference

As is evident from Part 1 and Part 2, the auto-correlation coefficient > 1/e for more days in Part 2 than in Part 1. Specificially, Decorrelation time scale for winter is MUCH smaller than Summer $\Rightarrow$ this dummy weather forecast is better at predicting Summer weather, than Winter weather, overall.

For individual offsets too (like, for example, $\tau$ = 1, i.e. predicting today's weather using yesterday's weather), r$_{xx}(\tau)_{Summer}$ > r$_{xx}(\tau)_{Winter}$ $\Rightarrow$ Summer predictions are better than Winter predictions, for dummy weather forecast.

This also implies that the Summer weather is more consistent, i.e. varies more slowly, than Winter weather, on a day-to-day basis.