In [None]:
import sys
sys.path.append('..')

from config import get_config
from data.random_data import get_dataloaders
from submission.resnet import ResNetPV as Model
from util import util
import submission.keys as keys

import numpy as np
import torch
import torch.nn as nn
from datetime import datetime
import matplotlib.pyplot as plt
from tqdm import tqdm

In [None]:
config = get_config('../configs/resnet.yaml', [])
ckpt_path = '../ckpts/resnext50_imstoopid.pt.best_ema'

config.data.eval_subset_size = 50_000

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

model = Model(config.model.config).to(device)
model.load_state_dict(torch.load(ckpt_path))
model.eval()
dataloader = get_dataloaders(
    config=config,
    meta_features=keys.META,
    nonhrv_features=model.REQUIRED_NONHRV,
    weather_features=model.REQUIRED_WEATHER,
    future_features=None,
    load_train=False,
)

# EDA

In [None]:
# for i, (pv, meta, nonhrv, weather, target) in enumerate(dataloader):
#     for j, key in enumerate(meta[keys.META.TIME]):

In [None]:
pv_data = dataloader.dataset.pv
pv_data

In [None]:
pv_np = pv_data.to_numpy()
pv_ind = pv_data.index.to_numpy()

print('a')

subset = np.random.randint(0, len(pv_data), 1_000_000)

print('b')

pv_np = pv_np[subset]
pv_ind = pv_ind[subset]

## Average daily PV profile

In [None]:
# iterate over pv_data, a multiindex dataframe
ave_profile = np.zeros((24 * 12, ), dtype=np.float32)
counts = np.zeros((24 * 12, ), dtype=np.int32)
# for (time, ssid), pv in tqdm(pv_shuf[:100_000].iterrows()):
for (time, ssid), pv in tqdm(zip(pv_ind, pv_np), total=len(pv_np)):
    i = time.hour * 12 + time.minute // 5
    ave_profile[i] += pv
    counts[i] += 1
ave_profile /= counts

def smooth(x, window_size=12):
    return np.convolve(x, np.ones(window_size)/window_size, mode='same')

smoothed_ave = smooth(ave_profile)


plt.figure(figsize=(16, 8))
plt.plot(ave_profile, label='Average', color='red')
plt.plot(smoothed_ave, label='Smooted', color='blue')
plt.title('Average PV profile')
# xticks
plt.xticks(np.arange(0, 24 * 12, 12), [f'{i}:00' for i in range(24)], rotation=45)
plt.grid()
plt.ylim(0, 1)

In [None]:
profiles = np.zeros((12, 24 * 12, ), dtype=np.float32)
countss = np.zeros((12, 24 * 12, ), dtype=np.int32)
for (time, ssid), pv in tqdm(zip(pv_ind, pv_np), total=len(pv_np)):
    m = time.month - 1
    i = time.hour * 12 + time.minute // 5
    profiles[m, i] += pv
    countss[m, i] += 1
profiles /= countss

MONTHS = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
plt.figure(figsize=(16, 8))

for m in range(12):
    plt.plot(profiles[m], color=plt.cm.viridis(m / 11), alpha=0.3)
    plt.plot(smooth(profiles[m], 12), label=MONTHS[m], color=plt.cm.viridis(m / 11))
    
plt.plot(smoothed_ave, label='Average', color='red', linestyle='--')
plt.xticks(np.arange(0, 24 * 12, 12), [f'{i}:00' for i in range(24)], rotation=45)
plt.grid()
plt.ylim(0, 1)
plt.title('Average PV profile by month')
plt.legend()

In [None]:
np.save('pv_profiles.npy', profiles)