In [1]:
import pandas as pd
import numpy as np
from random import normalvariate as rNorm
from random import randint

In [2]:
# read solar data from file

solarFilePath = '/Users/nxd/Desktop/Work Files/MG Controller/forecasters/resources/data/solarData8760-Tucson.csv'

solarDf = pd.read_csv(solarFilePath)

# combine year, month, etc columns into datetime
solarDf['timestamp'] = pd.to_datetime(solarDf[['Year', 'Month', 'Day', 'Hour','Minute']])

# reorder cols
solarDf = solarDf[['timestamp', 'Tamb', 'DHI', 'DNI','GHI']]

solarDf.set_index('timestamp', inplace=True)

solarDf.head()

Unnamed: 0_level_0,Tamb,DHI,DNI,GHI
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2021-09-29 00:00:00,23.1,0,0,0
2021-09-29 01:00:00,22.7,0,0,0
2021-09-29 02:00:00,22.4,0,0,0
2021-09-29 03:00:00,22.0,0,0,0
2021-09-29 04:00:00,21.7,0,0,0


In [3]:
# extract list of Tamb and GHI to create fake forecasts

listTamb = solarDf['Tamb'].tolist()
listGhi = solarDf['GHI'].tolist()

# extend lists, so that final days can have complete forecasts
listTamb += listTamb[:50]
listGhi += listGhi[:50]

# generate randomness for fake forecasts
randTamb = [rNorm(0,2) for val in listTamb]
randGhi = [rNorm(1,0.03) for val in listTamb]

# add randomn diffs to lists
listTamb2 = [int(val + randTamb[ii]) for ii, val in enumerate(listTamb)]
listGhi2 = [int(val * randGhi[ii]) for ii, val in enumerate(listGhi)]

diffTamb = [listTamb[ii] - listTamb2[ii] for ii, val in enumerate(listTamb)]
diffGhi = [listGhi[ii] - listGhi2[ii] for ii, val in enumerate(listGhi)]


In [4]:
# add new columns to dataframe

solarDf['d1'] = None
solarDf['d2'] = None
solarDf['d3'] = None

for metric in ['Tamb', 'GHI']:
    for hh in range(0,24):
        colName = f'f{metric}_{hh}'
        
        solarDf[colName] = None
        
solarDf.head()

Unnamed: 0_level_0,Tamb,DHI,DNI,GHI,d1,d2,d3,fTamb_0,fTamb_1,fTamb_2,...,fGHI_14,fGHI_15,fGHI_16,fGHI_17,fGHI_18,fGHI_19,fGHI_20,fGHI_21,fGHI_22,fGHI_23
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2021-09-29 00:00:00,23.1,0,0,0,,,,,,,...,,,,,,,,,,
2021-09-29 01:00:00,22.7,0,0,0,,,,,,,...,,,,,,,,,,
2021-09-29 02:00:00,22.4,0,0,0,,,,,,,...,,,,,,,,,,
2021-09-29 03:00:00,22.0,0,0,0,,,,,,,...,,,,,,,,,,
2021-09-29 04:00:00,21.7,0,0,0,,,,,,,...,,,,,,,,,,


In [5]:
# add random data to 'd' cols

solarDf['d1'] = solarDf['d1'].apply(lambda y: randint(0,100))
solarDf['d2'] = solarDf['d1'].apply(lambda y: int(rNorm(50,12)))
solarDf['d3'] = solarDf['d1'].apply(lambda y: randint(1,10))

In [9]:
# for each timestep, extract forecast and add to df
nRows = solarDf.shape[0]

for hh in range(0,24):
    
    # generate randomness for fake forecasts
    randTamb = [rNorm(0,2) for val in listTamb]
    randGhi = [rNorm(1,0.03) for val in listTamb]

    # add randomn diffs to lists
    listTamb2 = [int(val + randTamb[ii]) for ii, val in enumerate(listTamb)]
    listGhi2 = [int(val * randGhi[ii]) for ii, val in enumerate(listGhi)]
        
    colTamb = f'fTamb_{hh}'
    colGhi = f'fGHI_{hh}'

    solarDf[colTamb] = np.array(listTamb2[hh:(nRows+hh)])
    solarDf[colGhi] = np.array(listGhi2[hh:(nRows+hh)])

solarDf.head()

Unnamed: 0_level_0,Tamb,DHI,DNI,GHI,d1,d2,d3,fTamb_0,fTamb_1,fTamb_2,...,fGHI_14,fGHI_15,fGHI_16,fGHI_17,fGHI_18,fGHI_19,fGHI_20,fGHI_21,fGHI_22,fGHI_23
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2021-09-29 00:00:00,23.1,0,0,0,94,38,9,22,23,21,...,264,125,40,0,0,0,0,0,0,0
2021-09-29 01:00:00,22.7,0,0,0,72,43,7,25,21,20,...,117,41,0,0,0,0,0,0,0,0
2021-09-29 02:00:00,22.4,0,0,0,65,65,7,23,24,21,...,38,0,0,0,0,0,0,0,0,0
2021-09-29 03:00:00,22.0,0,0,0,81,43,1,23,21,21,...,0,0,0,0,0,0,0,0,0,0
2021-09-29 04:00:00,21.7,0,0,0,77,34,5,21,19,22,...,0,0,0,0,0,0,0,0,0,0


In [10]:
solarDf.to_csv('/Users/nxd/Desktop/Work Files/MG Controller/forecasters/resources/data/FORECAST-solarData8760-Tucson.csv')