In [None]:
!pip install numpy==1.21.6
import numpy as np
import pandas as pd

In [None]:
#Read data frames

df_temp = pd.read_csv('surface_air_temperature.csv', index_col=0)
print(df_temp)

df_pressure = pd.read_csv('surface_pressure.csv', index_col=0)
print(df_pressure)

df_wind = pd.read_csv('near_surface_wind_speed.csv', index_col=0)
print(df_wind)

df_humidity = pd.read_csv('specific_humidity.csv', index_col=0)
print(df_humidity)


In [None]:
#Make air density data frame
start_date = '2010-01-01'
end_date = '2022-01-01'
date_range = pd.date_range(start = start_date, end = end_date, freq='MS')

group_names_density = []
for count in range(1, 210):
    group_names_density += ["Area " + str(count) + " Air Density (kg m-3)"]

    
df_density = pd.DataFrame(0.0,columns = group_names_density, index = date_range)

for row in range(0, 145):
    for col in range(0, 209):
        #Formula for air density = ((pressure(Pa) / (gas constant of dry air(J/kg K) * Temperature(K))) (1 + specific_humidity(%))) / (1 + (specific_humidity(%)(individual gas constant water vapor (J/kg K))) / gas constant of dry air(J/kg K)))
        df_density.iloc[row][col] = (((df_pressure.iloc[row][col] / (286.9 * df_temp.iloc[row][col])) * (1 + df_humidity.iloc[row][col]))) / (1 + (df_humidity.iloc[row][col] * 461.5) / 286.9)
    
print(df_density)

In [None]:
#Convert air density dataframe to csv
df_density.to_csv("air_density.csv", na_rep=np.NaN)

In [None]:
#Make wind potential data frame
start_date = '2010-01-01'
end_date = '2022-01-01'
date_range = pd.date_range(start = start_date, end = end_date, freq='MS')

group_names_potential = []
for count in range(1, 210):
    group_names_potential += ["Area " + str(count) + " Power Potential(kW)"]

    
df_potential = pd.DataFrame(0.0,columns = group_names_potential, index = date_range)

for row in range(0, 145):
    for col in range(0, 209):
        #Formula for wind potential (kW) = 0.5 * Air Density(kg m-3) * Area Swept by Blades(pi * r^2)((22/7) * (Average Wind Turbine Diameter(127.5) / 2) ^ 2) * Wind Speed(m s-1)
        df_potential.iloc[row][col] = (0.5 * df_density.iloc[row][col] * ((22/7) * ((127.5/2) * (127.5/2))) * (df_wind.iloc[row][col] * df_wind.iloc[row][col] * df_wind.iloc[row][col])) * 0.001
    
print(df_potential)

In [None]:
#Convert wind potential dataframe to csv
df_potential.to_csv("wind_potential.csv", na_rep=np.NaN)

In [None]:
#Run our regression model for Temperature

core_areas = []

for col in range(0, 209):
    if(df_temp.apply(pd.isnull).sum()[col] == 0):
        core_areas += [df_temp.columns[col]]
        
#Create Target Areas
core_temp = df_temp[core_areas].copy()
core_temp = core_temp.reset_index(drop=True)

target_areas = []
for area in core_areas:
    target_areas += [area + ' Target']
    
#Create Target Area Columns

from sklearn.linear_model import Ridge
from sklearn.metrics import mean_absolute_error

reg = Ridge(alpha=.1)

for month in range(0, 120):
    for area in target_areas:
        core_temp[area] = core_temp.shift(-1)[area.removesuffix(" Target")]
    
    #Create training and test set

    train = core_temp.loc[:120]
    test = core_temp.loc[120:]

    #Fit our model
    reg.fit(train[core_areas], train[target_areas])
    
    #Run predictions for training

    predictions = reg.predict(test[core_areas])
    
    if(month == 0):
        dummy = test.iloc[:-1,:].copy()
        dum_pre = predictions[:-1,:].copy()
        print("Mean absolute error for our existing data: ", mean_absolute_error(dummy[target_areas], dum_pre))
    
    core_temp = core_temp.append(pd.DataFrame([predictions[test.index.size - 1]], columns=core_areas), ignore_index=True)
    print('At month ', month)


In [None]:
print(core_temp)

In [None]:
date_range = pd.date_range(start = '2010-01-01', periods = core_temp.index.size, freq='MS')

core_temp.index = date_range

#Save future values
#First error value in test: 1.5945740732440523
core_temp.to_csv("prediction_temperature.csv", na_rep=np.NaN)

In [None]:
#Run our regression model for Pressure

core_areas = []

for col in range(0, 209):
    if(df_pressure.apply(pd.isnull).sum()[col] == 0):
        core_areas += [df_pressure.columns[col]]
        
#Create Target Areas
core_pressure = df_pressure[core_areas].copy()
core_pressure = core_pressure.reset_index(drop=True)

target_areas = []
for area in core_areas:
    target_areas += [area + ' Target']
    
#Create Target Area Columns

from sklearn.linear_model import Ridge
from sklearn.metrics import mean_absolute_error

reg = Ridge(alpha=.1)

for month in range(0, 120):
    for area in target_areas:
        core_pressure[area] = core_pressure.shift(-1)[area.removesuffix(" Target")]
    
    #Create training and test set

    train = core_pressure.loc[:120]
    test = core_pressure.loc[120:]

    #Fit our model
    reg.fit(train[core_areas], train[target_areas])
    
    #Run predictions for training

    predictions = reg.predict(test[core_areas])
    
    if(month == 0):
        dummy = test.iloc[:-1,:].copy()
        dum_pre = predictions[:-1,:].copy()
        print("Mean absolute error for our existing data: ", mean_absolute_error(dummy[target_areas], dum_pre))
    
    core_pressure = core_pressure.append(pd.DataFrame([predictions[test.index.size - 1]], columns=core_areas), ignore_index=True)
    print('At month ', month)


In [None]:
print(core_pressure)

In [None]:
date_range = pd.date_range(start = '2010-01-01', periods = core_pressure.index.size, freq='MS')

core_pressure.index = date_range

#Save future values
#First error value in test: 202.04864330877325 
core_pressure.to_csv("prediction_pressure.csv", na_rep=np.NaN)

In [None]:
#Run our regression model for Wind Speed

core_areas = []

for col in range(0, 209):
    if(df_wind.apply(pd.isnull).sum()[col] == 0):
        core_areas += [df_wind.columns[col]]
        
#Create Target Areas
core_wind = df_wind[core_areas].copy()
core_wind = core_wind.reset_index(drop=True)

target_areas = []
for area in core_areas:
    target_areas += [area + ' Target']
    
#Create Target Area Columns

from sklearn.linear_model import Ridge
from sklearn.metrics import mean_absolute_error

reg = Ridge(alpha=.1)

for month in range(0, 120):
    for area in target_areas:
        core_wind[area] = core_wind.shift(-1)[area.removesuffix(" Target")]
    
    #Create training and test set

    train = core_wind.loc[:120]
    test = core_wind.loc[120:]

    #Fit our model
    reg.fit(train[core_areas], train[target_areas])
    
    #Run predictions for training

    predictions = reg.predict(test[core_areas])
    
    if(month == 0):
        dummy = test.iloc[:-1,:].copy()
        dum_pre = predictions[:-1,:].copy()
        print("Mean absolute error for our existing data: ", mean_absolute_error(dummy[target_areas], dum_pre))
    
    core_wind = core_wind.append(pd.DataFrame([predictions[test.index.size - 1]], columns=core_areas), ignore_index=True)
    print('At month ', month)

In [None]:
print(core_wind)

In [None]:
date_range = pd.date_range(start = '2010-01-01', periods = core_wind.index.size, freq='MS')

core_wind.index = date_range

#Save future values
#First error value in test: 0.33157827060066336
core_wind.to_csv("prediction_wind.csv", na_rep=np.NaN)

In [23]:
#Run our regression model for Specific Humidity

core_areas = []

for col in range(0, 209):
    if(df_humidity.apply(pd.isnull).sum()[col] == 0):
        core_areas += [df_humidity.columns[col]]
        
#Create Target Areas
core_humidity = df_humidity[core_areas].copy()
core_humidity = core_humidity.reset_index(drop=True)

target_areas = []
for area in core_areas:
    target_areas += [area + ' Target']
    
#Create Target Area Columns

from sklearn.linear_model import Ridge
from sklearn.metrics import mean_absolute_error

reg = Ridge(alpha=.1)

for month in range(0, 120):
    for area in target_areas:
        core_humidity[area] = core_humidity.shift(-1)[area.removesuffix(" Target")]
    
    #Create training and test set

    train = core_humidity.loc[:120]
    test = core_humidity.loc[120:]

    #Fit our model
    reg.fit(train[core_areas], train[target_areas])
    
    #Run predictions for training

    predictions = reg.predict(test[core_areas])
    
    if(month == 0):
        dummy = test.iloc[:-1,:].copy()
        dum_pre = predictions[:-1,:].copy()
        print("Mean absolute error for our existing data: ", mean_absolute_error(dummy[target_areas], dum_pre))
    
    core_humidity = core_humidity.append(pd.DataFrame([predictions[test.index.size - 1]], columns=core_areas), ignore_index=True)
    print('At month ', month)


Mean absolute error for our existing data:  0.0018293480892456271
At month  0
At month  1
At month  2
At month  3
At month  4
At month  5
At month  6
At month  7
At month  8
At month  9
At month  10
At month  11
At month  12
At month  13
At month  14
At month  15
At month  16
At month  17
At month  18
At month  19
At month  20
At month  21
At month  22
At month  23
At month  24
At month  25
At month  26
At month  27
At month  28
At month  29
At month  30
At month  31
At month  32
At month  33
At month  34
At month  35
At month  36
At month  37
At month  38
At month  39
At month  40
At month  41
At month  42
At month  43
At month  44
At month  45
At month  46
At month  47
At month  48
At month  49
At month  50
At month  51
At month  52
At month  53
At month  54
At month  55
At month  56
At month  57
At month  58
At month  59
At month  60
At month  61
At month  62
At month  63
At month  64
At month  65
At month  66
At month  67
At month  68
At month  69
At month  70
At month  71
At month

KeyboardInterrupt: 

In [None]:
print(core_humidity)

In [None]:
date_range = pd.date_range(start = '2010-01-01', periods = core_humidity.index.size, freq='MS')

core_humidity.index = date_range

#Save future values
#First error value in test: 0.0018293480892456271
core_humidity.to_csv("prediction_humidity.csv", na_rep=np.NaN)

In [None]:
#Make prediction air density dataset

group_names_density = []
for area in core_areas:
    group_names_density += [area.removesuffix(" Specific Humidity (kg kg-1)") + " Air Density (kg m-3)"]

df_density_pred = pd.DataFrame(0.0,columns = group_names_density, index = date_range)

for row in range(0, len(date_range)):
    for col in range(0, len(group_names_density)):
        #Formula for air density = ((pressure(Pa) / (gas constant of dry air(J/kg K) * Temperature(K))) (1 + specific_humidity(%))) / (1 + (specific_humidity(%)(individual gas constant water vapor (J/kg K))) / gas constant of dry air(J/kg K)))
        df_density_pred.iloc[row][col] = (((core_pressure.iloc[row][col] / (286.9 * core_temp.iloc[row][col])) * (1 + core_humidity.iloc[row][col]))) / (1 + (core_humidity.iloc[row][col] * 461.5) / 286.9)
    
print(df_density_pred)

In [None]:
#Convert predicted air density dataframe to csv
df_density_pred.to_csv("prediction_air_density.csv", na_rep=np.NaN)

In [None]:
#Make wind potential data frame

group_names_potential = []
for area in core_areas:
    group_names_potential += [area.removesuffix(" Specific Humidity (kg kg-1)") + " Power Potential(kW)"]

    
df_potential_pred = pd.DataFrame(0.0,columns = group_names_potential, index = date_range)

for row in range(0, len(date_range)):
    for col in range(0, len(group_names_potential)):
        #Formula for wind potential (kW) = 0.5 * Air Density(kg m-3) * Area Swept by Blades(pi * r^2)((22/7) * (Average Wind Turbine Diameter(127.5) / 2) ^ 2) * Wind Speed(m s-1) ^ 3
        df_potential_pred.iloc[row][col] = 0.5 * df_density_pred.iloc[row][col] * ((22/7) * ((127.5/2) * (127.5/2))) * (core_wind.iloc[row][col] * core_wind.iloc[row][col] * core_wind.iloc[row][col]) * 0.001
    
print(df_potential_pred)

In [None]:
#Convert predicted wind potential dataframe to csv
df_potential_pred.to_csv("prediction_potential.csv")