In [1]:
import pandas as pd
import geopandas as gpd
from geopandas import GeoSeries
import shapely
import shapely.speedups
import matplotlib.pyplot as plt
import math
import numpy as np
import time



In [2]:
pd.set_option('display.max_columns', None)
shapely.speedups.enable()
# Get shape file of Continguous US
df = gpd.read_file('CartographicBoundries/US_State/cb_2018_us_state_500k.shp')
df = df.drop([37,38,44,45,13,27,42])
contUSdf = df.dissolve()
# ax = contUSdf.plot(color='white', edgecolor='black')
bound = pd.DataFrame(contUSdf.bounds)

In [3]:
# Return a list of the boundries from the Polygon
def find_boundary(index):
    minx = bound.iat[index, 0]
    miny = bound.iat[index, 1]
    maxx = bound.iat[index, 2]
    maxy = bound.iat[index, 3]
    return minx, miny, maxx, maxy

In [4]:
def multiples(n):
    l = [*range(1, n+1)]
    multiples = []
    for i in range(1, l[len(l)-1]+1):
        if n % i == 0:
            multiples.append(i)
    return multiples

In [5]:
# Find the number of subdivisions to make,
# Uses the middle 2 elements of the mutiples list ...
# ... (created by above function)
def num_of_divisions(area):
    m = multiples(area)
    w_n = m[len(m) // 2]
    h_n = m[len(m) // 2]
    if (len(m) % 2) == 1:
        return w_n, h_n
    else:
        h_n = m[(len(m) // 2) - 1]
    return w_n, h_n

In [6]:
def len_of_div(bounds, width_n, height_n):
    a = bounds[0]
    # width subdivision
    width = bounds[2] - bounds[0]
    length_of_each_div_w = width/width_n
    # length subdivision
    height = bounds[3] - bounds[1]
    length_of_each_div_h = height/height_n
    return length_of_each_div_w, length_of_each_div_h

In [7]:
def coord_of_div(len_div, bounds, num_of_divs):
    ## formula for first point = a + (1/2)length
    initial_coord_w = bounds[0] + (0.5 * len_div[0])
    initial_coord_h = bounds[1] + (0.5 * len_div[1])
    w = []
    h = []
    for i in range (num_of_divs[0]):
        w.append(initial_coord_w + (i * len_div[0]))
    for i in range (num_of_divs[1]):
        h.append(initial_coord_h + (i * len_div[1]))
    return w, h

In [8]:
def average(data, col):
    col = str(col)
    co = data[col].tolist()
    intList = [float(item) for item in co]
    avg = sum(intList)/len(intList)
    return avg

In [9]:
def point_dataframe(longitude, latitude):

    point = pd.DataFrame(columns = ['lon', 'lat'])
    for x in longitude:
        for y in latitude:
            point2 = pd.DataFrame([{'lon':x, 'lat':y}])
            point = point.append(point2, ignore_index = True)

    ## Note: I know it is wierd that longitude comes before latitude in the tuple, but 'gpd.points_from_xy()'
    ## requires for lon to come before lat
            
    gdf = gpd.GeoDataFrame(point,
            geometry = gpd.points_from_xy(point.lon, point.lat))
    
    print(gdf)

    ## Check if point is in main polygon
    # pip = 'point in polygon'

    pip_mask = gdf.within(contUSdf.loc[0, 'geometry'])
    pip_data = gdf.loc[pip_mask]

    return pip_data

In [10]:
def split(list,numSplit):
    splits = np.array_split(list,numSplit)
    return splits

In [11]:
# using the NSRDB API to get weather data for wach of the points, yearly averages
# future notes -- plan on getting monthly averages
def get_data_source1(list_lat, list_lon):
    
    Data = pd.DataFrame(columns = ['lat','lon','GHI','DHI','DNI',
                             'Wind Speed','Temperature','Pressure',
                             'Relative Humidity'])

    for i in range(len(list_lat)):
        # Declare all variables as strings. Spaces must be replaced with '+', i.e., change 'John Smith' to 'John+Smith'.
        # Define the lat, long of the location and the year
        latitude = list_lat[i]
        longitude = list_lon[i]
        lat,lon,year = latitude, longitude, 2020
        # You must request an NSRDB api key from the link above
        api_key = 'D17M1VkGJXM8039FxMliZ1Zkiia0hlaglICc3S7C'
        # Set the attributes to extract (e.g., dhi, ghi, etc.), separated by commas.
        attributes = 'ghi,dhi,dni,wind_speed,air_temperature,surface_pressure,relative_humidity'            
        # Choose year of data
        year = '2020'
        # Set leap year to true or false. True will return leap day data if present, false will not.
        leap_year = 'false'
        # Set time interval in minutes, i.e., '30' is half hour intervals. Valid intervals are 30 & 60.
        interval = '60'
        # Specify Coordinated Universal Time (UTC), 'true' will use UTC, 'false' will use the local time zone of the data.
        # NOTE: In order to use the NSRDB data in SAM, you must specify UTC as 'false'. SAM requires the data to be in the
        # local time zone.
        utc = 'true'
        # Your full name, use '+' instead of spaces.
        your_name = 'Shrey+Poshiya'
        # Your reason for using the NSRDB.
        reason_for_use = 'personal+project'
        # Your affiliation
        your_affiliation = 'Santa+Fe+Preparatory+School'
        # Your email address
        your_email = 'shreyposh@gmail.com'
        # Please join our mailing list so we can keep you up-to-date on new developments.
        mailing_list = 'false'
        # Declare url string
        url = 'https://developer.nrel.gov/api/solar/nsrdb_psm3_download.csv?wkt=POINT({lon}%20{lat})&names={year}&leap_day={leap}&interval={interval}&utc={utc}&full_name={name}&email={email}&affiliation={affiliation}&mailing_list={mailing_list}&reason={reason}&api_key={api}&attributes={attr}'.format(year=year, lat=lat, lon=lon, leap=leap_year, interval=interval, utc=utc, name=your_name, email=your_email, mailing_list=mailing_list, affiliation=your_affiliation, reason=reason_for_use, api=api_key, attr=attributes)
        # Return just the first 2 lines to get metadata:
        info = pd.read_csv(url, skiprows=2)
        # See metadata for specified properties, e.g., timezone and elevation

        ## Make a new dataframe, with the same columns as the first dataframe ... then concat
        df1 = pd.DataFrame([{'lat':latitude, 'lon':longitude, 'GHI':average(info, 'GHI'),
                             'DHI':average(info, 'DHI'),
                             'DNI':average(info, 'DNI'),
                             'Wind Speed':average(info, 'Wind Speed'),
                             'Temperature':average(info, 'Temperature'),
                             'Pressure':average(info, 'Pressure'),
                             'Relative Humidity':average(info, 'Relative Humidity')}])
        # Track Progress
        print("Progress:", (i+1), "/", len(list_lat), "\n", latitude, ",", longitude)
        Data = Data.append(df1, ignore_index = True)

    return Data


In [12]:
l_bounds = list(find_boundary(0))

In [13]:
num_of_div = list(num_of_divisions(10000))

In [14]:
len_of_each_div = list(len_of_div(l_bounds, num_of_div[0], num_of_div[1]))

In [15]:
coords = coord_of_div(len_of_each_div, l_bounds, num_of_div)

In [16]:
lat = coords[1]
lon = coords[0]

In [17]:
points = point_dataframe(lon,lat)

             lon        lat                     geometry
0    -124.474002  24.647402  POINT (-124.47400 24.64740)
1    -124.474002  24.896015  POINT (-124.47400 24.89601)
2    -124.474002  25.144628  POINT (-124.47400 25.14463)
3    -124.474002  25.393240  POINT (-124.47400 25.39324)
4    -124.474002  25.641853  POINT (-124.47400 25.64185)
...          ...        ...                          ...
9995  -67.238961  48.265601   POINT (-67.23896 48.26560)
9996  -67.238961  48.514214   POINT (-67.23896 48.51421)
9997  -67.238961  48.762826   POINT (-67.23896 48.76283)
9998  -67.238961  49.011439   POINT (-67.23896 49.01144)
9999  -67.238961  49.260052   POINT (-67.23896 49.26005)

[10000 rows x 3 columns]


In [18]:
l_lat = points["lat"].tolist()
l_lon = points["lon"].tolist()

In [19]:
latSplit = split(l_lat,10)
lonSplit = split(l_lon,10)

In [38]:
# predData1 = get_data_source1(list(latSplit[0]), list(lonSplit[0]))
# predData1.to_csv('PredData_1.csv')

# predData2 = get_data_source1(list(latSplit[1]),list(lonSplit[1]))
# predData2.to_csv('PredData_2.csv')

# predData3 = get_data_source1(list(latSplit[2]),list(lonSplit[2]))
# predData3.to_csv('PredData_3.csv')

# predData4 = get_data_source1(list(latSplit[3]),list(lonSplit[3]))
# predData4.to_csv('PredData_4.csv')

# predData5 = get_data_source1(list(latSplit[4]),list(lonSplit[4]))
# predData5.to_csv('PredData_5.csv')

# predData6 = get_data_source2(list(latSplit[5]),list(lonSplit[5]))
# predData6.to_csv('PredData_6.csv')

# predData7 = get_data_source1(list(latSplit[6]),list(lonSplit[6]))
# predData7.to_csv('PredData_7.csv')

# predData8 = get_data_source1(list(latSplit[7]),list(lonSplit[7]))
# predData8.to_csv('PredData_8.csv')

# lat_SS = split(list(latSplit[8]),2)
# lon_SS = split(list(lonSplit[8]),2)
# predData9 = get_data_source1(list(lat_SS[0]),list(lon_SS[0]))
# # predData9 = get_data_source1(list(latSplit[8]),list(lonSplit[8]))
# predData9.to_csv('PredData_9.csv')

# lat_SS = split(list(latSplit[8]),2)
# lon_SS = split(list(lonSplit[8]),2)
# predData10 = get_data_source1(list(lat_SS[1]),list(lon_SS[1]))
# predData10.to_csv('PredData_10.csv')

lat_SS = split(list(latSplit[9]),2)
lon_SS = split(list(lonSplit[9]),2)
predData12 = get_data_source1(list(lat_SS[1]),list(lon_SS[1]))
predData12.to_csv('PredData_12.csv')




Progress: 1 / 284 
 42.54751095 , -77.067200275
Progress: 2 / 284 
 42.796123570000006 , -77.067200275
Progress: 3 / 284 
 43.04473619 , -77.067200275
Progress: 4 / 284 
 34.84051973 , -76.48906854500001
Progress: 5 / 284 
 35.58635759 , -76.48906854500001
Progress: 6 / 284 
 35.83497021 , -76.48906854500001
Progress: 7 / 284 
 36.08358283 , -76.48906854500001
Progress: 8 / 284 
 36.33219545 , -76.48906854500001
Progress: 9 / 284 
 36.58080807 , -76.48906854500001
Progress: 10 / 284 
 36.82942069 , -76.48906854500001
Progress: 11 / 284 
 37.07803331 , -76.48906854500001
Progress: 12 / 284 
 37.32664593 , -76.48906854500001
Progress: 13 / 284 
 37.57525855 , -76.48906854500001
Progress: 14 / 284 
 37.823871170000004 , -76.48906854500001
Progress: 15 / 284 
 38.32109641 , -76.48906854500001
Progress: 16 / 284 
 39.066934270000004 , -76.48906854500001
Progress: 17 / 284 
 39.31554689 , -76.48906854500001
Progress: 18 / 284 
 39.56415951 , -76.48906854500001
Progress: 19 / 284 
 39.8127721

Progress: 160 / 284 
 43.04473619 , -73.02027816500001
Progress: 161 / 284 
 43.29334881 , -73.02027816500001
Progress: 162 / 284 
 43.54196143 , -73.02027816500001
Progress: 163 / 284 
 43.790574050000004 , -73.02027816500001
Progress: 164 / 284 
 44.03918667 , -73.02027816500001
Progress: 165 / 284 
 44.28779929 , -73.02027816500001
Progress: 166 / 284 
 44.53641191 , -73.02027816500001
Progress: 167 / 284 
 44.78502453 , -73.02027816500001
Progress: 168 / 284 
 41.05583523 , -72.442146435
Progress: 169 / 284 
 41.30444785 , -72.442146435
Progress: 170 / 284 
 41.553060470000005 , -72.442146435
Progress: 171 / 284 
 41.80167309 , -72.442146435
Progress: 172 / 284 
 42.05028571 , -72.442146435
Progress: 173 / 284 
 42.29889833 , -72.442146435
Progress: 174 / 284 
 42.54751095 , -72.442146435
Progress: 175 / 284 
 42.796123570000006 , -72.442146435
Progress: 176 / 284 
 43.04473619 , -72.442146435
Progress: 177 / 284 
 43.29334881 , -72.442146435
Progress: 178 / 284 
 43.54196143 , -72