In [1]:
import requests
import pandas as pd
import geopandas as gpd
from shapely.geometry import Point
import time
from sqlalchemy import create_engine, text
import psycopg2

In [2]:
temp_csv_path = r'C:\Users\tjjoh\OneDrive\Desktop\GIS 5572\Lab2\temperature.csv'

In [3]:
url = r'https://www.ncei.noaa.gov/cdo-web/api/v2/stations'
params = {
    'locationid': 'FIPS:27', #Minnesota is 27
    'datasetid': 'GHCND', #Global Historical Climatology Network Daily
    'limit': 1000 #Maximum results per request
}
headers = {'token': 'JgSvDOViKdKyBepyxxCKfJDaMaQghdfB'} #API token

stations = []

In [4]:
#Get a list of all stations in Minnesota
offset = 0 #Pagination will be necessary to retrieve all the data

while url:
    params['offset'] = offset #set the offset parameter
    try:
        response = requests.get(url, params = params, headers = headers) #make API request
        data = response.json() #transform to json
        
        if 'results' in data:
            stations.extend(data['results'])
            #for i in range(len(data['results'])):
                #stations.extend([data['results'][i]['id']]) #if results exist, add the station ids to the the list
                
            if len(data['results']) < params['limit']: #check if all data has been retrieved
                break
        else:
            break

        offset = offset + params['limit'] #update offset
        
    except:
        print(f'Error retriving data: {response.status_code}')
        break

In [5]:
url = r'https://www.ncei.noaa.gov/cdo-web/api/v2/data'
params = {
    'datasetid': 'NORMAL_MLY',
    'startdate': '2010-01-01',
    'enddate': '2010-12-01',
    'units': 'standard',
    'limit': 1000
}

In [6]:
temperature = pd.DataFrame() #create empty dataframe

for i in range(2189, len(stations)): #the first 2188 give errors
    attempt = 1 #create a counter for how many time to attempt to retrieve data
    max_attempts = 5 #maximum number of attempts
    success = False #default state of whether api request was successful

    while attempt <= max_attempts and not success: #iterate up to 3 times to try and make a successful api call
        try:
            params['stationid'] = stations[i]['id'] #add station id to url parameters
            response = requests.get(url, params = params, headers = headers) #retrieve data
            data = response.json() #convert to json
        
            temp = [stations[i]['id'], stations[i]['latitude'], stations[i]['longitude'], stations[i]['name']] #extract station id, location, and name
            for j in range(len(data['results'])): #iterate through retrieved data
                if data['results'][j]['datatype'] == 'MLY-TAVG-NORMAL': #find desired dataset
                    temp.extend([data['results'][j]['value']]) #add data to station info list

            if len(temp) == 16: #check to see if temp is the proper length
                temperature = pd.concat([temperature, pd.DataFrame([temp])], ignore_index = True) #add to temperature dataframe if the proper data exists
                print(f'{i} Success')
            else:
                print(f'{i} Error: temp_length = {len(temp)}')

            success = True #api call was succesful

        except:
            if response.status_code == 503: #check if api call was unsuccesful
                attempt += 1
                if attempt <= max_attempts: #if attempts remain, try api request again
                    print(f'{i} Error: 503, Retrying...')
                else: #if no attempts remain, skip point
                    print(f'{i} Error: 503, Failed to retrieve data.')
            elif response.status_code != 200: #check if status is not OK
                print(f'{i} Error: {response.status_code}')
                success = True
            elif len(temp) != 16: #check if temp is the proper length
                print(f'{i} Error: temp_length = {len(temp)}')
                success = True
            else:
                print(f'{i} Error: unknown')
                success = True

        time.sleep(0.5)

2189 Success
2190 Success
2191 Success
2192 Success
2193 Error: temp_length = 4
2194 Error: temp_length = 4
2195 Error: temp_length = 4
2196 Error: temp_length = 4
2197 Error: temp_length = 4
2198 Error: temp_length = 4
2199 Error: temp_length = 4
2200 Success
2201 Success
2202 Error: temp_length = 4
2203 Success
2204 Success
2205 Error: temp_length = 4
2206 Success
2207 Success
2208 Error: temp_length = 4
2209 Success
2210 Error: temp_length = 4
2211 Error: temp_length = 4
2212 Error: temp_length = 4
2213 Error: temp_length = 4
2214 Success
2215 Error: temp_length = 4
2216 Success
2217 Success
2218 Success
2219 Error: temp_length = 4
2220 Error: temp_length = 4
2221 Error: temp_length = 4
2222 Error: temp_length = 4
2223 Error: temp_length = 4
2224 Success
2225 Error: temp_length = 4
2226 Error: temp_length = 4
2227 Error: temp_length = 4
2228 Error: temp_length = 4
2229 Error: temp_length = 4
2230 Error: temp_length = 4
2231 Success
2232 Error: 503, Retrying...
2232 Success
2233 Erro

In [7]:
temperature.columns = ['ID', 'Latitude', 'Longitude', 'Name', 
                       'January', 'February', 'March', 'April', 'May', 'June', 
                       'July', 'August', 'September', 'October', 'November', 'December']

In [8]:
# Create a geometry column using the longitude and latitude
geometry = [Point(xy) for xy in zip(temperature['Longitude'], temperature['Latitude'])]

# Convert to GeoDataFrame
temperature_gdf = gpd.GeoDataFrame(temperature, geometry=geometry)

# Set the Coordinate Reference System (CRS) to WGS84 (EPSG:4326)
temperature_gdf.set_crs(epsg=4326, inplace=True)

Unnamed: 0,ID,Latitude,Longitude,Name,January,February,March,April,May,June,July,August,September,October,November,December,geometry
0,GHCND:USC00210018,47.299100,-96.516100,"ADA, MN US",6.8,11.8,25.7,43.1,55.8,65.3,69.8,68.4,58.1,44.2,27.5,12.4,POINT (-96.51610 47.29910)
1,GHCND:USC00210050,48.300500,-95.981600,"AGASSIZ REFUGE, MN US",5.9,11.7,25.7,42.9,56.3,65.3,69.9,67.9,58.0,44.0,26.6,10.8,POINT (-95.98160 48.30050)
2,GHCND:USC00210059,46.525700,-93.667400,"AITKIN 2 E, MN US",9.9,14.7,26.8,41.8,53.4,62.8,67.5,65.2,56.3,44.2,29.3,15.2,POINT (-93.66740 46.52570)
3,GHCND:USC00210075,43.606400,-93.301900,"ALBERT LEA 3 SE, MN US",14.1,18.9,31.2,45.8,58.0,68.0,71.9,69.5,60.9,47.9,33.2,18.5,POINT (-93.30190 43.60640)
4,GHCND:USC00210157,43.883500,-94.166500,"AMBOY, MN US",15.7,20.8,33.1,47.5,60.1,69.8,73.0,70.3,62.4,49.9,34.4,19.7,POINT (-94.16650 43.88350)
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
188,GHCND:USW00094960,45.062220,-93.351070,"MINNEAPOLIS CRYSTAL AIRPORT, MN US",15.2,20.3,32.1,46.8,58.4,68.2,72.7,71.3,61.2,48.4,33.2,19.5,POINT (-93.35107 45.06222)
189,GHCND:USW00094961,48.726060,-94.612160,"BAUDETTE INTERNATIONAL AIRPORT, MN US",5.4,10.7,23.7,40.2,52.5,61.8,66.4,64.3,54.7,41.9,25.9,10.3,POINT (-94.61216 48.72606)
190,GHCND:USW00094963,44.832140,-93.470510,"MINNEAPOLIS FLYING CLOUD AIRPORT, MN US",15.4,20.3,32.2,47.2,59.3,69.0,73.4,70.9,61.9,48.9,33.9,19.5,POINT (-93.47051 44.83214)
191,GHCND:USW00094967,46.899670,-95.066820,"PARK RAPIDS MUNICIPAL AIRPORT, MN US",6.8,12.4,25.7,41.2,53.8,62.6,67.6,65.7,56.0,42.6,26.3,11.6,POINT (-95.06682 46.89967)


In [9]:
temperature_gdf.to_csv(temp_csv_path, index=False)

In [10]:
#temperature = pd.read_csv(temp_csv_path)
#temperature['geometry'] = [Point(xy) for xy in zip(temperature['Longitude'], temperature['Latitude'])]
#temperature_gdf = gpd.GeoDataFrame(temperature, geometry = 'geometry', crs = 'EPSG:4326')

In [11]:
min_latitude = 43
max_latitude = 49.5
min_longitude = -97.5
max_longitude = -89

invalid_geom = temperature_gdf[(temperature_gdf['Latitude'] < min_latitude) |
                               (temperature_gdf['Latitude'] > max_latitude) |
                               (temperature_gdf['Longitude'] < min_longitude) |
                               (temperature_gdf['Longitude'] > max_longitude)]

if invalid_geom.empty:
    print('All geometry is valid')
else:
    print('Invalid points:')
    print(invalid_geom)
    temperature_gdf = temperature_gdf[(temperature_gdf['Latitude'] >= min_latitude) &
                                      (temperature_gdf['Latitude'] <= max_latitude) &
                                      (temperature_gdf['Longitude'] >= min_longitude) &
                                      (temperature_gdf['Longitude'] <= max_longitude)]

All geometry is valid


In [12]:
warm_months = ['May', 'June', 'July', 'August', 'September']
cold_months = ['December', 'January', 'February', 'March']

invalid_temps = temperature_gdf.apply(lambda row: any(row[winter] >= row[summer] for winter in cold_months for summer in warm_months), axis = 1)

temperature_gdf = temperature_gdf[~invalid_temps]

In [13]:
#Create a new database
#root database
db_string = 'postgresql+psycopg2://<user>:<password>@34.133.43.30:5432/postgres'

#create a SQLAlchemy engine
engine = create_engine(db_string, isolation_level = 'AUTOCOMMIT')

#name of new database
new_db_name = 'lab2'

#SQL query to create the database
create_db_query = text(f"CREATE DATABASE {new_db_name};")

try:
    with engine.connect() as connection:
        # Create the new database
        connection.execute(create_db_query)
        print(f"Database '{new_db_name}' created successfully.")
except:
    print(f"Database '{new_db_name}' already exists.")

Database 'lab2' already exists.


In [14]:
#connect to the new database to enable PostGIS
connection_string = f'postgresql://<user>:<password>@34.133.43.30:5432/{new_db_name}'
engine = create_engine(connection_string)

#SQL queries to enable PostGIS
#enable_postgis_query = text("""
#CREATE EXTENSION postgis;
#CREATE EXTENSION postgis_topology;
#""")  #adding PostGIS extensions

#with engine.connect() as new_connection:
#    #enable PostGIS
#    new_connection.execution_options(autocommit=True).execute(enable_postgis_query)
#    print(f"PostGIS enabled in database '{new_db_name}'.")

# Push the GeoDataFrame to PostGIS
table_name = "mn_temperature"
temperature_gdf.to_postgis(table_name, engine, if_exists="replace", index=False)

print(f"GeoDataFrame successfully pushed to the PostGIS table '{table_name}'.")

GeoDataFrame successfully pushed to the PostGIS table 'mn_temperature'.
