In [7]:
''' This will get only the data from the weather api and load it to the local MongoDB instance '''

import os
import json
import time
# from urllib.parse import quote

from pyowm import OWM
from pyowm.weatherapi25.forecast import Forecast
from pyowm.exceptions.api_response_error import NotFoundError
from pyowm.exceptions.api_call_error import APICallTimeoutError, APIInvalidSSLCertificateError

from pymongo import MongoClient
from pymongo.collection import Collection, ReturnDocument
from pymongo.database import Database
from pymongo.errors import ConnectionFailure, InvalidDocument, DuplicateKeyError, OperationFailure

from config import OWM_API_key as key, port, user, password, socket_path


owm = OWM(key)  # the owm objects for the separate api keys
port = 27017
host = 'localhost'
# password = quote(password)    # url encode the password for the mongodb uri
# uri = "mongodb+srv://%s:%s@%s" % (user, password, socket_path)
# print(uri)

In [5]:
def read_list_from_file(filename):
    """ Read the zip codes list from the csv file.
        
    :param filename: the name of the file
    :type filename: sting
    """
    with open(filename, "r") as z_list:
        return z_list.read().strip().split(',')

def get_data_from_weather_api(owm, zipcode=None, coords=None):
    ''' Handle the API call errors for weatehr and forecast type calls.

    :param owm: the OWM API object
    :type owm: pyowm.OWM
    :param zipcode: the zipcode reference for the API call
    :type zipcode: string
    :param coords: the latitude and longitude coordinates reference for the API call
    :type coords: 2-tuple

    returns: the API data
    '''
    result = None
    tries = 1
    while result is None and tries <= 3:
        try:
            if coords:
                print(f'inside get_data_from... coords={coords} and type={type(coords)}')
                result = owm.three_hours_forecast_at_coords(**coords)
            elif zipcode:
                print(f'inside get_data_from... zip={zipcode} and type={type(zipcode)}')
                result = owm.weather_at_zip_code(zipcode, 'us')
        except APIInvalidSSLCertificateError:
            loc = zipcode or 'lat: {}, lon: {}'.format(str(coords[0]), str(coords[1]))
            print(f'SSL error with {loc} on attempt {tries} ...trying again')
        except APICallTimeoutError:
            loc = zipcode or 'lat: {}, lon: {}'.format(str(coords[0]), str(coords[1]))
            print(f'Timeout error with {loc} on attempt {tries}... waiting 1 second then trying again')
            time.sleep(1)
        tries += 1
    return result

def get_current_weather(code=None, coords=None):
    ''' Get the current weather for the given zipcode or coordinates.

    :param code: the zip code to find weather data about
    :type code: string
    :param coords: the coordinates for the data you want
    :type coords: 2-tuple

    :return: the raw weather object
    :type: json
    '''
    global owm

    result = get_data_from_weather_api(owm, zipcode=code)
    current = json.loads(result.to_JSON()) # the current weather for the given zipcode
    if code:
        current['zipcode'] = code
    if coords:
        current['coords'] = coords
    current['instant'] = 10800*(current['Weather']['reference_time']//10800 + 1)
    return current

def five_day(code=None, coords=None):
    ''' Get each weather forecast for the corrosponding coordinates
    
    :param coords: the latitude and longitude for which that that weather is being forecasted
    :type coords: tuple containing the latitude and logitude for the forecast

    :return five_day: the five day, every three hours, forecast for the zip code
    :type five_day: dict
    '''
    global owm

    Forecast = get_data_from_weather_api(owm, coords=coords).get_forecast()
    forecast = json.loads(Forecast.to_JSON())
    if code:
        forecast['zipcode'] = code
    if coords:
        forecast['coordinates'] = coords
    forecast['instant'] = 10800*(forecast['reception_time']//10800 + 1)
    forecast.pop('Location')
    forecast.pop('interval')
    return forecast

def load(data, client, database, collection):
    ''' Load data to specified database collection. Also checks for a preexisting document with the same instant and zipcode, and updates
    it in the case that there was already one there.

    :param data: the dictionary created from the api calls
    :type data: dict
    :param client: a MongoClient instance
    :type client: pymongo.MongoClient
    :param database: the database to be used
    :type database: str
    :param collection: the database collection to be used
    :type collection: str
    '''
### you need to find out what is in the raw data so that you can find what you want to use ###
    print(type(data))
    filters = {'zipcode':data['zipcode'], 'instant':data['instant']}
    updates = {'$set': data} # use only the weather object from the current weather created from the API call
    try:
        db = Database(client, database)
        col = Collection(db, collection)
        # check to see if there is a document that fits the parameters. If there is, update it, if there isn't, upsert it
        updates = col.find_one_and_update(filters, updates,  upsert=True, return_document=ReturnDocument.AFTER)
#         print(f'inserted {updates}')
        #         loaded = col.update_many(updates)
    except DuplicateKeyError:
        return(f'DuplicateKeyError, could not insert data into {name}.')

if __name__ == '__main__':
    # Try block to deal with the switching back anc forth between computers with different directory names
    try:
        directory = os.path.join(os.environ['HOME'], 'data', 'forcast-forcast')
        filename = os.path.join(directory, 'ETL', 'Extract', 'resources', 'success_zipsNC.csv')
        codes = read_list_from_file(filename)
    except FileNotFoundError:
        directory = os.path.join(os.environ['HOME'], 'data', 'forecast-forecast')
        filename = os.path.join(directory, 'ETL', 'Extract', 'resources', 'success_zipsNC.csv')
        codes = read_list_from_file(filename)
    codes = read_list_from_file(filename)
    num_zips = len(codes)
    n = 0 #For keeping track of the number of API calls made; it has to be limited to a maximum of 60/minute.
    print(f'task began at {time.localtime()}')
    local_client = MongoClient(host=host, port=port)
    for code in codes[:69]:
        start_time = time.now()
        current = get_current_weather(code)
        n+=1
        load(current, local_client, 'test', 'observed')
        coords = current['Location']['coordinates']
        forecasts = five_day(code, coords=coords)
        load(forecasts, local_client, 'test', 'forecasted')
        n+=1
    local_client.close()
    print(f'task ended at {time.localtime()} and processed like {n/2} zipcodes')

caught filenotfounderror, trying forcast-forcast
Got it
task began at time.struct_time(tm_year=2020, tm_mon=3, tm_mday=14, tm_hour=16, tm_min=57, tm_sec=42, tm_wday=5, tm_yday=74, tm_isdst=1)
processing the 0th
inside get_data_from... zip=27006 and type=<class 'str'>
<class 'dict'>
inside get_data_from... coords={'lon': -80.45, 'lat': 36.01} and type=<class 'dict'>
<class 'dict'>
task ended at time.struct_time(tm_year=2020, tm_mon=3, tm_mday=14, tm_hour=16, tm_min=57, tm_sec=42, tm_wday=5, tm_yday=74, tm_isdst=1) and processed like 1 zipcodes


In [12]:
start_time = time.time()
time.sleep(3)
time.time() - start_time

3.005368232727051

In [None]:
# def sort_casts(forecasts, code, client=None):
#     ''' Take the array of forecasts from the five day forecast and sort them into the documents of the instants collection.
        
#     :param forecasts: the forecasts from five_day()-  They come in a list of 40, one for each of every thrid hour, starting 
#         at 0:00, over five days
#     :type forecasts: list- expecting a list of forecasts
#     :param code: the zipcode
#     :type code: string
#     :param client: the mongodb client
#     :type client: MongoClient
#     '''
# #     global host
# #     global port

# #     client = MongoClient(host=host, port=port)
# #     db = client.OWM
#     db = client.test
#     col = db.instant
#     # update each forecast and insert it to the instant document with the matching instant_time and zipcode
#     for forecast in forecasts:
#         # now find the document that has that code and that ref_time
#         # This should find a single instant specified by zip and the forecast ref_time and append the forecast to the forecasts object
#         filter_by_zip_and_inst = {'zipcode': code, 'instant': forecast['instant']}
#         filters = filter_by_zip_and_inst
#         add_forecast_to_instant = {'$push': {'forecasts': forecast}} # append the forecast object to the forecasts list
#         updates = add_forecast_to_instant
#         updated = col.find_one_and_update(filters, updates, upsert=True, return_document=ReturnDocument.AFTER)
        
# def load_to_remote(data, database, collection):
#     ''' Add the observed weather to the corrosponding instant document and load it to the remote database 
        
#     :param data: the dictionary created from the api calls
#     :type data: dict
#     :param database: the datase that the data is supposed to go to
#     :type database: pymong.database.Databse
#     :param collection: the database collection to be used
#     :type collection: pymongo.collection.Collection
#     '''
# ### you need to find out what is in the raw data so that you can find what you want to use ### 
#     filters = {'zipcode':data['zipcode'], 'instant':data['instant']}
#     updates = {'$set': data} # use only the weather object from the current weather created from the API call
#     try:
#         # check to see if there is a document that fits the parameters. If there is, update it, if there isn't, upsert it
#         updates = col.find_one_and_update(filters, updates,  upsert=True, return_document=ReturnDocument.AFTER)
#         col = Collection(database, collection)
#         loaded = col.update_many(updates)
#     except DuplicateKeyError:
#         return(f'DuplicateKeyError, could not insert data into {name}.')
