In [2]:
# Dependencies and Setup
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import requests
import time
from scipy.stats import linregress
# datetime for dt conversion
from datetime import datetime

# Import API key
from config import weather_key

# Incorporated citipy to determine city based on latitude and longitude
from citipy import citipy

# Output File (CSV)
cities_data_file = "../Resources/cities.csv"

# Range of latitudes and longitudes
lat_range = (-90, 90)
lng_range = (-180, 180)

## Generate Cities List

In [3]:
# List for holding lat_lngs and cities
lat_lngs = []
cities = []
countries_list = []

# Create a set of random lat and lng combinations
lats = np.random.uniform(lat_range[0], lat_range[1], size=1500)
lngs = np.random.uniform(lng_range[0], lng_range[1], size=1500)
lat_lngs = zip(lats, lngs)

# Identify nearest city for each lat, lng combination
for lat_lng in lat_lngs:
    city = citipy.nearest_city(lat_lng[0], lat_lng[1]).city_name
    country = citipy.nearest_city(lat_lng[0], lat_lng[1]).country_code
    
    # If the city is unique, then add it to a our cities list
    if city not in cities:
        cities.append(city)
        countries_list.append(country)


# Print the city count to confirm sufficient count
len(cities)

606

In [4]:
cities

['vaini',
 'pavda',
 'saint-augustin',
 'mar del plata',
 'clyde river',
 'half moon bay',
 'barrow',
 'bilibino',
 'norman wells',
 'busselton',
 'saskylakh',
 'tambul',
 'ribeira grande',
 'zyryanka',
 'sandwick',
 'bambous virieux',
 'sterling',
 'taolanaro',
 'albany',
 'leningradskiy',
 'verkhniy mamon',
 'cockburn town',
 'rikitea',
 'tasiilaq',
 'whitehorse',
 'isangel',
 'kodiak',
 'ust-kan',
 'fortuna',
 'bumba',
 'kardla',
 'lompoc',
 'east london',
 'carnarvon',
 'arraial do cabo',
 'hermanus',
 'tiksi',
 'adrar',
 'mataura',
 'puerto ayora',
 'nemuro',
 'tukrah',
 'faanui',
 'tsihombe',
 'ushuaia',
 'mount isa',
 'pirenopolis',
 'mincivan',
 'bredasdorp',
 'vostok',
 'koulamoutou',
 'hithadhoo',
 'chuy',
 'thompson',
 'cherskiy',
 'alepe',
 'lagoa',
 'port alfred',
 'necochea',
 'belushya guba',
 'ponta delgada',
 'torbay',
 'pringsewu',
 'hobart',
 'kapaa',
 'pevek',
 'poum',
 'bluff',
 'asau',
 'butaritari',
 'tarakan',
 'lolua',
 'batemans bay',
 'nuuk',
 'castro',
 'atu

In [5]:
countries_list

['to',
 'ru',
 'ca',
 'ar',
 'ca',
 'us',
 'us',
 'ru',
 'ca',
 'au',
 'ru',
 'sd',
 'pt',
 'ru',
 'gb',
 'mu',
 'us',
 'mg',
 'au',
 'ru',
 'ru',
 'bs',
 'pf',
 'gl',
 'ca',
 'vu',
 'us',
 'ru',
 'us',
 'cd',
 'ee',
 'us',
 'za',
 'au',
 'br',
 'za',
 'ru',
 'dz',
 'pf',
 'ec',
 'jp',
 'ly',
 'pf',
 'mg',
 'ar',
 'au',
 'br',
 'az',
 'za',
 'ru',
 'ga',
 'mv',
 'uy',
 'ca',
 'ru',
 'ci',
 'pt',
 'za',
 'ar',
 'ru',
 'pt',
 'ca',
 'id',
 'au',
 'us',
 'ru',
 'nc',
 'nz',
 'tv',
 'ki',
 'id',
 'tv',
 'au',
 'gl',
 'cl',
 'pf',
 'cv',
 'ir',
 'gl',
 'cd',
 'br',
 'sj',
 'sh',
 'vn',
 'es',
 'ca',
 'cd',
 'ru',
 'au',
 'th',
 'ma',
 'za',
 'us',
 'nz',
 'nc',
 'br',
 'in',
 'br',
 'ml',
 'id',
 'ru',
 'us',
 'gw',
 'ru',
 'tr',
 'br',
 'ru',
 'gl',
 'br',
 'ca',
 'gl',
 'ca',
 'gf',
 'sa',
 'cn',
 'ru',
 'ca',
 'cv',
 'jp',
 'ca',
 'za',
 'mm',
 'cn',
 'mx',
 'ru',
 'pf',
 'cv',
 'us',
 'py',
 'us',
 'sd',
 'ru',
 'ru',
 'sd',
 'gb',
 'in',
 'mx',
 'gl',
 'sc',
 'us',
 'jm',
 'br',
 'au',

In [6]:
# Combine the two lists into a list of tuples using the zip function
city_country = zip(cities,countries_list)        
        
list(city_country)

[('vaini', 'to'),
 ('pavda', 'ru'),
 ('saint-augustin', 'ca'),
 ('mar del plata', 'ar'),
 ('clyde river', 'ca'),
 ('half moon bay', 'us'),
 ('barrow', 'us'),
 ('bilibino', 'ru'),
 ('norman wells', 'ca'),
 ('busselton', 'au'),
 ('saskylakh', 'ru'),
 ('tambul', 'sd'),
 ('ribeira grande', 'pt'),
 ('zyryanka', 'ru'),
 ('sandwick', 'gb'),
 ('bambous virieux', 'mu'),
 ('sterling', 'us'),
 ('taolanaro', 'mg'),
 ('albany', 'au'),
 ('leningradskiy', 'ru'),
 ('verkhniy mamon', 'ru'),
 ('cockburn town', 'bs'),
 ('rikitea', 'pf'),
 ('tasiilaq', 'gl'),
 ('whitehorse', 'ca'),
 ('isangel', 'vu'),
 ('kodiak', 'us'),
 ('ust-kan', 'ru'),
 ('fortuna', 'us'),
 ('bumba', 'cd'),
 ('kardla', 'ee'),
 ('lompoc', 'us'),
 ('east london', 'za'),
 ('carnarvon', 'au'),
 ('arraial do cabo', 'br'),
 ('hermanus', 'za'),
 ('tiksi', 'ru'),
 ('adrar', 'dz'),
 ('mataura', 'pf'),
 ('puerto ayora', 'ec'),
 ('nemuro', 'jp'),
 ('tukrah', 'ly'),
 ('faanui', 'pf'),
 ('tsihombe', 'mg'),
 ('ushuaia', 'ar'),
 ('mount isa', 'au'),


In [7]:
# Save config information.
url = "http://api.openweathermap.org/data/2.5/weather?"
units = "metric"

# Build partial query URL
query_url = f"{url}appid={weather_key}&units={units}&q="

# See what the links lay out looks like
city_url_test = f"{url}appid={weather_key}&units={units}&q=mataura"
city_country_url_test = f"{url}appid={weather_key}&units={units}&q=jamestown"
city_country_url_test = f"{url}appid={weather_key}&units={units}&q=tumbes,pe"

# print(city_country_url_test)



In [8]:
# Open file to write results of API calls - success or not into a txt file, as well as all data pulled into a csv
success_city_list = open('weatherAPI_printlog.txt', 'w')
city_data_outcome = open('weatherAPI_data.csv', 'w')

In [28]:

# set up lists to hold reponse info
# in ['coord']
lon = []
lat = []
# in ['weather']
weather_id = []
main_weather = []
weather_description = []
weather_icon = []
# in ['base']
base_base = []
# in ['main]
main_temp = []
main_feels_like = []
main_temp_min = []
main_temp_max = []
main_pressure = []
main_humidity = []
# in ['visibility']
visibility_visibility = []
# in ['wind']
wind_speed = []
wind_deg = []
# in ['clouds']
clouds_all = []
# in ['dt']
dt_weather_data = []
# in ['sys']
sys_type = []
sys_id = []
sys_country = []
sys_sunrise = []
sys_sunset = []
# in ['timezone']
timezone_timezone = []
# in ['id']
city_id = []
# in ['name']
city_name = []
# in ['cod']
cod_cod = []

number = []

# Loop through the list of cities and perform a request for data on each
for city in cities:
    response = requests.get(query_url + city).json()

    # to get the number of city in for loop
    number.append(city)
    number_of_city = len(number)
    try:
        # first dict
        lon.append(response['coord']['lon'])
        lon_only = response['coord']['lon']
        lat.append(response['coord']['lat'])
        lat_only = response['coord']['lat']


        # second dict
        # weather_id.append(response['weather'][0]['id'])
        # main_weather.append(response['weather'][0]['main'])
        # weather_description.append(response['weather'][0]['description'])
        # weather_icon.append(response['weather'][0]['icon'])

        # third dict
        # base_base.append(response['base'])

        # fourth dict
        main_temp.append(response['main']['temp'])
        main_temp_only = response['main']['temp']
        # main_feels_like.append(response['main']['feels_like'])
        main_temp_min.append(response['main']['temp_min'])
        main_temp_min_only = response['main']['temp_min']
        main_temp_max.append(response['main']['temp_max'])
        main_temp_max_only = response['main']['temp_max']
        # main_pressure.append(response['main']['pressure'])
        main_humidity.append(response['main']['humidity'])
        main_humidity_only = response['main']['humidity']

        # fifth dict
        # visibility_visibility.append(response['visibility'])

        # sixth dict
        wind_speed.append(response['wind']['speed'])
        wind_speed_only = response['wind']['speed']
        # wind_deg.append(response['wind']['deg'])

        # seventh dict
        clouds_all.append(response['clouds']['all'])
        clouds_all_only = response['clouds']['all']
        # eight dict
        dt_weather_date = (datetime.fromtimestamp(response['dt'])).strftime('%m/%d/%y')
        dt_weather_data.append(dt_weather_date)

        # ninth dict
        # sys_type.append(response['sys']['type'])
        # sys_id.append(response['sys']['id'])
        sys_country.append(response['sys']['country'])
        sys_country_only = response['sys']['country']
        # sys_sunrise.append(response['sys']['sunrise'])
        # sys_sunset.append(response['sys']['sunset'])

        # tenth dict
        # timezone_timezone.append(response['timezone'])

        # eleventh dict
        city_id.append(response['id'])
        city_id_only = int(response['id'])

        # twelveth dict
        city_name.append(response['name'])
        city_name_only = response['name']

        # thirteen dict
        # cod_cod.append(response['cod'])
        
        
            # Output line to terminal to mark progress and store it to a print log text file
        print(f"{number_of_city}. {city_name_only} with city ID:{city_id_only} was found in the Weathermap API\n")
        success_city_list.write(f"Success found: {city_name_only}  City ID:{city_id_only} \n")
        
        # Output all retreived values to a csv file

        city_data_outcome.write(f"The outcome data of {city_name_only}, {sys_country_only}: Lat & Lon --> {lat_only},{lon_only} Weather Date --> {dt_weather_date} Weather --> Temperature, Max Temp and Min Temp: {main_temp_only} & {main_temp_max_only} & {main_temp_min_only} Humidity: {main_humidity_only} Clouds: {clouds_all_only} Wind Speed: {wind_speed_only}\n")
        # city_data_outcome.write(f"{city_id},{city_name},{sys_country},{lat},{lon},{dt_weather_date},\
        #     {temperature},{humidity_val},{cloudiness_val},{windSpeed}\n")


    # Handle exceptions for a character that is not available 
    except:
        success_city_list.write(f"{number_of_city}. {city} was not found. \n")
        print(" - " * 10)
        print(f"{number_of_city}. {city} was not found" )
        print(" - " * 10)

        # Append null values so lists are all the same length (later we will remove nulls from dataframe)
        city_id.append(None)
        city_name.append(None)
        sys_country.append(None)
        lat.append(None)
        lon.append(None)
        dt_weather_data.append(None)
        main_temp.append(None)
        main_temp_max.append(None)
        main_temp_min.append(None)
        main_humidity.append(None)
        clouds_all.append(None)
        wind_speed.append(None)
        #keep code going
        pass


# After getting data in a for loop... we want to make sure our text file's do not change... basically convert our text files to raw data
success_city_list.close()
city_data_outcome.close()


1. Vaini with city ID:4032243 was found in the Weathermap API

 -  -  -  -  -  -  -  -  -  - 
2. pavda was not found
 -  -  -  -  -  -  -  -  -  - 
3. Saint-Augustin with city ID:6137462 was found in the Weathermap API

4. Mar del Plata with city ID:3430863 was found in the Weathermap API

5. Clyde River with city ID:5924351 was found in the Weathermap API

6. Half Moon Bay with city ID:5354943 was found in the Weathermap API

7. Barrow with city ID:5880054 was found in the Weathermap API

8. Bilibino with city ID:2126682 was found in the Weathermap API

9. Norman Wells with city ID:6089245 was found in the Weathermap API

10. Busselton with city ID:2075265 was found in the Weathermap API

11. Saskylakh with city ID:2017155 was found in the Weathermap API

 -  -  -  -  -  -  -  -  -  - 
12. tambul was not found
 -  -  -  -  -  -  -  -  -  - 
13. Ribeira Grande with city ID:3372707 was found in the Weathermap API

14. Zyryanka with city ID:2119283 was found in the Weathermap API

15. Sa

KeyboardInterrupt: 

### Convert Raw Data to DataFrame
* Export the city data into a .csv.
* Display the DataFrame

In [29]:
# create a dictionary from the lists that we created above
weather_dict = {
    "city_id": city_id,
    "city_name": city_name,
    "country": sys_country,
    "latitude": lat,
    "longitude": lon,
    "weather_date": dt_weather_date,
    "temp": main_temp,
    "temp max": main_temp_max,
    "temp min": main_temp_min, 
    "humidity": main_humidity,
    "cloudiness": clouds_all,
    "wind_speed": wind_speed
}

# Use the dictionary to create a dataframe named weather_data
weather_data = pd.DataFrame(weather_dict)
weather_data.head()

Unnamed: 0,city_id,city_name,country,latitude,longitude,weather_date,temp,temp max,temp min,humidity,cloudiness,wind_speed
0,4032243.0,Vaini,TO,-21.2,-175.2,11/11/21,29.09,29.09,29.09,79.0,75.0,3.6
1,,,,,,11/11/21,,,,,,
2,6137462.0,Saint-Augustin,CA,51.226,-58.6502,11/11/21,-1.23,-1.23,-1.23,73.0,11.0,5.8
3,3430863.0,Mar del Plata,AR,-38.0023,-57.5575,11/11/21,17.1,17.27,16.01,81.0,0.0,8.05
4,5924351.0,Clyde River,CA,70.4692,-68.5914,11/11/21,-17.2,-17.2,-17.2,95.0,71.0,3.91


In [30]:
weather_data.count()

city_id         169
city_name       169
country         169
latitude        169
longitude       169
weather_date    187
temp            169
temp max        169
temp min        169
humidity        169
cloudiness      169
wind_speed      169
dtype: int64

In [31]:
weather_data_clean = weather_data.dropna()
weather_data_clean.count()

city_id         169
city_name       169
country         169
latitude        169
longitude       169
weather_date    169
temp            169
temp max        169
temp min        169
humidity        169
cloudiness      169
wind_speed      169
dtype: int64