# WeatherPy
----

#### Note
* Instructions have been included for each segment. You do not have to follow them exactly, but they are included to help you think through the steps.

In [255]:
%matplotlib notebook

In [256]:
# Dependencies and Setup
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import requests
import time
from datetime import datetime

# Import API key
from api_keys import api_key

# Incorporated citipy to determine city based on latitude and longitude
from citipy import citipy

# Import Openweather
import openweathermapy.core as owm

# progress bar
from tqdm import tqdm_notebook

# Output File (CSV)
output_data_file = "output_data/cities.csv"
fig_file1 = "plot/lat_temp.png"
fig_file2 = "plot/lat_humidity.png"
fig_file3 = "plot/lat_cloud.png"
fig_file4 = "plot/lat_wind.png"

# output for image
# Range of latitudes and longitudes
lat_range = (-90, 90)
lng_range = (-180, 180)

## Generate Cities List

In [257]:
# List for holding lat_lngs and cities
lat_lngs = []
cities = []

# Create a set of random lat and lng combinations
lats = np.random.uniform(low=-90.000, high=90.000, size=1500)
lngs = np.random.uniform(low=-180.000, high=180.000, size=1500)
lat_lngs = zip(lats, lngs)

# Identify nearest city for each lat, lng combination
for lat_lng in lat_lngs:
    city = citipy.nearest_city(lat_lng[0], lat_lng[1]).city_name
    
    # If the city is unique, then add it to a our cities list
    if city not in cities:
        cities.append(city)

# Print the city count to confirm sufficient count
len(cities)

616

In [258]:
#check cities
cities[0:15]

['barentsburg',
 'barma',
 'pevek',
 'isangel',
 'attawapiskat',
 'port elizabeth',
 'busselton',
 'jardim',
 'east london',
 'sentyabrskiy',
 'ushuaia',
 'lagoa',
 'hilo',
 'panzhihua',
 'mahebourg']

### Perform API Calls
* Perform a weather check on each city using a series of successive API calls.
* Include a print log of each city as it'sbeing processed (with the city number and city name).


In [259]:
# create data array
city_weather = []
store_index  = []
store_cityName = []
store_city_not_found = []
count_city_not_found = 0  # count number of cities not found

# transform cities into datframe for the progress bar
cities_df = pd.DataFrame(cities,columns=['city'])

cities_df.head()


Unnamed: 0,city
0,barentsburg
1,barma
2,pevek
3,isangel
4,attawapiskat


In [260]:
city_weather = []
store_index  = []
store_cityName = []
store_city_not_found = []
count_city_not_found = 0  # count number of cities not found

# Parameters to split df into sets to avoid pulling too much data from API and get the API key blocked
ncity = len(cities) # total number of cities
step  = 50          # number city for set
nloop = int (ncity/step) +1  # total number of sets

count_city_not_found = 0  # count cities not found
count_city_found = 0      # count cities found
iset = 0                  # set set index to 0



for index, row in tqdm_notebook(list(cities_df.iterrows())): # add progress bar for fun
    if (0 == index):  # Display for the first data only
        print("Beginning Data Retrieval ")
        print("-----------------------------")
        
    if (index > step & 0 == (index%step) ):  # Increase istep at the end of iset 
        iset = iset +1
        time.sleep(15)   # Pause to avoid pulling too much data
    try:      # access to API and get data 
        city_weather.append(owm.get_current(cities_df.loc[index, 'city'], **settings))
               
        if (0 == (index%step)):  # Text for the first data of each set
            print(f"Processing Record {0} of set {iset+1} | {cities_df.loc[index, 'city']}")
        else:   # Text for the other data of each set
            print(f"Processing Record {index-(iset*step)} of set {iset+1} | {cities_df.loc[index, 'city']}") 
        count_city_found = count_city_found+1
                  
    except:  # catch error to avoid code crash    
        store_city_not_found.append(cities_df.loc[index, 'city'])
        count_city_not_found = count_city_not_found +1
        print(f"{cities_df.loc[index, 'city']} not found")

              
# Print ending message
print("-----------------------------")      
print("Data Retrieval Complete")
print("-----------------------------")


HBox(children=(IntProgress(value=0, max=616), HTML(value='')))

Beginning Data Retrieval 
-----------------------------
barentsburg not found
Processing Record 1 of set 1 | barma
Processing Record 2 of set 1 | pevek
Processing Record 3 of set 1 | isangel
attawapiskat not found
Processing Record 5 of set 1 | port elizabeth
Processing Record 6 of set 1 | busselton
Processing Record 7 of set 1 | jardim
Processing Record 8 of set 1 | east london
sentyabrskiy not found
Processing Record 10 of set 1 | ushuaia
Processing Record 11 of set 1 | lagoa
Processing Record 12 of set 1 | hilo
Processing Record 13 of set 1 | panzhihua
Processing Record 14 of set 1 | mahebourg
Processing Record 15 of set 1 | hearst
Processing Record 16 of set 1 | urucara
Processing Record 17 of set 1 | caravelas
Processing Record 18 of set 1 | san patricio
Processing Record 19 of set 1 | thompson
Processing Record 20 of set 1 | bourail
Processing Record 21 of set 1 | carutapera
Processing Record 22 of set 1 | ostersund
Processing Record 23 of set 1 | tuktoyaktuk
Processing Record 24

Processing Record 8 of set 5 | bubaque
Processing Record 9 of set 5 | key west
Processing Record 10 of set 5 | kodiak
Processing Record 11 of set 5 | paso de los toros
Processing Record 12 of set 5 | loandjili
Processing Record 13 of set 5 | hami
Processing Record 14 of set 5 | tiznit
Processing Record 15 of set 5 | saldanha
taolanaro not found
Processing Record 17 of set 5 | neepawa
Processing Record 18 of set 5 | buriti dos lopes
Processing Record 19 of set 5 | boa vista
tokzar not found
Processing Record 21 of set 5 | ancud
Processing Record 22 of set 5 | quang ngai
Processing Record 23 of set 5 | paamiut
arrecife not found
Processing Record 25 of set 5 | teguldet
liminangcong not found
Processing Record 27 of set 5 | candelaria
galiwinku not found
Processing Record 29 of set 5 | tuatapere
Processing Record 30 of set 5 | rosario
Processing Record 31 of set 5 | iiyama
Processing Record 32 of set 5 | mogadishu
Processing Record 33 of set 5 | santiago de cao
Processing Record 34 of set

Processing Record 21 of set 9 | sao felix do xingu
Processing Record 22 of set 9 | saint-francois
Processing Record 23 of set 9 | ewa beach
Processing Record 24 of set 9 | raduzhnyy
palkaka not found
Processing Record 26 of set 9 | pochutla
Processing Record 27 of set 9 | husavik
Processing Record 28 of set 9 | manzanillo
chardara not found
Processing Record 30 of set 9 | uvalde
Processing Record 31 of set 9 | myitkyina
Processing Record 32 of set 9 | uchiza
acarau not found
Processing Record 34 of set 9 | nizhniy kuranakh
Processing Record 35 of set 9 | zhanaozen
Processing Record 36 of set 9 | lander
Processing Record 37 of set 9 | isilkul
Processing Record 38 of set 9 | olavarria
Processing Record 39 of set 9 | wasilla
Processing Record 40 of set 9 | harindanga
Processing Record 41 of set 9 | gari
Processing Record 42 of set 9 | minab
Processing Record 43 of set 9 | tooele
Processing Record 44 of set 9 | pacific grove
Processing Record 45 of set 9 | grand gaube
sakakah not found
Pro

In [261]:
# check number of cities found
count_city_found

546

In [262]:
# check number of cities not found
count_city_not_found

70

In [263]:
# check number of length of dataframe
cities_df.count()

city    616
dtype: int64

In [264]:
# check if all cities were processed (count_city_found + count_city_not_found = length(cities))
count_city_found + count_city_not_found

616

In [265]:
# check first data inside city_weather
city_weather[0]

{'coord': {'lon': -11.33, 'lat': 8.35},
 'weather': [{'id': 804,
   'main': 'Clouds',
   'description': 'overcast clouds',
   'icon': '04n'}],
 'base': 'stations',
 'main': {'temp': 20.88,
  'pressure': 1012.99,
  'humidity': 99,
  'temp_min': 20.88,
  'temp_max': 20.88,
  'sea_level': 1012.99,
  'grnd_level': 972.02},
 'wind': {'speed': 0.99, 'deg': 262.76},
 'clouds': {'all': 99},
 'dt': 1563765137,
 'sys': {'message': 0.0052,
  'country': 'SL',
  'sunrise': 1563777331,
  'sunset': 1563822458},
 'timezone': 0,
 'id': 2410312,
 'name': 'Barma',
 'cod': 200}

In [266]:
# check last data inside city_weather
city_weather[-1]

{'coord': {'lon': 139.9, 'lat': 69.3},
 'weather': [{'id': 804,
   'main': 'Clouds',
   'description': 'overcast clouds',
   'icon': '04d'}],
 'base': 'stations',
 'main': {'temp': 1.58,
  'pressure': 1021.31,
  'humidity': 92,
  'temp_min': 1.58,
  'temp_max': 1.58,
  'sea_level': 1021.31,
  'grnd_level': 956.09},
 'wind': {'speed': 3.19, 'deg': 320.105},
 'clouds': {'all': 100},
 'dt': 1563765433,
 'sys': {'message': 0.0075, 'country': 'RU', 'sunrise': 0, 'sunset': 0},
 'timezone': 36000,
 'id': 2028164,
 'name': 'Deputatskiy',
 'cod': 200}

### Convert Raw Data to DataFrame
* Export the city data into a .csv.
* Display the DataFrame

In [267]:
# extract Temperature, humidity, lat,lon, country, wind speed, date, city,max. temperature
temperature = []
humidity    = []
lat         = []
long        = []
cloudiness  = []
wind_speed  = []
country     = []
dt          = []
city_name   = []

for i in range (count_city_found):
    lat.append(city_weather[i]["coord"]["lat"])
    long.append(city_weather[i]["coord"]["lon"])
    temperature.append(city_weather[i]["main"]["temp_max"])
    humidity.append(city_weather[i]["main"]["humidity"])
    cloudiness.append(city_weather[i]["clouds"]["all"])
    country.append(city_weather[i]["sys"]["country"])
    wind_speed.append(city_weather[i]["wind"]["speed"])
    dt.append(city_weather[i]["dt"])
    city_name.append(city_weather[i]["name"])
print("done")

done


In [268]:
# Create dataframe from extracted data
weather_data_df = pd.DataFrame({
                     'City' : city_name , 
                     'Cloudiness':cloudiness,
                     'Country':country ,
                     'Date':dt, 
                     'Humidity':humidity, 
                     "Lat":lat, 
                     "Lon":long, 
                     "Max temp": temperature, 
                     "Wind speed":wind_speed 
                    })

weather_data_df.head()

Unnamed: 0,City,Cloudiness,Country,Date,Humidity,Lat,Lon,Max temp,Wind speed
0,Barma,99,SL,1563765137,99,8.35,-11.33,20.88,0.99
1,Pevek,100,RU,1563765137,95,69.7,170.27,0.88,9.63
2,Isangel,75,VU,1563765138,53,-19.55,169.27,24.0,5.1
3,Port Elizabeth,1,US,1563765138,79,39.31,-74.98,32.0,2.1
4,Busselton,100,AU,1563765138,45,-33.64,115.35,15.56,7.66


In [269]:
# Save in CVS file 
#  without the Pandas index, but with the header
weather_data_df.to_csv(output_data_file, index=False, header=True)

### Plotting the Data
* Use proper labeling of the plots using plot titles (including date of analysis) and axes labels.
* Save the plotted figures as .pngs.

#### Latitude vs. Temperature Plot

In [270]:
# check date (dt in Unix time)
dt_format = []
for i in range (count_city_found):
    dt_format.append(datetime.utcfromtimestamp(dt[i]).strftime('%Y-%m-%d %H:%M:%S'))

In [271]:
# found the date 

dt_format = pd.Series(dt_format) # transform the dt list into serie to use split()

date = []

# extract date in Year-month-day format
for i in range (count_city_found): 
    date.append(dt_format.str.split(" ")[i][0])

In [272]:
# checl if date is unique
date= pd.Series(date)
day = date.unique()[0]
day

'2019-07-22'

In [273]:
# extract lat and temp to prepare the plot
x = weather_data_df["Lat"]
y = weather_data_df["Max temp"]

In [274]:
min_temp = weather_data_df["Max temp"].min()
max_temp = weather_data_df["Max temp"].max()

In [275]:
# determine the minmax of the temperature axis
margin_temp = 5  # margin value
axis_temp_max = round(max_temp/10)*10 + margin_temp
axis_temp_min = round(min_temp/10)*10 - margin_temp

In [276]:
fig1=plt.figure(1)
plt.scatter(x,y, marker="o", facecolors="lightblue", edgecolors="black", alpha=0.75)
plt.xlim(-90,90)  # range of lat [-90:90]
plt.ylim(axis_temp_min,axis_temp_max)

plt.xlabel("Latitude (deg)")
plt.ylabel("Max. temperature (F)")
plt.grid()

plt.title(f"City Latitude vs Max Temperature ({day})")
plt.tight_layout()
plt.show()

<IPython.core.display.Javascript object>

In [277]:
plt.savefig(fig_file1)

In [278]:
plt.close(fig1)

#### Latitude vs. Humidity Plot

In [279]:
# assign humidity to y
y = weather_data_df["Humidity"]

In [280]:
min_humidity = weather_data_df["Humidity"].min()
max_humidity = weather_data_df["Humidity"].max()

In [281]:
# determine the minmax of the humidity axis
margin_temp = 5  # margin value
axis_humidity_max = round(max_humidity/10)*10 + margin_temp
axis_humidity_min = 0 # can't be negative

In [282]:
fig2=plt.figure(2)

plt.scatter(x,y, marker="o", facecolors="lightblue", edgecolors="black", alpha=0.75)
plt.xlim(-90,90)  # range of lat [-90:90]
plt.ylim(axis_humidity_min,axis_humidity_max)

plt.xlabel("Latitude (deg)")
plt.ylabel("Humidity (%)")
plt.grid()

plt.title(f"City Latitude vs Humidity ({day})")
plt.tight_layout()
plt.show()

<IPython.core.display.Javascript object>

In [283]:
plt.savefig(fig_file2)
plt.close(fig2)

#### Latitude vs. Cloudiness Plot

In [284]:
# assign cloudiness to y
y = weather_data_df["Cloudiness"]

In [285]:
min_cloud = weather_data_df["Cloudiness"].min()
max_cloud = weather_data_df["Cloudiness"].max()

In [286]:
# determine the minmax of the cloudiness axis
margin_temp = 5  # margin value
axis_cloud_max = round(max_cloud/10)*10 + margin_temp
axis_cloud_min = 0 # can't be negative

In [287]:
fig3=plt.figure(3)

plt.scatter(x,y, marker="o", facecolors="lightblue", edgecolors="black", alpha=0.75)
plt.xlim(-90,90)  # range of lat [-90:90]
plt.ylim(axis_cloud_min,axis_cloud_max)

plt.xlabel("Latitude (deg)")
plt.ylabel("Cloudiness (%)")
plt.grid()

plt.title(f"City Latitude vs Cloudiness ({day})")
plt.tight_layout()
plt.show()

<IPython.core.display.Javascript object>

In [288]:
plt.savefig(fig_file3)
plt.close(fig3)

#### Latitude vs. Wind Speed Plot

In [289]:
# assign cloudiness to y
y = weather_data_df["Wind speed"]

In [290]:
min_wind = weather_data_df["Wind speed"].min()
max_wind = weather_data_df["Wind speed"].max()

In [294]:
# determine the minmax of the cloudiness axis
margin_temp = 5  # margin value
axis_wind_max = round(max_wind/10)*10 
axis_wind_min = 0 # can't be negative

In [296]:
fig4=plt.figure(4)

plt.scatter(x,y, marker="o", facecolors="lightblue", edgecolors="black", alpha=0.75)
plt.xlim(-90,90)  # range of lat [-90:90]
plt.ylim(axis_wind_min,axis_wind_max)

plt.xlabel("Latitude (deg)")
plt.ylabel("Wind speed (mph)")
plt.grid()

plt.title(f"City Latitude vs Wind speed ({day})")
plt.tight_layout()
plt.show()

<IPython.core.display.Javascript object>

In [297]:
plt.savefig(fig_file4)
plt.close(fig4)