# WeatherPy
----

#### Note
* Instructions have been included for each segment. You do not have to follow them exactly, but they are included to help you think through the steps.

In [1]:
# Dependencies and Setup
import matplotlib.pyplot as plt
import pandas as pd
import scipy.stats as st
import numpy as np
import requests
import time
from scipy.stats import linregress
import random

# Import API key
from api_keys import weather_api_key

# Incorporated citipy to determine city based on latitude and longitude
from citipy import citipy

# Output File (CSV)
output_data_file = "output_data/cities.csv"

# Range of latitudes and longitudes
lat_range = (-90, 90)
lng_range = (-180, 180)

## Generate Cities List

In [2]:
# List for holding lat_lngs and cities
lat_lngs = []
cities = []

# Create a set of random lat and lng combinations
lats = np.random.uniform(lat_range[0], lat_range[1], size=1500)
lngs = np.random.uniform(lng_range[0], lng_range[1], size=1500)
lat_lngs = zip(lats, lngs)

# Identify nearest city for each lat, lng combination
for lat_lng in lat_lngs:
    city = citipy.nearest_city(lat_lng[0], lat_lng[1]).city_name
    
    # If the city is unique, then add it to a our cities list
    if city not in cities:
        cities.append(city)

# Print the city count to confirm sufficient count
len(cities)

608

### Perform API Calls
* Perform a weather check on each city using a series of successive API calls.
* Include a print log of each city as it'sbeing processed (with the city number and city name).


In [None]:
# to track
city_tracker = 1
# set_tracker = 1

# lists for results
max_temp = []
humidity_list = []
cloudiness_results = []
wind_speed = []
cities_list = []
country_list =[]
latitude = []
longitude = []
date = []

print(f"Begin")

# for loop for query
for city in cities:
#     if (i % 50 == 0 and i >= 50):
#         city_tracker = 1
#         set_tracker += 1
    url = f"http://api.openweathermap.org/data/2.5/weather?q={city}&appid={weather_api_key}&units=imperial"
    #cities_query 
    response = requests.get(url).json()
    #append to list
    try:
        max_temp.append(response["main"]["temp_max"])
        humidity_list.append(response["main"]["humidity"])
        cloudiness_results.append(response["clouds"]["all"])
        wind_speed.append(response["wind"]["speed"])
        cities_list.append(response["name"])
        country_list.append(response["sys"]["country"])
        latitude.append(response["coord"]["lat"])
        longitude.append(response["coord"]["lon"])
        date.append(response["dt"])
         #results
        print(f"Result # {city_tracker} | {city}")
        #next
        city_tracker += 1
    except: 
        print("Unable to locate city, next result")
        pass
print("End")

Begin
Result # 1 | tuktoyaktuk
Result # 2 | mataura
Result # 3 | punta arenas
Result # 4 | hobart
Result # 5 | albany
Result # 6 | waipawa
Result # 7 | griffith
Result # 8 | esperance
Result # 9 | broome
Unable to locate city, next result
Result # 10 | bluff
Result # 11 | tucuman
Result # 12 | srikakulam
Result # 13 | la ronge
Result # 14 | georgetown
Result # 15 | severo-kurilsk
Result # 16 | yellowknife
Result # 17 | fairbanks
Result # 18 | abu samrah
Result # 19 | sao filipe
Result # 20 | ribeira grande
Result # 21 | busselton
Result # 22 | vaini
Result # 23 | jamestown
Result # 24 | butaritari
Result # 25 | arlit
Result # 26 | maldonado
Result # 27 | lima
Result # 28 | ugoofaaru
Result # 29 | okahandja
Unable to locate city, next result
Result # 30 | ushuaia
Result # 31 | taoudenni
Result # 32 | port alfred
Result # 33 | clyde river
Result # 34 | atuona
Result # 35 | soe
Unable to locate city, next result
Result # 36 | barrow
Result # 37 | hermanus
Result # 38 | naze
Result # 39 | 

Result # 286 | sovetskiy
Result # 287 | olafsvik
Result # 288 | touros
Result # 289 | nome
Result # 290 | khatanga
Result # 291 | tucuma
Result # 292 | quatre cocos
Result # 293 | sandnessjoen
Result # 294 | sumbe
Result # 295 | kropotkin
Result # 296 | namibe
Result # 297 | vallenar
Result # 298 | madang
Result # 299 | acapulco
Result # 300 | bredasdorp
Result # 301 | tautira
Unable to locate city, next result
Result # 302 | yongchang
Result # 303 | beringovskiy
Result # 304 | oistins
Result # 305 | baindur
Result # 306 | mendeleyevo
Result # 307 | ballina
Result # 308 | kidal
Result # 309 | ilulissat
Result # 310 | callaway
Result # 311 | barmer
Unable to locate city, next result
Result # 312 | arraial do cabo
Result # 313 | joshimath
Result # 314 | sassandra
Result # 315 | sechura
Result # 316 | darnah
Result # 317 | topchikha
Result # 318 | uralets
Result # 319 | carauari
Result # 320 | isla vista
Result # 321 | adrar
Result # 322 | ubinskoye
Unable to locate city, next result
Resu

### Convert Raw Data to DataFrame
* Export the city data into a .csv.
* Display the DataFrame

In [None]:
#dictionary
Results = {
    "City": cities_list,
    "Country": country_list,
    "Latitude": latitude,
    "Longitude": longitude,
    "Date": date,
    "Cloudiness": cloudiness_results,
    "Humidity": humidity_list,
    "Max Temp": max_temp,
    "Wind Speed": wind_speed
}

weather_api_df = pd.DataFrame(Results)
weather_api_df.count()

In [None]:
weather_api_df.head()

## Inspect the data and remove the cities where the humidity > 100%.
----
Skip this step if there are no cities that have humidity > 100%. 

In [None]:
weather_api_df.describe()

In [None]:
# Make a new DataFrame equal to the city data to drop all humidity outliers by index.
# Passing "inplace=False" will make a copy of the city_data DataFrame, which we call "clean_city_data".


In [None]:
# Extract relevant fields from the data frame


# Export the City_Data into a csv


## Plotting the Data
* Use proper labeling of the plots using plot titles (including date of analysis) and axes labels.
* Save the plotted figures as .pngs.

## Latitude vs. Temperature Plot

In [None]:
plt.scatter(weather_api_df["Latitude"], weather_api_df["Max Temp"], marker = "o", facecolor = "lightblue", edgecolor="black")
plt.title("City Latitude vs. Max Temperature (%s)" % time.strftime("%x"))
plt.xlabel("Latitude")
plt.ylabel("Max Temprature (F)")
plt.grid()
plt.show()

## Latitude vs. Humidity Plot

In [None]:
plt.scatter(weather_api_df["Latitude"], weather_api_df["Humidity"], marker = "o", facecolor = "lightblue", edgecolor="black")
plt.title("City Latitude vs. Humidity (%s)" % time.strftime("%x"))
plt.xlabel("Latitude")
plt.ylabel("Humidity (%)")
plt.grid()
plt.show()

## Latitude vs. Cloudiness Plot

In [None]:
plt.scatter(weather_api_df["Latitude"], weather_api_df["Cloudiness"], marker = "o", facecolor = "lightblue", edgecolor="black")
plt.title("City Latitude vs. Cloudiness (%s)" % time.strftime("%x"))
plt.xlabel("Latitude")
plt.ylabel("Cloudiness (%)")
plt.grid()
plt.show()

## Latitude vs. Wind Speed Plot

In [None]:
plt.scatter(weather_api_df["Latitude"], weather_api_df["Wind Speed"], marker = "o", facecolor = "lightblue", edgecolor="black")
plt.title("City Latitude vs. Wind Speed (%s)" % time.strftime("%x"))
plt.xlabel("Latitude")
plt.ylabel("Wind Speed (mph)")
plt.grid()
plt.show()

## Linear Regression

In [None]:
# OPTIONAL: Create a function to create Linear Regression plots

In [None]:
# Create Northern and Southern Hemisphere DataFrames
northern_lat_df = weather_api_df.loc[weather_api_df["Latitude"] >= 0, :]
northern_lat_df.head()

In [None]:
southern_lat_df = weather_api_df.loc[weather_api_df["Latitude"] < 0, :]
southern_lat_df.head()

####  Northern Hemisphere - Max Temp vs. Latitude Linear Regression

In [None]:
Latitude = northern_lat_df["Latitude"]
Max_Temp = northern_lat_df["Max Temp"]
correlation = st.pearsonr(Latitude, Max_Temp)
x_limit = 80
x_axis = np.arange(0, x_limit, 1)
data = [random.random() for value in x_axis]
plt.title("Max Temp vs Latitude")
plt.xlabel("Latitude")
plt.ylabel("Max Temp (F)")
(slope, intercept, rvalue, pvalue, stderr) = linregress(Latitude, Max_Temp)
regress_values = Latitude* slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
plt.scatter(Latitude, Max_Temp, marker="o", facecolors="lightblue", edgecolors="black",
             s=x_axis, alpha=0.75)
print(f"The correlation between both factors is {round(correlation[0],2)}")
plt.plot(Latitude,regress_values,"r-")
plt.annotate(line_eq,(0,50),fontsize=15,color="red")
print(f"The r-squared is: {rvalue**2}")
plt.show()

####  Southern Hemisphere - Max Temp vs. Latitude Linear Regression

In [None]:
Latitude = southern_lat_df["Latitude"]
Max_Temp = southern_lat_df["Max Temp"]
correlation = st.pearsonr(Latitude, Max_Temp)
x_limit = 80
x_axis = np.arange(0, x_limit, 1)
data = [random.random() for value in x_axis]
plt.title("Max Temp vs Latitude")
plt.xlabel("Latitude")
plt.ylabel("Max Temp (F)")
(slope, intercept, rvalue, pvalue, stderr) = linregress(Latitude, Max_Temp)
regress_values = Latitude* slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
plt.scatter(Latitude, Max_Temp, marker="o", facecolors="lightblue", edgecolors="black",
             s=x_axis, alpha=0.75)
print(f"The correlation between both factors is {round(correlation[0],2)}")
plt.plot(Latitude,regress_values,"r-")
plt.annotate(line_eq,(0,50),fontsize=15,color="red")
print(f"The r-squared is: {rvalue**2}")
plt.show()

####  Northern Hemisphere - Humidity (%) vs. Latitude Linear Regression

In [None]:
Latitude = northern_lat_df["Latitude"]
Humidity = northern_lat_df["Humidity"]
correlation = st.pearsonr(Latitude, Humidity)
x_limit = 80
x_axis = np.arange(0, x_limit, 1)
data = [random.random() for value in x_axis]
plt.title("Humidity vs Latitude")
plt.xlabel("Latitude")
plt.ylabel("Humidity (%)")
(slope, intercept, rvalue, pvalue, stderr) = linregress(Latitude, Humidity)
regress_values = Latitude* slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
plt.scatter(Latitude, Humidity, marker="o", facecolors="lightblue", edgecolors="black",
             s=x_axis, alpha=0.75)
print(f"The correlation between both factors is {round(correlation[0],2)}")
plt.plot(Latitude,regress_values,"r-")
plt.annotate(line_eq,(0,50),fontsize=15,color="red")
print(f"The r-squared is: {rvalue**2}")
plt.show()

####  Southern Hemisphere - Humidity (%) vs. Latitude Linear Regression

In [None]:
Latitude = southern_lat_df["Latitude"]
Humidity = southern_lat_df["Humidity"]
correlation = st.pearsonr(Latitude, Humidity)
x_limit = 80
x_axis = np.arange(0, x_limit, 1)
data = [random.random() for value in x_axis]
plt.title("Humidity vs Latitude")
plt.xlabel("Latitude")
plt.ylabel("Humidity (%)")
(slope, intercept, rvalue, pvalue, stderr) = linregress(Latitude, Humidity)
regress_values = Latitude* slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
plt.scatter(Latitude, Humidity, marker="o", facecolors="lightblue", edgecolors="black",
             s=x_axis, alpha=0.75)
print(f"The correlation between both factors is {round(correlation[0],2)}")
plt.plot(Latitude,regress_values,"r-")
plt.annotate(line_eq,(0,50),fontsize=15,color="red")
print(f"The r-squared is: {rvalue**2}")
plt.show()

####  Northern Hemisphere - Cloudiness (%) vs. Latitude Linear Regression

In [None]:
Latitude = northern_lat_df["Latitude"]
Cloudiness = northern_lat_df["Cloudiness"]
correlation = st.pearsonr(Latitude, Cloudiness)
x_limit = 80
x_axis = np.arange(0, x_limit, 1)
data = [random.random() for value in x_axis]
plt.title("Cloudiness vs Latitude")
plt.xlabel("Latitude")
plt.ylabel("Cloudiness (%)")
(slope, intercept, rvalue, pvalue, stderr) = linregress(Latitude, Cloudiness)
regress_values = Latitude* slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
plt.scatter(Latitude, Cloudiness, marker="o", facecolors="lightblue", edgecolors="black",
             s=x_axis, alpha=0.75)
print(f"The correlation between both factors is {round(correlation[0],2)}")
plt.plot(Latitude,regress_values,"r-")
plt.annotate(line_eq,(0,50),fontsize=15,color="red")
print(f"The r-squared is: {rvalue**2}")
plt.show()

####  Southern Hemisphere - Cloudiness (%) vs. Latitude Linear Regression

In [None]:
Latitude = southern_lat_df["Latitude"]
Cloudiness = southern_lat_df["Cloudiness"]
correlation = st.pearsonr(Latitude, Cloudiness)
x_limit = 80
x_axis = np.arange(0, x_limit, 1)
data = [random.random() for value in x_axis]
plt.title("Cloudiness vs Latitude")
plt.xlabel("Latitude")
plt.ylabel("Cloudiness (%)")
(slope, intercept, rvalue, pvalue, stderr) = linregress(Latitude, Cloudiness)
regress_values = Latitude* slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
plt.scatter(Latitude, Cloudiness, marker="o", facecolors="lightblue", edgecolors="black",
             s=x_axis, alpha=0.75)
print(f"The correlation between both factors is {round(correlation[0],2)}")
plt.plot(Latitude,regress_values,"r-")
plt.annotate(line_eq,(0,50),fontsize=15,color="red")
print(f"The r-squared is: {rvalue**2}")
plt.show()

####  Northern Hemisphere - Wind Speed (mph) vs. Latitude Linear Regression

In [None]:
Latitude = northern_lat_df["Latitude"]
Wind_Speed = northern_lat_df["Wind Speed"]
correlation = st.pearsonr(Latitude, Wind_Speed)
x_limit = 80
x_axis = np.arange(0, x_limit, 1)
data = [random.random() for value in x_axis]
plt.title("Wind Speed vs Latitude")
plt.xlabel("Latitude")
plt.ylabel("Wind Speed (mph)")
(slope, intercept, rvalue, pvalue, stderr) = linregress(Latitude, Wind_Speed)
regress_values = Latitude* slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
plt.scatter(Latitude, Wind_Speed, marker="o", facecolors="lightblue", edgecolors="black",
             s=x_axis, alpha=0.75)
print(f"The correlation between both factors is {round(correlation[0],2)}")
plt.plot(Latitude,regress_values,"r-")
plt.annotate(line_eq,(0,50),fontsize=15,color="red")
print(f"The r-squared is: {rvalue**2}")
plt.show()

####  Southern Hemisphere - Wind Speed (mph) vs. Latitude Linear Regression

In [None]:
Latitude = southern_lat_df["Latitude"]
Wind_Speed = southern_lat_df["Wind Speed"]
correlation = st.pearsonr(Latitude, Wind_Speed)
x_limit = 80
x_axis = np.arange(0, x_limit, 1)
data = [random.random() for value in x_axis]
plt.title("Wind Speed vs Latitude")
plt.xlabel("Latitude")
plt.ylabel("Wind_Speed (mph)")
(slope, intercept, rvalue, pvalue, stderr) = linregress(Latitude, Wind_Speed)
regress_values = Latitude* slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
plt.scatter(Latitude, Wind_Speed, marker="o", facecolors="lightblue", edgecolors="black",
             s=x_axis, alpha=0.75)
print(f"The correlation between both factors is {round(correlation[0],2)}")
plt.plot(Latitude,regress_values,"r-")
plt.annotate(line_eq,(0,50),fontsize=15,color="red")
print(f"The r-squared is: {rvalue**2}")
plt.show()

In [None]:
weather_api_df.to_csv("Output/weather.csv", index=False, header=True)
#city_data = pd.read_csv("Output/weather.csv", encoding="utf-8")
# city_data = city_data.set_index("City")
# city_data.head()