# WeatherPy
----

#### Note
* Instructions have been included for each segment. You do not have to follow them exactly, but they are included to help you think through the steps.

In [4]:
# Dependencies and Setup
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import requests
import time
from scipy.stats import linregress

# Import API key
from api_keys import weather_api_key
from api_keys import api_key
from api_keys import g_key


# Output File (CSV)
output_data_file = "cities.csv"

# Range of latitudes and longitudes
lat_range = (-90, 90)
lng_range = (-180, 180)
lng_range = (-180, 180)

In [5]:
#citipy = f"https://pypi.python.org/pypi/citipy={weather_api_key}&q="
#base_url = citipy + "lompoc" + "&APPID=" + weather_api_key
#base_url

#citipy = "../output_data/citipy.py"
import citipy

In [None]:
#url= f"http://api.openweathermap.org/data/2.5/weather?units=Imperial&appid={weather_api_key}&q="
#citipy = url + "lompoc" + "&APPID=" + weather_api_key
#citipy
#from citipy import citipy




## Generate Cities List

In [None]:
# List for holding lat_lngs and cities
lat_lngs = []
cities = []

# Create a set of random lat and lng combinations
lats = np.random.uniform(lat_range[0], lat_range[1], size=1500)
lngs = np.random.uniform(lng_range[0], lng_range[1], size=1500)
lat_lngs = zip(lats, lngs)

# Identify nearest city for each lat, lng combination
for lat_lng in lat_lngs:
    city = citipy.nearest_city(lat_lng[0], lat_lng[1]).city_name
    
    # If the city is unique, then add it to a our cities list
    if city not in cities:
        cities.append(city)

# Print the city count to confirm sufficient count
len(cities)

### Perform API Calls
* Perform a weather check on each city using a series of successive API calls.
* Include a print log of each city as it'sbeing processed (with the city number and city name).


In [None]:
#Empty Lists
x=1
count=1
city_df=[]
lat=[] 
long=[]
Max_Temp=[]
Humidity=[]
Cloudiness=[]
Wind_Speed=[]
Country=[]
Date=[]


#Base line information
url= f"http://api.openweathermap.org/data/2.5/weather?units=Imperial&appid={weather_api_key}&q="
city_responses= [ ]

#Beggining Print
print("Beginning Data Retrieval")
print("------------------------")

#for loop to make a api request for each city. 
for city in cities:
    if x >= 51: 
        count+= 1
        x=1
    try:
        city_data=requests.get(url + city).json()
        city_df.append(city_data['name'])
        lat.append(city_data['coord']['lat'])
        long.append(city_data['coord']['lon'])
        Max_Temp.append(city_data['main']['temp_max'])
        Humidity.append(city_data['main']["humidity"])
        Cloudiness.append(city_data["clouds"]['all'])
        Wind_Speed.append(city_data["wind"]['speed'])
        Country.append(city_data['sys']['country'])
        Date.append(city_data['dt'])
        city_responses.append(city_data)
        print(f"Processing Record {x} of set {count}|{city}")
        x+=1
    except: 
        print("City not found. Skipping... ")
        
#Final Print
print("------------------------")
print("Data Retrieval Complete ")
print("------------------------")

### Convert Raw Data to DataFrame
* Export the city data into a .csv.
* Display the DataFrame

In [None]:
#Make into a DataFrame

main_df=pd.DataFrame({"City": city_df, "Lat": lat, "Lng": long, 
                      "Max Temp": Max_Temp, "Humidity": Humidity, 
                      "Cloudiness":Cloudiness, "Wind Speed": Wind_Speed,
                     "Country": Country, "Date": Date})

main_df.head(5)


In [None]:

#Count each column to ensure that they are equal

main_df.count()

## Inspect the data and remove the cities where the humidity > 100%.
----
Skip this step if there are no cities that have humidity > 100%. 

In [None]:
#There is no data where humidity is over 100%
main_df["Humidity"].max()

#This is how I would create the dataframe though

main_df=main_df[main_df["Humidity"]<= 100]

main_df.describe()

In [None]:
#  Get the indices of cities that have humidity over 100%.
# Make a new DataFrame equal to the city data to drop all humidity outliers by index.
# Passing "inplace=False" will make a copy of the city_data DataFrame, which we call "clean_city_data".

main_df.to_csv(output_data_file, index_label= "city_id")

## Plotting the Data
* Use proper labeling of the plots using plot titles (including date of analysis) and axes labels.
* Save the plotted figures as .pngs.

## Latitude vs. Temperature Plot

In [None]:
#New (x,y) Variables
latitude=main_df["Lat"]
Max_T=main_df["Max Temp"]

#Making Scatter Plot
plt.scatter(latitude, Max_T, marker="o", facecolors="lightskyblue", edgecolors="black")

#Formatting
plt.grid()
plt.title("City Latitude vs Max Temperature 07/20/2021")
plt.xlabel("City Latitude")
plt.ylabel("Max Temperature")
plt.savefig("Fig1.png")
plt.show()

## Latitude vs. Humidity Plot

In [None]:
#New (y) variable
Humidity_data=main_df["Humidity"]

#Making Scatter Plot
plt.scatter(latitude, Humidity_data, marker="o", facecolors="lightskyblue", edgecolors="black")

#Formatting
plt.grid()
plt.title("City Latitude vs Humidity 07/20/2021")
plt.xlabel("City Latitude")
plt.ylabel("Humidity (%)")

#Saving the graph to a PNG
plt.savefig("Fig2.png")
plt.show()

## Latitude vs. Cloudiness Plot

In [None]:
#New (y) variable
Cloud_data=main_df["Cloudiness"]

#Making Scatter Plot
plt.scatter(latitude, Cloud_data, marker="o", facecolors="lightskyblue", edgecolors="black")

#Formatting
plt.grid()
plt.title("City Latitude vs Cloudiness 07/20/2021")
plt.xlabel("City Latitude")
plt.ylabel("Cloudiness (%)")

#Saving the graph to a PNG
plt.savefig("Fig3.png")
plt.show()

## Latitude vs. Wind Speed Plot

In [None]:
#New (y) variable
Wind_data=main_df["Wind Speed"]

#Making Scatter Plot
plt.scatter(latitude, Wind_data, marker="o", facecolors="lightskyblue", edgecolors="black")

#Formatting
plt.grid()
plt.title("City Latitude vs Wind Speed 07/20/2021")
plt.xlabel("City Latitude")
plt.ylabel("Wind Speed (mph)")
plt.savefig("Fig4.png")
plt.show()

## Linear Regression

In [None]:
# Create Northern and Southern Hemisphere DataFrames
Northern_df=main_df[main_df["Lat"]>0]
Southern_df=main_df[main_df["Lat"]<0]

In [None]:
#Practice
def plot_linear_regression(x,y,title,text_coordinates):
    (slope, intercept,rvalue, pvalue, stderr)= linregress(x,y)
    regress_values= x*slope + intercept
    line_eq=f"y={round(slope,2)}x + {round(intercept,2)}"
    plt.scatter(x, y, marker="o", facecolors="lightskyblue", edgecolors="black")
    plt.plot(x, regress_values, color='red')
    plt.annotate(line_eq, text_coordinates, color='red', fontsize= 14)
    plt.xlabel("Latitude")
    plt.ylabel(title)
    plt.show()
    print(f"The r-squared is: {rvalue**2}")

####  Northern Hemisphere - Max Temp vs. Latitude Linear Regression

In [None]:
plot_linear_regression(Northern_df["Lat"], Northern_df["Max Temp"], "Max Temp",(10,20))

####  Southern Hemisphere - Max Temp vs. Latitude Linear Regression

In [None]:
plot_linear_regression(Southern_df["Lat"], Southern_df["Max Temp"], "Max Temp",(-55,85))

####  Northern Hemisphere - Humidity (%) vs. Latitude Linear Regression

In [None]:
plot_linear_regression(Northern_df["Lat"], Northern_df["Humidity"], "Max Temp",(50,12))

####  Southern Hemisphere - Humidity (%) vs. Latitude Linear Regression

In [None]:
plot_linear_regression(Southern_df["Lat"], Southern_df["Humidity"], "Max Temp",(-55,20))

####  Northern Hemisphere - Cloudiness (%) vs. Latitude Linear Regression

In [None]:
plot_linear_regression(Northern_df["Lat"], Northern_df["Cloudiness"], "Max Temp",(50,33))

####  Southern Hemisphere - Cloudiness (%) vs. Latitude Linear Regression

In [None]:
plot_linear_regression(Southern_df["Lat"], Southern_df["Cloudiness"], "Max Temp",(-56,25))

####  Northern Hemisphere - Wind Speed (mph) vs. Latitude Linear Regression

In [None]:
plot_linear_regression(Northern_df["Lat"], Northern_df["Wind Speed"], "Max Temp",(5,30))

####  Southern Hemisphere - Wind Speed (mph) vs. Latitude Linear Regression

In [None]:
plot_linear_regression(Southern_df["Lat"], Southern_df["Wind Speed"], "Max Temp",(-56,13))