# WeatherPy
----

#### Note
* Instructions have been included for each segment. You do not have to follow them exactly, but they are included to help you think through the steps.

In [1]:
# Dependencies and Setup
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import requests
import time
from scipy.stats import linregress

# Import API key
from config import weather_api_key

# Incorporated citipy to determine city based on latitude and longitude
from citipy import citipy

# Output File (CSV)
output_data_file = "output_csv/cities.csv"

# Range of latitudes and longitudes
lat_range = (-90, 90)
lng_range = (-180, 180)

## Generate Cities List

In [2]:
# List for holding lat_lngs and cities
lat_lngs = []
cities = []

# Create a set of random lat and lng combinations
lats = np.random.uniform(lat_range[0], lat_range[1], size=1500)
lngs = np.random.uniform(lng_range[0], lng_range[1], size=1500)
lat_lngs = zip(lats, lngs)

# Identify nearest city for each lat, lng combination
for lat_lng in lat_lngs:
    city = citipy.nearest_city(lat_lng[0], lat_lng[1]).city_name
    
    # If the city is unique, then add it to a our cities list
    if city not in cities:
        cities.append(city)

# Print the city count to confirm sufficient count
cities


['qaanaaq',
 'kavieng',
 'galveston',
 'roald',
 'kaoma',
 'albany',
 'busselton',
 'mar del plata',
 'nemuro',
 'hermanus',
 'hamilton',
 'grindavik',
 'jalu',
 'ilulissat',
 'hilo',
 'ushuaia',
 'belushya guba',
 'hobart',
 'vardo',
 'dikson',
 'yaan',
 'vaini',
 'meulaboh',
 'mataura',
 'avarua',
 'punta arenas',
 'ardakan',
 'chuy',
 'selikhino',
 'bilma',
 'nikolskoye',
 'port elizabeth',
 'saint-philippe',
 'barentsburg',
 'bluff',
 'bethel',
 'bairiki',
 'bredasdorp',
 'seoul',
 'mys shmidta',
 'faanui',
 'kaeo',
 'torbay',
 'santa cruz',
 'northam',
 'ponta do sol',
 'los angeles',
 'rikitea',
 'atuona',
 'elko',
 'fortuna',
 'hithadhoo',
 'rafaela',
 'shenjiamen',
 'san patricio',
 'sioux lookout',
 'uwayl',
 'santa rosa',
 'kavaratti',
 'saint george',
 'pochutla',
 'tasiilaq',
 'carnarvon',
 'bengkulu',
 'yulara',
 'nusaybin',
 'klaksvik',
 'tashigang',
 'tymovskoye',
 'chokurdakh',
 'sinnamary',
 'poyarkovo',
 'longlac',
 'biak',
 'vila',
 'kapaa',
 'sao filipe',
 'saleaula

In [3]:
print(len(cities))

615


### Perform API Calls
* Perform a weather check on each city using a series of successive API calls.
* Include a print log of each city as it'sbeing processed (with the city number and city name).


In [None]:
#url - found on https://openweathermap.org/current
url = "http://api.openweathermap.org/data/2.5/weather?units=Imperial&APPID=" + weather_api_key


#create lists
city_name = []
lat = []
long = []
max_temp = []
humidity = []
cloudiness = []
wind_speed = []
country = []
date = []

#Set loop
i = 1
for city in cities:
    time.sleep(1)
    city=city
    query_url = url + "&q=" + city.replace(" ","+")
    response = requests.get(query_url).json()
    
#Extracting data. Missing data is handled by try and except  
    try:
        
        country.append(response["sys"]["country"])
        date.append(response["dt"])
        max_temp.append(response["main"]['temp_max'])
        humidity.append(response["main"]['humidity'])
        lat.append(response["coord"]["lat"])  
        long.append(response["coord"]["lon"])
        wind_speed.append(response["wind"]["speed"])
        city_name.append(response["name"])
        cloudiness.append(response["clouds"])
        
        print(f"Processing record {i}|{city}")
        i+=1
        
    except (KeyError):
        print("City not found. Skipping.")

Processing record 1|qaanaaq
Processing record 2|kavieng
Processing record 3|galveston
Processing record 4|roald
Processing record 5|kaoma
Processing record 6|albany
Processing record 7|busselton
Processing record 8|mar del plata
Processing record 9|nemuro
Processing record 10|hermanus
Processing record 11|hamilton
Processing record 12|grindavik
Processing record 13|jalu
Processing record 14|ilulissat
Processing record 15|hilo
Processing record 16|ushuaia
City not found. Skipping.
Processing record 17|hobart
Processing record 18|vardo
Processing record 19|dikson
Processing record 20|yaan
Processing record 21|vaini
Processing record 22|meulaboh
Processing record 23|mataura
Processing record 24|avarua
Processing record 25|punta arenas
Processing record 26|ardakan
Processing record 27|chuy
Processing record 28|selikhino
Processing record 29|bilma
Processing record 30|nikolskoye
Processing record 31|port elizabeth
Processing record 32|saint-philippe
City not found. Skipping.
Processing reco

### Convert Raw Data to DataFrame
* Export the city data into a .csv.
* Display the DataFrame

In [None]:
#Create Dataframe


weather_df = {
    "City":city_name,
    "Lat":lat,
    "Long":long,
    "Max Temp":max_temp,
    "Humidity":humidity,
    "Cloudiness":cloudiness,
    "Wind Speed":wind_speed,
    "Country":country,
    "Date":date,
}


#Display DF
weather_df =pd.DataFrame(weather_df)
weather_df



In [None]:
#Cleann Cloudiness column
clean_cloudiness_list=[]

for cloudiness in list(weather_df["Cloudiness"].values):
    clean_cloudiness_list.append(int(dict(cloudiness)["all"]))
    
weather_df["Cloudiness"]=clean_cloudiness_list
weather_df

In [None]:
weather_df= weather_df.rename_axis('City ID')

#Exporting data to csv
weather_df.to_csv("output_csv/city.csv")

In [None]:
humidity

In [None]:

print("After inspecting the data, the maximum humidity value is 100%, therefore there are no values greater than 100% in our data")


## Inspect the data and remove the cities where the humidity > 100%.
----
Skip this step if there are no cities that have humidity > 100%. 

In [None]:
#  Get the indices of cities that have humidity over 100%.


In [None]:
# Make a new DataFrame equal to the city data to drop all humidity outliers by index.
# Passing "inplace=False" will make a copy of the city_data DataFrame, which we call "clean_city_data".


## Plotting the Data
* Use proper labeling of the plots using plot titles (including date of analysis) and axes labels.
* Save the plotted figures as .pngs.

## Latitude vs. Temperature Plot

In [None]:
#Create Scatter plot for Latitude v Temp
plt.scatter(weather_df["Lat"], weather_df["Max Temp"], facecolor = "blue", edgecolor = "red")

#Title
plt.title ("Latitude v Max Temp")

#Set Labels
plt.xlabel("City Latitude")
plt.ylabel("Max Temp")

#Set Grid, IMO it makes it easier to see the measurements for analysis
plt.grid (linestyle='-', linewidth=.5, alpha = .5)

#Save as png
plt.savefig("Images/Latitude v Max Temp.png")
           
    

## Latitude vs. Humidity Plot

In [None]:
#Create Scatter plot for Latitude v Humidity
plt.scatter(weather_df["Lat"], weather_df["Humidity"], facecolor = "lightblue", edgecolor = "black")

#Title
plt.title ("Latitude v Humidity")

#Set Labels
plt.xlabel("City Latitude")
plt.ylabel("Humidity in %")

#Set Grid, IMO it makes it easier to see the measurements for analysis
plt.grid (linestyle='-', linewidth=.5, alpha = .5)

#Save as png
plt.savefig("Images/Latitude v Humidity.png")

## Latitude vs. Cloudiness Plot

In [None]:
# Create Latitude vs. Cloudiness Plot scatter plot
plt.scatter(weather_df["Lat"], weather_df["Humidity"], facecolor = "lightblue", edgecolor = "blue")
#Title
plt.title ("Latitude v Cloudiness")

#Set Labels
plt.xlabel("City Latitude")
plt.ylabel("Cloudiness in  Percent")

#Set Grid, IMO it makes it easier to see the measurements for analysis
plt.grid (linestyle='-', linewidth=.5, alpha = .5)

#Save as png
plt.savefig("Images/Latitude v Cloudiness.png")

## Latitude vs. Wind Speed Plot

In [None]:
# Create Latitude vs. Cloudiness Plot scatter plot
plt.scatter(weather_df["Lat"], weather_df["Wind Speed"], facecolor = "black", edgecolor = "red")
#Title
plt.title ("Latitude v Wind Speed")

#Set Labels
plt.xlabel("City Latitude")
plt.ylabel("Wind Speed (mph)")

#Set Grid, IMO it makes it easier to see the measurements for analysis
plt.grid (linestyle='-', linewidth=.5, alpha = .5)

#Save as png
plt.savefig("Images/Latitude v Wind Speed.png")

## Linear Regression

####  Northern Hemisphere - Max Temp vs. Latitude Linear Regression

In [None]:
# Create Northern and Southern Hemisphere DataFrames
Northern_hemisphere = weather_df.loc[weather_df["Lat"] >= 0]
Southern_hemisphere = weather_df.loc[weather_df["Lat"] < 0]

In [None]:
# Define function for creating linear agression and scatter plot
# Add the linear regression equation and line to plot
def linear_regression(x_values,y_values,eq_coord):
    (slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
    regress_values = x_values * slope + intercept
    
    
    #Equation
    line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
    
    
    plt.scatter(x_values,y_values)
    plt.plot(x_values,regress_values,"r-")
    plt.annotate(line_eq,(eq_coord[0],eq_coord[1]),fontsize=15,color="red")
    plt.xlabel("Latitude")
    print(f"The r-squared is: {rvalue}")
    plt.show()

In [None]:
###Southern Hemisphere - Max Temp vs. Latitude Linear Regression

In [None]:
#Define X and Y
x_values = Northern_hemisphere["Lat"]
y_values = Northern_hemisphere["Max Temp"]

#Label and diplay
plt.ylabel("Max Temp (F)")
print(linear_regression(x_values,y_values,[10,40]))

In [None]:
#Print Correlation
print(f'There is a negative correlation.')

####  Southern Hemisphere - Max Temp vs. Latitude Linear Regression

In [None]:
#Define X and Y
x_values = Southern_hemisphere["Lat"]
y_values = Southern_hemisphere["Max Temp"]


#Label and diplay
plt.ylabel("Max Temp")
print(linear_regression(x_values,y_values,[-50,90]))



In [None]:
#Print Correlation
print(f'There is a positive correlation.')

####  Northern Hemisphere - Humidity (%) vs. Latitude Linear Regression

In [None]:
#Define X and Y
x_values = Northern_hemisphere["Lat"]
y_values = Northern_hemisphere["Humidity"]


#Label and diplay
plt.ylabel("Humidity")
print(linear_regression(x_values,y_values))


In [None]:
#Print Correlation
print(f'There is a weak positive correlation.')

####  Southern Hemisphere - Humidity (%) vs. Latitude Linear Regression

In [None]:
#Define X and Y
x_values = Southern_hemisphere["Lat"]
y_values = Southern_hemisphere["Humidity"]
plt.ylim(0, y_values.max()+100)
plt.ylabel("Humidity")

#Label and diplay
plt.ylabel("Humidity")
print(linear_regression(x_values,y_values))


In [None]:
#Print Correlation
print(f'There is a little if any correlation.')

####  Northern Hemisphere - Cloudiness (%) vs. Latitude Linear Regression

In [None]:
#Define X and Y
x_values = Northern_hemisphere["Lat"]
y_values = Northern_hemisphere["Cloudiness"]


#Label and diplay
plt.ylabel("Cloudiness (")
print(linear_regression(x_values,y_values))


In [None]:
#Print Correlation
print(f'There is a  correlation.')

####  Southern Hemisphere - Cloudiness (%) vs. Latitude Linear Regression

In [None]:
#Define X and Y
x_values = Southern_hemisphere["Lat"]
y_values = Southern_hemisphere["Cloudiness"]

#Label and diplay
plt.ylabel("Cloudiness")
print(linear_regression(x_values,y_values))

In [None]:
#Print Correlation
print(f'There is a  correlation.')

###  Northern Hemisphere - Wind Speed (mph) vs. Latitude Linear Regression

In [None]:
#Define X and Y
x_values = Northern_hemisphere["Lat"]
y_values = Northern_hemisphere["Wind Speed"]

#Label and diplay
plt.ylabel("Wind Speed")
print(linear_regression(x_values,y_values))

In [None]:
#Print Correlation
print(f'There is a weak positive correlation.')

####  Southern Hemisphere - Wind Speed (mph) vs. Latitude Linear Regression

In [None]:
#Define X and Y
x_values = Southern_hemisphere["Lat"]
y_values = Southern_hemisphere["Wind Speed"]

#Label and diplay
plt.ylabel("Wind Speed")
print(linear_regression(x_values,y_values))

In [None]:
#Print Correlation
print(f'There is a weak negative correlation.')