# WeatherPy
----

#### Note
* Instructions have been included for each segment. You do not have to follow them exactly, but they are included to help you think through the steps.

In [1]:
# Dependencies and Setup
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import requests
import time
import json
import datetime
from scipy.stats import linregress

# Import API key

from api_keys import weather_api_key

# Incorporated citipy to determine city based on latitude and longitude
from citipy import citipy

# Output File (CSV)
output_data_file = "output_data/cities.csv"

# Range of latitudes and longitudes
lat_range = (-90, 90)
lng_range = (-180, 180)

In [2]:
#For reference
dataset_df = pd.read_csv('../output_data/cities.csv')
dataset_df

Unnamed: 0,City_ID,City,Cloudiness,Country,Date,Humidity,Lat,Lng,Max Temp,Wind Speed
0,0,ostrovnoy,2,RU,1558378754,72,68.05,39.51,37.50,7.16
1,1,mahebourg,75,MU,1558378503,74,-20.41,57.70,78.80,11.41
2,2,qaanaaq,25,GL,1558378755,73,77.48,-69.36,22.20,2.37
3,3,zhuhai,0,CN,1558378755,39,40.71,112.04,44.60,4.47
4,4,cape town,20,ZA,1558378755,76,-33.93,18.42,55.99,8.05
...,...,...,...,...,...,...,...,...,...,...
543,543,yabrud,0,SY,1558378840,32,33.97,36.66,80.60,8.05
544,544,paraiso,5,MX,1558378898,4,24.01,-104.61,84.20,16.11
545,545,veraval,0,FR,1558378898,62,49.65,0.71,61.00,8.05
546,546,novyy urgal,100,RU,1558378899,93,51.07,132.56,49.74,2.68


## Generate Cities List

In [None]:
# List for holding lat_lngs and cities
lat_lngs = []
cities = []

# Create a set of random lat and lng combinations
lats = np.random.uniform(lat_range[0], lat_range[1], size=1500)
lngs = np.random.uniform(lng_range[0], lng_range[1], size=1500)
lat_lngs = zip(lats, lngs)

# Identify nearest city for each lat, lng combination
for lat_lng in lat_lngs:
    city = citipy.nearest_city(lat_lng[0], lat_lng[1]).city_name
    
    # If the city is unique, then add it to a our cities list
    if city not in cities:
        cities.append(city)

# Print the city count to confirm sufficient count
len(cities)

## Perform API Calls
* Perform a weather check on each city using a series of successive API calls.
* Include a print log of each city as it'sbeing processed (with the city number and city name).


In [None]:
#weather api
url = "http://api.openweathermap.org/data/2.5/weather?"

#dataframe
w_df = pd.DataFrame({"City": cities})

#counter
count_record = 0
set_num = 1

w_df["Country"] = ""
w_df["Date"] = ""
w_df["Latitude"] = ""
w_df["Longitude"] = ""
w_df["Cloudiness"] = ""
w_df["Humidity"] = ""
w_df["Max Temperature"] = ""
w_df["Wind Speed"] = ""

#print title
print("Beginning Data Retrieval")

#usefor loop

for index, row in w_df.iterrows():
    city = row["City"]
    
#query url,
    query_url = url + "&q=" + city + "appid=" + weather_api_key
    
#get response
    weather_res = requests.get(query_url).json
    
    try:
        w_df.loc[index, "Country"] = weather_res["sys"]["country"]  
        w_df.loc[index, "Date"] = datetime.datetime.fromtimestamp(weather_response["dt"]
        w_df.loc[index, "Latitude"] = weather_res['coord']['lat']
        w_df.loc[index, "Longitude"] = weather_res['coord']['lon']    
        w_df.loc[index, "Cloudiness"] = weather_res['clouds']['all']
        w_df.loc[index, "Humidity"] = weather_res['main']['humidity']
        #temp formula
        w_df.loc[index, "Main Temperature"] = 1.8*(weather_res['main']['temp_max'] -273) +32
        w_df.loc[index, "Wind Speed"] = weather_res['wind']['speed']
   
     except:
     
         print(f"City not found here {city}")
         count_record = count_record +1
                                                                  
 #if count ==50                                                           
     if count_record ==50:
                                                                  
  #then,
         count_record = 0
         set_num = set_num +1
                                                                  
        
print("Data Retrieval Complete")                                                               

## Convert Raw Data to DataFrame
* Save as csv

In [None]:
#show table below
w_df.head(10)

In [None]:
#changing to csv
w_df.to_csv("w_df.csv")

## Plotting the Data
* Use proper labeling of the plots using plot titles (including date of analysis) and axes labels.
* Save the plotted figures as .pngs.

### Latitude vs. Temperature Plot

In [None]:
#Latitude vs. Temperature Plot

#datetime
t = time.strftime("%m/%d/%Y")
#scatterplot
plt.scatter(w_df["Latitude"], w_df["Max Temperature"],facecolors="green")

#labels
plt.xlabel("City Latitude")
plt.ylabel("Max Temperature")
plt.title(f"City Latitude vs Max Temperature({t}) ")
#print result
plt.show()
plt.tight_layout()

### Latitude vs. Humidity Plot

In [None]:
#Latitude vs. Humidity Plot
#datetime
t = time.strftime("%m/%d/%Y")
#scatterplot
plt.scatter(w_df["Latitude"], w_df["Humidity"],facecolors="green")

# labels
plt.xlabel("City Latitude")
plt.ylabel("Humidity")
plt.title(f"City Latitude vs Humidity({t}) ")
#print message
plt.show()
plt.tight_layout()


### Latitude vs. Cloudiness Plot

In [None]:
#Latitude vs. Cloudiness Plot
#datetime
t = time.strftime("%m/%d/%Y")
#scatterplot
plt.scatter(w_df["Latitude"], w_df["Cloudiness"],facecolors="green")

# plt.scatterplot
plt.xlabel("City Latitude")
plt.ylabel("Cloudiness")
plt.title(f"City Latitude vs Cloudiness({t}) ")
#print result with tight label
plt.show()
plt.tight_layout()
#print result

### Latitude vs. Wind Speed Plot

In [None]:
#Latitude vs. Wind Speed Plot
#datetime
t = time.strftime("%m/%d/%Y")
plt.scatter(w_df["Latitude"], w_df["Wind Speed"],facecolors="green")

# plt.scatterplot
plt.xlabel("City Latitude")
plt.ylabel("Wind Speed")
plt.title(f"City Latitude vs Wind Speed({t}) ")
#print result
plt.show()
plt.tight_layout()


## Linear Regression

### Write linear regression plot function (optional)

In [3]:
#linear regression equation is:
#regression_value = x_value * slope + intercept

##line equation:
#line_equation_value = "y = " + str(slope) + "x + " + str(intercept)

In [None]:
#dataframe
#northern hemisphere, greater than 0
northernhem_df = w_df["Latitude"] > 0

In [None]:
#souther hemisphere, less than 0
southernhem_df = w_df["Latitude"] < 0

###  Northern Hemisphere - Max Temp vs. Latitude Linear Regression

In [None]:
#Northern Hemisphere - Max Temp vs. Latitude Linear Regression

x_latitude = northernhem_df['Latitude']
y_temperature = northernhem_df['Max Temperature']

(slope, intercept, rvalue, pvalue, stderr) = linregress(x_latitude, y_temperature)
#regression value equation
regress_val = x_latitude * slope + intercept

#line equation:
line_equation = "y = " + str(slope) + "x + " + str(intercept)
#scatterplot
plt.scatter(x_latitude,y_temperature)
plt.plot(x_latitude,regress_val,"r-")

#labels
plt.title('Northern Hemisphere- Max temp vs Latitude Linear Regression')
plt.xlabel('Latitude')
plt.ylabel('Max temperature')

#show results, r value 

#print(f " The r-squared value here is {rvalue}")
plt.show()
plt.tight_layout()


###  Southern Hemisphere - Max Temp vs. Latitude Linear Regression

In [None]:
#S Hemisphere - Max Temp vs. Latitude Linear Regression

x_latitude = southernhem_df['Latitude']
y_temperature = southernhem_df['Max Temperature']

(slope, intercept, rvalue, pvalue, stderr) = linregress(x_latitude, y_temperature)
#regression value equation
regress_val = x_latitude * slope + intercept

#line equation:
line_equation = "y = " + str(slope) + "x + " + str(intercept)
#scatterplot
plt.scatter(x_latitude,y_temperature)
plt.plot(x_latitude,regress_val,"r-")

#labels
plt.title('Southern Hemisphere- Max temp vs Latitude Linear Regression')
plt.xlabel('Latitude')
plt.ylabel('Max temperature')

#show results, r value 
plt.show()
plt.tight_layout()



###  Northern Hemisphere - Humidity (%) vs. Latitude Linear Regression

In [None]:
#Northern Hemisphere - HUmidity vs. Latitude Linear Regression

x_latitude = northernhem_df['Latitude']
y_hum = northernhem_df['Humidity']

(slope, intercept, rvalue, pvalue, stderr) = linregress(x_latitude, y_hum)
#regression value equation
regress_val = x_latitude * slope + intercept

#line equation:
line_equation = "y = " + str(slope) + "x + " + str(intercept)
#scatterplot
plt.scatter(x_latitude,y_hum)
plt.plot(x_latitude,regress_val,"r-")

#labels
plt.title('Northern Hemisphere- Humidity vs Latitude Linear Regression')
plt.xlabel('Latitude')
plt.ylabel('Humidity')

#show results, r value 

plt.show()
plt.tight_layout()



###  Southern Hemisphere - Humidity (%) vs. Latitude Linear Regression

In [None]:
#Southern Hemisphere - HUmidity vs. Latitude Linear Regression

x_latitude = southernhem_df['Latitude']
y_hum = southernhem_df['Humidity']

(slope, intercept, rvalue, pvalue, stderr) = linregress(x_latitude, y_hum)
#regression value equation
regress_val = x_latitude * slope + intercept

#line equation:
line_equation = "y = " + str(slope) + "x + " + str(intercept)
#scatterplot
plt.scatter(x_latitude,y_hum)
plt.plot(x_latitude,regress_val,"r-")

#labels
plt.title('Southern Hemisphere- Humidity vs Latitude Linear Regression')
plt.xlabel('Latitude')
plt.ylabel('Humidity')

#show results, r value 
plt.show()
plt.tight_layout()

###  Northern Hemisphere - Cloudiness (%) vs. Latitude Linear Regression

In [None]:
#Northern Hemisphere - Cloudiness vs. Latitude Linear Regression

x_latitude = northernhem_df['Latitude']
y_c = northernhem_df['Cloudiness']

(slope, intercept, rvalue, pvalue, stderr) = linregress(x_latitude, y_c)
#regression value equation
regress_val = x_latitude * slope + intercept

#line equation:
line_equation = "y = " + str(slope) + "x + " + str(intercept)
#scatterplot
plt.scatter(x_latitude,y_c)
plt.plot(x_latitude,regress_val,"r-")

#labels
plt.title('Northern Hemisphere- Cloudiness vs Latitude Linear Regression')
plt.xlabel('Latitude')
plt.ylabel('Cloudiness')

#show results, r value 

plt.show()
plt.tight_layout()

###  Southern Hemisphere - Cloudiness (%) vs. Latitude Linear Regression

In [None]:
#S Hemisphere - Cloudiness vs. Latitude Linear Regression

x_latitude = southernhem_df['Latitude']
y_c = southernhem_df['Cloudiness']
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_latitude, y_c)
#regression value equation
regress_val = x_latitude * slope + intercept

#line equation:
line_equation = "y = " + str(slope) + "x + " + str(intercept)
#scatterplot
plt.scatter(x_latitude,y_c)
plt.plot(x_latitude,regress_val,"r-")

#labels
plt.title('Southern Hemisphere- Cloudiness vs Latitude Linear Regression')
plt.xlabel('Latitude')
plt.ylabel('Cloudiness')

#show results, r value 

plt.show()
plt.tight_layout()

###  Northern Hemisphere - Wind Speed (mph) vs. Latitude Linear Regression

In [None]:
#northern hem

x_latitude = northernhem_df['Latitude']
y_s = northernhem_df['Wind Speed']
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_latitude, y_s)
#regression value equation
regress_val = x_latitude * slope + intercept

#line equation:
line_equation = "y = " + str(slope) + "x + " + str(intercept)
#scatterplot
plt.scatter(x_latitude,y_s)
plt.plot(x_latitude,regress_val,"r-")

#labels
plt.title('Northern Hemisphere- Wind Speed vs Latitude Linear Regression')
plt.xlabel('Latitude')
plt.ylabel('Wind Speed')

#show results, tight layout
plt.show()
plt.tight_layout()

###  Southern Hemisphere - Wind Speed (mph) vs. Latitude Linear Regression

In [None]:
#southern hem

x_latitude = southernhem_df['Latitude']
y_s = southernhem_df['Wind Speed']
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_latitude, y_s)
#regression value equation
regress_val = x_latitude * slope + intercept

#line equation:
line_equation = "y = " + str(slope) + "x + " + str(intercept)
#scatterplot
plt.scatter(x_latitude,y_s)
plt.plot(x_latitude,regress_val,"r-")

#labels
plt.title('Southern Hemisphere- Wind Speed vs Latitude Linear Regression')
plt.xlabel('Latitude')
plt.ylabel('Wind Speed')

#show results, tight layout

plt.show()
plt.tight_layout()

In [None]:
#Observations:
#code wouldn't run properly so I couldn't make obervations about the data plots
