# WeatherPy

---

## Starter Code to Generate Random Geographic Coordinates and a List of Cities

In [None]:
# Dependencies and Setup
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import requests
import time
from scipy.stats import linregress

# Impor the OpenWeatherMap API key
from api_keys import weather_api_key

# Import citipy to determine the cities based on latitude and longitude
from citipy import citipy


### Generate the Cities List by Using the `citipy` Library

In [None]:
# Empty list for holding the latitude and longitude combinations
lat_lngs = []

# Empty list for holding the cities names
cities = []

# Range of latitudes and longitudes
lat_range = (-90, 90)
lng_range = (-180, 180)

# Create a set of random lat and lng combinations
lats = np.random.uniform(lat_range[0], lat_range[1], size=1500)
lngs = np.random.uniform(lng_range[0], lng_range[1], size=1500)
lat_lngs = zip(lats, lngs)

# Identify nearest city for each lat, lng combination
for lat_lng in lat_lngs:
    city = citipy.nearest_city(lat_lng[0], lat_lng[1]).city_name

    # If the city is unique, then add it to a our cities list
    if city not in cities:
        cities.append(city)

# Print the city count to confirm sufficient count
print(f"Number of cities in the list: {len(cities)}")


---

## Requirement 1: Create Plots to Showcase the Relationship Between Weather Variables and Latitude

### Use the OpenWeatherMap API to retrieve weather data from the cities list generated in the started code

In [None]:
# Set the API base URL
url = f"https://api.openweathermap.org/data/2.5/weather?q={city}&appid={weather_api_key}"

# Define an empty list to fetch the weather data for each city
city_data = []

# Print to logger
print("Beginning Data Retrieval     ")
print("-----------------------------")

# Create counters
record_count = 1
set_count = 1

# Loop through all the cities in our list to fetch weather data
for i, city in enumerate(cities):

    # Group cities in sets of 50 for logging purposes
    if (i % 50 == 0 and i >= 50):
        set_count += 1
        record_count = 0

    # Create endpoint URL with each city
    city_url = f"https://api.openweathermap.org/data/2.5/weather?q={city}&appid={weather_api_key}&units=metric"

    # Log the url, record, and set numbers
    print("Processing Record %s of Set %s | %s" % (record_count, set_count, city))

    # Add 1 to the record count
    record_count += 1

    # Run an API request for each of the cities
    try:
        # Parse the JSON and retrieve data
        city_weather = requests.get(city_url).json()

        # Parse out latitude, longitude, max temp, humidity, cloudiness, wind speed, country, and date
        city_lat = city_weather["coord"]["lat"]
        city_lng = city_weather["coord"]["lon"]
        city_max_temp = city_weather["main"]["temp_max"]
        city_humidity = city_weather["main"]["humidity"]
        city_clouds = city_weather["clouds"]["all"]
        city_wind = city_weather["wind"]["speed"]
        city_country = city_weather["sys"]["country"]
        city_date = city_weather["dt"]

        # Append the City information into city_data list
        city_data.append({"City": city,
                          "Lat": city_lat,
                          "Lng": city_lng,
                          "Max Temp": city_max_temp,
                          "Humidity": city_humidity,
                          "Cloudiness": city_clouds,
                          "Wind Speed": city_wind,
                          "Country": city_country,
                          "Date": city_date})

    # If an error is experienced, skip the city
    except:
        print("City not found. Skipping...")
        pass

    # pause to avoid rate limiting
    time.sleep(1)

# Indicate that Data Loading is complete
print("-----------------------------")
print("Data Retrieval Complete      ")
print("-----------------------------")


In [None]:
# Convert the cities weather data into a Pandas DataFrame
city_data_df = pd.DataFrame(city_data)
# Show Record Count
city_data_df.count()


In [None]:
# Display sample data
city_data_df.head()


In [None]:
# Export the City_Data into a csv
city_data_df.to_csv("output_data/cities.csv", index_label="City_ID")


In [None]:
# Read saved data
city_data_df = pd.read_csv("output_data/cities.csv", index_col="City_ID")

# Display sample data
city_data_df.head()


### Create the Scatter Plots Requested

#### Latitude Vs. Temperature

In [None]:
from datetime import datetime
# Build scatter plot for latitude vs. temperature
plt.scatter(city_data_df["Lat"], city_data_df["Max Temp"], edgecolor="black", linewidths=1, marker="o", alpha=0.8)

date = datetime.now().strftime("%Y-%m-%d")
# Incorporate the other graph properties
plt.title(f"City Max Latitude vs. Temperature ({date})")
plt.xlabel("Latitude")
plt.ylabel("Max Temperature (C)")
plt.grid(True)

# Save the figure
plt.savefig("output_data/Fig1.png")

# Show plot
plt.show()


#### Latitude Vs. Humidity

In [None]:
# Build the scatter plots for latitude vs. humidity
plt.scatter(city_data_df["Lat"], city_data_df["Humidity"], edgecolor="black", linewidths=1, marker="o", alpha=0.8)

# Incorporate the other graph properties
plt.title(f"City Latitude vs. Humidity ({date})")
plt.xlabel("Latitude")
plt.ylabel("Humidity (%)")
plt.grid(True)

# Save the figure
plt.savefig("output_data/Fig2.png")

# Show plot
plt.show()


#### Latitude Vs. Cloudiness

In [None]:
# Build the scatter plots for latitude vs. cloudiness
plt.scatter(city_data_df["Lat"], city_data_df["Cloudiness"], edgecolor="black", linewidths=1, marker="o", alpha=0.8)


# Incorporate the other graph properties
plt.title(f"City Latitude vs. Cloudiness ({date})")
plt.xlabel("Latitude")
plt.ylabel("Cloudiness (%)")
plt.grid(True)

# Save the figure
plt.savefig("output_data/Fig3.png")

# Show plot
plt.show()


#### Latitude vs. Wind Speed Plot

In [None]:
# Build the scatter plots for latitude vs. wind speed
plt.scatter(city_data_df["Lat"], city_data_df["Wind Speed"], edgecolor="black", linewidths=1, marker="o", alpha=0.8)


# Incorporate the other graph properties
plt.title(f"City Latitude vs. Wind Speed ({date})")
plt.xlabel("Latitude")
plt.ylabel("Wind Speed (m/s)")
plt.grid(True)

# Save the figure
plt.savefig("output_data/Fig4.png")

# Show plot
plt.show()


---

## Requirement 2: Compute Linear Regression for Each Relationship


In [None]:
# Define a function to create Linear Regression plots
def create_linear_regression_plot(x, y, x_label, y_label, title, output_file):
    """
    Creates a scatter plot with a linear regression line.

    Parameters:
    x (pd.Series): The data for the x-axis.
    y (pd.Series): The data for the y-axis.
    x_label (str): The label for the x-axis.
    y_label (str): The label for the y-axis.
    title (str): The title of the plot.
    output_file (str): The file path to save the plot.

    Returns:
    None
    """
    # Perform linear regression
    slope, intercept, r_value, p_value, std_err = linregress(x, y)

    # Create scatter plot
    plt.scatter(x, y, edgecolor="black", linewidths=1, marker="o", alpha=0.8, label="Data Points")

    # Plot the linear regression line
    plt.plot(x, slope * x + intercept, color="red")

    # Add the linear equation to the plot
    # Place the equation in a white space by adjusting xy coordinates
    props = dict(boxstyle='round', facecolor='white', alpha=0.8)
    plt.gca().annotate(f"Linear equation: y = {slope:.2f}x + {intercept:.2f}", 
                       xy=(0.05, 0.05), xycoords="axes fraction", fontsize=12, color="red",
                       horizontalalignment="left", verticalalignment="bottom", bbox=props)

    # Add labels and title
    plt.xlabel(x_label)
    plt.ylabel(y_label)
    plt.title(title)
    plt.legend()

    # Disable the grid
    plt.grid(False)

    # Save the figure
    plt.savefig(output_file)

    # Show the plot
    plt.show()

    # Print the r-value separately
    print(f"The r-value is: {r_value:.2f}")
    

In [None]:
# Create a DataFrame with the Northern Hemisphere data (Latitude >= 0)
northern_hemi_df = city_data_df[city_data_df["Lat"] >= 0]

# Display sample data
northern_hemi_df.head()


In [None]:
# Create a DataFrame with the Southern Hemisphere data (Latitude < 0)
southern_hemi_df = city_data_df[city_data_df["Lat"] < 0]

# Display sample data
southern_hemi_df.head()


###  Temperature vs. Latitude Linear Regression Plot

In [None]:
# Linear regression on Northern Hemisphere
# Filter the data for the Northern Hemisphere
northern_hemisphere_df = city_data_df[city_data_df["Lat"] >= 0]

# Create the linear regression plot for Northern Hemisphere: Temperature vs. Latitude
create_linear_regression_plot(
    northern_hemisphere_df["Lat"], 
    northern_hemisphere_df["Max Temp"], 
    "Latitude", 
    "Max Temp", 
    "Latitude vs. Max Temperature", 
    "output_data/Northern_Hemisphere_Temperature.png"
)


In [None]:
# Linear regression on Southern Hemisphere
# Filter the data for the Southern Hemisphere
southern_hemisphere_df = city_data_df[city_data_df["Lat"] < 0]

# Create the linear regression plot for Southern Hemisphere: Latitude vs. Max Temperature
create_linear_regression_plot(
    southern_hemisphere_df["Lat"], 
    southern_hemisphere_df["Max Temp"], 
    "Latitude", 
    "Max Temp", 
    "Latitude vs. Max Temp", 
    "output_data/Southern_Hemisphere_Temperature.png"
)


**Discussion about the linear relationship:** 
The linear regression analysis for both the Northern and Southern Hemispheres highlights the relationship between latitude and maximum temperature:

Northern Hemisphere:
The regression analysis shows a negative correlation between latitude and maximum temperature. As we move away from the equator (latitude increases), temperatures tend to decrease. This is consistent with the expected behavior, as higher latitudes (closer to the poles) are generally cooler due to receiving less direct sunlight.
The slope of the regression line indicates that temperature decreases at a steady rate with increasing latitude, reflecting the broad climate gradient from the equator to the poles.

Southern Hemisphere:
Similar to the Northern Hemisphere, there is a negative correlation between latitude and temperature in the Southern Hemisphere. However, since the Southern Hemisphere is largely covered by oceans, the temperature variations might not be as drastic as in the Northern Hemisphere.
As latitude decreases (moving closer to the South Pole), temperatures drop, which reflects cooler conditions typical in southern latitudes.
Overall, the analysis demonstrates a clear inverse relationship between latitude and maximum temperature in both hemispheres, with temperatures declining as one moves further from the equator. These findings align with global climate patterns driven by the angle of solar radiation and distance from the equator.

### Humidity vs. Latitude Linear Regression Plot

In [None]:
# Northern Hemisphere
# Filtrar los datos para el hemisferio norte
northern_hemisphere_df = city_data_df[city_data_df["Lat"] >= 0]

# Realizar la regresión lineal para Humidity vs. Latitude en el hemisferio norte
create_linear_regression_plot(
    northern_hemisphere_df["Lat"], 
    northern_hemisphere_df["Humidity"], 
    "Latitude", 
    "Humidity", 
    "Latitude vs. Humidity", 
    "output_data/Northern_Hemisphere_Humidity.png"
)


In [None]:
# Southern Hemisphere
# Filtrar los datos para el hemisferio sur
southern_hemisphere_df = city_data_df[city_data_df["Lat"] < 0]

# Realizar la regresión lineal para Humidity vs. Latitude en el hemisferio sur
create_linear_regression_plot(
    southern_hemisphere_df["Lat"], 
    southern_hemisphere_df["Humidity"], 
    "Latitude", 
    "Humidity", 
    "Latitude vs. Humidity", 
    "output_data/Southern_Hemisphere_Humidity.png"
)


**Discussion about the linear relationship:** 
The linear regression analysis for humidity versus latitude in both the Northern and Southern Hemispheres provides insights into how humidity varies with latitude:

Northern Hemisphere:
The regression analysis for the Northern Hemisphere shows a weak positive correlation between latitude and humidity. This means that as latitude increases (moving away from the equator), there is a slight increase in humidity. This trend might be due to increased moisture availability in higher latitudes, where temperate and subarctic regions often experience higher relative humidity, especially near bodies of water.
The relationship is not very strong, suggesting that factors other than latitude, such as proximity to large bodies of water or local climate conditions, might play a significant role in determining humidity levels.

Southern Hemisphere:
In the Southern Hemisphere, the linear regression reveals a more noticeable relationship between latitude and humidity. Generally, there is a weak to moderate positive correlation, indicating that as latitude increases (moving towards higher latitudes), humidity tends to increase as well.
This pattern could be influenced by the larger expanse of oceanic areas in the Southern Hemisphere, which contribute to higher humidity levels in coastal and tropical regions. Additionally, the relatively smaller landmass compared to the Northern Hemisphere might lead to more uniform humidity distribution influenced by oceanic conditions.
In summary, while both hemispheres show some degree of correlation between latitude and humidity, the relationships are relatively weak. This suggests that latitude alone is not a dominant factor in determining humidity levels, and other regional climate factors, such as proximity to oceans, prevailing wind patterns, and local geographic features, also play significant roles.

### Cloudiness vs. Latitude Linear Regression Plot

In [None]:
# Northern Hemisphere
# Filtrar los datos para el hemisferio norte
northern_hemisphere_df = city_data_df[city_data_df["Lat"] >= 0]

# Realizar la regresión lineal para Cloudiness vs. Latitude en el hemisferio norte
create_linear_regression_plot(
    northern_hemisphere_df["Lat"], 
    northern_hemisphere_df["Cloudiness"], 
    "Latitude", 
    "Cloudiness (%)", 
    "Northern Hemisphere: Latitude vs. Cloudiness", 
    "output_data/Northern_Hemisphere_Cloudiness.png"
)


In [None]:
# Southern Hemisphere
# Filtrar los datos para el hemisferio sur
southern_hemisphere_df = city_data_df[city_data_df["Lat"] < 0]

# Realizar la regresión lineal para Cloudiness vs. Latitude en el hemisferio sur
create_linear_regression_plot(
    southern_hemisphere_df["Lat"], 
    southern_hemisphere_df["Cloudiness"], 
    "Latitude", 
    "Cloudiness (%)", 
    "Southern Hemisphere: Latitude vs. Cloudiness", 
    "output_data/Southern_Hemisphere_Cloudiness.png"
)


**Discussion about the linear relationship:** 

Northern Hemisphere:
The linear regression analysis for cloudiness versus latitude in the Northern Hemisphere might reveal a specific pattern or trend. Generally, cloudiness can be influenced by several factors such as geographic location, proximity to water bodies, and prevailing weather systems.
If the plot shows a trend where cloudiness increases or decreases with latitude, it may indicate how cloud cover is influenced by seasonal changes or climatic zones. For instance, higher latitudes might experience different cloud patterns due to varying temperatures and atmospheric conditions compared to lower latitudes.

Southern Hemisphere:
Similarly, in the Southern Hemisphere, the relationship between latitude and cloudiness will provide insights into how cloud cover varies with latitude. The Southern Hemisphere's cloudiness may be influenced by its vast oceanic expanses and its distinct climatic zones, which can affect cloud formation and distribution.
A clear trend might be observed in the plot, showing whether cloudiness increases or decreases as you move towards the poles or the equator. This can reveal patterns related to ocean currents, atmospheric pressure systems, and seasonal variations.

In summary, analyzing the linear relationship between cloudiness and latitude in both hemispheres can provide valuable insights into how cloud cover varies with geographical position, while also highlighting the influence of other climatic and meteorological factors.

### Wind Speed vs. Latitude Linear Regression Plot

In [None]:
# Northern Hemisphere
# Filtrar los datos para el hemisferio norte
northern_hemisphere_df = city_data_df[city_data_df["Lat"] >= 0]

# Realizar la regresión lineal para Wind Speed vs. Latitude en el hemisferio norte
create_linear_regression_plot(
    northern_hemisphere_df["Lat"], 
    northern_hemisphere_df["Wind Speed"], 
    "Latitude", 
    "Wind Speed (mph)", 
    "Northern Hemisphere: Latitude vs. Wind Speed", 
    "output_data/Northern_Hemisphere_Wind_Speed.png"
)


In [None]:
# Southern Hemisphere
# Filtrar los datos para el hemisferio sur
southern_hemisphere_df = city_data_df[city_data_df["Lat"] < 0]

# Realizar la regresión lineal para Wind Speed vs. Latitude en el hemisferio sur
create_linear_regression_plot(
    southern_hemisphere_df["Lat"], 
    southern_hemisphere_df["Wind Speed"], 
    "Latitude", 
    "Wind Speed (mph)", 
    "Southern Hemisphere: Latitude vs. Wind Speed", 
    "output_data/Southern_Hemisphere_Wind_Speed.png"
)


**Discussion about the linear relationship:** 

Northern Hemisphere:
Trend Observation: The linear regression plot of wind speed versus latitude in the Northern Hemisphere may exhibit certain patterns. Typically, wind speeds can be influenced by atmospheric pressure gradients, geographic features, and seasonal weather patterns.
Possible Findings: If the plot shows a clear trend where wind speeds increase or decrease with latitude, this might be related to the influence of large-scale weather systems such as the jet stream, which tends to shift with seasons and latitudinal changes. For example, mid-latitude regions might experience higher wind speeds due to stronger pressure gradients associated with storm systems.

Southern Hemisphere:
Trend Observation: The relationship between latitude and wind speed in the Southern Hemisphere might present different trends compared to the Northern Hemisphere. The Southern Hemisphere is dominated by oceanic expanses, which can affect wind patterns differently.
Possible Findings: If the plot shows a noticeable trend, it might reflect the impact of large-scale oceanic and atmospheric circulation patterns such as the Southern Hemisphere's westerlies, which can influence wind speeds in different latitudinal zones. The Southern Ocean, in particular, might contribute to higher wind speeds in higher latitudes.

In summary, analyzing the linear relationship between wind speed and latitude in both hemispheres can help identify regional patterns and factors influencing wind speeds. It provides insights into how wind patterns vary with geographic location and the interplay of atmospheric conditions across different latitudes.
