In [1]:
# Dependencies
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import requests as req
from pprint import pprint
import os, json, csv, time, random, datetime, time 
from config import api_key
from citipy import citipy
import openweathermapy as ow


In [2]:
# Assign the Cities' file to a variable 
cities_file = os.path.join(".", "Resources", "cities.csv")

### Create a list of cities for analysis

In [3]:
# Create lists with the complete latitudes and longitudes to better analysis accuracy. 
lat = [y for y in range (-88, 93, 5)]
lng = [x for x in range (-178, 188, 10)]

# Store the cities in a collection/set. This way we be sure we won't have any duplication.
collection = set()
# Line terminator to append to each city
lineterminator = '\r\n'
# Looping combining Latitudes and Longitudes to get cities 
for y in lat:
    for x in lng:
        # Get the nearest city of current latitude and longitude
        rand_y = random.uniform( (y-2), (y+2) )
        rand_x = random.uniform( (x-2), (x+2) )
        city = citipy.nearest_city(y,x)
        collection.add(f"{city.city_name},{city.country_code}{lineterminator}")
#         print(f"City: {city.city_name}, Country:{city.country_code}, coord: {y},{x}")

# Preview total of cities 
print(f"Total of distinct cities: {len(collection)}")

Total of distinct cities: 598


In [4]:
# Save the selected cities in a csv file.
with open(cities_file, 'w') as writeFile:
    # Write the header
    writeFile.write(f"City,Country{lineterminator}")
    # Write the cities
    writeFile.writelines(collection)
        

In [5]:
# Read the cities file
cities = pd.read_csv(cities_file, sep=',') # columns=["city","country"], 
# Convert the selected cities to a DataFrame
df_cities = pd.DataFrame(cities)
# Preview the DataFrame
df_cities.head()


Unnamed: 0,City,Country
0,grand gaube,mu
1,aklavik,ca
2,hobart,au
3,port-gentil,ga
4,kazalinsk,kz


In [17]:
# Create the new fields for city.
df_cities["Cloudiness"] = ""
df_cities["Date"] = ""
df_cities["Humidity"] = 0
df_cities["Lat"] = 0.0
df_cities["Lng"] = 0.0
df_cities["Max Temp"] = 0.0
df_cities["Wind Speed"] = 0.0

# Set units to get the temperature in fahrenheit
units = "imperial"

# Set the default URL.
url = f"http://api.openweathermap.org/data/2.5/weather?APPID={api_key}&units={units}"

0         grand gaube
1             aklavik
2              hobart
3         port-gentil
4           kazalinsk
5                jalu
6           geraldton
7               bumba
8          butaritari
9            balykshi
10              berga
11              lodja
12              floro
13        new norfolk
14               faya
15         kingsville
16       saint george
17            lazarev
18         buseresere
19              lolua
20           shizunai
21       attawapiskat
22         pospelikha
23      san cristobal
24         sumbawanga
25         wulanhaote
26        mys shmidta
27              yirol
28                sur
29             laguna
            ...      
568           norfolk
569             pisco
570            cheney
571             bitam
572             medea
573           amparai
574            burgeo
575    kulhudhuffushi
576        oudtshoorn
577      sungai udang
578            jiexiu
579             sucua
580      san patricio
581              jadu
582       

In [None]:

# Looping to fill new fields. 
count = 0 

print("-----------------------------")
print("Beginning Data Retrieval")
print("-----------------------------")       

for index, row in df_cities.iterrows(): 
#     if count == 5:
#         break
    # Add a one second interval between queries to stay within API query limits
    #####################################################
    time.sleep(1) #####################################################
    #####################################################
    count += 1
    # Build query URL and print log
    city_country = f"{row['City']},{row['Country']}" 
    # Append the current city to the URL
    query_url = f"{url}&q={city_country}"
    print(f"Processing Record {count} - ({city_country.replace(',','/')})")
    print(query_url)
    
    try:
        # Run the request
        temp = req.get(query_url).json()
        # Fill fields
        df_cities.loc[index, 'Cloudiness'] = temp['clouds']['all']
        df_cities.loc[index, 'Date'] = datetime.datetime.fromtimestamp( int(temp['dt']) ).strftime('%Y-%m-%d %H:%M:%S')
        df_cities.loc[index, 'Humidity'] = temp['main']['humidity']
        df_cities.loc[index, 'Lat'] = temp['coord']["lat"]
        df_cities.loc[index, 'Lng'] = temp['coord']["lon"]
        df_cities.loc[index, 'Max Temp'] = temp['main']['temp_max']
        df_cities.loc[index, 'Wind Speed'] = temp['wind']['speed']
    except Exception as e:    
        print(f"We've got ther error message [{e}] when processing city={city_country} - {temp}.")
        # Lets remove the current city from DF.
        df_cities.drop([index],inplace=True)

    
print("-----------------------------")
print("Data Retrieval Complete")
print("-----------------------------")       
        
        
# Remove any City missing data
df_cities.dropna(how="any", inplace=True)  
# Reindex DF
df_cities.reset_index(drop=True, inplace=True)   
# Preview total cities
print(f"Total cities remaining: {len(df_cities)}")


In [None]:
# Preview new fields filled
df_cities.head()

In [None]:
df_cities.info()

In [None]:
# Set Latitude for x axis
x_axis = df_cities["Lat"].tolist()
# Set Max Temperature for y axis
y_axis = df_cities["Max Temp"].tolist()

# Set Line Trend
# z = np.polyfit(x_axis, y_axis, 1)
# p = np.poly1d(z)
# plt.plot(x_axis,p(x_axis),"g--")

plt.grid(True)

plt.scatter(x_axis, y_axis, marker="o",color="purple")


# Set x and y limits
plt.xlim(-80, 100)
# plt.ylim(-100, 150)

# Set Title 
# Get the analysis date and format it
analisys_date = df_cities.loc[0:0]["Date"][0]
analisys_date = analisys_date[0:10]
dt = datetime.datetime.strptime(analisys_date, '%Y-%m-%d')
analisys_date = dt.strftime("%B %d, %Y")
plt.title(f"City Latitude vs Max Temperature ({analisys_date})")

# Set labels
plt.xlabel("Latitude")
plt.ylabel("Max Temperature (F)")

# Save the Figure
# plt.savefig("tumor_response_to_treatment.png")

# Show the Figure
plt.tight_layout()
plt.show()




