In [1]:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import requests
import json
import time
from scipy.stats import linregress
from config_weather import weather_api_key

In [2]:
pip install citipy

Note: you may need to restart the kernel to use updated packages.


In [3]:
from citipy import citipy

In [4]:
# Output File (CSV)
output_data_file = 'output_data/cities.csv'

# Range of latitudes and longitudes
lat_range = (-90, 90)
lng_range = (-180, 180)

In [5]:
# List for holding lat_lngs and cities
lat_lngs = []
cities = []

# Create a set of random lat and lng combinations
lats = np.random.uniform(lat_range[0], lat_range[1], size=1500)
lngs = np.random.uniform(lng_range[0], lng_range[1], size=1500)
lat_lngs = zip(lats, lngs)

# Identify nearest city for each lat, lng combination
for lat_lng in lat_lngs:
    city = citipy.nearest_city(lat_lng[0], lat_lng[1]).city_name
    
    # If the city is unique, then add it to a our cities list
    if city not in cities:
        cities.append(city)

# Print the city count to confirm sufficient count
len(cities)

652

In [6]:
#setting up test city
#so layed out one city set to get the specific directions to draw that specific data out as shown below
url = 'http://api.openweathermap.org/data/2.5/weather?'
units = 'imperial'
num_city = 0
set_num = 1
name_of_city = cities[0]

query_url = f'{url}appid={weather_api_key}&q={name_of_city}&units={units}'
weather_response = requests.get(query_url)
weather_json = weather_response.json()
print(json.dumps(weather_json, indent=4, sort_keys=True))

#want: city, Lat, Lng, Max Temp, Humidity, Cloudiness, Wind Speed, Country, Date

{
    "base": "stations",
    "clouds": {
        "all": 1
    },
    "cod": 200,
    "coord": {
        "lat": 41.2835,
        "lon": -70.0995
    },
    "dt": 1611640971,
    "id": 4944903,
    "main": {
        "feels_like": 19.58,
        "humidity": 64,
        "pressure": 1018,
        "temp": 31.03,
        "temp_max": 32,
        "temp_min": 30.2
    },
    "name": "Nantucket",
    "sys": {
        "country": "US",
        "id": 3218,
        "sunrise": 1611662208,
        "sunset": 1611697738,
        "type": 1
    },
    "timezone": -18000,
    "visibility": 10000,
    "weather": [
        {
            "description": "clear sky",
            "icon": "01n",
            "id": 800,
            "main": "Clear"
        }
    ],
    "wind": {
        "deg": 360,
        "speed": 11.5
    }
}


In [7]:
print(f'Beginning Data Retrieval')     
print(f'-----------------------------')

#making lists of data
cities_names = []
list_Lat = []
list_Lng = []
max_temps = []
list_of_humidity = []
cloudiness = []
wind_speeds = []
countries = []
dates = []

#from there, we want to go through each city in the list of cities
#withdraw data for each city in query_url
for city in cities:
    try:
        name_of_city = city
        query_url = f'{url}appid={weather_api_key}&q={name_of_city}&units={units}'
        weather_response = requests.get(query_url)
        weather_json = weather_response.json()
        
#collecting Lat of specific city in cities       
        Lat = weather_json['coord']['lat']
       
#collecting Lng of specific city in cities
        Lng = weather_json['coord']['lon']
    
#collecting max temp of specific city in cities
        temp = weather_json['main']['temp_max']
       
#collecting humidity of specific city in cities
        humidity = weather_json['main']['humidity']
      
#collecting cloudiness of specific city in cities
        clouds = weather_json['clouds']['all']
        
#collecting wind speed of specific city in cities
        wind = weather_json['wind']['speed']
        
#collecting country of specific city in cities
        country = weather_json['sys']['country']
       
#collecting date of specific city in cities
        date = weather_json['dt']
        
        cities_names.append(name_of_city)
        list_Lat.append(Lat)
        list_Lng.append(Lng)
        max_temps.append(temp)
        list_of_humidity.append(humidity)
        cloudiness.append(clouds)
        wind_speeds.append(wind)
        countries.append(country)
        dates.append(date)
        
        print(f'Processing Record {num_city} of Set {set_num} |{city}')

        num_city = num_city + 1
        if num_city >= 50:
            num_city = 0
            set_num = set_num + 1
        
    except:
        bold = '\033[1m not \033[0m'
        print(f'City {bold} found. Skipping...')

Beginning Data Retrieval
-----------------------------
Processing Record 0 of Set 1 |nantucket
Processing Record 1 of Set 1 |dire
Processing Record 2 of Set 1 |qaanaaq
Processing Record 3 of Set 1 |batagay-alyta
Processing Record 4 of Set 1 |bonthe
Processing Record 5 of Set 1 |badhni kalan
Processing Record 6 of Set 1 |ust-kuyga
Processing Record 7 of Set 1 |tiksi
Processing Record 8 of Set 1 |albany
Processing Record 9 of Set 1 |beidao
Processing Record 10 of Set 1 |porto novo
Processing Record 11 of Set 1 |port-gentil
Processing Record 12 of Set 1 |guerrero negro
Processing Record 13 of Set 1 |douglas
Processing Record 14 of Set 1 |mizan teferi
Processing Record 15 of Set 1 |katsuura
Processing Record 16 of Set 1 |orumiyeh
Processing Record 17 of Set 1 |kodiak
Processing Record 18 of Set 1 |sai buri
Processing Record 19 of Set 1 |high level
Processing Record 20 of Set 1 |caravelas
Processing Record 21 of Set 1 |smithers
Processing Record 22 of Set 1 |saldanha
Processing Record 23 of

City [1m not [0m found. Skipping...
Processing Record 38 of Set 4 |tautira
Processing Record 39 of Set 4 |requena
Processing Record 40 of Set 4 |marovoay
Processing Record 41 of Set 4 |gat
Processing Record 42 of Set 4 |nelson bay
Processing Record 43 of Set 4 |denpasar
Processing Record 44 of Set 4 |mareeba
Processing Record 45 of Set 4 |merauke
Processing Record 46 of Set 4 |ardakan
Processing Record 47 of Set 4 |buraydah
Processing Record 48 of Set 4 |sao goncalo do amarante
City [1m not [0m found. Skipping...
City [1m not [0m found. Skipping...
Processing Record 49 of Set 4 |vicosa
City [1m not [0m found. Skipping...
Processing Record 0 of Set 5 |karaton
Processing Record 1 of Set 5 |cap malheureux
Processing Record 2 of Set 5 |codrington
Processing Record 3 of Set 5 |kruisfontein
Processing Record 4 of Set 5 |flinders
Processing Record 5 of Set 5 |cidreira
Processing Record 6 of Set 5 |faanui
Processing Record 7 of Set 5 |mehamn
Processing Record 8 of Set 5 |thompson
Proce

Processing Record 23 of Set 8 |mascote
Processing Record 24 of Set 8 |naples
Processing Record 25 of Set 8 |san patricio
Processing Record 26 of Set 8 |gijon
Processing Record 27 of Set 8 |betsiamites
Processing Record 28 of Set 8 |bucak
Processing Record 29 of Set 8 |vilhena
Processing Record 30 of Set 8 |dapaong
Processing Record 31 of Set 8 |lasa
Processing Record 32 of Set 8 |davidson
Processing Record 33 of Set 8 |pisco
Processing Record 34 of Set 8 |magadan
Processing Record 35 of Set 8 |hami
City [1m not [0m found. Skipping...
Processing Record 36 of Set 8 |ulaanbaatar
Processing Record 37 of Set 8 |inuvik
City [1m not [0m found. Skipping...
Processing Record 38 of Set 8 |gweta
Processing Record 39 of Set 8 |ampanihy
Processing Record 40 of Set 8 |touba
Processing Record 41 of Set 8 |ulaangom
Processing Record 42 of Set 8 |husavik
Processing Record 43 of Set 8 |zhanakorgan
Processing Record 44 of Set 8 |prince rupert
Processing Record 45 of Set 8 |baillif
Processing Record 4

Processing Record 11 of Set 12 |melbourne
Processing Record 12 of Set 12 |great yarmouth
Processing Record 13 of Set 12 |kiunga
Processing Record 14 of Set 12 |mazagao
Processing Record 15 of Set 12 |tual
Processing Record 16 of Set 12 |ballina
Processing Record 17 of Set 12 |samarinda
Processing Record 18 of Set 12 |kanigiri
Processing Record 19 of Set 12 |itaobim
Processing Record 20 of Set 12 |indore
Processing Record 21 of Set 12 |caborca
Processing Record 22 of Set 12 |srednekolymsk
Processing Record 23 of Set 12 |cabo rojo
Processing Record 24 of Set 12 |paita
City [1m not [0m found. Skipping...
Processing Record 25 of Set 12 |klyuchi
Processing Record 26 of Set 12 |ambilobe
Processing Record 27 of Set 12 |komsomolskiy
Processing Record 28 of Set 12 |udachnyy
City [1m not [0m found. Skipping...
Processing Record 29 of Set 12 |corner brook
Processing Record 30 of Set 12 |pyskowice
Processing Record 31 of Set 12 |conde
Processing Record 32 of Set 12 |zhaoyang
Processing Record 

In [22]:
#convert data into dataframe
data = {'City': cities_names,
        'Lat': list_Lat,
        'Lng': list_Lng,
        'Max_Temp': max_temps,
        'Humidity': list_of_humidity,
        'Cloudiness': cloudiness,
        'Wind_Speed': wind_speeds,
        'Country': countries,
        'Date': dates}

#making dataframe
weather_df = pd.DataFrame(data)

#giving name to index for csv file header
weather_df.index.names = ['City_ID']
weather_df

Unnamed: 0_level_0,City,Lat,Lng,Max_Temp,Humidity,Cloudiness,Wind_Speed,Country,Date
City_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
0,nantucket,41.2835,-70.0995,32.00,64,1,11.50,US,1611640971
1,dire,12.2833,-10.9667,62.96,26,4,1.72,ML,1611641196
2,qaanaaq,77.4840,-69.3632,5.05,65,0,7.85,GL,1611641196
3,batagay-alyta,67.8006,130.4114,-38.15,80,87,0.87,RU,1611641196
4,bonthe,7.5264,-12.5050,73.83,91,15,3.60,SL,1611641196
...,...,...,...,...,...,...,...,...,...
588,naze,28.3667,129.4833,68.00,83,75,4.61,JP,1611641096
589,katubao,5.8892,124.8294,85.55,63,43,4.65,PH,1611641372
590,abhar,36.1468,49.2180,33.01,79,83,7.76,IR,1611641372
591,grindavik,63.8424,-22.4338,28.40,59,90,28.77,IS,1611641373


In [23]:
#create csv file of above dataframe
weather_df.to_csv(output_data_file, index = True, header = True)

In [45]:
#retrieval of information into summary dataframe
#adjusted dataframe to include all index values, but not the 'City' column
adjusted_df = weather_df.loc[:, weather_df.columns != 'City']

#retrieving data for all columns in new dataframe
count = adjusted_df.count()
mean = adjusted_df.mean()
std = adjusted_df.std()
min_values = adjusted_df.min()
one_quantile = adjusted_df.quantile(.25)
two_quantile = adjusted_df.quantile(.5)
three_quantile = adjusted_df.quantile(.75)
max_values = adjusted_df.max()

#making dataframe
index_name = ['count', 'mean', 'std', 'min', '25%', '50%', '75%', 'max']
analy_data = [count, mean, std, min_values, one_quantile, two_quantile, three_quantile, max_values]
compiled_weather_df = pd.DataFrame(analy_data, columns = ['Lat', 'Lng', 'Max_Temp', 'Humidity', 'Cloudiness', 'Wind_Speed', 'Date'], index = index_name)
compiled_weather_df

Unnamed: 0,Lat,Lng,Max_Temp,Humidity,Cloudiness,Wind_Speed,Date
count,593.0,593.0,593.0,593.0,593.0,593.0,593.0
mean,20.772793,18.504095,49.20489,74.63575,56.548061,7.864688,1611641000.0
std,32.8534,89.353321,33.423553,19.69134,38.218674,5.860994,114.3166
min,-54.8,-175.2,-49.72,12.0,0.0,0.47,1611641000.0
25%,-5.8092,-57.5575,28.63,66.0,20.0,3.44,1611641000.0
50%,25.6,21.7833,62.6,79.0,71.0,6.17,1611641000.0
75%,47.8667,97.2,74.82,89.0,93.0,10.83,1611641000.0
max,78.2186,179.3167,100.99,100.0,100.0,34.78,1611641000.0
