# Project 1. Weather Forecasting API Data Collection
### by: Rosa Adib Rad

In [1]:
!pip install requests
!pip install lxml



In [2]:
import random
import time
import datetime
import requests
from bs4 import BeautifulSoup
import re
import numpy as np
import pandas as pd

## First: Making a list of capital cities
By crawling the data of capital cities from [this website](https://geographyfieldwork.com/WorldCapitalCities.htm)

In [3]:
url_cities = "https://geographyfieldwork.com/WorldCapitalCities.htm"
api_key = "put your API here"

response = requests.get(url=url_cities)
response.status_code

200

In [4]:
soup = BeautifulSoup(response.text, "lxml")
selector_cities = "td"
elements = soup.select(selector_cities)

In [5]:
def corrected(text):
    return text.split(",")[0].split("(")[0].split("[")[0].strip()
    
city_name = []
country_name = []

for i in range(2,len(elements)-2):
    if i%2 == 0:
        country_name.append(corrected(elements[i].text))
    else:
        city_name.append(corrected(elements[i].text))

# fixing some problematic capital_city names
city_name[91] = "Tarawa"
city_name.pop(122)
country_name.pop(122)
city_name[186] = "Kyiv"

print(city_name)
print(country_name)

['Kabul', 'Tirana', 'Algiers', 'Andorra la Vella', 'Luanda', "Saint John's", 'Buenos Aires', 'Yerevan', 'Canberra', 'Vienna', 'Baku', 'Nassau', 'Manama', 'Dhaka', 'Bridgetown', 'Minsk', 'Brussels', 'Belmopan', 'Porto Novo', 'Thimphu', 'La Paz', 'Sarajevo', 'Gaborone', 'Brasilia', 'Bandar Seri Begawan', 'Sofia', 'Ouagadougou', 'Gitega', 'Phnom Penh', 'Yaounde', 'Ottawa', 'Praia', 'Bangui', "N'Djamena", 'Santiago', 'Beijing', 'Bogota', 'Moroni', 'Kinshasa', 'Brazzaville', 'San Jose', 'Yamoussoukro', 'Zagreb', 'Havana', 'Nicosia', 'Prague', 'Copenhagen', 'Djibouti', 'Roseau', 'Santo Domingo', 'Dili', 'Quito', 'Cairo', 'San Salvador', 'London', 'Malabo', 'Asmara', 'Tallinn', 'Mbabane', 'Addis Ababa', 'Palikir', 'Suva', 'Helsinki', 'Paris', 'Libreville', 'Banjul', 'Tbilisi', 'Berlin', 'Accra', 'Athens', "Saint George's", 'Guatemala City', 'Conakry', 'Bissau', 'Georgetown', 'Port au Prince', 'Tegucigalpa', 'Budapest', 'Reykjavik', 'New Delhi', 'Jakarta', 'Tehran', 'Baghdad', 'Dublin', 'Jerus

In [6]:
df = pd.DataFrame({'Country': country_name, 'Capital City': city_name})
df

Unnamed: 0,Country,Capital City
0,Afghanistan,Kabul
1,Albania,Tirana
2,Algeria,Algiers
3,Andorra,Andorra la Vella
4,Angola,Luanda
...,...,...
195,Vietnam,Hanoi
196,Wales,Cardiff
197,Yemen,Sana'a
198,Zambia,Lusaka


## Second: Finding lat & lon with Geocoding 
By using the [Geocoding API](https://openweathermap.org/api/geocoding-api)

In [7]:
lat_array = []
lon_array = []

url_geo = "http://api.openweathermap.org/geo/1.0/direct"
headers = {
        'Content-Type': 'application/json',
        'accept': 'application/json',
    }
for i in range(len(city_name)):
    parameters_geo = {"q": city_name[i], "appid":api_key}
    
    result = requests.get(url=url_geo, params=parameters_geo, headers=headers)
    
    lat = result.json()[0]["lat"]
    lon = result.json()[0]["lon"]
    
    lat_array.append(lat)
    lon_array.append(lon)


In [8]:
df.insert(2, "lat", lat_array, True)
df.insert(3, "lon", lon_array, True)
df

Unnamed: 0,Country,Capital City,lat,lon
0,Afghanistan,Kabul,34.526011,69.177684
1,Albania,Tirana,41.330514,19.825563
2,Algeria,Algiers,36.775361,3.060188
3,Andorra,Andorra la Vella,42.506939,1.521247
4,Angola,Luanda,-8.827270,13.243951
...,...,...,...,...
195,Vietnam,Hanoi,21.029450,105.854444
196,Wales,Cardiff,51.481655,-3.179193
197,Yemen,Sana'a,15.353857,44.205884
198,Zambia,Lusaka,-15.416412,28.282479


In [9]:
#df.to_csv('cityNames_and_LatLon.csv', index=False)

# Create a Pandas Excel writer using XlsxWriter as the engine.
writer = pd.ExcelWriter('city_names.xlsx', engine='openpyxl')

# Convert the dataframe to an XlsxWriter Excel object.
df.to_excel(writer, sheet_name='Sheet1')

# Close the Pandas Excel writer and output the Excel file.
writer.close()

## Third: Gattering the current weather of each capital city
Using the [Current Weather API](https://openweathermap.org/current)

In [10]:
url_current = "https://api.openweathermap.org/data/2.5/weather"
headers = {'Content-Type': 'application/json','accept': 'application/json',}

current_data = []

for i in range(len(city_name)):
    parameters_current = {"lat": lat_array[i] , "lon": lon_array[i], "appid" : api_key}
    result2 = requests.get(url=url_current, params=parameters_current, headers=headers)
    time.sleep(1)

    weather = result2.json()["weather"][0]["main"]
    description= result2.json()["weather"][0]["description"]
    temp = result2.json()["main"]["temp"]-273.15
    temp_min = result2.json()["main"]["temp_min"]-273.15
    temp_max = result2.json()["main"]["temp_max"]-273.15
    pressure = result2.json()["main"]["pressure"]
    humidity = result2.json()["main"]["humidity"]
    wind = result2.json()["wind"]
    clouds = result2.json()["clouds"]
    
    try:
        rain = result2.json()["rain"]
    except:
        rain = "nan"

    try:
        visibility = result2.json()["visibility"]
    except:
        visibility = "nan"
    

    current_data.append({'weather':weather, 'description':description,'temp':temp, 'temp_min':temp_min,
               'temp_max':temp_max, 'pressure':pressure, 'humidity':humidity, 'visibility':visibility, 'wind':wind, 'rain':rain, 'clouds':clouds})

current_Weather_df = pd.DataFrame(current_data)

In [11]:
current_Weather_df = pd.concat([df, current_Weather_df], axis=1)
current_Weather_df

Unnamed: 0,Country,Capital City,lat,lon,weather,description,temp,temp_min,temp_max,pressure,humidity,visibility,wind,rain,clouds
0,Afghanistan,Kabul,34.526011,69.177684,Clear,clear sky,7.71,7.71,7.71,1019,74,10000,"{'speed': 0.92, 'deg': 349, 'gust': 0.71}",,{'all': 0}
1,Albania,Tirana,41.330514,19.825563,Clear,clear sky,15.47,15.47,15.47,1012,72,10000,"{'speed': 0.51, 'deg': 0}",,{'all': 0}
2,Algeria,Algiers,36.775361,3.060188,Clouds,few clouds,13.84,13.84,13.84,1018,94,6000,"{'speed': 2.06, 'deg': 220}",,{'all': 20}
3,Andorra,Andorra la Vella,42.506939,1.521247,Clouds,broken clouds,8.47,8.23,9.47,1019,93,10000,"{'speed': 3.62, 'deg': 10, 'gust': 4.69}",,{'all': 69}
4,Angola,Luanda,-8.827270,13.243951,Clouds,broken clouds,27.02,27.02,27.02,1010,91,9000,"{'speed': 4.63, 'deg': 170}",,{'all': 75}
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
195,Vietnam,Hanoi,21.029450,105.854444,Clouds,few clouds,25.98,25.98,25.98,1010,77,10000,"{'speed': 5.22, 'deg': 159, 'gust': 7.51}",,{'all': 19}
196,Wales,Cardiff,51.481655,-3.179193,Clouds,broken clouds,8.50,7.48,9.71,1012,78,10000,"{'speed': 9.26, 'deg': 290}",,{'all': 75}
197,Yemen,Sana'a,15.353857,44.205884,Clouds,scattered clouds,18.18,18.18,18.18,1013,73,10000,"{'speed': 2.72, 'deg': 209, 'gust': 3.96}",,{'all': 39}
198,Zambia,Lusaka,-15.416412,28.282479,Clouds,broken clouds,16.74,16.74,16.74,1018,76,10000,"{'speed': 3.8, 'deg': 84, 'gust': 11.09}",,{'all': 80}


In [12]:
writer = pd.ExcelWriter('current_Weather.xlsx', engine='openpyxl')
current_Weather_df.to_excel(writer, sheet_name='Sheet1')
writer.close()

## Fourth: Gattering the 3-hour forecast of each capital city and save the data in a seperate dataframe
Using the [3-hour Forecast API](https://openweathermap.org/forecast5)

In [13]:
url_3hours = "https://api.openweathermap.org/data/2.5/forecast"
headers = {'Content-Type': 'application/json','accept': 'application/json',}

weather_3hour = [[],[],[],[],[]]
for i in range(200):
    parameters_3hours = {"lat": lat_array[i] , "lon": lon_array[i], "appid" : api_key}
    result3 = requests.get(url=url_3hours, params=parameters_3hours, headers=headers)
    time.sleep(1)
        
    for k in range(5):
        weather = result3.json()['list'][k]['weather'][0]['main']
        description= result3.json()['list'][k]['weather'][0]['description']
        temp_min = result3.json()['list'][k]['main']['temp_min']-273.15
        temp_max = result3.json()['list'][k]['main']['temp_max']-273.15
        pressure = result3.json()['list'][k]['main']['pressure']
        humidity = result3.json()['list'][k]['main']['humidity']
        wind = result3.json()['list'][k]['wind']
        clouds = result3.json()['list'][k]['clouds']
        date = result3.json()['list'][k]['dt_txt']
        
        try:
            rain = result3.json()["rain"]
        except:
            rain = "nan"
        
        try:
            visibility = result3.json()["visibility"]
        except:
            visibility = "nan"
        
        weather_3hour[k].append({'date':date, 'weather':weather, 'description':description,'temp_min':temp_min,'temp_max':temp_max,
                                'pressure':pressure,'humidity':humidity,'visibility':visibility,'wind':wind, 'rain':rain, 'clouds':clouds})
            
weather_3hour_1 = pd.DataFrame(weather_3hour[0])
weather_3hour_2 = pd.DataFrame(weather_3hour[1])
weather_3hour_3 = pd.DataFrame(weather_3hour[2])
weather_3hour_4 = pd.DataFrame(weather_3hour[3])
weather_3hour_5 = pd.DataFrame(weather_3hour[4])

In [14]:
weather_3hour_1

Unnamed: 0,date,weather,description,temp_min,temp_max,pressure,humidity,visibility,wind,rain,clouds
0,2024-04-16 03:00:00,Clear,clear sky,8.51,10.01,1018,68,,"{'speed': 0.6, 'deg': 7, 'gust': 0.53}",,{'all': 0}
1,2024-04-16 03:00:00,Clouds,scattered clouds,15.67,18.07,1012,64,,"{'speed': 2.78, 'deg': 126, 'gust': 2.6}",,{'all': 27}
2,2024-04-16 03:00:00,Clouds,few clouds,14.37,15.42,1018,90,,"{'speed': 2.56, 'deg': 238, 'gust': 3.71}",,{'all': 18}
3,2024-04-16 03:00:00,Clouds,broken clouds,2.91,6.32,1019,89,,"{'speed': 3.69, 'deg': 14, 'gust': 4.62}",,{'all': 54}
4,2024-04-16 03:00:00,Clouds,broken clouds,25.35,26.46,1010,92,,"{'speed': 3.01, 'deg': 135, 'gust': 4.09}",,{'all': 76}
...,...,...,...,...,...,...,...,...,...,...,...
195,2024-04-16 03:00:00,Clouds,broken clouds,27.37,30.14,1010,78,,"{'speed': 4.34, 'deg': 166, 'gust': 5.77}",,{'all': 53}
196,2024-04-16 03:00:00,Clouds,broken clouds,8.50,8.55,1013,76,,"{'speed': 8.67, 'deg': 307, 'gust': 15.22}",,{'all': 73}
197,2024-04-16 03:00:00,Clouds,scattered clouds,17.47,18.05,1013,73,,"{'speed': 1.89, 'deg': 229, 'gust': 3.11}",,{'all': 25}
198,2024-04-16 03:00:00,Clouds,scattered clouds,15.72,16.53,1018,77,,"{'speed': 3.79, 'deg': 86, 'gust': 10.76}",,{'all': 45}


In [15]:
weather_3hour_2

Unnamed: 0,date,weather,description,temp_min,temp_max,pressure,humidity,visibility,wind,rain,clouds
0,2024-04-16 06:00:00,Clear,clear sky,11.78,13.79,1019,52,,"{'speed': 0.36, 'deg': 63, 'gust': 0.72}",,{'all': 1}
1,2024-04-16 06:00:00,Clouds,broken clouds,18.21,20.08,1011,57,,"{'speed': 0.82, 'deg': 133, 'gust': 1.62}",,{'all': 61}
2,2024-04-16 06:00:00,Clouds,few clouds,14.90,15.43,1017,82,,"{'speed': 2.35, 'deg': 241, 'gust': 3.1}",,{'all': 11}
3,2024-04-16 06:00:00,Clouds,scattered clouds,2.90,4.61,1018,82,,"{'speed': 3.67, 'deg': 12, 'gust': 4.82}",,{'all': 35}
4,2024-04-16 06:00:00,Clouds,broken clouds,25.58,26.06,1011,90,,"{'speed': 3.56, 'deg': 135, 'gust': 4.57}",,{'all': 83}
...,...,...,...,...,...,...,...,...,...,...,...
195,2024-04-16 06:00:00,Clouds,scattered clouds,32.15,35.23,1007,58,,"{'speed': 6.18, 'deg': 163, 'gust': 6.6}",,{'all': 45}
196,2024-04-16 06:00:00,Clouds,broken clouds,8.54,8.57,1014,74,,"{'speed': 7.96, 'deg': 311, 'gust': 12.62}",,{'all': 71}
197,2024-04-16 06:00:00,Clouds,scattered clouds,20.01,20.85,1014,59,,"{'speed': 4.26, 'deg': 222, 'gust': 6.14}",,{'all': 42}
198,2024-04-16 06:00:00,Clouds,scattered clouds,17.62,17.96,1019,76,,"{'speed': 4.7, 'deg': 82, 'gust': 9.93}",,{'all': 37}


In [16]:
weather_3hour_3

Unnamed: 0,date,weather,description,temp_min,temp_max,pressure,humidity,visibility,wind,rain,clouds
0,2024-04-16 09:00:00,Clear,clear sky,16.97,16.97,1016,33,,"{'speed': 1.51, 'deg': 64, 'gust': 1.54}",,{'all': 4}
1,2024-04-16 09:00:00,Clouds,overcast clouds,21.39,21.39,1010,46,,"{'speed': 2.63, 'deg': 122, 'gust': 3.53}",,{'all': 100}
2,2024-04-16 09:00:00,Clear,clear sky,19.23,19.23,1017,60,,"{'speed': 2.74, 'deg': 262, 'gust': 3.11}",,{'all': 0}
3,2024-04-16 09:00:00,Clear,clear sky,9.14,9.14,1015,47,,"{'speed': 3.66, 'deg': 355, 'gust': 3.84}",,{'all': 1}
4,2024-04-16 09:00:00,Clouds,scattered clouds,28.65,28.65,1012,76,,"{'speed': 1.53, 'deg': 144, 'gust': 2.02}",,{'all': 45}
...,...,...,...,...,...,...,...,...,...,...,...
195,2024-04-16 09:00:00,Clear,clear sky,33.00,33.00,1004,47,,"{'speed': 6.02, 'deg': 154, 'gust': 5.64}",,{'all': 8}
196,2024-04-16 09:00:00,Clouds,scattered clouds,9.43,9.43,1017,72,,"{'speed': 7.84, 'deg': 319, 'gust': 11.29}",,{'all': 41}
197,2024-04-16 09:00:00,Clouds,scattered clouds,24.11,24.11,1013,40,,"{'speed': 6.59, 'deg': 229, 'gust': 7.03}",,{'all': 31}
198,2024-04-16 09:00:00,Clear,clear sky,23.86,23.86,1018,49,,"{'speed': 4.79, 'deg': 86, 'gust': 6.7}",,{'all': 5}


In [17]:
weather_3hour_4

Unnamed: 0,date,weather,description,temp_min,temp_max,pressure,humidity,visibility,wind,rain,clouds
0,2024-04-16 12:00:00,Clouds,scattered clouds,17.46,17.46,1015,32,,"{'speed': 2.07, 'deg': 47, 'gust': 1.7}",,{'all': 50}
1,2024-04-16 12:00:00,Clouds,overcast clouds,20.44,20.44,1010,59,,"{'speed': 2.15, 'deg': 152, 'gust': 3.93}",,{'all': 100}
2,2024-04-16 12:00:00,Clear,clear sky,21.19,21.19,1016,49,,"{'speed': 2.93, 'deg': 326, 'gust': 3.14}",,{'all': 0}
3,2024-04-16 12:00:00,Clear,clear sky,12.00,12.00,1013,39,,"{'speed': 3.26, 'deg': 327, 'gust': 3.83}",,{'all': 1}
4,2024-04-16 12:00:00,Rain,light rain,28.58,28.58,1009,77,,"{'speed': 5.05, 'deg': 271, 'gust': 4.46}",,{'all': 44}
...,...,...,...,...,...,...,...,...,...,...,...
195,2024-04-16 12:00:00,Clouds,scattered clouds,25.85,25.85,1005,75,,"{'speed': 3.1, 'deg': 181, 'gust': 8.26}",,{'all': 40}
196,2024-04-16 12:00:00,Clouds,scattered clouds,10.94,10.94,1017,59,,"{'speed': 7.41, 'deg': 320, 'gust': 10.04}",,{'all': 39}
197,2024-04-16 12:00:00,Clouds,broken clouds,25.02,25.02,1011,35,,"{'speed': 6.79, 'deg': 244, 'gust': 6.18}",,{'all': 64}
198,2024-04-16 12:00:00,Clear,clear sky,25.93,25.93,1014,44,,"{'speed': 4.22, 'deg': 90, 'gust': 4.6}",,{'all': 6}


In [18]:
weather_3hour_5 

Unnamed: 0,date,weather,description,temp_min,temp_max,pressure,humidity,visibility,wind,rain,clouds
0,2024-04-16 15:00:00,Clouds,scattered clouds,15.40,15.40,1016,42,,"{'speed': 1.65, 'deg': 352, 'gust': 1.6}",,{'all': 39}
1,2024-04-16 15:00:00,Rain,light rain,16.57,16.57,1009,80,,"{'speed': 5.53, 'deg': 189, 'gust': 11.54}",,{'all': 100}
2,2024-04-16 15:00:00,Clear,clear sky,21.10,21.10,1014,49,,"{'speed': 2.88, 'deg': 347, 'gust': 3.63}",,{'all': 0}
3,2024-04-16 15:00:00,Clear,clear sky,10.45,10.45,1012,45,,"{'speed': 3.77, 'deg': 333, 'gust': 4.07}",,{'all': 5}
4,2024-04-16 15:00:00,Rain,light rain,28.59,28.59,1007,78,,"{'speed': 5.01, 'deg': 240, 'gust': 5.25}",,{'all': 36}
...,...,...,...,...,...,...,...,...,...,...,...
195,2024-04-16 15:00:00,Clouds,few clouds,25.02,25.02,1007,78,,"{'speed': 5.56, 'deg': 138, 'gust': 11.86}",,{'all': 17}
196,2024-04-16 15:00:00,Clouds,few clouds,11.26,11.26,1017,58,,"{'speed': 8.12, 'deg': 324, 'gust': 10.18}",,{'all': 14}
197,2024-04-16 15:00:00,Clouds,overcast clouds,22.99,22.99,1012,40,,"{'speed': 4.93, 'deg': 241, 'gust': 4.9}",,{'all': 97}
198,2024-04-16 15:00:00,Clouds,scattered clouds,22.65,22.65,1015,57,,"{'speed': 3.85, 'deg': 97, 'gust': 6.03}",,{'all': 47}


In [19]:
writer = pd.ExcelWriter('weather_hour1.xlsx', engine='openpyxl')
pd.concat([df, weather_3hour_1], axis=1).to_excel(writer, sheet_name='Sheet1')
writer.close()

writer = pd.ExcelWriter('weather_hour2.xlsx', engine='openpyxl')
pd.concat([df, weather_3hour_2], axis=1).to_excel(writer, sheet_name='Sheet1')
writer.close()

writer = pd.ExcelWriter('weather_hour3.xlsx', engine='openpyxl')
pd.concat([df, weather_3hour_3], axis=1).to_excel(writer, sheet_name='Sheet1')
writer.close()

writer = pd.ExcelWriter('weather_hour4.xlsx', engine='openpyxl')
pd.concat([df, weather_3hour_4], axis=1).to_excel(writer, sheet_name='Sheet1')
writer.close()

writer = pd.ExcelWriter('weather_hour5.xlsx', engine='openpyxl')
pd.concat([df, weather_3hour_5], axis=1).to_excel(writer, sheet_name='Sheet1')
writer.close()

## Fifth(Optional): Gattering hourly forecasting air pollution data of Tehran for 4 days
Using the [Air Pollution API](https://openweathermap.org/api/air-pollution)  
Gathering basic air quality factor values as well as the overall air quality label ranging from 1 to 5 (1 for Good and 5 for Very Poor)  

In [20]:
# finding the lat and lon of Tehran

Teh_ID = city_name.index('Tehran')
Teh_lat = lat_array[Teh_ID]
Teh_lon = lon_array[Teh_ID]
print(Teh_ID , Teh_lat , Teh_lon)

81 35.6892523 51.3896004


In [None]:
url_AP = "http://api.openweathermap.org/data/2.5/air_pollution/forecast"
headers = {'Content-Type': 'application/json', 'accept': 'application/json'}

parameters_AP= {"lat": Teh_lat , "lon": Teh_lon, "appid" : api_key}
res_AP= requests.get(url=url_AP, params=parameters_AP, headers=headers)

recoredsLen_AP = len(res_AP.json()['list'])

In [None]:
pollution_Data = []
for i in range(recoredsLen_AP):

    date = res_AP.json()['list'][i]['dt']
    # converting unix timestamp string to readable date
    date = datetime.datetime.fromtimestamp(date).strftime('%Y-%m-%d %H:%M:%S')
    
    CO= res_AP.json()['list'][i]['components']['no']
    NO = res_AP.json()['list'][i]['components']['no2']
    NO2= res_AP.json()['list'][i]['components']['o3']
    O3 = res_AP.json()['list'][i]['components']['so2']
    SO2 = res_AP.json()['list'][i]['components']['pm2_5']
    PM25 = res_AP.json()['list'][i]['components']['pm10']
    PM10 = res_AP.json()['list'][i]['components']['nh3']
    NH3 = res_AP.json()['list'][i]['components']['nh3']
    overall = res_AP.json()['list'][i]['main']['aqi']

    pollution_Data.append({'date':date, 'CO':CO, 'NO':NO, 'NO2':NO2,'O3':O3,'SO2':SO2,'PM(2.5)':PM25,'PM(10)':PM10,'NH3':NH3,'Overall Air Quality':overall})

airPollution_df = pd.DataFrame(pollution_Data)
airPollution_df

In [None]:
writer = pd.ExcelWriter('airPollution_tehran.xlsx', engine='openpyxl')
airPollution_df.to_excel(writer, sheet_name='Sheet1')
writer.close()