<a href="https://colab.research.google.com/github/tellosofia/Gans_Data-Engineering/blob/main/Weather_data_function_city.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Weather is a **huge factor** when it comes to scooter usage. On very cold days and, especially, on rainy days, hardly anyone wants to use a scooter. Thankfully, there is an API that provides free weather forecasts: OpenWeather.

OpenWeather has many APIs that you can use. Once your API key arrives, we recommend using the **5-day forecast**. But if you feel confident and wish to explore, you are of course free to choose another one of the APIs.

It’s easy to make an API call using code like below:
f”http://api.openweathermap.org/data/2.5/forecast?q={city}&appid={API_key}&units=metric”

Obviously, city and API_key are variables that you will have already defined. Once you’ve made this API call, you’ll be able to explore the received information using the requests library and pandas.


In [None]:
# import the necessary libraries

import pandas as pd
import requests
from datetime import datetime
import pytz

In [None]:
city = ['A Coruña', 'Santiago de Compostela', 'Vigo', 'Pontevedra'] # create list of cities
API_key = "your_API_key" # you need to add your own API key

weather = [] # create an empty list to store the data

# get the weather data for each city and store it in the list with requests
for i in range(len(city)):
  url = (f"http://api.openweathermap.org/data/2.5/forecast?q={city[i]}&appid={API_key}&units=metric") # get the data from the API
  weather.append(requests.get(url).json())    # store the data in the list

weather

In [None]:
weather[0]['city'] # exploring the structure of the JSON object

{'id': 3119841,
 'name': 'A Coruña',
 'coord': {'lat': 43.3713, 'lon': -8.396},
 'country': 'ES',
 'population': 246056,
 'timezone': 7200,
 'sunrise': 1690867374,
 'sunset': 1690919809}

In [None]:
# making trials to get the data
try: # try to get the rain data
 weather[-.6]['list'][0]['rain']['3h']
except:
  0
# weather[0]['list'][0]['clouds']['all']
# weather[0]['list'][0]['main']['humidity']
# weather[0]['list'][0]['wind']['speed']

In [None]:
weather[0]['list'][0]['wind']['speed'] # more trials, in this case to get the wind speed

5.28

In [None]:
# create empty lists to store the data
country = []
city = []
population = []
date = []
temp_min = []
temp_max = []
id = []
weather_type = []
weather_desc = []
rain_P = []
rain = []
snow = []
clouds = []
humidity = []
wind = []

# initialize a for loop to get the data
for i in range(len(weather)):      # for each city
  for j in range(len(weather[i]['list'])):  # for each hour of the day
    country.append(weather[i]['city']['country']) # get the country
    city.append(weather[i]['city']['name'])       # get the city name
    population.append(weather[i]['city']['population']) # get the population of the city
    date.append(weather[i]['list'][j]['dt_txt'])        # get the date
    temp_min.append(weather[i]['list'][j]['main']['temp_min'])  # get the min and max temperature
    temp_max.append(weather[i]['list'][j]['main']['temp_max'])
    id.append(weather[i]['list'][j]['weather'][0]['id'])  # get the weather id
    weather_type.append(weather[i]['list'][j]['weather'][0]['main'])  # get the weather type
    weather_desc.append(weather[i]['list'][j]['weather'][0]['description']) # get the weather description
    rain_P.append(weather[i]['list'][j]['pop'])         # get the probability of rain
    try:
      rain.append(weather[i]['list'][j]['rain']['3h'])  # try to get the rain data
    except:
      rain.append(0)                                    # if not available, set it to 0
    try:
      snow.append(weather[0]['list'][0]['snow']['3h'])  # try to get the snow data
    except:
      snow.append(0)                                    # if not available, set it to 0
    clouds.append(weather[i]['list'][j]['clouds']['all']) # get the cloud cover
    humidity.append(weather[i]['list'][j]['main']['humidity'])  # get the humidity
    wind.append(weather[i]['list'][j]['wind']['speed'])   # get the wind speed

In [None]:
# create a dataframe
# first, create a list of tuples
wt = list(zip(country, city, population, date, temp_min, temp_max, id, weather_type, weather_desc, rain_P, rain, snow, clouds, humidity, wind))
# then, create a dataframe from the list of tuples and set the column names with the columns attribute
weather_df = pd.DataFrame(wt, columns=['coutry', 'city', 'population', 'date', 'min_temp', 'max_temp', 'w_id', 'w_type',
                                       'w_desc', 'rain_P', 'rain_mm', 'snow_mm', 'clouds', 'humidity', 'wind'])

In [None]:
weather_df

Unnamed: 0,coutry,city,population,date,min_temp,max_temp,w_id,w_type,w_desc,rain_P,rain_mm,snow_mm,clouds,humidity,wind
0,ES,A Coruña,246056,2023-08-03 15:00:00,20.00,20.09,802,Clouds,scattered clouds,0.00,0.00,0,30,66,5.28
1,ES,A Coruña,246056,2023-08-03 18:00:00,18.71,19.19,500,Rain,light rain,0.20,0.12,0,20,71,5.21
2,ES,A Coruña,246056,2023-08-03 21:00:00,17.00,17.00,500,Rain,light rain,0.23,0.30,0,29,82,4.69
3,ES,A Coruña,246056,2023-08-04 00:00:00,16.21,16.21,500,Rain,light rain,0.30,0.23,0,33,84,4.19
4,ES,A Coruña,246056,2023-08-04 03:00:00,16.39,16.39,500,Rain,light rain,0.22,0.13,0,60,82,3.93
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
155,ES,Pontevedra,81576,2023-08-08 00:00:00,20.98,20.98,800,Clear,clear sky,0.00,0.00,0,0,59,2.98
156,ES,Pontevedra,81576,2023-08-08 03:00:00,19.74,19.74,800,Clear,clear sky,0.00,0.00,0,0,65,2.45
157,ES,Pontevedra,81576,2023-08-08 06:00:00,19.54,19.54,800,Clear,clear sky,0.00,0.00,0,0,67,1.94
158,ES,Pontevedra,81576,2023-08-08 09:00:00,29.23,29.23,800,Clear,clear sky,0.00,0.00,0,0,38,0.44


In [None]:
def weather_per_city(cities):   # now let's write a function to get the weather data for each city, with the previous code
                                # cities is a list of cities we have to define when calling the function
  API_key = "your_API_key"      # replace your_API_key with your API key

  weather = []                  # initialize an empty list to store the weather data

  for i in range(len(cities)):  # we do the same as before
    url = (f"http://api.openweathermap.org/data/2.5/forecast?q={cities[i]}&appid={API_key}&units=metric")
    weather.append(requests.get(url).json())

    country = []
    city = []
    population = []
    date = []
    temp_min = []
    temp_max = []
    id = []
    weather_type = []
    weather_desc = []
    rain_P = []
    rain = []
    snow = []
    clouds = []
    humidity = []
    wind = []

    for i in range(len(weather)):
      for j in range(len(weather[i]['list'])):
        country.append(weather[i]['city']['country'])
        city.append(weather[i]['city']['name'])
        population.append(weather[i]['city']['population'])
        date.append(weather[i]['list'][j]['dt_txt'])
        temp_min.append(weather[i]['list'][j]['main']['temp_min'])
        temp_max.append(weather[i]['list'][j]['main']['temp_max'])
        id.append(weather[i]['list'][j]['weather'][0]['id'])
        weather_type.append(weather[i]['list'][j]['weather'][0]['main'])
        weather_desc.append(weather[i]['list'][j]['weather'][0]['description'])
        rain_P.append(weather[i]['list'][j]['pop'])
        try:
          rain.append(weather[i]['list'][j]['rain']['3h'])
        except:
          rain.append(0)
        try:
          snow.append(weather[0]['list'][0]['snow']['3h'])
        except:
          snow.append(0)
        clouds.append(weather[i]['list'][j]['clouds']['all'])
        humidity.append(weather[i]['list'][j]['main']['humidity'])
        wind.append(weather[i]['list'][j]['wind']['speed'])

  wt = list(zip(country, city, population, date, temp_min, temp_max, id, weather_type, weather_desc, rain_P, rain, snow, clouds, humidity, wind))
  weather_df = pd.DataFrame(wt, columns=['coutry', 'city', 'population', 'date_time', 'min_temp', 'max_temp', 'w_id', 'w_type', 'w_desc', 'rain_P', 'rain_mm', 'snow_mm', 'clouds', 'humidity', 'wind'])

  return(weather_df)

In [None]:
cities = ['Santiago de Compostela', 'Berlin'] # to use the function we need to provide a list of cities

In [None]:
weathers = weather_per_city(cities) # now let's call the function, saving the output in a variable

In [None]:
weathers.head(1) # 1st row of the dataframe obtained from the function

Unnamed: 0,coutry,city,population,date_time,min_temp,max_temp,w_id,w_type,w_desc,rain_P,rain_mm,snow_mm,clouds,humidity,wind
0,ES,Santiago de Compostela,95092,2023-08-03 15:00:00,21.05,21.98,802,Clouds,scattered clouds,0.0,0.0,0,40,51,3.98


In [None]:
weathers.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 80 entries, 0 to 79
Data columns (total 15 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   coutry      80 non-null     object 
 1   city        80 non-null     object 
 2   population  80 non-null     int64  
 3   date_time   80 non-null     object 
 4   min_temp    80 non-null     float64
 5   max_temp    80 non-null     float64
 6   w_id        80 non-null     int64  
 7   w_type      80 non-null     object 
 8   w_desc      80 non-null     object 
 9   rain_P      80 non-null     float64
 10  rain_mm     80 non-null     float64
 11  snow_mm     80 non-null     int64  
 12  clouds      80 non-null     int64  
 13  humidity    80 non-null     int64  
 14  wind        80 non-null     float64
dtypes: float64(5), int64(5), object(5)
memory usage: 9.5+ KB


# Other Solution

In [None]:
def get_weather_norm(cities): # now let's write a function to get the normalized weather data for each city

  API_key = 'your_API_key'    # replace your_API_key with your API key

  df_list = [] # we will save the dataframes in a list

  for city in cities: # we iterate through the list of cities
    url = (f"http://api.openweathermap.org/data/2.5/forecast?q={city}&appid={API_key}&units=metric") # we get the url
    response = requests.get(url) # we get the response
    json = response.json()  # we convert the response to json

    json_norm_df = pd.json_normalize(json['list'], # we normalize the json
                                record_path=['weather'],  # we select the 'weather' part of the json
                                meta=['dt_txt', ['main', 'temp'], ['main', 'feels_like'], ['clouds', 'all'], ['rain', '3h'], ['snow', '3h'],
                                    ['wind', 'speed'], ['wind', 'deg'], ['main', 'humidity'], ['main', 'pressure']],  # we select the 'main' part of the json
                                errors='ignore') # we ignore errors
    json_norm_df.drop(columns=['id', 'icon'], inplace=True) # we drop the 'id' and 'icon' columns
    json_norm_df.rename(columns={'main': 'outlook',                         # we rename the columns
                             'description': 'detailed_outlook',
                             'dt_txt': 'forecast_time',
                             'main.temp': 'temperature',
                             'main.feels_like': 'temperature_feels_like',
                             'clouds.all': 'clouds',
                             'rain.3h': 'rain',
                             'snow.3h': 'snow',
                             'wind.speed': 'wind_speed',
                             'wind.deg': 'wind_deg',
                             'main.humidity': 'humidity',
                             'main.pressure': 'pressure',},
                    inplace=True) # inplace=True makes the changes in the original dataframe
    json_norm_df.insert(0, 'city', json['city']['name'])  # we insert the 'city' column
    json_norm_df.insert(1, 'country', json['city']['country'])  # we insert the 'country' column
    json_norm_df['information_retrieved_at'] = now.strftime("%d/%m/%Y %H:%M:%S")  # we insert the 'information_retrieved_at' column
    json_norm_df[['city', 'country', 'forecast_time', 'outlook', 'detailed_outlook',  # we select the columns we want
          'temperature', 'temperature_feels_like', 'clouds', 'rain', 'snow',
          'wind_speed', 'wind_deg', 'humidity', 'pressure',
          'information_retrieved_at']]
    df_list.append(json_norm_df)        # we append the dataframe to the list
  return pd.concat(df_list, ignore_index=True)  # we concatenate the dataframes

In [None]:
tz = pytz.timezone('Europe/Berlin')   # Choose your timezone
now = datetime.now().astimezone(tz)   # Convert to your timezone

now                                   # Print the current time

datetime.datetime(2023, 8, 1, 21, 13, 10, 862807, tzinfo=<DstTzInfo 'Europe/Berlin' CEST+2:00:00 DST>)

In [None]:
get_weather_norm(['Berlin', 'London'])  # we use our function to get the weather data for the cities we want

Unnamed: 0,city,country,outlook,detailed_outlook,forecast_time,temperature,temperature_feels_like,clouds,rain,snow,wind_speed,wind_deg,humidity,pressure,information_retrieved_at
0,Berlin,DE,Clear,clear sky,2023-08-01 21:00:00,17.32,17.09,5,,,4.58,240,76,999,01/08/2023 21:13:10
1,Berlin,DE,Clouds,few clouds,2023-08-02 00:00:00,16.37,16.07,11,,,5.38,239,77,1000,01/08/2023 21:13:10
2,Berlin,DE,Clear,clear sky,2023-08-02 03:00:00,14.6,14.2,8,,,4.57,232,80,1002,01/08/2023 21:13:10
3,Berlin,DE,Clouds,few clouds,2023-08-02 06:00:00,16.28,15.84,14,,,4.62,234,72,1002,01/08/2023 21:13:10
4,Berlin,DE,Clouds,scattered clouds,2023-08-02 09:00:00,21,20.54,31,,,4.46,208,53,1001,01/08/2023 21:13:10
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
75,London,GB,Clouds,broken clouds,2023-08-06 06:00:00,11.14,10.61,63,,,3.69,295,88,1015,01/08/2023 21:13:10
76,London,GB,Clear,clear sky,2023-08-06 09:00:00,15.5,14.75,6,,,4.63,305,63,1015,01/08/2023 21:13:10
77,London,GB,Clouds,few clouds,2023-08-06 12:00:00,18.96,18.14,12,,,4.54,294,47,1015,01/08/2023 21:13:10
78,London,GB,Clouds,broken clouds,2023-08-06 15:00:00,19.43,18.68,65,,,4.66,283,48,1014,01/08/2023 21:13:10
