In [1]:
import pandas as pd, requests

In [2]:
import os 
import logging
# Import scrapy and scrapy.crawler 
import scrapy
from scrapy.crawler import CrawlerProcess

In [3]:
import operator

In [4]:
cities = ["Mont Saint Michel","St Malo","Bayeux","Le Havre","Rouen","Paris","Amiens","Lille","Strasbourg",
          "Chateau du Haut Koenigsbourg","Colmar","Eguisheim","Besancon","Dijon","Annecy","Grenoble","Lyon",
          "Gorges du Verdon","Bormes les Mimosas","Cassis","Marseille","Aix en Provence","Avignon","Uzes","Nimes",
          "Aigues Mortes","Saintes Maries de la mer","Collioure","Carcassonne","Ariege","Toulouse","Montauban",
          "Biarritz","Bayonne","La Rochelle"]

In [5]:
cities_red = ["Mont Saint Michel","St Malo","Bayeux","Le Havre","Rouen","Paris","Amiens","Lille"]

## Retrieve citie's coordinates

In [6]:
'''
    Retrieves location data of a city in a dictionary.  The data retrieved is, for example:
      place_id': 256949255,
     'licence': 'Data © OpenStreetMap contributors, ODbL 1.0. https://osm.org/copyright',
     'osm_type'
     'osm_id'
     'boundingbox': ['48.6119741', '48.637031', '-1.5495487', '-1.5094805'],
     'lat': 
     'lon': 
     'display_name': 'Le Mont-Saint-Michel, Avranches, Manche, Normandie, France métropolitaine, 50170, France',
     'place_rank': 16,
     'category': 'boundary',
     'type': 'administrative',
     'importance': 
     'icon':
    -----------------------------
    Parameters
    -----------------------------
    city: The city name
    
'''
def retrieve_one_city(city:str)->dict:
    url = f'https://nominatim.openstreetmap.org/search?country=France&format=jsonv2&class=boundary&limit=1&city={city}'
    result = requests.get(url)
    try:
        return result.json()[0]
    except IndexError as error:
        print('There is no result in the query')
        return None

In [7]:
url = "https://nominatim.openstreetmap.org/search?country=France&format=json&class=boundary"

In [8]:
def retrieve_cities(city_names)->list:
    retrieved_cities = list()
    for city in city_names:
        data = retrieve_one_city(city) #city_data is a dictionary
        city_dict = {'place': city, **data} ## ** used to unpack the elements of the dictionary
        retrieved_cities.append(city_dict)   
    return retrieved_cities

In [9]:
retrieved_cities=retrieve_cities(cities_red)

In [10]:
len(retrieved_cities)

8

In [11]:
cities_df = pd.DataFrame(retrieved_cities)

In [12]:
cities_df.columns.to_list()[1:-1]

['place_id',
 'licence',
 'osm_type',
 'osm_id',
 'boundingbox',
 'lat',
 'lon',
 'display_name',
 'place_rank',
 'category',
 'type',
 'importance']

In [125]:
cities_df.head()

Unnamed: 0,place,place_id,licence,osm_type,osm_id,boundingbox,lat,lon,display_name,place_rank,category,type,importance,icon
0,Mont Saint Michel,256949255,"Data © OpenStreetMap contributors, ODbL 1.0. h...",relation,376823,"[48.6119741, 48.637031, -1.5495487, -1.5094805]",48.6355232,-1.5102571,"Le Mont-Saint-Michel, Avranches, Manche, Norma...",16,boundary,administrative,0.961274,https://nominatim.openstreetmap.org/ui/mapicon...
1,St Malo,256985223,"Data © OpenStreetMap contributors, ODbL 1.0. h...",relation,905534,"[48.5979853, 48.6949736, -2.0768518, -1.9367259]",48.649518,-2.0260409,"Saint-Malo, Ille-et-Vilaine, Bretagne, France ...",16,boundary,administrative,0.786467,https://nominatim.openstreetmap.org/ui/mapicon...
2,Bayeux,256913845,"Data © OpenStreetMap contributors, ODbL 1.0. h...",relation,145776,"[49.2608124, 49.2934736, -0.7275671, -0.6757378]",49.2764624,-0.7024738,"Bayeux, Calvados, Normandie, France métropolit...",16,boundary,administrative,0.7927,https://nominatim.openstreetmap.org/ui/mapicon...
3,Le Havre,256879965,"Data © OpenStreetMap contributors, ODbL 1.0. h...",relation,104492,"[49.4516697, 49.5401463, 0.0667992, 0.1955556]",49.4938975,0.1079732,"Le Havre, Seine-Maritime, Normandie, France mé...",16,boundary,administrative,0.932333,https://nominatim.openstreetmap.org/ui/mapicon...
4,Rouen,304551005,"Data © OpenStreetMap contributors, ODbL 1.0. h...",relation,75628,"[49.4172001, 49.4652601, 1.0300648, 1.1521157]",49.4404591,1.0939658,"Rouen, Seine-Maritime, Normandie, France métro...",16,boundary,administrative,0.860073,https://nominatim.openstreetmap.org/ui/mapicon...


In [14]:
cities_df.to_csv('./data/cities.csv', sep='\t')

In [29]:
coords = list(zip(cities_df.lat.to_list(), cities_df.lon.to_list()))

In [30]:
coords

[('48.6355232', '-1.5102571'),
 ('48.649518', '-2.0260409'),
 ('49.2764624', '-0.7024738'),
 ('49.4938975', '0.1079732'),
 ('49.4404591', '1.0939658'),
 ('48.8566969', '2.3514616'),
 ('49.8941708', '2.2956951'),
 ('50.6365654', '3.0635282')]

## Getting the weather

In [256]:
class City:
    def __init__(self, place_id, name, coords:tuple):
        self._place_id=place_id
        self._name=name
        self._coords=coords
        self._scrapped = {}
    
    @property
    def place_id(self):
        return self._place_id
    @property
    def name(self):
        return self._name
    
    @property
    def coords(self):
        return self._coords
    @property
    def scrapped(self):
        return self._scrapped
        
    @scrapped.setter
    def scrapped(self,value:dict):
        self._scrapped=value
    
    @staticmethod
    def from_dataframe(dataframe: pd.DataFrame):
        #TODO Check first if it contains the columns we need
        cities=list()
        try:
            for index_row in range(0,dataframe.shape[0]):
                place_id = dataframe.loc[index_row, 'place_id']
                coords = (dataframe.loc[index_row, 'lat'], dataframe.loc[index_row, 'lon'])
                name = dataframe.loc[index_row, 'place']
                city = City(place_id, name, coords)
                cities.append(city)
        except KeyError as keyerr:
            print('The dataframe doesn''t have all required keys')
            return None
        else:
            return cities
        
    def __str__(self):
        return f'city:{self.name} place_id:{self.place_id} coords:{self.coords} scrapped:{self.scrapped}'

In [250]:
print(cities_list[0])

city:Mont Saint Michel place_id:256949255 coords:('48.6355232', '-1.5102571') scrapped:{}


In [171]:
def get_weather(cities:list[City]):
    api_key='662b304886186ebe717af2be19cb4764'
    units='metric'
    part='minutely,hourly'
    for city in cities:
        lat=city.coords[0]
        lon=city.coords[1]
        url = f'https://api.openweathermap.org/data/2.5/onecall?lat={lat}&lon={lon}&exclude={part}&appid={api_key}&units={units}'
        r = requests.get(url)
        city.scrapped=r.json()

In [257]:
cities_list = City.from_dataframe(cities_df)

In [149]:
place_id = '256949255'
lat=coords[0][0] #'48.6454528'
lon=coords[1][1]#'-2.015418'
part='minutely,hourly'
api_key='662b304886186ebe717af2be19cb4764'
units='metric'
one_city = City(place_id, 'Mont Saint Michel', coords[0])


In [32]:
url = f'https://api.openweathermap.org/data/2.5/onecall?lat={lat}&lon={lon}&exclude={part}&appid={api_key}&units={units}'

In [33]:
r = requests.get(url)
r

<Response [200]>

In [154]:
one_city.scrapped=r.json()

In [158]:
def add_place_to_dict(city:City):
    city.scrapped['place_id']=city.place_id
     #data['place_id'] = place_id

In [155]:
def add_place_to_list(city:City):
    '''
    Retrieves the forecast list from the scrapped data and adds the place_id to each dictionary in the list.
    '''
    for day_dict in city.scrapped['daily']: #city.scrapped['daily'] contains a list. Each element is a dict.
        day_dict['place_id']=city.place_id 
    

In [167]:
'''
    From a city containing scrapped data, it transforms the scrapped data in a pandas.Dataframe
'''
def process_city(city: City)->pd.DataFrame:
    add_place_to_dict(city)
    add_place_to_list(city)
    current_df = pd.DataFrame(city.scrapped['current'])
    forcast_df = pd.DataFrame(city.scrapped['daily'])
    return pd.concat([current_df,forcast_df], ignore_index=True)


In [None]:
get_weather(cities_list)

In [260]:
df_list = list()
for city in cities_list:
    df_list.append(process_city(city))    

In [261]:
pd.concat(df_list, ignore_index=True)

Unnamed: 0,dt,sunrise,sunset,temp,feels_like,pressure,humidity,dew_point,uvi,clouds,...,wind_speed,wind_deg,wind_gust,weather,moonrise,moonset,moon_phase,pop,place_id,rain
0,1630077981,1630041383,1630090749,20.11,19.53,1020,52,9.96,2.04,100,...,4.45,8,5.20,"{'id': 804, 'main': 'Clouds', 'description': '...",,,,,,
1,1630065600,1630041383,1630090749,"{'day': 20.07, 'min': 11.1, 'max': 20.33, 'nig...","{'day': 19.54, 'night': 13.49, 'eve': 19.06, '...",1021,54,10.48,3.65,90,...,7.22,34,12.08,"[{'id': 804, 'main': 'Clouds', 'description': ...",1.630099e+09,1.630060e+09,0.66,0.00,256949255.0,
2,1630152000,1630127867,1630177030,"{'day': 21.94, 'min': 10.83, 'max': 22.36, 'ni...","{'day': 21.39, 'night': 13.79, 'eve': 17.98, '...",1023,46,9.75,5.68,4,...,7.35,41,11.45,"[{'id': 800, 'main': 'Clear', 'description': '...",1.630186e+09,1.630151e+09,0.69,0.00,256949255.0,
3,1630238400,1630214351,1630263310,"{'day': 22.32, 'min': 11.37, 'max': 22.87, 'ni...","{'day': 21.7, 'night': 11.99, 'eve': 18.02, 'm...",1022,42,8.58,5.52,43,...,7.34,38,10.83,"[{'id': 802, 'main': 'Clouds', 'description': ...",1.630274e+09,1.630241e+09,0.72,0.00,256949255.0,
4,1630324800,1630300835,1630349589,"{'day': 19.63, 'min': 11.43, 'max': 21.8, 'nig...","{'day': 19.42, 'night': 15.28, 'eve': 20.57, '...",1021,68,13.33,4.84,80,...,6.92,44,11.15,"[{'id': 803, 'main': 'Clouds', 'description': ...",0.000000e+00,1.630332e+09,0.75,0.00,256949255.0,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
67,1630321200,1630299539,1630348690,"{'day': 21.31, 'min': 13.12, 'max': 21.39, 'ni...","{'day': 21.09, 'night': 13.23, 'eve': 16.63, '...",1021,61,13.45,2.57,85,...,7.03,360,11.28,"[{'id': 804, 'main': 'Clouds', 'description': ...",1.630361e+09,1.630331e+09,0.75,0.25,256873472.0,
68,1630407600,1630386031,1630434961,"{'day': 20.36, 'min': 11.86, 'max': 20.36, 'ni...","{'day': 19.97, 'night': 11.6, 'eve': 15.04, 'm...",1025,58,11.90,4.68,58,...,5.99,359,9.28,"[{'id': 803, 'main': 'Clouds', 'description': ...",0.000000e+00,1.630421e+09,0.78,0.00,256873472.0,
69,1630494000,1630472523,1630521232,"{'day': 18.46, 'min': 10.97, 'max': 18.53, 'ni...","{'day': 17.85, 'night': 10.45, 'eve': 14, 'mor...",1026,57,9.78,5.00,80,...,5.61,4,7.70,"[{'id': 803, 'main': 'Clouds', 'description': ...",1.630449e+09,1.630511e+09,0.81,0.00,256873472.0,
70,1630580400,1630559015,1630607501,"{'day': 20.73, 'min': 9.58, 'max': 22.71, 'nig...","{'day': 20.27, 'night': 14.11, 'eve': 18.88, '...",1023,54,11.12,5.00,0,...,4.25,65,8.03,"[{'id': 800, 'main': 'Clear', 'description': '...",1.630538e+09,1.630601e+09,0.85,0.02,256873472.0,


In [262]:
final_df.to_csv('./data/weather.csv', sep='\t')

In [141]:
final_df = pd.concat([current_df,forcast_df])
final_df

Unnamed: 0,dt,sunrise,sunset,temp,feels_like,pressure,humidity,dew_point,uvi,clouds,...,wind_speed,wind_deg,weather,place_id,moonrise,moonset,moon_phase,wind_gust,pop,rain
0,1630045420,1630041507,1630090873,13.86,13.31,1021,77,9.9,0.11,75,...,3.09,60,"{'id': 803, 'main': 'Clouds', 'description': '...",256949255,,,,,,
0,1630065600,1630041507,1630090873,"{'day': 18.58, 'min': 13.8, 'max': 19.04, 'nig...","{'day': 18.09, 'night': 15.4, 'eve': 18.25, 'm...",1022,61,10.82,4.12,69,...,7.84,45,"[{'id': 803, 'main': 'Clouds', 'description': ...",256949255,1630099000.0,1630061000.0,0.66,10.71,0.0,
1,1630152000,1630127991,1630177154,"{'day': 19.61, 'min': 13.82, 'max': 19.7, 'nig...","{'day': 19.14, 'night': 15.91, 'eve': 17.9, 'm...",1024,58,10.88,5.7,1,...,8.04,22,"[{'id': 800, 'main': 'Clear', 'description': '...",256949255,1630186000.0,1630151000.0,0.69,10.21,0.0,
2,1630238400,1630214475,1630263434,"{'day': 19.81, 'min': 14.27, 'max': 19.92, 'ni...","{'day': 19.31, 'night': 14.64, 'eve': 17.94, '...",1023,56,10.46,5.58,4,...,7.6,13,"[{'id': 800, 'main': 'Clear', 'description': '...",256949255,1630274000.0,1630242000.0,0.72,10.52,0.0,
3,1630324800,1630300959,1630349713,"{'day': 19.05, 'min': 14.24, 'max': 19.74, 'ni...","{'day': 18.89, 'night': 16.29, 'eve': 18.69, '...",1020,72,13.59,4.89,73,...,8.0,23,"[{'id': 500, 'main': 'Rain', 'description': 'l...",256949255,0.0,1630332000.0,0.75,9.6,0.24,0.3
4,1630411200,1630387443,1630435992,"{'day': 21.18, 'min': 15.26, 'max': 21.5, 'nig...","{'day': 21.02, 'night': 16.4, 'eve': 19.17, 'm...",1023,64,13.46,3.42,48,...,9.49,26,"[{'id': 802, 'main': 'Clouds', 'description': ...",256949255,1630362000.0,1630422000.0,0.78,14.44,0.0,
5,1630497600,1630473927,1630522270,"{'day': 21.18, 'min': 13.5, 'max': 21.72, 'nig...","{'day': 20.87, 'night': 15.75, 'eve': 18.59, '...",1022,58,11.83,4.0,0,...,9.89,38,"[{'id': 800, 'main': 'Clear', 'description': '...",256949255,1630451000.0,1630512000.0,0.81,13.48,0.0,
6,1630584000,1630560411,1630608547,"{'day': 20.97, 'min': 13.5, 'max': 21.3, 'nigh...","{'day': 20.56, 'night': 15.77, 'eve': 18.61, '...",1021,55,10.8,4.0,0,...,9.23,29,"[{'id': 800, 'main': 'Clear', 'description': '...",256949255,1630540000.0,1630601000.0,0.85,12.45,0.0,
7,1630670400,1630646895,1630694824,"{'day': 20.96, 'min': 13.38, 'max': 21.92, 'ni...","{'day': 20.5, 'night': 15.03, 'eve': 18.88, 'm...",1022,53,10.11,4.0,0,...,8.62,39,"[{'id': 800, 'main': 'Clear', 'description': '...",256949255,1630630000.0,1630690000.0,0.88,11.53,0.0,


In [43]:
import datetime

In [50]:
datetime.datetime.fromtimestamp(1630065600)

datetime.datetime(2021, 8, 27, 14, 0)