# PARTIE 1 : Données météorologiques

## Import librairies

In [2]:
import requests
import pandas as pd
import numpy as np 
import json

## Liste des villes

Nous travaillons avec les 35 meilleures villes françaises à visiter.

In [3]:
cities = [
    "Mont Saint Michel",
    "St Malo",
    "Bayeux",
    "Le Havre",
    "Rouen",
    "Paris",
    "Amiens",
    "Lille",
    "Strasbourg",
    "Chateau du Haut Koenigsbourg",
    "Colmar",
    "Eguisheim",
    "Besancon",
    "Dijon",
    "Annecy",
    "Grenoble",
    "Lyon",
    "Gorges du Verdon",
    "Bormes les Mimosas",
    "Cassis",
    "Marseille",
    "Aix en Provence",
    "Avignon",
    "Uzes",
    "Nimes",
    "Aigues Mortes",
    "Saintes Maries de la mer",
    "Collioure",
    "Carcassonne",
    "Ariege",
    "Toulouse",
    "Montauban",
    "Biarritz",
    "Bayonne",
    "La Rochelle"
]

## API Nominatim

Nous utilisons l'API Nominatim pour récuperer les données GPS (latitude et longitude) des 35 villes.

In [4]:
r = requests.get('https://nominatim.openstreetmap.org/search?city=Paris&country=France&format=json')
r.content

b'[{"place_id":115350921,"licence":"Data \xc2\xa9 OpenStreetMap contributors, ODbL 1.0. http://osm.org/copyright","osm_type":"relation","osm_id":7444,"lat":"48.8588897","lon":"2.3200410217200766","class":"boundary","type":"administrative","place_rank":15,"importance":0.8317101715588673,"addresstype":"suburb","name":"Paris","display_name":"Paris, \xc3\x8ele-de-France, France m\xc3\xa9tropolitaine, France","boundingbox":["48.8155755","48.9021560","2.2241220","2.4697602"]},{"place_id":114827617,"licence":"Data \xc2\xa9 OpenStreetMap contributors, ODbL 1.0. http://osm.org/copyright","osm_type":"relation","osm_id":71525,"lat":"48.8534951","lon":"2.3483915","class":"boundary","type":"administrative","place_rank":12,"importance":0.8317101715588673,"addresstype":"city","name":"Paris","display_name":"Paris, \xc3\x8ele-de-France, France m\xc3\xa9tropolitaine, France","boundingbox":["48.8155755","48.9021560","2.2241220","2.4697602"]},{"place_id":114994164,"licence":"Data \xc2\xa9 OpenStreetMap co

In [5]:
r.json()

[{'place_id': 115350921,
  'licence': 'Data © OpenStreetMap contributors, ODbL 1.0. http://osm.org/copyright',
  'osm_type': 'relation',
  'osm_id': 7444,
  'lat': '48.8588897',
  'lon': '2.3200410217200766',
  'class': 'boundary',
  'type': 'administrative',
  'place_rank': 15,
  'importance': 0.8317101715588673,
  'addresstype': 'suburb',
  'name': 'Paris',
  'display_name': 'Paris, Île-de-France, France métropolitaine, France',
  'boundingbox': ['48.8155755', '48.9021560', '2.2241220', '2.4697602']},
 {'place_id': 114827617,
  'licence': 'Data © OpenStreetMap contributors, ODbL 1.0. http://osm.org/copyright',
  'osm_type': 'relation',
  'osm_id': 71525,
  'lat': '48.8534951',
  'lon': '2.3483915',
  'class': 'boundary',
  'type': 'administrative',
  'place_rank': 12,
  'importance': 0.8317101715588673,
  'addresstype': 'city',
  'name': 'Paris',
  'display_name': 'Paris, Île-de-France, France métropolitaine, France',
  'boundingbox': ['48.8155755', '48.9021560', '2.2241220', '2.4697

Notre liste de villes contient des espaces. Nous remplaçons les espaces par le caractère '+' pour les futures requêtes API.

In [6]:
cities_for_api = []
for i, city in enumerate(cities): 
    cities_for_api.append(city.replace(' ','+'))
cities_for_api

['Mont+Saint+Michel',
 'St+Malo',
 'Bayeux',
 'Le+Havre',
 'Rouen',
 'Paris',
 'Amiens',
 'Lille',
 'Strasbourg',
 'Chateau+du+Haut+Koenigsbourg',
 'Colmar',
 'Eguisheim',
 'Besancon',
 'Dijon',
 'Annecy',
 'Grenoble',
 'Lyon',
 'Gorges+du+Verdon',
 'Bormes+les+Mimosas',
 'Cassis',
 'Marseille',
 'Aix+en+Provence',
 'Avignon',
 'Uzes',
 'Nimes',
 'Aigues+Mortes',
 'Saintes+Maries+de+la+mer',
 'Collioure',
 'Carcassonne',
 'Ariege',
 'Toulouse',
 'Montauban',
 'Biarritz',
 'Bayonne',
 'La+Rochelle']

In [7]:
# Nous récupérons les données GPS et les stockons dans deux listes distinctes
lat_list, lon_list = [], []
for city in cities_for_api: 
    r = requests.get(f'https://nominatim.openstreetmap.org/search?city={city}&country=France&format=json')
    lat_list.append(float(r.json()[0]['lat']))
    lon_list.append(float(r.json()[0]['lon']))

In [8]:
print(lat_list)
print(lon_list)

[48.6359541, 48.649518, 49.2764624, 49.4938975, 49.4404591, 48.8588897, 49.8941708, 50.6365654, 48.584614, 48.2495226, 48.0777517, 48.0447968, 47.2380222, 47.3215806, 45.8992348, 45.1875602, 45.7578137, 43.7496562, 43.1506968, 43.2140359, 43.2961743, 43.5298424, 43.9492493, 44.0121279, 43.8374249, 43.5661521, 43.4515922, 42.52505, 43.2130358, 42.9927428, 43.6044622, 44.0175835, 43.4832523, 43.4945144, 46.1591126]
[-1.511459954959514, -2.0260409, -0.7024738, 0.1079732, 1.0939658, 2.3200410217200766, 2.2956951, 3.0635282, 7.7507127, 7.3454923, 7.3579641, 7.3079618, 6.0243622, 5.0414701, 6.1288847, 5.7357819, 4.8320114, 6.3285616, 6.3419285, 5.5396318, 5.3699525, 5.4474738, 4.8059012, 4.4196718, 4.3600687, 4.19154, 4.4277202, 3.0831554, 2.3491069, 1.6124975, 1.4442469, 1.3549991, -1.5592776, -1.4736657, -1.1520434]


In [9]:
type(lat_list[0])

float

## API Openweather

Nous utilisons l'API Openweather pour récupérer les données météorologiques des 35 villes.

In [10]:
# L'utilisation de cette API requiert une clé API
# (renseignez votre clé API ci-dessous :)
API_KEY = '' 

## Météo actuelle

In [11]:
# Nous effectuons un test pour Paris (qui se trouve à l'index 5)
lat_paris = lat_list[5]
lon_paris = lon_list[5]

# Météo actuelle pour Paris
r = requests.get(
    f'https://api.openweathermap.org/data/2.5/weather?lat={lat_paris}&lon={lon_paris}&appid={API_KEY}&units=metric&lang=fr'
)
print(r.content)
r.json()

b'{"coord":{"lon":2.32,"lat":48.8589},"weather":[{"id":501,"main":"Rain","description":"pluie mod\xc3\xa9r\xc3\xa9e","icon":"10n"},{"id":701,"main":"Mist","description":"brume","icon":"50n"}],"base":"stations","main":{"temp":10.84,"feels_like":10.49,"temp_min":10.19,"temp_max":11.32,"pressure":1005,"humidity":96},"visibility":5000,"wind":{"speed":7.2,"deg":210},"rain":{"1h":1.33},"clouds":{"all":100},"dt":1708556337,"sys":{"type":2,"id":2012208,"country":"FR","sunrise":1708498176,"sunset":1708535974},"timezone":3600,"id":6545270,"name":"Quartier du Palais-Royal","cod":200}'


{'coord': {'lon': 2.32, 'lat': 48.8589},
 'weather': [{'id': 501,
   'main': 'Rain',
   'description': 'pluie modérée',
   'icon': '10n'},
  {'id': 701, 'main': 'Mist', 'description': 'brume', 'icon': '50n'}],
 'base': 'stations',
 'main': {'temp': 10.84,
  'feels_like': 10.49,
  'temp_min': 10.19,
  'temp_max': 11.32,
  'pressure': 1005,
  'humidity': 96},
 'visibility': 5000,
 'wind': {'speed': 7.2, 'deg': 210},
 'rain': {'1h': 1.33},
 'clouds': {'all': 100},
 'dt': 1708556337,
 'sys': {'type': 2,
  'id': 2012208,
  'country': 'FR',
  'sunrise': 1708498176,
  'sunset': 1708535974},
 'timezone': 3600,
 'id': 6545270,
 'name': 'Quartier du Palais-Royal',
 'cod': 200}

## Prévisions

Openweather donne gratuitement les prévisions météorologiques pour les 5 prochains jours, et par tranche de 3 heures.

In [12]:
# Prévisions météo pour les 5 prochains jours par tranche de 3 heures
r = requests.get(
    f'https://api.openweathermap.org/data/2.5/forecast?lat={lat_paris}&lon={lon_paris}&appid={API_KEY}&units=metric&lang=fr'
)
print(r.content)
r.json()

b'{"cod":"200","message":0,"cnt":40,"list":[{"dt":1708560000,"main":{"temp":10.84,"feels_like":10.49,"temp_min":10.84,"temp_max":11.33,"pressure":1005,"sea_level":1005,"grnd_level":1000,"humidity":96,"temp_kf":-0.49},"weather":[{"id":500,"main":"Rain","description":"l\xc3\xa9g\xc3\xa8re pluie","icon":"10n"}],"clouds":{"all":100},"wind":{"speed":5.47,"deg":218,"gust":13.5},"visibility":10000,"pop":1,"rain":{"3h":2.75},"sys":{"pod":"n"},"dt_txt":"2024-02-22 00:00:00"},{"dt":1708570800,"main":{"temp":11.16,"feels_like":10.81,"temp_min":11.16,"temp_max":11.8,"pressure":1004,"sea_level":1004,"grnd_level":997,"humidity":95,"temp_kf":-0.64},"weather":[{"id":500,"main":"Rain","description":"l\xc3\xa9g\xc3\xa8re pluie","icon":"10n"}],"clouds":{"all":100},"wind":{"speed":4.36,"deg":226,"gust":11.49},"visibility":10000,"pop":1,"rain":{"3h":1.7},"sys":{"pod":"n"},"dt_txt":"2024-02-22 03:00:00"},{"dt":1708581600,"main":{"temp":11.79,"feels_like":11.45,"temp_min":11.79,"temp_max":12.26,"pressure":10

{'cod': '200',
 'message': 0,
 'cnt': 40,
 'list': [{'dt': 1708560000,
   'main': {'temp': 10.84,
    'feels_like': 10.49,
    'temp_min': 10.84,
    'temp_max': 11.33,
    'pressure': 1005,
    'sea_level': 1005,
    'grnd_level': 1000,
    'humidity': 96,
    'temp_kf': -0.49},
   'weather': [{'id': 500,
     'main': 'Rain',
     'description': 'légère pluie',
     'icon': '10n'}],
   'clouds': {'all': 100},
   'wind': {'speed': 5.47, 'deg': 218, 'gust': 13.5},
   'visibility': 10000,
   'pop': 1,
   'rain': {'3h': 2.75},
   'sys': {'pod': 'n'},
   'dt_txt': '2024-02-22 00:00:00'},
  {'dt': 1708570800,
   'main': {'temp': 11.16,
    'feels_like': 10.81,
    'temp_min': 11.16,
    'temp_max': 11.8,
    'pressure': 1004,
    'sea_level': 1004,
    'grnd_level': 997,
    'humidity': 95,
    'temp_kf': -0.64},
   'weather': [{'id': 500,
     'main': 'Rain',
     'description': 'légère pluie',
     'icon': '10n'}],
   'clouds': {'all': 100},
   'wind': {'speed': 4.36, 'deg': 226, 'gust': 

Le résultat de la requête contient 40 éléments et non 5 parce que les prévisions sont données par tranche de 3 heures.

Afin de récupérer la prévision de demain (soit dans 24 heures) nous devons sélectionner l'élément à l'index 7 :   
--> 24 heures = 3 heures x 8 (auquel nous retirons 1 car l'index commence à 0)

Voici les index qui permettent de récupérer les prévisions pour les 5 prochains jours :

- Prévision dans 1 jour :  7 

- Prévisions dans 2 jours : 15

- Prévisions dans 3 jours : 23

- Prévisions dans 4 jours : 31

- Prévisions dans 5 jours : 39

In [13]:
# Nous vérifions que la prévision à l'index 39 nous retourne bien la prévision dans 5 jours
r.json()['list'][39]

{'dt': 1708981200,
 'main': {'temp': 6.88,
  'feels_like': 3.87,
  'temp_min': 6.88,
  'temp_max': 6.88,
  'pressure': 994,
  'sea_level': 994,
  'grnd_level': 989,
  'humidity': 86,
  'temp_kf': 0},
 'weather': [{'id': 500,
   'main': 'Rain',
   'description': 'légère pluie',
   'icon': '10n'}],
 'clouds': {'all': 100},
 'wind': {'speed': 4.6, 'deg': 77, 'gust': 7.92},
 'visibility': 10000,
 'pop': 0.91,
 'rain': {'3h': 0.66},
 'sys': {'pod': 'n'},
 'dt_txt': '2024-02-26 21:00:00'}

## Sélection et récupération des données

Nous identifions 8 données météorologiques intéressantes pour notre projet : 

1. *list.main.temp* : la température

2. *list.weather.main* : la description générale (nuageux, pluie, etc.)

3. *list.clouds.all* : le taux de nuages

4. *list.wind.speed* : la vitesse du vent

5. *list.pop* : la probabilité de pleuvoir

6. *list.rain.3h* : le volume de précipitations cumulées pendant les 3 dernières heures

7. *list.main.humidity* : le taux d'humidité

In [19]:
temp = r.json()['list'][7]['main']['temp']
weather = r.json()['list'][7]['weather'][0]['main']
description = r.json()['list'][7]['weather'][0]['description']
clouds = r.json()['list'][7]['clouds']['all']
wind = r.json()['list'][7]['wind']['speed']
pop = r.json()['list'][7]['pop']
try:
    rain = r.json()['list'][7]['rain']['3h']
except: 
    print('No rain data for selected time')
humidity = r.json()['list'][7]['main']['humidity']

In [20]:
# Nous préparons des listes pour stocker les données récupérées

temp_d1, weather_d1, clouds_d1, pop_d1, rain_d1, wind_d1, humidity_d1 = [], [], [], [], [], [], []
temp_d2, weather_d2, clouds_d2, pop_d2, rain_d2, wind_d2, humidity_d2 = [], [], [], [], [], [], []
temp_d3, weather_d3, clouds_d3, pop_d3, rain_d3, wind_d3, humidity_d3 = [], [], [], [], [], [], []
temp_d4, weather_d4, clouds_d4, pop_d4, rain_d4, wind_d4, humidity_d4 = [], [], [], [], [], [], []
temp_d5, weather_d5, clouds_d5, pop_d5, rain_d5, wind_d5, humidity_d5 = [], [], [], [], [], [], []

In [21]:
# Nous construisons une fonction qui permet de récupérer ces données depuis l'API Openweather

def extractWeatherForecast(temp, weather, clouds, wind, humidity, pop, rain, fi): # fi : forecast index
    ''' This function collects the data from Openweather API '''
    for lat, lon in zip(lat_list, lon_list):
        r = requests.get(
            f'https://api.openweathermap.org/data/2.5/forecast?lat={lat}&lon={lon}&appid={API_KEY}&units=metric&lang=fr'
        )
        temp.append(r.json()['list'][fi]['main']['temp'])
        weather.append(r.json()['list'][fi]['weather'][0]['main'])
        clouds.append(r.json()['list'][fi]['clouds']['all'])
        wind.append(r.json()['list'][fi]['wind']['speed'])
        humidity.append(r.json()['list'][fi]['main']['humidity'])
        pop.append(r.json()['list'][fi]['pop'])
        try:
            rain.append(r.json()['list'][fi]['rain']['3h'])
        except: 
            rain.append(0)

In [22]:
# Application de la fonction

extractWeatherForecast(temp_d1, weather_d1, clouds_d1, wind_d1, humidity_d1, pop_d1, rain_d1, fi=7)
extractWeatherForecast(temp_d2, weather_d2, clouds_d2, wind_d2, humidity_d2, pop_d2, rain_d2, fi=15)
extractWeatherForecast(temp_d3, weather_d3, clouds_d3, wind_d3, humidity_d3, pop_d3, rain_d3, fi=23)
extractWeatherForecast(temp_d4, weather_d4, clouds_d4, wind_d4, humidity_d4, pop_d4, rain_d4, fi=31)
extractWeatherForecast(temp_d5, weather_d5, clouds_d5, wind_d5, humidity_d5, pop_d5, rain_d5, fi=39)

In [23]:
# Vérification des données de température des 5 prochains jours

print(temp_d1)
print(temp_d2)
print(temp_d3)
print(temp_d4)
print(temp_d5)

[5.38, 5.78, 4.95, 6.73, 4.92, 6.48, 6.04, 6.05, 8.56, 4.53, 7.9, 7.75, 6.35, 5.88, 7.46, 9.01, 8.67, 7.28, 13.11, 13.84, 13.96, 12.02, 12.02, 10.21, 10.96, 12.29, 12.68, 10.36, 7.63, 6.21, 7.28, 7.48, 9.47, 9.23, 8.87]
[2.99, 4.28, 2.65, 4.76, 2.52, 5.09, 2.92, 3.84, 5.18, 1.77, 5.18, 5.22, 3.96, 2.82, 3.39, 4.77, 5.3, 0.05, 9.16, 10.15, 10.31, 6.9, 6.78, 5.91, 6.84, 8.62, 9.11, 7.67, 6.18, 4.27, 5.45, 4.58, 7.5, 7.01, 5.97]
[5.53, 6.57, 4.61, 7.54, 4.39, 5.95, 4.36, 5.09, 3.25, -0.91, 2.34, 2.3, 3.36, 3.82, 0.94, 2.86, 6.2, -0.15, 8.34, 9.17, 9.32, 5.86, 5.99, 5.64, 6.98, 8.73, 8.79, 7.13, 7.01, 4.97, 5.84, 5.44, 8.46, 8.18, 8.88]
[7.67, 7.79, 7.9, 9.25, 8.97, 9.97, 9.06, 4.44, 6.36, 2.09, 5.07, 4.81, 9.14, 6.88, 4.19, 6.64, 7.36, 2.88, 11.09, 12.11, 12.35, 8.04, 9.35, 8.34, 9.39, 11.42, 12.67, 8.63, 6.33, 8.87, 7.58, 8.6, 11.43, 11.25, 9.5]
[6.77, 7.2, 7.48, 7.98, 6.79, 6.88, 7.32, 7.82, 6.06, 2.69, 6.32, 6.49, 5.96, 6.02, 3.79, 5.9, 7.02, 2.05, 9.7, 10.09, 10.26, 7.38, 8.01, 7.17, 

## Création dataframe Pandas

In [24]:
# Nous créons un dictionnaire qui permettra de créer le dataframe

city_dict = {}

In [25]:
# Ajout des listes ville_id, ville, latitude et longitude à notre dictionnaire

city_dict['city_id'] = [i for i in range(1,36)]
city_dict['city'] = cities
city_dict['lat_city'] = lat_list
city_dict['lon_city'] = lon_list

In [26]:
# Ajout des données météorologiques dans notre dictionnaire

city_dict['temp_d1'] = temp_d1
city_dict['temp_d2'] = temp_d2
city_dict['temp_d3'] = temp_d3
city_dict['temp_d4'] = temp_d4
city_dict['temp_d5'] = temp_d5

city_dict['weather_d1'] = weather_d1
city_dict['weather_d2'] = weather_d2
city_dict['weather_d3'] = weather_d3
city_dict['weather_d4'] = weather_d4
city_dict['weather_d5'] = weather_d5

city_dict['clouds_d1'] = clouds_d1
city_dict['clouds_d2'] = clouds_d2
city_dict['clouds_d3'] = clouds_d3
city_dict['clouds_d4'] = clouds_d4
city_dict['clouds_d5'] = clouds_d5

city_dict['wind_d1'] = wind_d1
city_dict['wind_d2'] = wind_d2
city_dict['wind_d3'] = wind_d3
city_dict['wind_d4'] = wind_d4
city_dict['wind_d5'] = wind_d5

city_dict['humidity_d1'] = humidity_d1
city_dict['humidity_d2'] = humidity_d2
city_dict['humidity_d3'] = humidity_d3
city_dict['humidity_d4'] = humidity_d4
city_dict['humidity_d5'] = humidity_d5

city_dict['pop_d1'] = pop_d1
city_dict['pop_d2'] = pop_d2
city_dict['pop_d3'] = pop_d3
city_dict['pop_d4'] = pop_d4
city_dict['pop_d5'] = pop_d5

city_dict['rain_d1'] = rain_d1
city_dict['rain_d2'] = rain_d2
city_dict['rain_d3'] = rain_d3
city_dict['rain_d4'] = rain_d4
city_dict['rain_d5'] = rain_d5

In [27]:
dataset = pd.DataFrame().from_dict(city_dict)

In [28]:
dataset.shape

(35, 39)

## Résultats

In [29]:
pd.set_option('display.max_columns', None)
dataset = pd.DataFrame().from_dict(city_dict)
dataset.head(35)

Unnamed: 0,city_id,city,lat_city,lon_city,temp_d1,temp_d2,temp_d3,temp_d4,temp_d5,weather_d1,weather_d2,weather_d3,weather_d4,weather_d5,clouds_d1,clouds_d2,clouds_d3,clouds_d4,clouds_d5,wind_d1,wind_d2,wind_d3,wind_d4,wind_d5,humidity_d1,humidity_d2,humidity_d3,humidity_d4,humidity_d5,pop_d1,pop_d2,pop_d3,pop_d4,pop_d5,rain_d1,rain_d2,rain_d3,rain_d4,rain_d5
0,1,Mont Saint Michel,48.635954,-1.51146,5.38,2.99,5.53,7.67,6.77,Rain,Rain,Rain,Rain,Rain,47,83,99,84,100,10.57,5.66,5.03,8.37,8.91,83,87,88,84,93,0.59,0.47,0.28,0.39,0.91,0.39,0.57,0.32,0.12,0.83
1,2,St Malo,48.649518,-2.026041,5.78,4.28,6.57,7.79,7.2,Rain,Rain,Rain,Clouds,Rain,63,75,100,82,100,10.8,6.68,6.31,8.65,11.82,84,80,83,83,90,0.68,0.48,0.23,0.35,0.99,0.37,0.29,0.18,0.0,1.78
2,3,Bayeux,49.276462,-0.702474,4.95,2.65,4.61,7.9,7.48,Rain,Rain,Rain,Rain,Rain,53,64,40,99,100,8.77,5.51,5.41,7.7,7.57,87,90,91,84,92,0.72,0.63,0.33,0.45,0.95,0.71,0.34,0.12,0.26,2.73
3,4,Le Havre,49.493898,0.107973,6.73,4.76,7.54,9.25,7.98,Rain,Rain,Rain,Rain,Rain,47,82,42,100,100,11.59,7.87,7.39,9.38,8.88,77,80,82,82,89,0.77,0.54,0.57,0.61,0.74,1.12,0.61,0.98,0.53,0.79
4,5,Rouen,49.440459,1.093966,4.92,2.52,4.39,8.97,6.79,Rain,Clouds,Rain,Rain,Rain,70,49,82,100,100,8.45,4.77,5.38,8.2,4.84,80,86,93,87,92,0.93,0.18,0.4,0.76,0.37,0.82,0.0,0.22,1.7,0.21
5,6,Paris,48.85889,2.320041,6.48,5.09,5.95,9.97,6.88,Rain,Clouds,Rain,Rain,Rain,100,91,78,100,100,7.24,4.13,4.92,8.1,4.6,70,72,81,79,86,0.92,0.0,0.55,0.94,0.91,0.5,0.0,0.25,4.93,0.66
6,7,Amiens,49.894171,2.295695,6.04,2.92,4.36,9.06,7.32,Rain,Clouds,Rain,Rain,Rain,96,50,93,100,100,8.57,5.67,5.25,6.58,5.03,78,84,92,88,92,0.92,0.07,0.41,0.99,0.72,1.07,0.0,0.29,2.04,0.36
7,8,Lille,50.636565,3.063528,6.05,3.84,5.09,4.44,7.82,Rain,Rain,Rain,Rain,Rain,96,73,100,100,100,8.12,5.92,6.35,7.96,6.31,71,78,85,92,89,0.46,0.29,0.51,1.0,0.75,0.15,0.13,0.32,6.68,0.4
8,9,Strasbourg,48.584614,7.750713,8.56,5.18,3.25,6.36,6.06,Rain,Clouds,Clear,Rain,Clouds,100,84,3,100,100,7.51,5.39,2.86,4.22,0.74,71,79,82,81,91,1.0,0.21,0.0,0.81,0.03,1.54,0.0,0.0,0.85,0.0
9,10,Chateau du Haut Koenigsbourg,48.249523,7.345492,4.53,1.77,-0.91,2.09,2.69,Rain,Clouds,Clear,Rain,Clouds,100,88,7,100,100,7.08,2.9,1.85,3.72,1.05,78,84,88,91,91,1.0,0.37,0.0,0.98,0.19,2.98,0.0,0.0,1.32,0.0


In [30]:
dataset.describe(include='all')

Unnamed: 0,city_id,city,lat_city,lon_city,temp_d1,temp_d2,temp_d3,temp_d4,temp_d5,weather_d1,weather_d2,weather_d3,weather_d4,weather_d5,clouds_d1,clouds_d2,clouds_d3,clouds_d4,clouds_d5,wind_d1,wind_d2,wind_d3,wind_d4,wind_d5,humidity_d1,humidity_d2,humidity_d3,humidity_d4,humidity_d5,pop_d1,pop_d2,pop_d3,pop_d4,pop_d5,rain_d1,rain_d2,rain_d3,rain_d4,rain_d5
count,35.0,35,35.0,35.0,35.0,35.0,35.0,35.0,35.0,35,35,35,35,35,35.0,35.0,35.0,35.0,35.0,35.0,35.0,35.0,35.0,35.0,35.0,35.0,35.0,35.0,35.0,35.0,35.0,35.0,35.0,35.0,35.0,35.0,35.0,35.0,35.0
unique,,35,,,,,,,,2,3,3,2,2,,,,,,,,,,,,,,,,,,,,,,,,,
top,,Mont Saint Michel,,,,,,,,Rain,Clouds,Rain,Rain,Rain,,,,,,,,,,,,,,,,,,,,,,,,,
freq,,1,,,,,,,,34,19,15,29,23,,,,,,,,,,,,,,,,,,,,,,,,,
mean,18.0,,45.842672,3.401638,8.495429,5.402857,5.508,8.212,7.128286,,,,,,90.371429,77.942857,60.171429,95.742857,98.0,6.876571,4.983143,4.303429,6.262,5.279429,80.085714,79.057143,80.171429,86.742857,81.714286,0.820571,0.366571,0.229714,0.727714,0.454286,1.676,0.65,0.275714,1.442286,0.496857
std,10.246951,,2.588229,2.95055,2.720422,2.386132,2.632504,2.614947,1.932145,,,,,,20.578524,25.83311,38.891618,10.404589,4.789695,2.89444,1.735079,1.764229,3.427132,2.746507,9.838016,9.857642,10.592688,5.797276,10.637258,0.209438,0.356717,0.292539,0.304622,0.306578,1.924202,1.740054,0.548116,1.527216,0.657396
min,1.0,,42.52505,-2.026041,4.53,0.05,-0.91,2.09,2.05,,,,,,17.0,15.0,1.0,56.0,76.0,0.62,1.99,1.55,1.76,0.74,62.0,60.0,58.0,75.0,63.0,0.3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,9.5,,43.512178,1.399623,6.28,3.9,4.09,6.76,6.04,,,,,,97.5,68.0,18.5,100.0,99.0,5.735,4.01,2.715,3.785,3.345,71.0,71.5,70.5,83.0,72.0,0.705,0.0,0.0,0.565,0.215,0.44,0.0,0.0,0.2,0.0
50%,18.0,,45.18756,4.360069,7.75,5.18,5.84,8.6,7.2,,,,,,100.0,88.0,71.0,100.0,100.0,7.17,5.15,4.91,5.21,5.3,81.0,81.0,83.0,87.0,86.0,0.91,0.34,0.0,0.83,0.39,0.99,0.0,0.0,1.15,0.21
75%,26.5,,48.417068,5.637707,10.285,6.87,7.335,9.445,8.005,,,,,,100.0,98.5,98.5,100.0,100.0,8.67,6.16,5.5,8.15,7.03,87.0,86.5,88.0,91.5,91.0,1.0,0.57,0.495,0.985,0.715,1.885,0.315,0.32,2.175,0.81


## Enregistrement fichier csv

In [None]:
# Enregistrement fichier csv
# Top 35 villes françaises avec données gps et météo
dataset.to_csv('./src/top_35_cities.csv', index=False)

In [3]:
dataset = pd.read_csv('./src/top_35_cities.csv')

## Simplification du dataframe

Nous simplifions le dataframe obtenu en calculant les valeurs moyennes pour les 5 prochains jours.

In [31]:
# Calcul des valeurs moyennes pour les 5 prochains jours 

dataset['avg_temp'] = dataset.filter(like='temp_d').mean(axis=1)
dataset['avg_clouds'] = dataset.filter(like='clouds_d').mean(axis=1)
dataset['avg_wind'] = dataset.filter(like='wind_d').mean(axis=1)
dataset['avg_humidity'] = dataset.filter(like='humidity_d').mean(axis=1)
dataset['avg_pop'] = dataset.filter(like='pop_d').mean(axis=1)
dataset['avg_rain'] = dataset.filter(like='rain_d').mean(axis=1)
dataset['avg_weather'] = dataset.filter(like='weather_d').mode(axis=1).iloc[:,0]

In [32]:
dataset_lite = dataset[['city_id', 'city', 'lat_city', 'lon_city', 
                      'avg_weather', 'avg_temp', 'avg_clouds', 'avg_wind',
                      'avg_humidity', 'avg_rain', 'avg_pop',]].copy()
dataset_lite.head(35)

Unnamed: 0,city_id,city,lat_city,lon_city,avg_weather,avg_temp,avg_clouds,avg_wind,avg_humidity,avg_rain,avg_pop
0,1,Mont Saint Michel,48.635954,-1.51146,Rain,5.668,82.6,7.708,87.0,0.446,0.528
1,2,St Malo,48.649518,-2.026041,Rain,6.324,84.0,8.852,84.0,0.524,0.546
2,3,Bayeux,49.276462,-0.702474,Rain,5.518,71.2,6.992,88.8,0.832,0.616
3,4,Le Havre,49.493898,0.107973,Rain,7.252,74.2,9.022,82.0,0.806,0.646
4,5,Rouen,49.440459,1.093966,Rain,5.518,80.2,6.328,87.6,0.59,0.528
5,6,Paris,48.85889,2.320041,Rain,6.874,93.8,5.798,77.6,1.268,0.664
6,7,Amiens,49.894171,2.295695,Rain,5.94,87.8,6.22,86.8,0.752,0.622
7,8,Lille,50.636565,3.063528,Rain,5.448,93.8,6.932,83.0,1.536,0.602
8,9,Strasbourg,48.584614,7.750713,Clouds,5.882,77.4,4.144,80.8,0.478,0.41
9,10,Chateau du Haut Koenigsbourg,48.249523,7.345492,Clouds,2.034,79.0,3.32,86.4,0.86,0.508


In [33]:
dataset_lite.describe(include='all')

Unnamed: 0,city_id,city,lat_city,lon_city,avg_weather,avg_temp,avg_clouds,avg_wind,avg_humidity,avg_rain,avg_pop
count,35.0,35,35.0,35.0,35,35.0,35.0,35.0,35.0,35.0,35.0
unique,,35,,,2,,,,,,
top,,Mont Saint Michel,,,Rain,,,,,,
freq,,1,,,25,,,,,,
mean,18.0,,45.842672,3.401638,,6.949314,84.445714,5.540914,81.554286,0.908171,0.519771
std,10.246951,,2.588229,2.95055,,2.216783,11.825803,1.989231,6.888197,0.698629,0.183944
min,1.0,,42.52505,-2.026041,,2.034,63.0,2.128,68.0,0.118,0.194
25%,9.5,,43.512178,1.399623,,5.593,76.3,3.996,75.8,0.417,0.36
50%,18.0,,45.18756,4.360069,,6.434,84.6,5.646,82.4,0.752,0.528
75%,26.5,,48.417068,5.637707,,8.394,94.0,6.947,86.6,1.213,0.638


In [None]:
# Enregistrement fichier csv 
# Top 35 villes françaises avec données gps et météo en version lite
dataset_lite.to_csv('./src/top_35_cities_lite.csv', index=False)

In [8]:
dataset_lite = pd.read_csv('./src/top_35_cities_lite.csv')

## Les 5 meilleures destinations

### Villes supérieures à la moyenne

Nous nous intéressons aux villes dont les données météorologiques sont meilleures que la moyenne.

In [14]:
# Création de masques basés sur la moyenne

min_temp = dataset_lite['avg_temp'] >= dataset_lite['avg_temp'].mean()
max_wind = dataset_lite['avg_wind'] <= dataset_lite['avg_wind'].mean()
max_clouds = dataset_lite['avg_clouds'] <= dataset_lite['avg_clouds'].mean()
max_rain = dataset_lite['avg_rain'] <= dataset_lite['avg_rain'].mean()
max_pop = dataset_lite['avg_pop'] <= dataset_lite['avg_pop'].mean()
max_humidity = dataset_lite['avg_humidity'] <= dataset_lite['avg_humidity'].mean()

In [15]:
best_destinations_lite = dataset_lite[min_temp & max_clouds & max_rain & max_pop & max_humidity & max_wind]
best_destinations_lite = best_destinations_lite.reset_index(drop=True)
print(len(best_destinations_lite))
best_destinations_lite

4


Unnamed: 0,city_id,city,lat_city,lon_city,avg_weather,avg_temp,avg_clouds,avg_wind,avg_humidity,avg_rain,avg_pop
0,19,Bormes les Mimosas,43.150697,6.341928,Rain,10.28,63.6,5.37,75.8,0.486,0.388
1,20,Cassis,43.214036,5.539632,Rain,11.072,64.8,5.528,75.2,0.348,0.32
2,22,Aix en Provence,43.529842,5.447474,Clouds,8.04,64.2,3.224,79.0,0.176,0.332
3,28,Collioure,42.52505,3.083155,Clouds,8.358,69.8,3.846,77.4,0.242,0.224


Cette méthode est trop restrictive : nous n'obtenons que 4 villes au moment où ce notebook est édité (novembre).

Par conséquent nous tentons une méthode différente.

### Ordonner les villes

Nous choisissons d'ordonner les villes par rapport à ces critères et dans cet ordre : 

1. la vitesse du vent

2. la probabilité de pleuvoir

3. le taux de nuages

4. le taux d'humidité

5. la température

In [19]:
dataset_sorted = dataset_lite.sort_values(['avg_wind', 'avg_pop', 'avg_clouds', 'avg_humidity', 'avg_temp'], 
                                        ascending=[True, True, True, True, False])
dataset_sorted.head(5)

Unnamed: 0,city_id,city,lat_city,lon_city,avg_weather,avg_temp,avg_clouds,avg_wind,avg_humidity,avg_rain,avg_pop
17,18,Gorges du Verdon,43.749656,6.328562,Rain,2.422,70.6,2.128,93.2,0.442,0.442
15,16,Grenoble,45.18756,5.735782,Rain,5.836,84.6,2.262,92.4,0.874,0.734
14,15,Annecy,45.899235,6.128885,Rain,3.954,93.0,2.506,95.6,2.152,0.768
29,30,Ariege,42.992743,1.612497,Rain,5.834,96.6,2.742,86.0,1.088,0.63
21,22,Aix en Provence,43.529842,5.447474,Clouds,8.04,64.2,3.224,79.0,0.176,0.332


Les 5 premières villes qui ressortent sont concentrées géographiquement. 

Cela semble logique puisque les villes partageant des données météorologiques proches sont susceptibles d'être géographiquement proches.

### Varier les régions représentées

Pour apporter de la variété dans les villes sélectionnées, nous ajoutons le critère de latitude.  

--> Nous construisons 5 intervalles de latitudes et choisissons la meilleure ville de chaque intervalle.

In [40]:
lat_interval = []
for i in [x / 10.0 for x in range(0, 11, 2)]:
    lat_interval.append(np.quantile(dataset_sorted['lat_city'], i))
lat_interval

[42.52505,
 43.476920279999995,
 43.90451954,
 46.59067644,
 48.63866688,
 50.6365654]

In [41]:
best = []
for i in range(len(lat_interval)-1):
    city = dataset_sorted[(dataset_sorted['lat_city'] > lat_interval[i]) & (dataset_sorted['lat_city'] < lat_interval[i+1])].iloc[0]['city']
    best.append(city)
best

['Ariege',
 'Gorges du Verdon',
 'Grenoble',
 'Chateau du Haut Koenigsbourg',
 'Paris']

In [None]:
best_destinations_lite = dataset_lite[(dataset_lite['city'] == best[0]) | 
                                 (dataset_lite['city'] == best[1]) | 
                                 (dataset_lite['city'] == best[2]) |
                                 (dataset_lite['city'] == best[3]) |
                                 (dataset_lite['city'] == best[4])]
best_destinations_lite.sort_values(['avg_wind', 'avg_pop', 'avg_clouds', 'avg_humidity', 'avg_temp'], 
                                        ascending=[True, True, True, True, False])

### Enregistrement fichiers csv

In [None]:
# Enregistrement fichier csv
# Top 5 destinations françaises
best_destinations_lite.to_csv('./src/top_5_destinations.csv', index=False)

In [3]:
best_destinations_lite = pd.read_csv('./src/top_5_destinations.csv')

## Visualisation sur carte

In [4]:
import plotly.express as px
fig = px.scatter_mapbox(best_destinations_lite, 
                        lat = "lat_city", lon = "lon_city", 
                        color = "avg_temp",
                        size = "avg_temp",
                        hover_name = "city",
                        hover_data = {"lat_city": ":.2f", "lon_city": ":.2f", "avg_temp": ":.0f"},
                        mapbox_style = "carto-positron",
                        zoom = 4,
                        center = {'lat':46.232192999999995,'lon':2.209666999999996},
                        color_continuous_scale=px.colors.sequential.Bluered)
fig.update_layout(title_text = "Top 5 destinations in France", title_x = 0.5)
fig.show()