# Creating a weather API

The website will need an API that can return a list of destinations on the specified CCAAs (or the whole country) on a given date based on the predicted weather.

It must meet the following criteria:

- Takes a date and derives the week of the year.
- Returns the 5 best destination on that date based on meteorological data.
- Filters the 5 destinations by the specified CCAAs.
- Spreads results between all selected CCAAs.


This whole project has been made with scalability in mind, and this API is no exception. This whole notebook can simply be re-run changing the *towns* dataframe to add new location and it will generate a new *weekly_weather.csv* ready to feed into our API.

# Assigning a weather station to each destination

Since most destination towns don't have a weather station we'll be simply assigning them the closest one (for the moment). This will be done by creating a new dataframe containing all town IDs (one for each row) as well as the weekly index of the nearest weather station.

In [1]:
import pandas as pd
import haversine as hs
import folium
import random
import datetime

In [2]:
#Importing our towns dataframe.

towns = pd.read_csv('towns_2807_155.csv')

In [3]:
towns.head()

Unnamed: 0,ID,municipality,ccaa,province,municipality_inhabitants,geographic_area,radius,routes_number,routes_ids,mountain_passes_ids,coords,coords_MDB
0,884,Barcelona,Cataluña,Barcelona,1664182,100.7644,5.663411,3,"[1247, 6787, 10190]","[388, 315, 323, 822]","(41.38424664,2.17634927)","[2.17634927,41.38424664]"
1,7257,València,Comunitat Valenciana,Valencia,800215,139.2687,6.658115,3,"[486, 1460, 9283]","[238, 533, 906]","(39.47534441,-0.37565717)","[-0.37565717,39.47534441]"
2,4613,Murcia,Región de Murcia,Murcia,459403,885.1149,16.785117,6,"[691, 1691, 6089, 6099, 6769, 8083]","[11, 787, 1055, 978, 1001, 1012, 1103]","(37.98436361,-1.1285408)","[-1.1285408,37.98436361]"
3,151,Alicante,Comunitat Valenciana,Alicante,337482,201.265845,8.004046,9,"[301, 484, 678, 1239, 1974, 9425, 9503, 9550, ...","[644, 956, 534, 918, 429, 601, 626, 643, 802, ...","(38.34548705,-0.4831832)","[-0.4831832,38.34548705]"
4,2076,Córdoba,Andalucía,Córdoba,326039,1254.9326,19.986408,3,"[2620, 2642, 7498]","[944, 925, 964]","(37.87954225,-4.78032455)","[-4.78032455,37.87954225]"


In [4]:
#Importing our weather station data.

meteo = pd.read_csv('meteo.csv')

In [5]:
meteo.head()

Unnamed: 0,w1,w2,w3,w4,w5,w6,w7,w8,w9,w10,...,w46,w47,w48,w49,w50,w51,w52,w53,name,coords
0,7.6,7.6,7.2,7.2,7.6,7.6,7.8,7.8,7.6,8.0,...,7.8,7.8,7.2,7.6,7.6,7.8,7.8,7.8,0002I,"(40.95806,0.87139)"
1,7.0,7.0,7.0,7.0,7.0,7.0,7.2,7.6,7.4,7.4,...,7.2,7.6,7.2,7.0,7.2,7.6,7.2,7.0,0009X,"(41.21389,0.96333)"
2,7.6,7.0,7.4,7.2,7.2,7.2,7.6,7.6,7.2,7.6,...,8.4,7.8,7.4,7.6,7.4,7.6,7.4,6.8,0016A,"(41.14972,1.17889)"
3,7.8,7.8,7.8,7.8,7.8,7.8,7.8,7.8,7.8,8.2,...,8.6,8.2,7.8,7.8,7.8,7.8,7.8,7.8,0042Y,"(41.12389,1.24917)"
4,7.0,7.0,7.0,5.6,6.8,6.8,7.2,7.2,6.8,7.2,...,7.6,6.6,7.2,7.2,6.8,7.2,7.0,6.8,0061X,"(41.41694,1.51917)"


In [6]:
#Creating a new dataframe with the same columns as meteo.

df = pd.DataFrame(columns=meteo.columns)

In [7]:
df.head()

Unnamed: 0,w1,w2,w3,w4,w5,w6,w7,w8,w9,w10,...,w46,w47,w48,w49,w50,w51,w52,w53,name,coords


In [8]:
#Adding new columns to hold town ID,  CCAA and coords.

df['town_ID'] = None
df['CCAA_ID'] = None
df['coords'] = None

In [9]:
#Let's popuate the town_ID column by using a list.

df['town_ID'] = towns['ID'].tolist()

In [10]:
#Doing the same with CCAA.

df['CCAA_ID'] = towns['ccaa'].tolist()

In [11]:
#Performing the same procedure with the coordinates.

df['coords'] = towns['coords'].tolist()

In [12]:
df.head()

Unnamed: 0,w1,w2,w3,w4,w5,w6,w7,w8,w9,w10,...,w48,w49,w50,w51,w52,w53,name,coords,town_ID,CCAA_ID
0,,,,,,,,,,,...,,,,,,,,"(41.38424664,2.17634927)",884,Cataluña
1,,,,,,,,,,,...,,,,,,,,"(39.47534441,-0.37565717)",7257,Comunitat Valenciana
2,,,,,,,,,,,...,,,,,,,,"(37.98436361,-1.1285408)",4613,Región de Murcia
3,,,,,,,,,,,...,,,,,,,,"(38.34548705,-0.4831832)",151,Comunitat Valenciana
4,,,,,,,,,,,...,,,,,,,,"(37.87954225,-4.78032455)",2076,Andalucía


In [14]:
df['CCAA_ID'].value_counts()

Andalucía               23
Comunitat Valenciana    21
País Vasco              19
Cantabria               10
Navarra                 10
Cataluña                 9
Asturias                 9
Aragón                   9
Galicia                  7
La Rioja                 5
Comunidad de Madrid      4
Región de Murcia         3
Extremadura              1
Castilla-La Mancha       1
Name: CCAA_ID, dtype: int64

In [15]:
#Our next step will be turning each CCAA name into its ID, this will have to be performed with a loop.

for i in range(len(df)):
    if df['CCAA_ID'].iloc[i] == 'Andalucía':
        df['CCAA_ID'].iloc[i] = '1'
    elif df['CCAA_ID'].iloc[i] == 'Comunitat Valenciana':
        df['CCAA_ID'].iloc[i] = '10'
    elif df['CCAA_ID'].iloc[i] == 'Galicia':
        df['CCAA_ID'].iloc[i] = '12'
    elif df['CCAA_ID'].iloc[i] == 'Castilla y León':
        df['CCAA_ID'].iloc[i] = '7'
    elif df['CCAA_ID'].iloc[i] == 'País Vasco':
        df['CCAA_ID'].iloc[i] = '16'
    elif df['CCAA_ID'].iloc[i] == 'Asturias':
        df['CCAA_ID'].iloc[i] = '3'
    elif df['CCAA_ID'].iloc[i] == 'Cantabria':
        df['CCAA_ID'].iloc[i] = '6'
    elif df['CCAA_ID'].iloc[i] == 'Cataluña':
        df['CCAA_ID'].iloc[i] = '9'
    elif df['CCAA_ID'].iloc[i] == 'Aragón':
        df['CCAA_ID'].iloc[i] = '2'
    elif df['CCAA_ID'].iloc[i] == 'Navarra':
        df['CCAA_ID'].iloc[i] = '15'
    elif df['CCAA_ID'].iloc[i] == 'Comunidad de Madrid':
        df['CCAA_ID'].iloc[i] = '13'
    elif df['CCAA_ID'].iloc[i] == 'Extremadura':
        df['CCAA_ID'].iloc[i] = '11'
    elif df['CCAA_ID'].iloc[i] == 'Castilla-La Mancha':
        df['CCAA_ID'].iloc[i] = '8'
    elif df['CCAA_ID'].iloc[i] == 'Región de Murcia':
        df['CCAA_ID'].iloc[i] = '14'
    elif df['CCAA_ID'].iloc[i] == 'La Rioja':
        df['CCAA_ID'].iloc[i] = '17'

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_block(indexer, value, name)


In [16]:
#Creating a dictionary of all CCAA and its ID.

dict_ccaa = {'Andalucía': '1',
 'Aragón': '2',
 'Asturias': '3',
 'Illes Balears': '4',
 'Canarias': '5',
 'Cantabria': '6',
 'Castilla y León': '7',
 'Castilla-La Mancha': '8',
 'Cataluña': '9',
 'Comunitat Valenciana': '10',
 'Extremadura': '11',
 'Galicia': '12',
 'Comunidad de Madrid': '13',
 'Región de Murcia': '14',
 'Navarra': '15',
 'País Vasco': '16',
 'La Rioja': '17'}

In [17]:
#Dropping 'name', we won't really use it.

meteo.drop('name', axis=1, inplace=True)
df.drop('name', axis=1, inplace=True)

In [18]:
meteo.head()

Unnamed: 0,w1,w2,w3,w4,w5,w6,w7,w8,w9,w10,...,w45,w46,w47,w48,w49,w50,w51,w52,w53,coords
0,7.6,7.6,7.2,7.2,7.6,7.6,7.8,7.8,7.6,8.0,...,8.4,7.8,7.8,7.2,7.6,7.6,7.8,7.8,7.8,"(40.95806,0.87139)"
1,7.0,7.0,7.0,7.0,7.0,7.0,7.2,7.6,7.4,7.4,...,7.4,7.2,7.6,7.2,7.0,7.2,7.6,7.2,7.0,"(41.21389,0.96333)"
2,7.6,7.0,7.4,7.2,7.2,7.2,7.6,7.6,7.2,7.6,...,8.0,8.4,7.8,7.4,7.6,7.4,7.6,7.4,6.8,"(41.14972,1.17889)"
3,7.8,7.8,7.8,7.8,7.8,7.8,7.8,7.8,7.8,8.2,...,9.0,8.6,8.2,7.8,7.8,7.8,7.8,7.8,7.8,"(41.12389,1.24917)"
4,7.0,7.0,7.0,5.6,6.8,6.8,7.2,7.2,6.8,7.2,...,7.4,7.6,6.6,7.2,7.2,6.8,7.2,7.0,6.8,"(41.41694,1.51917)"


## Looking for the nearest weather station

Now that we have the coordinates of both our destinations and all weather stations it's just a matter of crossing the two.

For some reason I can't go on without exporting and re-importing my dataframes, **Pandas** simply won't recognize the columns, and I also had to fill in the NaNs and change column names. I still don't know why.

In [19]:
#Filling missing values.

df = df.fillna(0)

In [20]:
#Changing column names.

for i in range(1,53):
    df.columns.values[i] = 'w' + str(i+1)
    
for i in range(1,53):
    meteo.columns.values[i] = 'w' + str(i+1)

In [21]:
#Saving the dataframes and re-opening them.

meteo.to_csv('meteo.csv', index=False)
meteo = pd.read_csv('meteo.csv')

df.to_csv('df.csv', index=False)
df = pd.read_csv('df.csv')

In [22]:
#Assigning the closest weather station to each town.

dist_list = []

for i in range(len(towns)):
    distance = 2000
    index = 'p'
    for n in range(len(meteo)):
        d = hs.haversine(eval(towns['coords'].iloc[i]), eval(meteo['coords'].iloc[n])) #Checking the distance using Haversine.
        if d < distance: #If the new distance is smaller than the previous one it gets replaced.
            distance = d
            index = n
    for p in range(1,54): #Iterating through all column (week) numbers.
        val = 'w' + str(p)
        df[val].iloc[i] = meteo[val].iloc[index]

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_block(indexer, value, name)


In [23]:
df.head()

Unnamed: 0,w1,w2,w3,w4,w5,w6,w7,w8,w9,w10,...,w47,w48,w49,w50,w51,w52,w53,coords,town_ID,CCAA_ID
0,7.8,7.4,7.8,7.8,7.8,7.8,7.8,7.8,7.8,8.2,...,8.2,7.8,7.8,7.8,7.8,7.8,7.8,"(41.38424664,2.17634927)",884,9
1,8.2,7.8,7.8,8.2,8.2,7.8,7.8,7.8,8.6,9.0,...,8.2,8.2,7.8,8.2,8.2,8.2,7.8,"(39.47534441,-0.37565717)",7257,10
2,8.2,7.8,7.8,8.0,8.4,8.4,8.6,8.6,8.8,8.8,...,9.0,8.2,8.2,7.8,8.2,8.2,7.6,"(37.98436361,-1.1285408)",4613,14
3,7.8,7.8,7.8,8.2,8.2,8.2,7.8,7.8,8.6,9.0,...,8.6,8.2,8.2,8.2,8.2,8.2,7.8,"(38.34548705,-0.4831832)",151,10
4,7.6,7.8,7.8,7.8,7.6,7.6,8.0,8.6,8.4,8.8,...,7.8,8.2,8.2,7.6,7.8,7.8,7.4,"(37.87954225,-4.78032455)",2076,1


In [24]:
#Typecasting CCAA_ID.

df["CCAA_ID"] = df["CCAA_ID"].astype(int)

In [25]:
#Exporting our results.

df.to_csv('weekly_weather.csv', index=False)

# Mapping all weather stations

We can easily use **Folium** to map all our weather stations on a map and visually inspect it.

In [26]:
map = folium.Map(location=[40.4167, -3.70325], zoom_start=6, tiles="OpenStreetMap") #Initiating our map.

for i in range(len(meteo)):
    folium.Marker(eval(meteo['coords'].iloc[i])).add_to(map)
    
map #Displaying the map.

# Creating the API call response

It's time to finally create our API response, keeping in mind that it must meet our criteria (point 1 of the notebook).

In [74]:
#Creating a view of a given week and other valuable columns sorted by CWI.

df[['w28', 'town_ID', 'CCAA_ID']].sort_values('w28', ascending=False)

Unnamed: 0,w28,town_ID,CCAA_ID
172,9.6,5067,3
73,9.6,3165,16
54,9.6,3219,16
139,9.4,5834,6
131,9.4,4251,12
...,...,...,...
37,6.6,3603,1
183,6.2,2731,1
78,6.2,2771,1
96,6.0,2747,1


In [118]:
#Filtering by CCAA.

df[['w1', 'town_ID', 'CCAA_ID']][df['CCAA_ID'] == 6].sort_values('w1', ascending=False).head(1)['town_ID'].tolist()[0]

5832

In [23]:
#Defining our final function:

def API_response(month, day, ccaa_list):
    """
    Input: month, day, a list of all CCAA to choose from.
    
    Output: 5 random locations with the best climate in that week, randomly split between all CCAA, with no repeated values.
    """
    weekNumber = str(datetime.date(2021, month, day).isocalendar()[1]) #Obtaining the week number.
    if len(ccaa_list) == 1: #If there's just one CCAA returns its 5 best destinations.
        response = df[['w'+weekNumber, 'town_ID', 'CCAA_ID']][df['CCAA_ID'] == ccaa_list[0]].sort_values('w'+weekNumber, ascending=False).head(5)['town_ID'].tolist()
    elif len(ccaa_list) == 15: #If all of Spain has been selected, returns the 5 absolute best destinations (no matter which CCAA).
        response = df[['w'+weekNumber, 'town_ID', 'CCAA_ID']].sort_values('w'+weekNumber, ascending=False).head(5)['town_ID'].tolist()
    else: #If any other number of CCAAs is inserted, 5 random CCAAs are picked and then 5 of their 15 best values are randomly selected.
        a = df[['w'+weekNumber, 'town_ID', 'CCAA_ID']][df['CCAA_ID'] == random.choice(ccaa_list)].sort_values('w'+weekNumber, ascending=False).head(3)['town_ID'].tolist()[0:3]
        b = df[['w'+weekNumber, 'town_ID', 'CCAA_ID']][df['CCAA_ID'] == random.choice(ccaa_list)].sort_values('w'+weekNumber, ascending=False).head(3)['town_ID'].tolist()[0:3]
        c = df[['w'+weekNumber, 'town_ID', 'CCAA_ID']][df['CCAA_ID'] == random.choice(ccaa_list)].sort_values('w'+weekNumber, ascending=False).head(3)['town_ID'].tolist()[0:3]
        d = df[['w'+weekNumber, 'town_ID', 'CCAA_ID']][df['CCAA_ID'] == random.choice(ccaa_list)].sort_values('w'+weekNumber, ascending=False).head(3)['town_ID'].tolist()[0:3]
        e = df[['w'+weekNumber, 'town_ID', 'CCAA_ID']][df['CCAA_ID'] == random.choice(ccaa_list)].sort_values('w'+weekNumber, ascending=False).head(3)['town_ID'].tolist()[0:3]
        response = []
        for n in [a,b,c,d,e]:
            for i in n:
                response.append(i) #Appending all values.
        response = list(set(response))[0:5] #Deleting duplicates and keeping 5 random ones.
    return response

## Testing our API function

In [180]:
#Testing with a single CCAA.

API_response2(6,15,[17])

[4158, 4120]

In [182]:
#Testing all CCAAs(15):

API_response2(6,15,[1,2,3,6,7,8,9,10,11,12,13,14,15,16,17])

[884, 4416, 1642, 3805, 1161]

In [183]:
# Testing with 3 CCAA.

API_response2(6,15,[6,7,8])

[136, 5832, 5864, 3723, 5803]

# API calls and structure

For now our **API** will follow a very simple structure, returning the correct destinations once an API call has been performed with the following parameters:

**mm**: month of the year.<br>
**dd**: day of the year.<br>
**ccaa_list**: list of CCAAs to perform the search.

Here's an example of a call for the month **11**, day **15** and CCAAs **[3,2,11]**:

http://127.0.0.1:5000/api/weather?mm=11&dd=15&ccaa=[3,2,11]


The code inside our **api.py** file is as follows:

In [None]:
import flask
import pandas as pd
import datetime
import random
from flask import request, jsonify

df = pd.read_csv('weekly_weather.csv')

app = flask.Flask(__name__)
app.config["DEBUG"] = True


@app.route('/', methods=['GET'])
def home():
    return '''<h1>On2wheels weather API</h1>
<p>A prototype weather API for destination selection based on the CWI.</p>'''


@app.route('/api/weather', methods=['GET'])
def api_all():
    df = pd.read_csv('weekly_weather.csv')
    if 'mm' in request.args:
        month = int(request.args['mm'])
    if 'dd' in request.args:
        day = int(request.args['dd'])
    if 'ccaa' in request.args:
        ccaa_list = eval(request.args['ccaa'])
    weekNumber = str(datetime.date(2021, month, day).isocalendar()[1])
    if len(ccaa_list) == 1: 
        response = df[['w'+weekNumber, 'town_ID', 'CCAA_ID']][df['CCAA_ID'] == ccaa_list[0]].sort_values('w'+weekNumber, ascending=False).head(5)['town_ID'].tolist()
    elif len(ccaa_list) == 15: 
        response = df[['w'+weekNumber, 'town_ID', 'CCAA_ID']].sort_values('w'+weekNumber, ascending=False).head(5)['town_ID'].tolist()
    else: 
        a = df[['w'+weekNumber, 'town_ID', 'CCAA_ID']][df['CCAA_ID'] == random.choice(ccaa_list)].sort_values('w'+weekNumber, ascending=False).head(3)['town_ID'].tolist()[0:3]
        b = df[['w'+weekNumber, 'town_ID', 'CCAA_ID']][df['CCAA_ID'] == random.choice(ccaa_list)].sort_values('w'+weekNumber, ascending=False).head(3)['town_ID'].tolist()[0:3]
        c = df[['w'+weekNumber, 'town_ID', 'CCAA_ID']][df['CCAA_ID'] == random.choice(ccaa_list)].sort_values('w'+weekNumber, ascending=False).head(3)['town_ID'].tolist()[0:3]
        d = df[['w'+weekNumber, 'town_ID', 'CCAA_ID']][df['CCAA_ID'] == random.choice(ccaa_list)].sort_values('w'+weekNumber, ascending=False).head(3)['town_ID'].tolist()[0:3]
        e = df[['w'+weekNumber, 'town_ID', 'CCAA_ID']][df['CCAA_ID'] == random.choice(ccaa_list)].sort_values('w'+weekNumber, ascending=False).head(3)['town_ID'].tolist()[0:3]
        response = []
        for n in [a,b,c,d,e]:
            for i in n:
                response.append(i)
        response = list(set(response))[0:5]
                
    return jsonify({'destination': response})

app.run()