# Historicos GDL

In [46]:
import pandas as pd
import numpy as np

In [47]:
#importar archivo
historicos = pd.read_excel('Historicos.xls', header=6)
historicos.head()

Unnamed: 0,Local time in Guadalajara,T,Po,P,Pa,U,DD,Ff,ff10,ff3,...,Cm,Ch,VV,Td,RRR,tR,E,Tg,E',sss
0,16.02.2020 12:00,23.9,636.3,762.2,-0.1,40.0,Wind blowing from the north-northeast,1.0,,,...,"No Altocumulus, Altostratus or Nimbostratus.","Cirrus fibratus, sometimes uncinus, not progre...",15,9.5,,,Surface of ground dry (without cracks and no a...,,,
1,16.02.2020 09:00,13.1,636.4,762.7,0.9,82.0,"Calm, no wind",0.0,,,...,"No Altocumulus, Altostratus or Nimbostratus.","Cirrus fibratus, sometimes uncinus, not progre...",15,10.0,,,Surface of ground dry (without cracks and no a...,,,
2,16.02.2020 06:00,11.7,635.5,760.4,0.0,85.0,Wind blowing from the west-northwest,1.0,,,...,,,10,9.2,,,Surface of ground dry (without cracks and no a...,,,
3,16.02.2020 03:00,13.3,635.5,758.9,-0.6,76.0,"Calm, no wind",0.0,,,...,,,10,9.1,,,Surface of ground dry (without cracks and no a...,,,
4,16.02.2020 00:00,15.9,636.1,758.3,0.5,63.0,Wind blowing from the west,1.0,,,...,,,10,8.9,,,Surface of ground dry (without cracks and no a...,,,


##### Se separa el dia y la hora en dos columnas distintas, para mejor manejo de la información:

In [48]:
# CORRER UNA VEZ!
date = historicos["Local time in Guadalajara"].str.split(" ", n = 1, expand = True)
date.columns = ['Date', 'Time']
historicos.drop(columns =["Local time in Guadalajara"], inplace = True)
historicos = pd.concat([date, historicos], axis = 1)
historicos.head()

Unnamed: 0,Date,Time,T,Po,P,Pa,U,DD,Ff,ff10,...,Cm,Ch,VV,Td,RRR,tR,E,Tg,E',sss
0,16.02.2020,12:00,23.9,636.3,762.2,-0.1,40.0,Wind blowing from the north-northeast,1.0,,...,"No Altocumulus, Altostratus or Nimbostratus.","Cirrus fibratus, sometimes uncinus, not progre...",15,9.5,,,Surface of ground dry (without cracks and no a...,,,
1,16.02.2020,09:00,13.1,636.4,762.7,0.9,82.0,"Calm, no wind",0.0,,...,"No Altocumulus, Altostratus or Nimbostratus.","Cirrus fibratus, sometimes uncinus, not progre...",15,10.0,,,Surface of ground dry (without cracks and no a...,,,
2,16.02.2020,06:00,11.7,635.5,760.4,0.0,85.0,Wind blowing from the west-northwest,1.0,,...,,,10,9.2,,,Surface of ground dry (without cracks and no a...,,,
3,16.02.2020,03:00,13.3,635.5,758.9,-0.6,76.0,"Calm, no wind",0.0,,...,,,10,9.1,,,Surface of ground dry (without cracks and no a...,,,
4,16.02.2020,00:00,15.9,636.1,758.3,0.5,63.0,Wind blowing from the west,1.0,,...,,,10,8.9,,,Surface of ground dry (without cracks and no a...,,,


In [49]:
historicos.columns

Index(['Date', 'Time', 'T', 'Po', 'P', 'Pa', 'U', 'DD', 'Ff', 'ff10', 'ff3',
       'N', 'WW', 'W1', 'W2', 'Tn', 'Tx', 'Cl', 'Nh', 'H', 'Cm', 'Ch', 'VV',
       'Td', 'RRR', 'tR', 'E', 'Tg', 'E'', 'sss'],
      dtype='object')

## Nomenclatura:
* T = air temperature (degrees Celsius) at 2 metre height above the earth's surface
*	P0 =  atmospheric pressure at weather station level (millimeters of mercury)
*	P = atmospheric pressure reduced to mean sea level (millimeters of mercury)
*	U = relative humidity (%) at a height of 2 metres above the earth's surface
*	DD = mean wind direction (compass points) at a height of 10-12 metres above the earth’s surface over the 10-minute period immediately preceding the observation
*	FF = mean wind speed at a height of 10-12 metres above the earth’s surface over the 10-minute period immediately preceding the observation (meters per second)
*	FF10 = maximum gust value at a height of 10-12 metres above the earth’s surface over the 10-minute period immediately preceding the observation (meters per second)
*	FF3 = maximum gust value at a height of 10-12 metres above the earth’s surface between the periods of observations (meters per second)
*	Tn = minimum air temperature (degrees Celsius) during the past period (not exceeding 12 hours)
*	Tx = maximum air temperature (degrees Celsius) during the past period (not exceeding 12 hours)
*	Nh = amount of all the CL cloud present or, if no CL cloud is present, the amount of all the CM cloud present
*	H = height of the base of the lowest clouds (m)
*	VV = horizontal visibility (km)
*	RRR = amount of precipitation (millimeters)
*	sss = snow depth (cm)


***

In [50]:
#Interpolar datos:
historicos['T'] = historicos['T'].astype(float).interpolate(method='linear')
historicos['P'] = historicos['P'].astype(float).interpolate(method='linear')
historicos['Pa'] = historicos['Pa'].astype(float).interpolate(method='linear')
historicos['Tn'] = historicos['Tn'].astype(float).interpolate(method='linear')
historicos['Tx'] = historicos['Tx'].astype(float).interpolate(method='linear')
historicos['U'] = historicos['U'].astype(float).interpolate(method='linear')
historicos['Ff'] = round(historicos['Ff'].astype(float).interpolate(method='linear'),0)
historicos['Td'] = historicos['Td'].astype(float).interpolate(method='linear')

In [51]:
Nh = historicos['Nh']
Nh.head()

0    no clouds
1    no clouds
2          NaN
3          NaN
4          NaN
Name: Nh, dtype: object

In [52]:
# Ya tenemos los datos de la cantidad de nubes, ahora los pasaremos a porcentaje cuantitativo para poderlo utilizar
# Se utilizó de 0 a 1.00 y se promedió en caso de ser necesario 
# Esto afecta a la columna Nh 

#0 = "no clouds"
#0.05 = "10%  or less, but not 0"
#0.25 = "20–30%."
#0.40 = "40%."
#0.50 = "50%."
#0.60 = "60%."
#0.75 = "70 – 80%."
#0.95 = "90  or more, but not 100%"
#1.00 = "100%."

Nh = Nh.replace(np.nan, 0)

for i in range (len(Nh)):
    if Nh[i] == "no clouds":
        Nh.loc[i] = 0
    if Nh[i] == "10%  or less, but not 0":
        Nh.loc[i] = 0.05
    if Nh[i] == "20–30%.":
        Nh.loc[i] = 0.25
    if Nh[i] == "40%.":
        Nh.loc[i] = 0.4
    if Nh[i] == "50%.":
        Nh.loc[i] = 0.5
    if Nh[i] == "60%.":
        Nh.loc[i] = 0.6
    if Nh[i] == "70 – 80%.":
        Nh.loc[i] = 0.75
    if Nh[i] == "90  or more, but not 100%":
        Nh.loc[i] = 0.95
    if Nh[i] == "100%.":
        Nh.loc[i] = 1.00 
historicos['Nh'] = Nh

In [53]:
historicos['Nh']

0           0
1           0
2           0
3           0
4           0
5           0
6           0
7        0.05
8           0
9           0
10          0
11          0
12          0
13          0
14       0.05
15       0.05
16          0
17          0
18          0
19          0
20          0
21       0.05
22       0.05
23       0.05
24       0.75
25       0.05
26          0
27          0
28       0.25
29       0.95
         ... 
11786     0.6
11787    0.05
11788       0
11789    0.25
11790    0.05
11791     0.4
11792     0.6
11793    0.05
11794    0.05
11795    0.95
11796     0.6
11797     0.4
11798    0.25
11799     0.6
11800     0.4
11801    0.95
11802    0.95
11803    0.75
11804    0.75
11805    0.75
11806    0.75
11807     0.6
11808     0.5
11809    0.25
11810    0.25
11811     0.4
11812     0.4
11813     0.5
11814       0
11815       0
Name: Nh, Length: 11816, dtype: object