## Solución

In [5]:
# Importamos las librerías

import pandas as pd
import datetime as dt
from funpymodeling.exploratory import freq_tbl, status, profiling_num, cat_vars, num_vars

## **Ejercicio 1**

#### 1. Carga y exploración de datos

In [3]:
ruta = "https://raw.githubusercontent.com/nico-edvai/scipy-2022-salta/main/data/weatherdata--304-622.csv"
df_weather = pd.read_csv(ruta)
df_weather.head(5)

Unnamed: 0,Date,Longitude,Latitude,Elevation,Max Temperature,Min Temperature,Precipitation,Wind,Relative Humidity,Solar,Unnamed: 10
0,1/1/1979,-62.1875,-30.442301,75,34.968,18.019,0.346756,3.459731,0.539945,32.456311,
1,1/2/1979,-62.1875,-30.442301,75,32.055,21.653,10.099693,3.182952,0.810696,20.985752,
2,1/3/1979,-62.1875,-30.442301,75,32.643,19.994,0.0,3.529763,0.638956,28.406983,
3,1/4/1979,-62.1875,-30.442301,75,33.438,19.572,0.0,3.430782,0.48042,30.222842,
4,1/5/1979,-62.1875,-30.442301,75,35.729,20.306,0.0,3.149614,0.44507,30.734881,


In [4]:
status(df_weather)

Unnamed: 0,variable,q_nan,p_nan,q_zeros,p_zeros,unique,type
0,Date,0,0.0,0,0.0,12987,object
1,Longitude,0,0.0,0,0.0,1,float64
2,Latitude,0,0.0,0,0.0,1,float64
3,Elevation,0,0.0,0,0.0,1,int64
4,Max Temperature,0,0.0,0,0.0,10537,float64
5,Min Temperature,0,0.0,0,0.0,9952,float64
6,Precipitation,0,0.0,7646,0.588743,4547,float64
7,Wind,0,0.0,0,0.0,12987,float64
8,Relative Humidity,0,0.0,0,0.0,12987,float64
9,Solar,0,0.0,1,7.7e-05,12986,float64


Tenemos 11 variables, 1 de tipo categórico (Date) y el resto de tipo numérico.

#### 2. Convertimos el atributo 'Date' a tipo datetime

In [6]:
df_weather['Date'] = pd.to_datetime(df_weather['Date'])

In [8]:
df_weather.head(5)

Unnamed: 0,Date,Longitude,Latitude,Elevation,Max Temperature,Min Temperature,Precipitation,Wind,Relative Humidity,Solar,Unnamed: 10
0,1979-01-01,-62.1875,-30.442301,75,34.968,18.019,0.346756,3.459731,0.539945,32.456311,
1,1979-01-02,-62.1875,-30.442301,75,32.055,21.653,10.099693,3.182952,0.810696,20.985752,
2,1979-01-03,-62.1875,-30.442301,75,32.643,19.994,0.0,3.529763,0.638956,28.406983,
3,1979-01-04,-62.1875,-30.442301,75,33.438,19.572,0.0,3.430782,0.48042,30.222842,
4,1979-01-05,-62.1875,-30.442301,75,35.729,20.306,0.0,3.149614,0.44507,30.734881,


Vemos que ha cambiado la forma en que se reportan los registros para el atributo 'Date'.

Creamos un nuevo atributo 'Day' dado por el día de la semana al que corresponde 'Date'.

In [9]:
df_weather['Day'] = df_weather['Date'].dt.dayofweek
days = ['Lunes', 'Martes', 'Miércoles', 'Jueves', 'Viernes', 'Sábado', 'Domingo']
df_weather['Day'] = df_weather['Day'].replace([0, 1, 2, 3, 4, 5, 6], days)
df_weather.head(5)

Unnamed: 0,Date,Longitude,Latitude,Elevation,Max Temperature,Min Temperature,Precipitation,Wind,Relative Humidity,Solar,Unnamed: 10,Day
0,1979-01-01,-62.1875,-30.442301,75,34.968,18.019,0.346756,3.459731,0.539945,32.456311,,Lunes
1,1979-01-02,-62.1875,-30.442301,75,32.055,21.653,10.099693,3.182952,0.810696,20.985752,,Martes
2,1979-01-03,-62.1875,-30.442301,75,32.643,19.994,0.0,3.529763,0.638956,28.406983,,Miércoles
3,1979-01-04,-62.1875,-30.442301,75,33.438,19.572,0.0,3.430782,0.48042,30.222842,,Jueves
4,1979-01-05,-62.1875,-30.442301,75,35.729,20.306,0.0,3.149614,0.44507,30.734881,,Viernes


Calculamos la cantidad de lunes en la muestra

In [10]:
print(f"Hay en total {sum(df_weather['Day'] == 'Lunes')} lunes en la muestra.")

Hay en total 1856 lunes en la muestra.


## **Ejercicio 2**

#### 1. Calculamos la cantidad de precipitación para los días sábado y domingo

In [11]:
Prec_sab_total = sum(df_weather[df_weather['Day'] == 'Sábado']['Precipitation'])
Prec_dom_total = sum(df_weather[df_weather['Day'] == 'Domingo']['Precipitation'])

Calculamos el promedio de precipitación

In [12]:
Prec_prom = (Prec_sab_total + Prec_dom_total)/(len(df_weather['Day'] == 'Domingo') + len(df_weather['Day'] == 'Sábado'))
print(f"Promedio de precipitación durante los sábados y domingos: {Prec_prom}.")

Promedio de precipitación durante los sábados y domingos: 0.3162117222117813.


## **Ejercicio 3**

De la exploración del dataset se puede ver que las coordenadas geográficas son las mismas para todos los registros. Vamos a verificarlo.

In [13]:
Long_prom = df_weather[df_weather['Day'] == 'Miércoles']['Longitude'].mean()
print(f"Lontitud promedio durante todos los días miércoles: {Long_prom}.")

Lontitud promedio durante todos los días miércoles: -62.1875.


Este valor es el único para todo el dataset

In [14]:
df_weather['Longitude'].unique()

array([-62.1875])