Within this file you will be create the following dataframes and save them as a csv files into your folder `data/csv`.

* monthly medians on `temperature`, `relative humidity`, `precipitation`, and `surface pressure` for all available months
* yearly medians on `temperature`, `relative humidity`, `precipitation`, and `surface pressure` for all available years
* yearly medians on `temperature`, `relative humidity`, `precipitation`, and `surface pressure` for all available years along with harvest data (`million_60kgs_bag`, `nonbear_mill_trees`, `bear_mill_trees`, `avg_unemp_perc`) for each respective year 

In [69]:
import pandas as pd
import os  

In [70]:
# Get current working directory 
os.getcwd()

# Load the csv file
df = pd.read_csv("/Users/sa17/Desktop/projects/projects/weather_api/data/csv/data.csv")

df

Unnamed: 0,time,temperature,relative humidity,precipitation,surface pressure
0,2022-01-01T00:00,18.0,96,0.0,906.6
1,2022-01-01T01:00,18.2,95,0.1,907.4
2,2022-01-01T02:00,18.0,96,0.1,907.2
3,2022-01-01T03:00,17.9,97,0.3,906.5
4,2022-01-01T04:00,17.8,98,1.0,905.9
...,...,...,...,...,...
17515,2023-12-31T19:00,26.5,75,0.5,908.8
17516,2023-12-31T20:00,25.9,78,0.5,908.4
17517,2023-12-31T21:00,25.2,82,0.5,908.5
17518,2023-12-31T22:00,24.0,85,3.2,909.0


In [71]:
# Convert the `time` column into a datetime data-type
df["time"] = pd.to_datetime(df["time"])

df.set_index("time", inplace=True)

df

Unnamed: 0_level_0,temperature,relative humidity,precipitation,surface pressure
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2022-01-01 00:00:00,18.0,96,0.0,906.6
2022-01-01 01:00:00,18.2,95,0.1,907.4
2022-01-01 02:00:00,18.0,96,0.1,907.2
2022-01-01 03:00:00,17.9,97,0.3,906.5
2022-01-01 04:00:00,17.8,98,1.0,905.9
...,...,...,...,...
2023-12-31 19:00:00,26.5,75,0.5,908.8
2023-12-31 20:00:00,25.9,78,0.5,908.4
2023-12-31 21:00:00,25.2,82,0.5,908.5
2023-12-31 22:00:00,24.0,85,3.2,909.0


In [72]:
# Monthly medians on `temperature`, `relative humidity`, `precipitation`, and `surface pressure` for all available months
monthly_medians = df.resample("1ME")["temperature", "relative humidity", "precipitation", "surface pressure"].median()

# Convert the DatetimeIndex to a PeriodIndex to keep only month and year
monthly_medians.index = monthly_medians.index.to_period("M")

# Rename the index to "month"
monthly_medians.rename_axis("month", inplace=True)

monthly_medians.to_csv("/Users/sa17/Desktop/projects/projects/weather_api/data/csv/monthly_medians.csv", index=False)

monthly_medians


Unnamed: 0_level_0,temperature,relative humidity,precipitation,surface pressure
month,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2022-01,19.1,92.0,0.1,908.7
2022-02,19.1,92.0,0.0,909.2
2022-03,19.8,84.0,0.0,910.1
2022-04,18.45,83.0,0.0,911.3
2022-05,14.8,79.0,0.0,912.4
2022-06,15.4,73.0,0.0,913.1
2022-07,15.85,59.0,0.0,914.4
2022-08,16.7,56.0,0.0,914.0
2022-09,18.6,61.0,0.0,912.3
2022-10,19.8,72.0,0.0,910.7


In [73]:
# Yearly medians on `temperature`, `relative humidity`, `precipitation`, and `surface pressure` for all available years
yearly_medians = df.resample("1YE")["temperature", "relative humidity", "precipitation", "surface pressure"].median()

# Convert the DatetimeIndex to a PeriodIndex to keep only year
yearly_medians.index = yearly_medians.index.to_period("Y")

# Rename the index to "year"
yearly_medians.rename_axis("year", inplace=True)

yearly_medians.to_csv("/Users/sa17/Desktop/projects/projects/weather_api/data/csv/yearly_medians.csv", index=False)

yearly_medians

Unnamed: 0_level_0,temperature,relative humidity,precipitation,surface pressure
year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2022,18.6,78.0,0.0,911.4
2023,20.7,78.0,0.0,913.4


In [75]:
# Yearly medians on `temperature`, `relative humidity`, `precipitation`, and `surface pressure` for all available years along with harvest data (`million_60kgs_bag`, `nonbear_mill_trees`, `bear_mill_trees`, `avg_unemp_perc`) for each respective year present for Minas Gerais
df2 = pd.read_csv("/Users/sa17/Desktop/projects/projects/weather_api/data/csv/br_final.csv")

df2

Unnamed: 0,country,subdivision,type,million_60kgs_bag,year,nonbear_mill_trees,bear_mill_trees,nonbear_thous_hect,bear_thous_hect,trees_hect_bear,trees_hect_nonbear,avg_unemp_perc
0,Brazil,Minas Gerais,Arabica,16.2,2002,1500.0,4465.0,495.0,2120.0,2106.0,3030.0,
1,Brazil,Espirito Santo,Arabica,2.2,2002,1500.0,4465.0,495.0,2120.0,2106.0,3030.0,
2,Brazil,Espirito Santo,Robusta,7.5,2002,1500.0,4465.0,495.0,2120.0,2106.0,3030.0,
3,Brazil,Sao Paulo,Arabica,3.2,2002,1500.0,4465.0,495.0,2120.0,2106.0,3030.0,
4,Brazil,Parana,Arabica,0.5,2002,1500.0,4465.0,495.0,2120.0,2106.0,3030.0,6.95
...,...,...,...,...,...,...,...,...,...,...,...,...
110,Brazil,Minas Gerais,Arabica,28.5,2023,1510.0,6100.0,475.0,2020.0,3020.0,3179.0,
111,Brazil,Espirito Santo,Arabica,4.4,2023,1510.0,6100.0,475.0,2020.0,3020.0,3179.0,
112,Brazil,Espirito Santo,Robusta,16.6,2023,1510.0,6100.0,475.0,2020.0,3020.0,3179.0,
113,Brazil,Sao Paulo,Arabica,5.1,2023,1510.0,6100.0,475.0,2020.0,3020.0,3179.0,
