Within this file you will be create the following dataframes and save them as a csv files into your folder `data/csv`.

* monthly medians on `temperature`, `relative humidity`, `precipitation`, and `surface pressure` for all available months
* yearly medians on `temperature`, `relative humidity`, `precipitation`, and `surface pressure` for all available years
* yearly medians on `temperature`, `relative humidity`, `precipitation`, and `surface pressure` for all available years along with harvest data (`million_60kgs_bag`, `nonbear_mill_trees`, `bear_mill_trees`, `avg_unemp_perc`) for each respective year 

In [51]:
import pandas as pd
import os  # Directories 
import ast # To safely evaluate strings as Python objects

In [5]:
# Get current working directory 
print(os.getcwd())

/Users/sa17/Desktop/projects/projects/weather_api/notebooks


In [149]:
# Load the csv file
df = pd.read_csv("/Users/sa17/Desktop/projects/projects/weather_api/data/csv/data.csv")

# rename index 
df.index = ["time","temperature","relative humidity","precipitation","surface pressure"] 

# transfrom the rows into columns
df = df.T 

# Convert string representations of lists into actual lists
# Can loop through a dataframe since its essentially a dictionary the columns are the keys and the rows are the values (ex. val = keys, df[val] = values)
for val in df:
    df[val] = df[val].apply(ast.literal_eval)

# Change the lists into rows
df = pd.DataFrame({
    "time": df["time"].iloc[0],  
    "temperature": df["temperature"].iloc[0],
    "relative humidity": df["relative humidity"].iloc[0],
    "precipitation": df["precipitation"].iloc[0],
    "surface pressure": df["surface pressure"].iloc[0]
})

df

# Key Points:
# ast.literal_eval: This is used to safely parse strings representing Python objects (e.g., lists, dictionaries) into actual objects.
# .iloc[0]: Extracts the first row of lists for processing since the original DataFrame has only one row.


Unnamed: 0,time,temperature,relative humidity,precipitation,surface pressure
0,2022-01-01T00:00,18.0,96,0.0,906.6
1,2022-01-01T01:00,18.2,95,0.1,907.4
2,2022-01-01T02:00,18.0,96,0.1,907.2
3,2022-01-01T03:00,17.9,97,0.3,906.5
4,2022-01-01T04:00,17.8,98,1.0,905.9
...,...,...,...,...,...
17515,2023-12-31T19:00,26.5,75,0.5,908.8
17516,2023-12-31T20:00,25.9,78,0.5,908.4
17517,2023-12-31T21:00,25.2,82,0.5,908.5
17518,2023-12-31T22:00,24.0,85,3.2,909.0


In [150]:
# Convert the `time` column into a datetime data-type
df["time"] = pd.to_datetime(df["time"])

df.set_index("time", inplace=True)

df

Unnamed: 0_level_0,temperature,relative humidity,precipitation,surface pressure
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2022-01-01 00:00:00,18.0,96,0.0,906.6
2022-01-01 01:00:00,18.2,95,0.1,907.4
2022-01-01 02:00:00,18.0,96,0.1,907.2
2022-01-01 03:00:00,17.9,97,0.3,906.5
2022-01-01 04:00:00,17.8,98,1.0,905.9
...,...,...,...,...
2023-12-31 19:00:00,26.5,75,0.5,908.8
2023-12-31 20:00:00,25.9,78,0.5,908.4
2023-12-31 21:00:00,25.2,82,0.5,908.5
2023-12-31 22:00:00,24.0,85,3.2,909.0


In [159]:
# Monthly medians on `temperature`, `relative humidity`, `precipitation`, and `surface pressure` for all available months

monthly_medians = df.resample("1ME")["temperature", "relative humidity", "precipitation", "surface pressure"].median()

monthly_medians


Unnamed: 0_level_0,temperature,relative humidity,precipitation,surface pressure
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2022-01-31,19.1,92.0,0.1,908.7
2022-02-28,19.1,92.0,0.0,909.2
2022-03-31,19.8,84.0,0.0,910.1
2022-04-30,18.45,83.0,0.0,911.3
2022-05-31,14.8,79.0,0.0,912.4
2022-06-30,15.4,73.0,0.0,913.1
2022-07-31,15.85,59.0,0.0,914.4
2022-08-31,16.7,56.0,0.0,914.0
2022-09-30,18.6,61.0,0.0,912.3
2022-10-31,19.8,72.0,0.0,910.7


In [None]:
# Yearly medians on `temperature`, `relative humidity`, `precipitation`, and `surface pressure` for all available years


In [None]:
# Yearly medians on `temperature`, `relative humidity`, `precipitation`, and `surface pressure` for all available years along with harvest data (`million_60kgs_bag`, `nonbear_mill_trees`, `bear_mill_trees`, `avg_unemp_perc`) for each respective year present for Minas Gerais

