# Brazil Temperature Data 

### Columns present across different data sources : 
- **DateValueCET**- sets the date for specific measurement
- **TimeValueCET** - sets the hour for the specific measurement

Relavant Columns:
- **Value:** which is the actual measurement
- **Unit:** values(Celsius)
- **PowerPriceAreaCode:** values (BRAZIL_SOUTH, BRAZIL_NORTH, BRAZIL_NORTHEAST, RAZIL_SOUTHEAST_CENTRALWEST)

In [None]:
import pandas as pd 
from get_files_only import get_file_from_xdrive
# Read the Excel file
df_temp = get_file_from_xdrive("Temperature_Data.xlsx")
df_temp = df_temp.rename(columns = {"Value": "Temperature"})
df_temp = df_temp[['DateValueCET', 'TimeValueCET', 'PowerPriceAreaCode', 'Temperature','Unit']]
df_temp['DateTime'] = pd.to_datetime((df_temp['DateValueCET']).astype(str) + ' ' + (df_temp['TimeValueCET']).astype(str), format='%Y-%m-%d %H:%M:%S')
df_temp['DateTime'] = pd.to_datetime(df_temp['DateTime'], format='%Y-%m-%d %H:%M:%S')
df_temp['Month'] = df_temp['DateTime'].dt.month
df_temp = df_temp.groupby(['DateTime', 'PowerPriceAreaCode']).mean().reset_index()

display(df_temp.groupby(['Month','PowerPriceAreaCode']).describe())
#df_temp.drop(columns={'Month'})

  df_temp = df_temp.groupby(['DateTime', 'PowerPriceAreaCode']).mean().reset_index()


Unnamed: 0_level_0,Unnamed: 1_level_0,Temperature,Temperature,Temperature,Temperature,Temperature,Temperature,Temperature,Temperature
Unnamed: 0_level_1,Unnamed: 1_level_1,count,mean,std,min,25%,50%,75%,max
Month,PowerPriceAreaCode,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2
8.0,BRAZIL_NORTH,146.0,27.973823,2.803696,24.570714,25.319583,27.221667,30.770893,33.031628
8.0,BRAZIL_NORTHEAST,146.0,24.474877,1.283118,22.844524,23.241607,24.247857,25.69625,27.065952
8.0,BRAZIL_SOUTH,146.0,13.225595,4.229925,5.801667,9.871667,13.290119,15.781192,22.026279
8.0,BRAZIL_SOUTHEAST_CENTRALWEST,146.0,17.781668,3.742463,12.224524,14.724924,17.001573,20.222321,26.160465
9.0,BRAZIL_NORTH,651.0,28.516166,2.916569,24.53881,25.7675,27.72875,31.30631,33.865385
9.0,BRAZIL_NORTHEAST,651.0,24.897494,1.390063,22.700952,23.59,24.64619,26.2735,27.373077
9.0,BRAZIL_SOUTH,651.0,18.204726,3.601527,10.446905,15.511823,17.907381,20.393305,29.32093
9.0,BRAZIL_SOUTHEAST_CENTRALWEST,651.0,22.804747,3.442217,16.254524,19.903333,22.133488,25.656374,30.506744


In [19]:
import plotly.express as px

fig_1 = px.line( 
    df_temp,
    x='DateTime',
    y='Temperature',
    color='PowerPriceAreaCode',
    title=f'Temperature in Celsius Over Time by Region for 2024',
    labels={'Temperature in Celsius': 'Temperature in Celsius', 'DateTime': 'Date and Time'}
)

""" # Update layout for better visibility
fig.update_layout(
    xaxis_title='Date and Time',
    yaxis_title='Rainfall Potential (GWh)',
    legend_title='Region',
    xaxis=dict(tickformat="%Y-%m-%d %H:%M")
) """

fig_1.show()

## Daily Average Temperature

In [13]:

df_temp = df_temp.drop(columns=['Month'])
# Convert 'DateTime' to date only (without time) for daily grouping
df_temp['Date'] = df_temp['DateTime'].dt.date

# Group by 'Date' and 'PowerPriceAreaCode' to calculate the daily mean
df_daily_mean = df_temp.groupby(['Date', 'PowerPriceAreaCode']).mean()

# Merge the daily mean back to the original DataFrame on 'Date' and 'PowerPriceAreaCode'
daily_merged = pd.merge(df_temp, df_daily_mean, on=['Date', 'PowerPriceAreaCode'])

daily_merged= daily_merged.rename(columns = {'Temperature_x': 'Hourly_Temperature', 'Temperature_y': 'Daily_Average_Temperature'})

#all_values =  pd.merge(merged_df, daily_merged, on=['PowerPriceAreaCode', 'DateTime'], how='outer')
#data_2023_SE_with_temperature = all_values[(all_values['PowerPriceAreaCode'] == 'BRAZIL_SOUTHEAST_CENTRALWEST') & (all_values['DateTime'].dt.year == 2024)]


display(daily_merged.head())

  df_daily_mean = df_temp.groupby(['Date', 'PowerPriceAreaCode']).mean()


Unnamed: 0,DateTime,PowerPriceAreaCode,Hourly_Temperature,Date,Daily_Average_Temperature
0,2024-08-25 22:00:00,BRAZIL_NORTH,30.867391,2024-08-25,30.273815
1,2024-08-25 23:00:00,BRAZIL_NORTH,29.680238,2024-08-25,30.273815
2,2024-08-25 22:00:00,BRAZIL_NORTHEAST,25.994286,2024-08-25,25.65631
3,2024-08-25 23:00:00,BRAZIL_NORTHEAST,25.318333,2024-08-25,25.65631
4,2024-08-25 22:00:00,BRAZIL_SOUTH,11.598148,2024-08-25,11.176931


In [20]:
import plotly.express as px

fig_1 = px.line( 
    daily_merged,
    x='DateTime',
    y='Daily_Average_Temperature',
    color='PowerPriceAreaCode',
    title=f'Daily Average Temperature in Celsius Over Time by Region for 2024',
    labels={'Daily Average Temperature in Celsius': 'Daily Average Temperature in Celsius', 'DateTime': 'Date and Time'}
)

""" # Update layout for better visibility
fig.update_layout(
    xaxis_title='Date and Time',
    yaxis_title='Rainfall Potential (GWh)',
    legend_title='Region',
    xaxis=dict(tickformat="%Y-%m-%d %H:%M")
) """

fig_1.show()