In [1]:
import pandas as pd
import glob

In [2]:
filename = glob.glob('2021 - RJ/INMET_SE_RJ_*_01-01-2021_A_31-08-2021.CSV')

In [3]:
merged_df = pd.DataFrame()

In [4]:
for i in filename:
    saidaStr = str(i)
    lsaida = i.split('-')
    snm = lsaida[2].strip()
    nm = snm[:-3]
    
    df = pd.read_csv(i, sep=';', decimal=',', thousands='.')
    df['ESTACAO'] = nm
    merged_df = merged_df.append(df)

In [5]:
df_filtered = merged_df[['Data', 'Hora UTC', 'TEMPERATURA M?XIMA NA HORA ANT. (AUT) (?C)', 'TEMPERATURA M?NIMA NA HORA ANT. (AUT) (?C)', 'ESTACAO']]

In [6]:
df_filtered = df_filtered.rename(columns={'TEMPERATURA M?XIMA NA HORA ANT. (AUT) (?C)':'TEMPERATURA MAXIMA', 'TEMPERATURA M?NIMA NA HORA ANT. (AUT) (?C)':'TEMPERATURA MINIMA'})

In [7]:
df_filtered.head()

Unnamed: 0,Data,Hora UTC,TEMPERATURA MAXIMA,TEMPERATURA MINIMA,ESTACAO
0,01/01/2021,0000 UTC,24.9,24.5,MARAMBAIA
1,01/01/2021,0100 UTC,25.0,24.4,MARAMBAIA
2,01/01/2021,0200 UTC,24.4,23.8,MARAMBAIA
3,01/01/2021,0300 UTC,24.0,23.4,MARAMBAIA
4,01/01/2021,0400 UTC,23.4,22.9,MARAMBAIA


In [8]:
df_filtered.tail()

Unnamed: 0,Data,Hora UTC,TEMPERATURA MAXIMA,TEMPERATURA MINIMA,ESTACAO
5827,31/08/2021,1900 UTC,20.2,19.6,FORTE DE COPACABANA
5828,31/08/2021,2000 UTC,20.2,19.8,FORTE DE COPACABANA
5829,31/08/2021,2100 UTC,20.4,20.0,FORTE DE COPACABANA
5830,31/08/2021,2200 UTC,20.2,19.7,FORTE DE COPACABANA
5831,31/08/2021,2300 UTC,20.1,19.0,FORTE DE COPACABANA


In [9]:
df_filtered.dtypes

Data                   object
Hora UTC               object
TEMPERATURA MAXIMA    float64
TEMPERATURA MINIMA    float64
ESTACAO                object
dtype: object

In [10]:
df_filtered.isnull().sum()

Data                   0
Hora UTC               0
TEMPERATURA MAXIMA    37
TEMPERATURA MINIMA    37
ESTACAO                0
dtype: int64

In [11]:
df_filtered['TEMPERATURA MAXIMA'].fillna(df_filtered['TEMPERATURA MAXIMA'].mean(), inplace=True)

In [12]:
df_filtered['TEMPERATURA MINIMA'].fillna(df_filtered['TEMPERATURA MINIMA'].mean(), inplace=True)

In [13]:
df_filtered.sample(10)

Unnamed: 0,Data,Hora UTC,TEMPERATURA MAXIMA,TEMPERATURA MINIMA,ESTACAO
1976,24/03/2021,0800 UTC,22.1,21.3,VILA MILITAR
2765,26/04/2021,0500 UTC,20.4,19.8,VILA MILITAR
64,03/01/2021,1600 UTC,25.5,24.4,FORTE DE COPACABANA
1250,22/02/2021,0200 UTC,25.8,24.5,FORTE DE COPACABANA
4866,22/07/2021,1800 UTC,23.3,22.1,JACAREPAGUA
1921,22/03/2021,0100 UTC,27.0,25.0,VILA MILITAR
1322,25/02/2021,0200 UTC,25.0,23.9,VILA MILITAR
2855,29/04/2021,2300 UTC,22.9,22.3,MARAMBAIA
2889,01/05/2021,0900 UTC,21.3,20.6,FORTE DE COPACABANA
4003,16/06/2021,1900 UTC,22.3,21.9,MARAMBAIA


#### MAXIMA TEMPERATURA REGISTRADA NO ANO

In [14]:
df_filtered['TEMPERATURA MAXIMA'].max()

40.2

#### MÍNIMA TEMPERATURA REGISTRADA NO ANO

In [15]:
df_filtered['TEMPERATURA MINIMA'].min()

8.4

#### MÉDIA DE TEMPERATURA MAXIMA POR ESTAÇÕES

In [16]:
df_filtered.groupby('ESTACAO')['TEMPERATURA MAXIMA'].mean()

ESTACAO
FORTE DE COPACABANA    24.007214
JACAREPAGUA            23.182094
MARAMBAIA              23.749648
VILA MILITAR           24.115597
Name: TEMPERATURA MAXIMA, dtype: float64

#### MÉDIA DE TEMPERATURA MÍNIMA POR ESTAÇÕES

In [17]:
df_filtered.groupby('ESTACAO')['TEMPERATURA MINIMA'].mean()

ESTACAO
FORTE DE COPACABANA    22.997805
JACAREPAGUA            21.909162
MARAMBAIA              22.574398
VILA MILITAR           22.798726
Name: TEMPERATURA MINIMA, dtype: float64

In [18]:
df_filtered.sort_values('TEMPERATURA MAXIMA', ascending=False).head(10)

Unnamed: 0,Data,Hora UTC,TEMPERATURA MAXIMA,TEMPERATURA MINIMA,ESTACAO
714,30/01/2021,1800 UTC,40.2,37.0,VILA MILITAR
713,30/01/2021,1700 UTC,40.0,37.6,VILA MILITAR
688,29/01/2021,1600 UTC,40.0,32.7,MARAMBAIA
833,04/02/2021,1700 UTC,39.8,37.9,JACAREPAGUA
712,30/01/2021,1600 UTC,39.7,32.5,MARAMBAIA
835,04/02/2021,1900 UTC,39.7,37.5,VILA MILITAR
687,29/01/2021,1500 UTC,39.6,37.4,MARAMBAIA
834,04/02/2021,1800 UTC,39.6,37.4,VILA MILITAR
833,04/02/2021,1700 UTC,39.5,30.2,MARAMBAIA
689,29/01/2021,1700 UTC,39.4,36.8,VILA MILITAR


In [19]:
df_filtered['Data'] = pd.to_datetime(df_filtered['Data'], format='%d/%m/%Y')

#### MÉDIA DE TEMPERATURA MÁXIMA POR MÊS

In [20]:
df_filtered.groupby(df_filtered['Data'].dt.month)['TEMPERATURA MAXIMA'].mean()

Data
1    27.641062
2    26.113207
3    26.872950
4    23.954167
5    22.533199
6    21.501259
7    19.841847
8    21.811959
Name: TEMPERATURA MAXIMA, dtype: float64