In [1]:
import pandas as pd

In [2]:
df = pd.read_csv("dados/VilaMariana03-01-23-06-04-2023.csv", delimiter=';')

df = df.replace(',', '.', regex=True)

df = df.dropna(subset=[
    'Data', 'Hora (UTC)', 'Temp. Ins. (C)', 'Temp. Max. (C)', 'Temp. Min. (C)',
    'Umi. Ins. (%)', 'Pressao Ins. (hPa)', 'Vel. Vento (m/s)', 'Dir. Vento (m/s)',
])

numeric_columns = ['Temp. Ins. (C)', 'Temp. Max. (C)', 'Temp. Min. (C)',
                   'Umi. Ins. (%)', 'Umi. Max. (%)', 'Umi. Min. (%)',
                   'Pressao Ins. (hPa)', 'Pressao Max. (hPa)',
                   'Pressao Min. (hPa)', 'Vel. Vento (m/s)',
                   'Dir. Vento (m/s)', 'Raj. Vento (m/s)', 'Chuva (mm)']

df[numeric_columns] = df[numeric_columns].apply(pd.to_numeric, errors='coerce')

df['Data'] = pd.to_datetime(df['Data'], format='%d/%m/%Y')

df['Hora (UTC)'] = pd.to_timedelta(df['Hora (UTC)'], unit='h')

df['Data'] = df['Data'] + df['Hora (UTC)']

df['Dia'] = df['Data'].dt.day
df['Mes'] = df['Data'].dt.month
df['Ano'] = df['Data'].dt.year
df['Hora'] = df['Data'].dt.hour

df = df.drop(columns=['Data', 'Hora (UTC)'])

df.head()

Unnamed: 0,Temp. Ins. (C),Temp. Max. (C),Temp. Min. (C),Umi. Ins. (%),Umi. Max. (%),Umi. Min. (%),Pto Orvalho Ins. (C),Pto Orvalho Max. (C),Pto Orvalho Min. (C),Pressao Ins. (hPa),...,Pressao Min. (hPa),Vel. Vento (m/s),Dir. Vento (m/s),Raj. Vento (m/s),Radiacao (KJ/m²),Chuva (mm),Dia,Mes,Ano,Hora
0,19.4,19.8,19.4,81.9,82.6,81.1,,,,921.6,...,920.2,0.5,0.0,1.6,,0.0,6,4,2023,0
1,18.7,19.4,18.7,83.3,83.3,81.9,,,,921.3,...,921.2,0.5,337.0,1.3,,0.0,10,4,2023,4
2,18.4,18.7,18.4,84.5,84.8,83.3,,,,921.4,...,921.1,0.0,45.0,2.4,,0.0,14,4,2023,8
3,18.1,18.4,18.1,86.8,86.8,84.5,,,,920.9,...,920.9,0.9,0.0,1.5,,0.0,18,4,2023,12
4,17.9,18.1,17.9,87.9,87.9,86.8,,,,920.8,...,,0.9,0.0,1.5,,0.0,22,4,2023,16


In [3]:
colunas_descartadas = [
    'Temp. Max. (C)', 'Temp. Min. (C)', 'Temp. Ins. (C)',
    'Umi. Max. (%)', 'Umi. Min. (%)', 'Pto Orvalho Ins. (C)',
    'Pto Orvalho Max. (C)', 'Pto Orvalho Min. (C)',
    'Pressao Max. (hPa)', 'Pressao Min. (hPa)',
    'Raj. Vento (m/s)', 'Radiacao (KJ/m²)', 'Chuva (mm)'
]

X = df.drop(colunas_descartadas, axis=1)

X.head()

Unnamed: 0,Umi. Ins. (%),Pressao Ins. (hPa),Vel. Vento (m/s),Dir. Vento (m/s),Dia,Mes,Ano,Hora
0,81.9,921.6,0.5,0.0,6,4,2023,0
1,83.3,921.3,0.5,337.0,10,4,2023,4
2,84.5,921.4,0.0,45.0,14,4,2023,8
3,86.8,920.9,0.9,0.0,18,4,2023,12
4,87.9,920.8,0.9,0.0,22,4,2023,16


In [4]:
X.shape

(8311, 8)

In [5]:
Y = df[['Temp. Max. (C)', 'Temp. Min. (C)', 'Temp. Ins. (C)']]

Y.head()

Unnamed: 0,Temp. Max. (C),Temp. Min. (C),Temp. Ins. (C)
0,19.8,19.4,19.4
1,19.4,18.7,18.7
2,18.7,18.4,18.4
3,18.4,18.1,18.1
4,18.1,17.9,17.9


In [6]:
Y.shape

(8311, 3)

In [7]:
from sklearn.model_selection import train_test_split

In [8]:
from sklearn.linear_model import LinearRegression

In [9]:
modelo = LinearRegression()

In [10]:
x_treino, x_teste, y_treino, y_teste = train_test_split(X, Y, test_size=0.3)

In [11]:
y_treino = y_treino.astype('float64')
y_teste = y_teste.astype('float64')

In [12]:
modelo.fit(x_treino, y_treino)

In [13]:
score_percent = modelo.score(x_teste, y_teste) * 100
print(f"A precisão do modelo é de {score_percent:.2f}%")

A precisão do modelo é de 75.95%


In [14]:
modelo.predict([ [73.1, 918.1, 1.1, 180.0, 26, 2, 2024, 0] ])[0]



array([25.12552279, 23.82236131, 24.41781972])

In [15]:
import pickle

In [16]:
pickle.dump(modelo, open('modelo_regressao_linear.sav', 'wb'))