# **Dataset Marinha**

In [None]:
import pandas as pd

# Carregar dataset de dados marinhos
marine_data = pd.read_csv('/content/Marine_CSV_sample.csv')
print(marine_data.head())
print(marine_data.describe())
print(marine_data.info())

  Identification  Latitude  Longitude  Time of Observation  \
0          D5GN6     -35.8        2.9  2015-01-12T00:00:00   
1          D5GN6     -35.9        6.9  2015-01-12T12:00:00   
2          D5GN6     -35.9        8.6  2015-01-12T18:00:00   
3          D5GN6     -35.7       11.9  2015-01-13T06:00:00   
4          D5GN6     -35.6       13.7  2015-01-13T12:00:00   

  Ice Accretion On Ship Thickness of Ice Accretion On Ship  \
0                                                            
1                                                            
2                                                            
3                                                            
4                                                            

  Rate of Ice Accretion on Ship Sea Level Pressure  \
0                                            29.83   
1                                            29.93   
2                                            30.02   
3                                     

# **Datset Clima**

In [None]:
# Carregar dataset de resumos climáticos globais diários
climate_data = pd.read_csv('/content/Global_Sum_Day.csv')
print(climate_data.head())
print(climate_data.describe())
print(climate_data.info())

       STATION        DATE  LATITUDE  LONGITUDE  ELEVATION  \
0  72565003017  2018-01-01   39.8328  -104.6575     1650.2   
1  72565003017  2018-01-02   39.8328  -104.6575     1650.2   
2  72565003017  2018-01-03   39.8328  -104.6575     1650.2   
3  72565003017  2018-01-04   39.8328  -104.6575     1650.2   
4  72565003017  2018-01-05   39.8328  -104.6575     1650.2   

                                  NAME  TEMP  TEMP_ATTRIBUTES  DEWP  \
0  DENVER INTERNATIONAL AIRPORT, CO US  11.6               24   5.5   
1  DENVER INTERNATIONAL AIRPORT, CO US  21.2               24   7.3   
2  DENVER INTERNATIONAL AIRPORT, CO US  31.8               24   3.0   
3  DENVER INTERNATIONAL AIRPORT, CO US  34.6               24  11.6   
4  DENVER INTERNATIONAL AIRPORT, CO US  36.3               24  11.4   

   DEWP_ATTRIBUTES  ...  MXSPD   GUST   MAX  MAX_ATTRIBUTES   MIN  \
0               24  ...    9.9  999.9  28.0                   3.0   
1               24  ...    9.9  999.9  43.0                   

# **Dataset Precipitação Horária**

In [None]:
# Carregar dataset de precipitação horária
precipitation_data = pd.read_csv('/content/PRECIP_HLY_sample_csv.csv')
print(precipitation_data.head())
print(precipitation_data.describe())
print(precipitation_data.info())

       STATION     STATION_NAME  ELEVATION  LATITUDE  LONGITUDE  \
0  COOP:310301  ASHEVILLE NC US      682.1   35.5954   -82.5568   
1  COOP:310301  ASHEVILLE NC US      682.1   35.5954   -82.5568   
2  COOP:310301  ASHEVILLE NC US      682.1   35.5954   -82.5568   

             DATE   HPCP Measurement Flag Quality Flag  
0  20100101 00:00  99999                ]               
1  20100101 01:00      0                g               
2  20100102 06:00      1                                
       ELEVATION  LATITUDE  LONGITUDE          HPCP
count        3.0    3.0000     3.0000      3.000000
mean       682.1   35.5954   -82.5568  33333.333333
std          0.0    0.0000     0.0000  57734.160896
min        682.1   35.5954   -82.5568      0.000000
25%        682.1   35.5954   -82.5568      0.500000
50%        682.1   35.5954   -82.5568      1.000000
75%        682.1   35.5954   -82.5568  50000.000000
max        682.1   35.5954   -82.5568  99999.000000
<class 'pandas.core.frame.DataFrame

# **Criação do modelo de previsão**

**Pré-processamento dos Dados - Dados marinhos e Dados Climáticos**

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error

# Função para pré-processar dados marinhos
def preprocess_marine_data(data):
    # Manter apenas as colunas relevantes
    relevant_columns = ['Latitude', 'Longitude', 'Sea Level Pressure', 'Air Temperature', 'Wave Height', 'Wind Speed']
    data = data[relevant_columns]

    # Converter colunas para numéricas, lidando com erros de conversão
    for col in relevant_columns:
        data[col] = pd.to_numeric(data[col], errors='coerce')

    # Remover linhas com valores ausentes
    data.dropna(inplace=True)

    return data

# Função para pré-processar dados climáticos
def preprocess_climate_data(data):
    # Manter apenas as colunas relevantes
    relevant_columns = ['LATITUDE', 'LONGITUDE', 'TEMP', 'DEWP', 'SLP', 'WDSP', 'PRCP']
    data = data[relevant_columns]

    # Converter colunas para numéricas, lidando com erros de conversão
    for col in relevant_columns:
        data[col] = pd.to_numeric(data[col], errors='coerce')

    # Remover linhas com valores ausentes
    data.dropna(inplace=True)

    return data

# Função para pré-processar dados de precipitação
def preprocess_precipitation_data(data):
    # Manter apenas as colunas relevantes
    relevant_columns = ['LATITUDE', 'LONGITUDE', 'HPCP']
    data = data[relevant_columns]

    # Converter colunas para numéricas, lidando com erros de conversão
    for col in relevant_columns:
        data[col] = pd.to_numeric(data[col], errors='coerce')

    # Remover linhas com valores ausentes
    data.dropna(inplace=True)

    return data

# Carregar os dados
marine_data = pd.read_csv('/content/Marine_CSV_sample.csv')
climate_data = pd.read_csv('/content/Global_Sum_Day.csv')
precipitation_data = pd.read_csv('/content/PRECIP_HLY_sample_csv.csv')

# Pré-processar os dados
marine_data_processed = preprocess_marine_data(marine_data)
climate_data_processed = preprocess_climate_data(climate_data)
precipitation_data_processed = preprocess_precipitation_data(precipitation_data)

# Exibir os dados processados para verificação
print(marine_data_processed.head())
print(climate_data_processed.head())
print(precipitation_data_processed.head())

   Latitude  Longitude  Sea Level Pressure  Air Temperature  Wave Height  \
1     -35.9        6.9               29.93             66.2         10.0   
2     -35.9        8.6               30.02             61.2          7.0   
3     -35.7       11.9               30.14             64.6          7.0   
4     -35.6       13.7               30.12             66.7          7.0   
7     -35.2       18.6               29.98             69.3         14.0   

   Wind Speed  
1         165  
2         154  
3         118  
4         123  
7         154  
   LATITUDE  LONGITUDE  TEMP  DEWP     SLP  WDSP  PRCP
0   39.8328  -104.6575  11.6   5.5  1030.9   5.0   0.0
1   39.8328  -104.6575  21.2   7.3  1029.4   5.6   0.0
2   39.8328  -104.6575  31.8   3.0  1026.7   9.7   0.0
3   39.8328  -104.6575  34.6  11.6  1022.0   6.5   0.0
4   39.8328  -104.6575  36.3  11.4  1020.9   6.7   0.0
   LATITUDE  LONGITUDE   HPCP
0   35.5954   -82.5568  99999
1   35.5954   -82.5568      0
2   35.5954   -82.5568     

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data[col] = pd.to_numeric(data[col], errors='coerce')
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data.dropna(inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data[col] = pd.to_numeric(data[col], errors='coerce')
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.o

In [None]:
# Combinar dados processados
combined_data = pd.concat([marine_data_processed, climate_data_processed, precipitation_data_processed])

# Separar características (features) e rótulos (labels)
X = combined_data.drop('HPCP', axis=1)  # Excluindo a coluna de precipitação como rótulo
y = combined_data['HPCP']  # Coluna de precipitação como rótulo

# Dividir os dados em conjuntos de treino e teste
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Padronizar os dados
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Treinar o modelo de regressão florestal aleatória
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train_scaled, y_train)

# Fazer previsões
y_pred = model.predict(X_test_scaled)
