In [2]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neural_network import MLPRegressor
from sklearn.metrics import mean_squared_error, accuracy_score
from google.cloud import secretmanager

from pymongo.mongo_client import MongoClient
from pymongo.server_api import ServerApi

In [3]:
def getSecret(projectId : str, secretId : str, versionId : int):
    name = f"projects/{projectId}/secrets/{secretId}/versions/{versionId}"
    client = secretmanager.SecretManagerServiceClient()
    response = client.access_secret_version(name=name)
    payload = response.payload.data.decode("UTF-8")
    return payload

In [4]:
mongoConnectionString = getSecret(projectId="rising-apricot-401312", secretId="mongoConnectionString", versionId=1)

In [5]:
import pandas as pd

# Conecte-se ao servidor MongoDB (certifique-se de que o MongoDB esteja em execução)
client = MongoClient(mongoConnectionString, server_api=ServerApi('1'))  # URL de conexão com seu servidor MongoDB

# Especifique o banco de dados e a coleção que você deseja consultar
db = client["moonquakeDb"]
collection = db["moonquake"]

# Consulta para buscar os documentos desejados
# Por exemplo, aqui estamos buscando todos os documentos na coleção
cursor = collection.find({})

# Crie uma lista de dicionários a partir dos documentos retornados
data = [document for document in cursor]

# Feche a conexão com o servidor MongoDB
client.close()

# Crie um DataFrame do Pandas a partir dos dados
df = pd.DataFrame(data)


In [11]:
import json
from bson import json_util

In [12]:
json.dumps(data,default=json_util.default)

'[{"_id": {"$oid": "652185be293b8604b6c7c0e0"}, "Timestamp": {"$date": "1971-04-17T07:00:55Z"}, "Lat": 48.0, "Long": 35.0, "Magnitude": 2.8}, {"_id": {"$oid": "652185be293b8604b6c7c0e1"}, "Timestamp": {"$date": "1971-05-20T17:25:10Z"}, "Lat": 42.0, "Long": -24.0, "Magnitude": 2.0}, {"_id": {"$oid": "652185be293b8604b6c7c0e2"}, "Timestamp": {"$date": "1971-07-11T13:24:45Z"}, "Lat": 43.0, "Long": -47.0, "Magnitude": 1.9}, {"_id": {"$oid": "652185be293b8604b6c7c0e3"}, "Timestamp": {"$date": "1972-01-02T22:29:40Z"}, "Lat": 54.0, "Long": 101.0, "Magnitude": 1.9}, {"_id": {"$oid": "652185be293b8604b6c7c0e4"}, "Timestamp": {"$date": "1972-09-17T14:35:55Z"}, "Lat": 12.0, "Long": 46.0, "Magnitude": 1.0}, {"_id": {"$oid": "652185be293b8604b6c7c0e5"}, "Timestamp": {"$date": "1972-12-06T23:08:20Z"}, "Lat": 51.0, "Long": 45.0, "Magnitude": 1.4}, {"_id": {"$oid": "652185be293b8604b6c7c0e6"}, "Timestamp": {"$date": "1972-12-09T03:50:15Z"}, "Lat": -20.0, "Long": -80.0, "Magnitude": 1.2}, {"_id": {"$oi

In [81]:
df['Date'] = df['Timestamp'].dt.date

In [82]:
df

Unnamed: 0,_id,Timestamp,Lat,Long,Magnitude,DateFloat,Date
0,652185be293b8604b6c7c0e0,1971-04-17 07:00:55,48.0,35.0,2.8,4.071966e+16,1971-04-17
1,652185be293b8604b6c7c0e1,1971-05-20 17:25:10,42.0,-24.0,2.0,4.360831e+16,1971-05-20
2,652185be293b8604b6c7c0e2,1971-07-11 13:24:45,43.0,-47.0,1.9,4.808668e+16,1971-07-11
3,652185be293b8604b6c7c0e3,1972-01-02 22:29:40,54.0,101.0,1.9,6.323938e+16,1972-01-02
4,652185be293b8604b6c7c0e4,1972-09-17 14:35:55,12.0,46.0,1.0,8.558856e+16,1972-09-17
5,652185be293b8604b6c7c0e5,1972-12-06 23:08:20,51.0,45.0,1.4,9.25313e+16,1972-12-06
6,652185be293b8604b6c7c0e6,1972-12-09 03:50:15,-20.0,-80.0,1.2,9.272102e+16,1972-12-09
7,652185be293b8604b6c7c0e7,1973-02-08 22:52:10,33.0,35.0,0.8,9.805993e+16,1973-02-08
8,652185be293b8604b6c7c0e8,1973-03-13 07:56:30,-84.0,-134.0,3.2,1.008574e+17,1973-03-13
9,652185be293b8604b6c7c0e9,1973-06-20 20:22:00,-1.0,-71.0,2.2,1.094557e+17,1973-06-20


In [47]:
df['DateFloat'] = df['Timestamp'].values.astype("float64")
df.dropna(axis=0, inplace=True)
X = df[['DateFloat', 'Lat', 'Long']]
y = df[['Magnitude']]

In [48]:
# Divida o conjunto de dados em treinamento e teste
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [49]:
# Normalização dos dados
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [50]:
# Crie uma rede neural regressora
mlp_regressor = MLPRegressor(hidden_layer_sizes=(100, 50), max_iter=1000, random_state=42)

In [51]:
# Treine o modelo
mlp_regressor.fit(X_train_scaled, y_train)

  y = column_or_1d(y, warn=True)


In [55]:
X_test_scaled

array([[-0.72166748, -2.71391301, -2.23261902],
       [-0.01729253, -1.46687756,  0.89660354],
       [-0.54739687, -0.51170148, -1.11249958],
       [ 0.98044766, -0.69743016,  1.28775636],
       [-1.94053668,  0.78839931,  0.77214583],
       [-0.11304456,  0.47000728, -0.13461753]])

In [52]:
# Faça previsões no conjunto de teste
y_pred = mlp_regressor.predict(X_test_scaled)

# Avalie o desempenho do modelo
mse = mean_squared_error(y_test, y_pred)
print(f"Erro médio quadrático (MSE): {mse}")

Erro médio quadrático (MSE): 1.1083186869139732


In [53]:
y_pred

array([1.6250944 , 1.91247933, 1.41950309, 2.65574878, 1.52501476,
       1.12119036])

In [54]:
y_test

Unnamed: 0,Magnitude
8,3.2
13,0.9
9,2.2
22,1.8
0,2.8
11,0.7


In [58]:
X_test

Unnamed: 0,DateFloat,Lat,Long
8,1.008574e+17,-84.0,-134.0
13,1.356105e+17,-37.0,42.0
9,1.094557e+17,-1.0,-71.0
22,1.84838e+17,-8.0,64.0
0,4.071966e+16,48.0,35.0
11,1.308862e+17,36.0,-16.0


In [62]:
teste = pd.DataFrame({
    'DateFloat':['2023-09-07'.astype("float64")],
    'Lat':['-84'],
    'Long':['-134']
})

AttributeError: 'str' object has no attribute 'astype'

In [74]:
df['Timestamp'][0]

Timestamp('1971-04-17 07:00:55')

In [65]:
('1971-04-17 07:00:55').astype("float64")

AttributeError: 'str' object has no attribute 'astype'

In [None]:
X_train_scaled = scaler.fit_transform(X_train)