In [2]:
import numpy as np

In [3]:
import pandas as pd

In [4]:
from sklearn.model_selection import train_test_split

In [5]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense

In [6]:
from sklearn.preprocessing import LabelEncoder, StandardScaler

In [7]:
# Cargar los datos
data = pd.read_csv('chicago_crimes.csv')

In [8]:
# Preprocesamiento de datos
data['Date'] = pd.to_datetime(data['Date'], format='%m/%d/%Y %I:%M:%S %p')

In [9]:
# Extraer información útil de la fecha
data['HOUR'] = data['Date'].dt.hour
data['DAY_OF_WEEK'] = data['Date'].dt.dayofweek
data['MONTH'] = data['Date'].dt.month

In [10]:
# Seleccionar características y etiquetas
X = data[['HOUR', 'DAY_OF_WEEK', 'MONTH']].values
y = data['Primary Type']

In [11]:
# Codificar las etiquetas
label_encoder = LabelEncoder()
y = label_encoder.fit_transform(y)

In [12]:
# Dividir los datos en conjuntos de entrenamiento y prueba
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [13]:
# Escalar características
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [14]:
# Asegurarse de que los datos estén en el formato correcto para LSTM
X_train = np.expand_dims(X_train, axis=-1)
X_test = np.expand_dims(X_test, axis=-1)

In [16]:
# Construir la red neuronal recurrente (LSTM)
model = Sequential([
    LSTM(64, input_shape=(X_train.shape[1], X_train.shape[2]), activation='relu', return_sequences=True),
    LSTM(32, activation='relu'),
    Dense(len(label_encoder.classes_), activation='softmax')
])

In [17]:
# Compilar el modelo
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

In [18]:
# Convertir y_train y y_test a arrays de NumPy
y_train = np.array(y_train)
y_test = np.array(y_test)

In [19]:
# Ajustar el modelo
model.fit(X_train, y_train, epochs=20, batch_size=32, validation_data=(X_test, y_test))

Epoch 1/20
[1m28395/28395[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m184s[0m 6ms/step - accuracy: 0.2204 - loss: 2.3903 - val_accuracy: 0.2253 - val_loss: 2.3555
Epoch 2/20
[1m28395/28395[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m210s[0m 7ms/step - accuracy: 0.2287 - loss: 2.3508 - val_accuracy: 0.2309 - val_loss: 2.3448
Epoch 3/20
[1m28395/28395[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m221s[0m 8ms/step - accuracy: 0.2297 - loss: 2.3471 - val_accuracy: 0.2312 - val_loss: 2.3430
Epoch 4/20
[1m28395/28395[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m220s[0m 8ms/step - accuracy: 0.2307 - loss: 2.3451 - val_accuracy: 0.2308 - val_loss: 2.3421
Epoch 5/20
[1m28395/28395[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m218s[0m 8ms/step - accuracy: 0.2301 - loss: 2.3453 - val_accuracy: 0.2308 - val_loss: 2.3410
Epoch 6/20
[1m28395/28395[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m187s[0m 7ms/step - accuracy: 0.2307 - loss: 2.3432 - val_accuracy: 0.2307 - val_loss:

<keras.src.callbacks.history.History at 0x7f6101263a10>

In [2]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, MaxPooling1D, Flatten, Dense
from sklearn.preprocessing import StandardScaler, LabelEncoder

In [3]:
# Cargar los datos
data = pd.read_csv('chicago_crimes.csv')

In [4]:
# Preprocesamiento de datos
data['Date'] = pd.to_datetime(data['Date'], format='%m/%d/%Y %I:%M:%S %p')
data = data.sort_values(by='Date')

In [5]:
# Seleccionar características y etiquetas
X = data[['Date']].values
y = data['Primary Type']

In [6]:
# Codificar las etiquetas
label_encoder = LabelEncoder()
y = label_encoder.fit_transform(y)

In [7]:
# Dividir los datos en conjuntos de entrenamiento y prueba
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False)

In [8]:
# Escalar características
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [9]:
# Ajustar las dimensiones para la entrada de CNN (reshape)
X_train = X_train.reshape((X_train.shape[0], X_train.shape[1], 1))
X_test = X_test.reshape((X_test.shape[0], X_test.shape[1], 1))

In [10]:
# Verificar la forma de X_train y X_test
print(X_train.shape)
print(X_test.shape)

(908615, 1, 1)
(227154, 1, 1)


In [12]:
# Construir la red neuronal convolucional (CNN)
model = Sequential([
    Conv1D(filters=64, kernel_size=1, activation='relu', input_shape=(X_train.shape[1], 1)),
    MaxPooling1D(pool_size=1),  # Ajustar el pool_size a 1 para evitar reducción de dimensiones negativa
    Flatten(),
    Dense(50, activation='relu'),
    Dense(len(label_encoder.classes_), activation='softmax')
])

In [13]:
# Compilar el modelo
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

In [14]:
# Ajustar el modelo
model.fit(X_train, y_train, epochs=20, batch_size=32, validation_data=(X_test, y_test))

Epoch 1/20
[1m28395/28395[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m94s[0m 3ms/step - accuracy: 0.2090 - loss: 2.4111 - val_accuracy: 0.2336 - val_loss: 2.3665
Epoch 2/20
[1m28395/28395[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m95s[0m 3ms/step - accuracy: 0.2116 - loss: 2.3904 - val_accuracy: 0.2336 - val_loss: 2.3682
Epoch 3/20
[1m28395/28395[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m91s[0m 3ms/step - accuracy: 0.2140 - loss: 2.3850 - val_accuracy: 0.2336 - val_loss: 2.3701
Epoch 4/20
[1m28395/28395[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m116s[0m 4ms/step - accuracy: 0.2136 - loss: 2.3859 - val_accuracy: 0.2336 - val_loss: 2.3761
Epoch 5/20
[1m28395/28395[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m94s[0m 3ms/step - accuracy: 0.2144 - loss: 2.3857 - val_accuracy: 0.2336 - val_loss: 2.3739
Epoch 6/20
[1m28395/28395[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m95s[0m 3ms/step - accuracy: 0.2133 - loss: 2.3855 - val_accuracy: 0.2336 - val_loss: 2.37

<keras.src.callbacks.history.History at 0x7f2958d806d0>

In [2]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import SimpleRNN, Dense
from sklearn.preprocessing import StandardScaler, LabelEncoder

In [3]:
# Cargar los datos
data = pd.read_csv('chicago_crimes.csv')

In [4]:
# Preprocesamiento de datos
data['Date'] = pd.to_datetime(data['Date'], format='%m/%d/%Y %I:%M:%S %p')
data = data.sort_values(by='Date')

In [5]:
# Seleccionar características y etiquetas
X = data[['Date']].values
y = data['Primary Type']

In [6]:
# Codificar las etiquetas
label_encoder = LabelEncoder()
y = label_encoder.fit_transform(y)

In [7]:
# Dividir los datos en conjuntos de entrenamiento y prueba
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False)

In [8]:
# Escalar características
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [9]:
# Ajustar las dimensiones para la entrada de ARNN (reshape)
X_train = X_train.reshape((X_train.shape[0], X_train.shape[1], 1))
X_test = X_test.reshape((X_test.shape[0], X_test.shape[1], 1))

In [10]:
# Verificar la forma de X_train y X_test
print(X_train.shape)
print(X_test.shape)

(908615, 1, 1)
(227154, 1, 1)


In [12]:
# Construir la red neuronal auto-recurrente (ARNN)
model = Sequential([
    SimpleRNN(64, input_shape=(X_train.shape[1], 1), activation='relu', return_sequences=True),
    SimpleRNN(32, activation='relu'),
    Dense(len(label_encoder.classes_), activation='softmax')
])

In [13]:
# Compilar el modelo
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

In [14]:
# Convertir y_train y y_test a arrays de NumPy
y_train = np.array(y_train)
y_test = np.array(y_test)

In [15]:
# Ajustar el modelo
model.fit(X_train, y_train, epochs=20, batch_size=32, validation_data=(X_test, y_test))

Epoch 1/20
[1m28395/28395[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m100s[0m 4ms/step - accuracy: 0.2094 - loss: 2.4120 - val_accuracy: 0.2336 - val_loss: 2.3649
Epoch 2/20
[1m28395/28395[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m99s[0m 3ms/step - accuracy: 0.2117 - loss: 2.3904 - val_accuracy: 0.2336 - val_loss: 2.3622
Epoch 3/20
[1m28395/28395[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m103s[0m 4ms/step - accuracy: 0.2133 - loss: 2.3888 - val_accuracy: 0.2336 - val_loss: 2.3660
Epoch 4/20
[1m28395/28395[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m110s[0m 4ms/step - accuracy: 0.2128 - loss: 2.3866 - val_accuracy: 0.2336 - val_loss: 2.3649
Epoch 5/20
[1m28395/28395[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m113s[0m 4ms/step - accuracy: 0.2147 - loss: 2.3858 - val_accuracy: 0.2336 - val_loss: 2.3686
Epoch 6/20
[1m28395/28395[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m111s[0m 4ms/step - accuracy: 0.2141 - loss: 2.3856 - val_accuracy: 0.2336 - val_loss: 

<keras.src.callbacks.history.History at 0x7f2b40084510>

In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from sklearn.preprocessing import StandardScaler

In [3]:
# Cargar los datos
data = pd.read_csv('chicago_crimes.csv')

In [4]:
# Seleccionar características y etiquetas
X = data[['X Coordinate', 'Y Coordinate']].values
y_latitude = data['Latitude'].values
y_longitude = data['Longitude'].values

In [5]:
# Dividir los datos en conjuntos de entrenamiento y prueba
X_train, X_test, y_lat_train, y_lat_test, y_long_train, y_long_test = train_test_split(
 X, y_latitude, y_longitude, test_size=0.2, random_state=42
)

In [6]:
# Escalar características
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [8]:
# Construir la red neuronal profunda (DNN)
model_lat = Sequential([
 Dense(64, activation='relu', input_shape=(X_train.shape[1],)),
 Dense(32, activation='relu'),
 Dense(1)
])

In [9]:
model_long = Sequential([
 Dense(64, activation='relu', input_shape=(X_train.shape[1],)),
 Dense(32, activation='relu'),
 Dense(1)
])

In [10]:
# Compilar el modelo
model_lat.compile(optimizer='adam', loss='mse')
model_long.compile(optimizer='adam', loss='mse')

In [11]:
# Ajustar el modelo
model_lat.fit(X_train, y_lat_train, epochs=20, batch_size=32, validation_data=(X_test,
y_lat_test))
model_long.fit(X_train, y_long_train, epochs=20, batch_size=32, validation_data=(X_test,
y_long_test))

Epoch 1/20
[1m28395/28395[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m67s[0m 2ms/step - loss: nan - val_loss: nan
Epoch 2/20
[1m28395/28395[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m69s[0m 2ms/step - loss: nan - val_loss: nan
Epoch 3/20
[1m28395/28395[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m70s[0m 2ms/step - loss: nan - val_loss: nan
Epoch 4/20
[1m28395/28395[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m66s[0m 2ms/step - loss: nan - val_loss: nan
Epoch 5/20
[1m28395/28395[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m72s[0m 3ms/step - loss: nan - val_loss: nan
Epoch 6/20
[1m28395/28395[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m74s[0m 3ms/step - loss: nan - val_loss: nan
Epoch 7/20
[1m28395/28395[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m78s[0m 3ms/step - loss: nan - val_loss: nan
Epoch 8/20
[1m28395/28395[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m75s[0m 3ms/step - loss: nan - val_loss: nan
Epoch 9/20
[1m28395/28395[0m [32m━━━━━━━━━━━━

<keras.src.callbacks.history.History at 0x7f970fa91110>

In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Embedding
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from sklearn.preprocessing import LabelEncoder

In [3]:
# Cargar los datos
data = pd.read_csv('chicago_crimes.csv')

In [4]:
# Preprocesamiento de datos
X_title = data['Primary Type'].values
X_location = data['Location Description'].values
y = data['Description'].values

In [5]:
# Convertir todos los valores de X_location a cadenas de texto
X_location = X_location.astype(str)

In [6]:
# Codificar etiquetas
label_encoder = LabelEncoder()
y = label_encoder.fit_transform(y)

In [7]:
# Tokenización de texto
max_len = 50
vocab_size = 10000  # Número máximo de palabras en el vocabulario
embedding_dim = 64  # Dimensión de los vectores de embedding

In [8]:
tokenizer = Tokenizer(num_words=vocab_size)
tokenizer.fit_on_texts(X_title)
X_title_sequences = tokenizer.texts_to_sequences(X_title)
X_title_padded = pad_sequences(X_title_sequences, maxlen=max_len)

In [9]:
tokenizer.fit_on_texts(X_location)
X_location_sequences = tokenizer.texts_to_sequences(X_location)
X_location_padded = pad_sequences(X_location_sequences, maxlen=max_len)

In [11]:
import numpy as np # Asegurarse de importar numpy

In [12]:
# Concatenar los features si es necesario
X_padded = np.hstack((X_title_padded, X_location_padded))

In [13]:
# Dividir los datos en conjuntos de entrenamiento y prueba
X_train, X_test, y_train, y_test = train_test_split(X_padded, y, test_size=0.2, random_state=42)

In [15]:
# Modelo de texto
model = Sequential([
    Embedding(input_dim=vocab_size, output_dim=embedding_dim, input_length=X_padded.shape[1]),
    LSTM(64, return_sequences=True),
    LSTM(32),
    Dense(len(label_encoder.classes_), activation='softmax')
])

In [16]:
# Compilar el modelo
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

In [None]:
# Ajustar el modelo
model.fit(X_train, y_train, epochs=10, batch_size=32, validation_data=(X_test, y_test))

Epoch 1/10


2024-11-05 20:16:48.077708: W external/local_xla/xla/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 363446000 exceeds 10% of free system memory.


[1m28395/28395[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 128ms/step - accuracy: 0.3124 - loss: 2.7382

2024-11-05 21:17:39.665659: W external/local_xla/xla/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 90861600 exceeds 10% of free system memory.
