# Explore here

In [1]:
# Libraries
import pandas as pd
import numpy as np
import regex as re

import matplotlib.pyplot as plt
import seaborn as sns

from nltk import download
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer

from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

from sklearn.svm import SVC

import warnings

In [2]:
rawdata = pd.read_csv('https://raw.githubusercontent.com/4GeeksAcademy/NLP-project-tutorial/main/url_spam.csv')

rawdata.to_csv('../data/raw/url_spam_raw.csv')
rawdata.head()

Unnamed: 0,url,is_spam
0,https://briefingday.us8.list-manage.com/unsubs...,True
1,https://www.hvper.com/,True
2,https://briefingday.com/m/v4n3i4f3,True
3,https://briefingday.com/n/20200618/m#commentform,False
4,https://briefingday.com/fan,True


In [3]:
rawdata['is_spam'] = rawdata['is_spam'].astype(int) 

rawdata.head()

Unnamed: 0,url,is_spam
0,https://briefingday.us8.list-manage.com/unsubs...,1
1,https://www.hvper.com/,1
2,https://briefingday.com/m/v4n3i4f3,1
3,https://briefingday.com/n/20200618/m#commentform,0
4,https://briefingday.com/fan,1


In [4]:
rawdata.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2999 entries, 0 to 2998
Data columns (total 2 columns):
 #   Column   Non-Null Count  Dtype 
---  ------   --------------  ----- 
 0   url      2999 non-null   object
 1   is_spam  2999 non-null   int32 
dtypes: int32(1), object(1)
memory usage: 35.3+ KB


In [5]:
rawdata.duplicated().sum()

dupes = rawdata[rawdata.duplicated()]

print(dupes)

                                                    url  is_spam
60    https://briefingday.us8.list-manage.com/unsubs...        1
61                               https://www.hvper.com/        1
62                   https://briefingday.com/m/v4n3i4f3        1
64                          https://briefingday.com/fan        1
113                         https://briefingday.com/fan        1
...                                                 ...      ...
2971  https://www.cnbc.com/2020/06/29/stock-market-f...        0
2972                      https://thehustle.co/account/        1
2973                              https://thehustle.co/        1
2979               https://www.bloomberg.com/tosv2.html        1
2995                      https://www.youtube.com/watch        1

[630 rows x 2 columns]


In [6]:
briefingday = rawdata[rawdata['url'].str.contains('briefingday')]
print(f"briefingday count = {len(briefingday)}")

yt = rawdata[rawdata['url'].str.contains('youtube')]
print(f"youtube count = {len(yt)}")

briefingday count = 69
youtube count = 104


In [7]:
yt_dupes = dupes[dupes['url'].str.contains('youtube')]
print(f"yt dupes count = {len(yt_dupes)}")

yt dupes count = 14


##### No quitamos los duplicados, ya que muchos duplicados realmente son links distintos y pueden aportar a la clasificación del modelo

In [8]:
https = 0
http = 0
for row in rawdata['url']:
    if str(row).startswith('https://'):
        https += 1
    elif str(row).startswith('http://'):
        http += 1
    else:
        print(row)


print(f"https:// = {https}")
print(f"http:// = {http}")
print(f"total = {https + http}")
print(f"dataset length = {len(rawdata)}")

https:// = 2945
http:// = 54
total = 2999
dataset length = 2999


In [9]:
def clean_htps(columna):
    # defino la nueva columna
    new_column = []
    # cambio y sustituyo los inicios de la columna raw
    for row in columna:
        if str(row).startswith('https://'):
            row = row.replace('https://', '')
            new_column.append(row) # guardo el resultado en la lista vacía
        elif str(row).startswith('http://'):
            row = row.replace('http://', '')
            new_column.append(row)
        else:
            new_column.append(row)

    final_column = []
    # ahora que ya no hay https, quito el www del inicio
    for row in new_column:
        if str(row).startswith('www.'):
            row = row.replace('www.', '')
            final_column.append(row)
        else: 
            final_column.append(row)


    return final_column

rawdata['url_stripped'] = clean_htps(rawdata['url'])
print(rawdata)

                                                    url  is_spam  \
0     https://briefingday.us8.list-manage.com/unsubs...        1   
1                                https://www.hvper.com/        1   
2                    https://briefingday.com/m/v4n3i4f3        1   
3      https://briefingday.com/n/20200618/m#commentform        0   
4                           https://briefingday.com/fan        1   
...                                                 ...      ...   
2994  https://www.smartcitiesworld.net/news/news/dee...        0   
2995                      https://www.youtube.com/watch        1   
2996  https://techcrunch.com/2019/07/04/an-optimisti...        0   
2997  https://www.technologyreview.com/2019/12/20/13...        0   
2998       https://www.bbc.com/news/technology-51018758        0   

                                           url_stripped  
0           briefingday.us8.list-manage.com/unsubscribe  
1                                            hvper.com/  
2        

In [10]:
def get_dot(row):
    # creo la lista para guardar los dots
    dots = []
    # divido el string en base al punto, y me quedo con lo que viene despúes del punto
    x = str(row).partition('.')[2]
    # guardo 3 caracteres despues del punto (.com)
    dots.append(str(x)[:3])
    return dots

# aplico la función a toda la columna
puntos = rawdata['url'].apply(get_dot)
print(puntos.describe())

count      2999
unique      386
top       [com]
freq        534
Name: url, dtype: object


In [11]:
print(rawdata)

                                                    url  is_spam  \
0     https://briefingday.us8.list-manage.com/unsubs...        1   
1                                https://www.hvper.com/        1   
2                    https://briefingday.com/m/v4n3i4f3        1   
3      https://briefingday.com/n/20200618/m#commentform        0   
4                           https://briefingday.com/fan        1   
...                                                 ...      ...   
2994  https://www.smartcitiesworld.net/news/news/dee...        0   
2995                      https://www.youtube.com/watch        1   
2996  https://techcrunch.com/2019/07/04/an-optimisti...        0   
2997  https://www.technologyreview.com/2019/12/20/13...        0   
2998       https://www.bbc.com/news/technology-51018758        0   

                                           url_stripped  
0           briefingday.us8.list-manage.com/unsubscribe  
1                                            hvper.com/  
2        

In [12]:
rawdata.drop('url', axis=1, inplace=True)

In [13]:
def prosesin(text):
    # quedarme con letras y espacios
    text = re.sub(r'[^a-z ]', " ", text)

    # saco espacios
    text = re.sub(r'\s+[a-zA-Z]\s+', " ", text)
    text = re.sub(r'\^[a-zA-Z]\s+', " ", text)

    # Reducir espacios en blanco múltiples a uno único
    text = re.sub(r'\s+', " ", text.lower())

    return text.split()


In [14]:
baf = [rawdata['url_stripped'].apply(prosesin)]
baf

[0       [briefingday, us, list, manage, com, unsubscribe]
 1                                            [hvper, com]
 2                                [briefingday, com, v, i]
 3                      [briefingday, com, m, commentform]
 4                                 [briefingday, com, fan]
                               ...                        
 2994    [smartcitiesworld, net, news, news, deepfake, ...
 2995                                [youtube, com, watch]
 2996    [techcrunch, com, an, optimistic, view, of, de...
 2997    [technologyreview, com, this, startup, claims,...
 2998                         [bbc, com, news, technology]
 Name: url_stripped, Length: 2999, dtype: object]

In [15]:
rawdata['url_processed'] = rawdata['url_stripped'].apply(prosesin)

rawdata.head()

Unnamed: 0,is_spam,url_stripped,url_processed
0,1,briefingday.us8.list-manage.com/unsubscribe,"[briefingday, us, list, manage, com, unsubscribe]"
1,1,hvper.com/,"[hvper, com]"
2,1,briefingday.com/m/v4n3i4f3,"[briefingday, com, v, i]"
3,0,briefingday.com/n/20200618/m#commentform,"[briefingday, com, m, commentform]"
4,1,briefingday.com/fan,"[briefingday, com, fan]"


In [16]:
rawdata.drop('url_stripped', axis=1, inplace=True)

In [17]:
rawdata.reset_index()
rawdata

Unnamed: 0,is_spam,url_processed
0,1,"[briefingday, us, list, manage, com, unsubscribe]"
1,1,"[hvper, com]"
2,1,"[briefingday, com, v, i]"
3,0,"[briefingday, com, m, commentform]"
4,1,"[briefingday, com, fan]"
...,...,...
2994,0,"[smartcitiesworld, net, news, news, deepfake, ..."
2995,1,"[youtube, com, watch]"
2996,0,"[techcrunch, com, an, optimistic, view, of, de..."
2997,0,"[technologyreview, com, this, startup, claims,..."


In [18]:
download("wordnet")
lemmatizer = WordNetLemmatizer()

download("stopwords")
stop_words = stopwords.words("english")

def lemmatize_text(words, lemmatizer = lemmatizer):
    # lematisasion
    tokens = [lemmatizer.lemmatize(word) for word in words]
    # fuera stop words
    tokens = [word for word in tokens if word not in stop_words]

    # eliminar palabras chiquitas
    tokens = [word for word in tokens if len(word) >= 3]

    return tokens





[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\34616\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\34616\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [19]:
df = rawdata

df['url_processed'] = df['url_processed'].apply(lemmatize_text)

df

Unnamed: 0,is_spam,url_processed
0,1,"[briefingday, list, manage, com, unsubscribe]"
1,1,"[hvper, com]"
2,1,"[briefingday, com]"
3,0,"[briefingday, com, commentform]"
4,1,"[briefingday, com, fan]"
...,...,...
2994,0,"[smartcitiesworld, net, news, news, deepfake, ..."
2995,1,"[youtube, com, watch]"
2996,0,"[techcrunch, com, optimistic, view, deepfakes]"
2997,0,"[technologyreview, com, startup, claim, deepfa..."


In [20]:
from sklearn.feature_extraction.text import TfidfVectorizer

token_list = df['url_processed']
token_list = [" ".join(token) for token in token_list]

vect = TfidfVectorizer(max_features = 5000, max_df = 0.6, min_df = 5)

X = vect.fit_transform(token_list).toarray()
y = df['is_spam']

X[:5]

array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]])

In [21]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 21)


In [22]:
model = SVC(kernel = "linear", random_state = 21)

model.fit(X_train, y_train)

In [23]:
y_pred = model.predict(X_test)
y_pred[:10]

array([1, 0, 0, 0, 0, 0, 0, 0, 0, 1])

In [24]:
accuracy_score(y_test, y_pred)

0.9266666666666666

In [25]:
def check_for_spam():
    # pido al usuario una url
    new_url = str(input("Introduce una url para checkear si es spam: "))

    # proceso el texto de la url como antes
    url = prosesin(new_url)
    url = lemmatize_text(url, lemmatizer)
    url = " ".join(url)

    # vectorizamos la url
    vect_url = vect.transform([url]).toarray()

    # predecimos si es spam o no:
    prediction = model.predict(vect_url)

    if prediction == 0:
        print("Limpito de spam mi rey")
    elif prediction == 1:
        print("Eso huele a spam mi rey")

In [26]:
check_for_spam()

Limpito de spam mi rey


#### optimizació

In [27]:
from sklearn.model_selection import GridSearchCV

model_base = SVC()

hyperparameters = {'kernel': ["linear", "rbf", "poly"], 
                   'C': [1, 10, 100, 1000], 
                   'gamma': ["scale", "auto"], 
                   'random_state': [21]
}

grid = GridSearchCV(model_base, hyperparameters, cv=5, scoring='accuracy', n_jobs=-1)

grid.fit(X_train, y_train)
best_params = grid.best_params_
print(f"Best hyperparameters: {grid.best_params_}")

KeyboardInterrupt: 

In [None]:
# runneando la optimització

model_opt1 = SVC(**best_params)
model_opt1.fit(X_train, y_train)

y_pred_opt1 = model_opt1.predict(X_test)
y_pred_train_opt1 = model_opt1.predict(X_train)


y_pred_opt1[:10]

array([1, 0, 0, 0, 0, 0, 0, 0, 0, 1])

In [None]:
from sklearn.metrics import accuracy_score
from sklearn.metrics import f1_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import classification_report

accuracy_test = accuracy_score(y_test, y_pred_opt1)
accuracy_train = accuracy_score(y_train, y_pred_train_opt1)

f1_score_test = f1_score(y_test, y_pred_opt1)
f1_score_train = f1_score(y_train, y_pred_train_opt1)

precision_test = precision_score(y_test, y_pred_opt1)
precision_train = precision_score(y_train, y_pred_train_opt1)

recall_test = recall_score(y_test, y_pred_opt1)
recall_train = recall_score(y_train, y_pred_train_opt1)

print("Test Score: ")
print("Accuracy Test: ", accuracy_test)
print("F1 score Test: ", f1_score_test)
print("Precision Test: ", precision_test)
print("Recall Test: ", recall_test)
print("")
print("Train Score: ")
print("Accuracy Train: ", accuracy_train)
print("F1 score Train: ", f1_score_train)
print("Precision Train: ", precision_train)
print("Recall Train: ", recall_train)

print("")
print("Model 1: ")
print("")
print(classification_report(y_test, y_pred_opt1, target_names=["No Spam", "Spam"]))

Test Score: 
Accuracy Test:  0.9516666666666667
F1 score Test:  0.8953068592057761
Precision Test:  0.9117647058823529
Recall Test:  0.8794326241134752

Train Score: 
Accuracy Train:  0.9699874947894956
F1 score Train:  0.9343065693430657
Precision Train:  0.9463955637707948
Recall Train:  0.9225225225225225

Model 1: 

              precision    recall  f1-score   support

     No Spam       0.96      0.97      0.97       459
        Spam       0.91      0.88      0.90       141

    accuracy                           0.95       600
   macro avg       0.94      0.93      0.93       600
weighted avg       0.95      0.95      0.95       600



# resampling

In [28]:
from imblearn.over_sampling import SMOTE 
from collections import Counter

oversample = SMOTE(random_state=21) 
X_res, y_res = oversample.fit_resample(X, y)

X_train, X_test, y_train, y_test = train_test_split(X_res, y_res, test_size=0.2, random_state=21)
counter_train = Counter(y_train)
print("counter train: ")
print(counter_train)
counter_test = Counter(y_test)
print("counter test:")
print(counter_test)

counter train: 
Counter({1: 1845, 0: 1839})
counter test:
Counter({0: 464, 1: 458})


found 0 physical cores < 1
  File "c:\Users\34616\AppData\Local\Programs\Python\Python311\Lib\site-packages\joblib\externals\loky\backend\context.py", line 282, in _count_physical_cores
    raise ValueError(f"found {cpu_count_physical} physical cores < 1")


In [29]:
# runneando la optimisasió

model_opt1 = SVC(**best_params)
model_opt1.fit(X_train, y_train)

y_pred_opt1 = model_opt1.predict(X_test)
y_pred_train_opt1 = model_opt1.predict(X_train)


y_pred_opt1[:10]

NameError: name 'best_params' is not defined

In [None]:
from sklearn.metrics import accuracy_score
from sklearn.metrics import f1_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import classification_report

accuracy_test = accuracy_score(y_test, y_pred_opt1)
accuracy_train = accuracy_score(y_train, y_pred_train_opt1)

f1_score_test = f1_score(y_test, y_pred_opt1)
f1_score_train = f1_score(y_train, y_pred_train_opt1)

precision_test = precision_score(y_test, y_pred_opt1)
precision_train = precision_score(y_train, y_pred_train_opt1)

recall_test = recall_score(y_test, y_pred_opt1)
recall_train = recall_score(y_train, y_pred_train_opt1)

print("Test Score: ")
print("Accuracy Test: ", accuracy_test)
print("F1 score Test: ", f1_score_test)
print("Precision Test: ", precision_test)
print("Recall Test: ", recall_test)
print("")
print("Train Score: ")
print("Accuracy Train: ", accuracy_train)
print("F1 score Train: ", f1_score_train)
print("Precision Train: ", precision_train)
print("Recall Train: ", recall_train)

print("")
print("Model 1: ")
print("")
print(classification_report(y_test, y_pred_opt1, target_names=["No Spam", "Spam"]))

Test Score: 
Accuracy Test:  0.9370932754880694
F1 score Test:  0.9361233480176211
Precision Test:  0.9444444444444444
Recall Test:  0.9279475982532751

Train Score: 
Accuracy Train:  0.9508686210640608
F1 score Train:  0.9499585291678186
Precision Train:  0.9695259593679458
Recall Train:  0.9311653116531166

Model 1: 

              precision    recall  f1-score   support

     No Spam       0.93      0.95      0.94       464
        Spam       0.94      0.93      0.94       458

    accuracy                           0.94       922
   macro avg       0.94      0.94      0.94       922
weighted avg       0.94      0.94      0.94       922



In [None]:
best_params

{'C': 10, 'gamma': 'scale', 'kernel': 'poly', 'random_state': 21}

### optimizació 2 amb variabels 

In [30]:
import tensorflow as tf
# Verificar si hay GPUs disponibles
gpus = tf.config.list_physical_devices('GPU')
if gpus:
    print(f"GPUs disponibles: {len(gpus)}")
    for gpu in gpus:
        print(f"  - {gpu}")
else:
    print("No se encontraron GPUs disponibles.")
# Ejemplo simple para probar el uso de la GPU
with tf.device('/GPU:0'):
    a = tf.constant([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]])
    b = tf.constant([[1.0, 2.0], [3.0, 4.0], [5.0, 6.0]])
    c = tf.matmul(a, b)
    print("Resultado de la multiplicación de matrices usando GPU:")
    print(c)

No se encontraron GPUs disponibles.
Resultado de la multiplicación de matrices usando GPU:
tf.Tensor(
[[22. 28.]
 [49. 64.]], shape=(2, 2), dtype=float32)


In [None]:
model_base = SVC()

hyperparameters = {'kernel': ["linear", "rbf", "poly"],
                   'C': [10, 50, 100], 
                   'gamma': [1, 0.1, 0.01], 
                   'random_state': [21],
}

grid = GridSearchCV(model_base, hyperparameters, cv=5, scoring='accuracy', n_jobs=-1)

grid.fit(X_train, y_train)
best_params = grid.best_params_
print(f"Best hyperparameters: {grid.best_params_}")


Best hyperparameters: {'C': 50, 'gamma': 1, 'kernel': 'rbf', 'random_state': 21}


gpu: 8mins 24s
cpu: 6mins 52s

In [None]:
# runneando la optimisasió

model_opt2 = SVC(**best_params)
model_opt2.fit(X_train, y_train)

y_pred_opt2 = model_opt2.predict(X_test)
y_pred_train_opt2 = model_opt2.predict(X_train)


y_pred_opt2[:10]

array([1, 0, 1, 1, 1, 0, 0, 0, 0, 1])

In [None]:
accuracy_test = accuracy_score(y_test, y_pred_opt2)
accuracy_train = accuracy_score(y_train, y_pred_train_opt2)

f1_score_test = f1_score(y_test, y_pred_opt2)
f1_score_train = f1_score(y_train, y_pred_train_opt2)

precision_test = precision_score(y_test, y_pred_opt2)
precision_train = precision_score(y_train, y_pred_train_opt2)

recall_test = recall_score(y_test, y_pred_opt2)
recall_train = recall_score(y_train, y_pred_train_opt2)

print("Test Score: ")
print("Accuracy Test: ", accuracy_test)
print("F1 score Test: ", f1_score_test)
print("Precision Test: ", precision_test)
print("Recall Test: ", recall_test)
print("")
print("Train Score: ")
print("Accuracy Train: ", accuracy_train)
print("F1 score Train: ", f1_score_train)
print("Precision Train: ", precision_train)
print("Recall Train: ", recall_train)

print("")
print("Model 1: ")
print("")
print(classification_report(y_test, y_pred_opt2, target_names=["No Spam", "Spam"]))

Test Score: 
Accuracy Test:  0.9457700650759219
F1 score Test:  0.9460043196544277
Precision Test:  0.9358974358974359
Recall Test:  0.9563318777292577

Train Score: 
Accuracy Train:  0.9641693811074918
F1 score Train:  0.9639541234298199
Precision Train:  0.9713813979086406
Recall Train:  0.9566395663956639

Model 1: 

              precision    recall  f1-score   support

     No Spam       0.96      0.94      0.95       464
        Spam       0.94      0.96      0.95       458

    accuracy                           0.95       922
   macro avg       0.95      0.95      0.95       922
weighted avg       0.95      0.95      0.95       922



#### 3era optimització per provar el nou entorn

In [None]:
model_base = SVC()

hyperparameters = {'kernel': ["linear", "rbf", "poly"],
                   'C': [10, 50, 100, 250, 1000], 
                   'gamma': [1, 0.5, 0.1], 
                   'random_state': [21],
}

grid = GridSearchCV(model_base, hyperparameters, cv=5, scoring='accuracy', n_jobs=-1)

grid.fit(X_train, y_train)
best_params = grid.best_params_
print(f"Best hyperparameters: {grid.best_params_}")

Best hyperparameters: {'C': 1000, 'gamma': 0.1, 'kernel': 'rbf', 'random_state': 21}


cpu: 2m 36s

gpu: 2m 34s

In [None]:
# runneando la optimisasió

model_opt2 = SVC(**best_params)
model_opt2.fit(X_train, y_train)

y_pred_opt2 = model_opt2.predict(X_test)
y_pred_train_opt2 = model_opt2.predict(X_train)


y_pred_opt2[:10]

array([1, 0, 1, 1, 1, 0, 0, 0, 0, 1])

In [None]:
accuracy_test = accuracy_score(y_test, y_pred_opt2)
accuracy_train = accuracy_score(y_train, y_pred_train_opt2)

f1_score_test = f1_score(y_test, y_pred_opt2)
f1_score_train = f1_score(y_train, y_pred_train_opt2)

precision_test = precision_score(y_test, y_pred_opt2)
precision_train = precision_score(y_train, y_pred_train_opt2)

recall_test = recall_score(y_test, y_pred_opt2)
recall_train = recall_score(y_train, y_pred_train_opt2)

print("Test Score: ")
print("Accuracy Test: ", accuracy_test)
print("F1 score Test: ", f1_score_test)
print("Precision Test: ", precision_test)
print("Recall Test: ", recall_test)
print("")
print("Train Score: ")
print("Accuracy Train: ", accuracy_train)
print("F1 score Train: ", f1_score_train)
print("Precision Train: ", precision_train)
print("Recall Train: ", recall_train)

print("")
print("Model 1: ")
print("")
print(classification_report(y_test, y_pred_opt2, target_names=["No Spam", "Spam"]))

Test Score: 
Accuracy Test:  0.9479392624728851
F1 score Test:  0.948051948051948
Precision Test:  0.9399141630901288
Recall Test:  0.9563318777292577

Train Score: 
Accuracy Train:  0.9641693811074918
F1 score Train:  0.9639541234298199
Precision Train:  0.9713813979086406
Recall Train:  0.9566395663956639

Model 1: 

              precision    recall  f1-score   support

     No Spam       0.96      0.94      0.95       464
        Spam       0.94      0.96      0.95       458

    accuracy                           0.95       922
   macro avg       0.95      0.95      0.95       922
weighted avg       0.95      0.95      0.95       922



In [32]:
import sys
print(sys.executable)

c:\Users\34616\AppData\Local\Programs\Python\Python311\python.exe


In [31]:
import tensorflow as tf
from tensorflow.keras.wrappers.scikit_learn import KerasClassifier
from sklearn.model_selection import GridSearchCV

# Verificar si hay GPUs disponibles
gpus = tf.config.list_physical_devices('GPU')
if gpus:
    print(f"GPUs disponibles: {len(gpus)}")
    for gpu in gpus:
        print(f"  - {gpu}")
else:
    print("No se encontraron GPUs disponibles.")
# Definir una función para crear el modelo Keras
def create_model():
    model = tf.keras.models.Sequential([
        tf.keras.layers.Dense(128, activation='relu', input_shape=(784,)),
        tf.keras.layers.Dense(64, activation='relu'),
        tf.keras.layers.Dense(10, activation='softmax')
    ])
    model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    return model

# Crear un clasificador Keras envuelto para scikit-learn
model = KerasClassifier(build_fn=create_model, verbose=0)

# Definir los hiperparámetros para la búsqueda
param_grid = {
    'batch_size': [32, 64],
    'epochs': [10, 20]
}
# Cargar datos de ejemplo (usaremos el dataset MNIST)
(X_train, y_train), (X_test, y_test) = tf.keras.datasets.mnist.load_data()
X_train = X_train.reshape(-1, 784).astype('float32') / 255
X_test = X_test.reshape(-1, 784).astype('float32') / 255

# Crear y ejecutar GridSearchCV
grid = GridSearchCV(estimator=model, param_grid=param_grid, n_jobs=1, cv=3)
grid_result = grid.fit(X_train, y_train)
# Imprimir los resultados de la mejor configuración encontrada
print(f"Mejor: {grid_result.best_score_} usando {grid_result.best_params_}")

ModuleNotFoundError: No module named 'tensorflow.keras.wrappers'

GPU: 5m 28s