# Imagen: Dogs vs Cats dataset

In [None]:
!pip install -q kaggle pathlib tensorflow
import os, zipfile
import pathlib, tensorflow as tf
import pickle


[0m

In [None]:
import zipfile
import pathlib

zip_path = "deep-learning.zip"
extract_path = "deep-learning"

with zipfile.ZipFile(zip_path, 'r') as zip_ref:
    zip_ref.extractall(extract_path)

BASE_DIR   = pathlib.Path("deep-learning/deep-learning")
IMG_SIZE   = (224, 224)
BATCH_SIZE = 32

train_ds = tf.keras.preprocessing.image_dataset_from_directory(
              BASE_DIR / "train",
              validation_split=0.2, subset="training", seed=123,
              image_size=IMG_SIZE, batch_size=BATCH_SIZE,
              label_mode="binary")

val_ds   = tf.keras.preprocessing.image_dataset_from_directory(
              BASE_DIR / "train",
              validation_split=0.2, subset="validation", seed=123,
              image_size=IMG_SIZE, batch_size=BATCH_SIZE,
              label_mode="binary")

test_ds  = tf.keras.preprocessing.image_dataset_from_directory(
              BASE_DIR / "test",
              shuffle=False,
              image_size=IMG_SIZE, batch_size=BATCH_SIZE,
              label_mode="binary")

print("Clases detectadas:", train_ds.class_names)


In [None]:
!pip install tensorflow.keras

In [None]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers


image_model = keras.Sequential([
    layers.Rescaling(1./255, input_shape=(224, 224, 3)),
    layers.Flatten(),
    layers.Dense(128, activation='relu'),
    layers.Dense(1, activation='sigmoid')
])

image_model.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy'])


image_model.fit(train_ds,
          epochs=5,
          validation_data=val_ds)

test_loss, test_acc = image_model.evaluate(test_ds)
print("Test accuracy:", test_acc)


In [None]:
from sklearn.metrics import classification_report, confusion_matrix, ConfusionMatrixDisplay
import numpy as np
y_true = np.concatenate([y for x, y in test_ds], axis=0)

y_pred_prob = image_model.predict(test_ds)
y_pred = (y_pred_prob > 0.5).astype("int32").flatten()
print(classification_report(y_true, y_pred, target_names=test_ds.class_names))


# Text SMS Spam Collection

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.pipeline import Pipeline
from sklearn.naive_bayes import MultinomialNB
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import classification_report, confusion_matrix

In [None]:
import pandas as pd
df = pd.read_csv("spam.csv", encoding="latin-1")[['v1', 'v2']]
df = df.rename(columns={'v1': 'label', 'v2': 'text'}).copy()
text_model = Pipeline([
    ('tfidf', TfidfVectorizer(stop_words='english')),
    ('clf', MultinomialNB())
])

In [None]:

df['label'] = df['label'].str.strip().map({'ham': 0, 'spam': 1}).astype('int32')
df['text']  = df['text'].astype(str)

In [None]:
from tensorflow.keras.layers import TextVectorization
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding
from tensorflow.keras.layers import GlobalAveragePooling1D, Dense


X_train, X_test, y_train, y_test = train_test_split(
    df['text'], df['label'], test_size=0.2, random_state=42)

MAX_TOKENS = 10000
MAX_LEN = 100
EMBED_DIM = 16

vectorizer = TextVectorization(max_tokens=MAX_TOKENS,
                               output_sequence_length=MAX_LEN)
vectorizer.adapt(X_train)

text_model = Sequential([
    vectorizer,
    Embedding(MAX_TOKENS, EMBED_DIM),
    GlobalAveragePooling1D(),
    Dense(64, activation='relu'),
    Dense(1, activation='sigmoid')
])

text_model.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy'])

X_train_np = X_train.to_numpy(dtype=object)
X_test_np  = X_test.to_numpy(dtype=object)

text_model.fit(X_train_np, y_train.to_numpy(),
          epochs=5, batch_size=32,
          validation_split=0.1)

loss, acc = text_model.evaluate(X_test_np, y_test.to_numpy())
print(f"Test accuracy: {acc:.3f}")

In [None]:
from sklearn.metrics import classification_report

y_pred_probs = text_model.predict(X_test_np)

y_pred_labels = (y_pred_probs > 0.5).astype(int)

print(classification_report(y_test, y_pred_labels,
                            target_names=['ham', 'spam']))


# Regression: House Prices

In [None]:
test = pd.read_csv("test.csv")
train = pd.read_csv("train.csv")

In [None]:
train.head()

In [None]:
len(train)

In [None]:
len(train.columns)

In [None]:

from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder, MinMaxScaler
from tensorflow.keras import layers, Sequential
train = train.fillna(train.mean(numeric_only=True))
y = train.pop('SalePrice')
X = train.copy()

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42)

cat_cols = X_train.select_dtypes(
    include=['object', 'category']).columns.tolist()
num_cols = X_train.select_dtypes(
    exclude=['object', 'category']).columns.tolist()

pre = ColumnTransformer([
        ('cat', OneHotEncoder(handle_unknown='ignore'), cat_cols),
        ('num', MinMaxScaler(),                     num_cols)
    ])

X_train_prep = pre.fit_transform(X_train)
X_test_prep  = pre.transform(X_test)
input_dim = X_train_prep.shape[1]
regression_model = Sequential([
    layers.Dense(128, activation='relu', input_shape=(input_dim,)),
    layers.Dense(64, activation='relu'),
    layers.Dense(1)
])

regression_model.compile(
    optimizer='adam',
    loss='mean_squared_error',
    metrics=['mean_absolute_error']
)

regression_model.fit(X_train_prep, y_train,
          epochs=5, batch_size=32,
          validation_split=0.1)

loss, mae = regression_model.evaluate(X_test_prep, y_test)
print(f"Test MAE: {mae:.0f}")

In [None]:
loss, mae = regression_model.evaluate(X_test_prep, y_test)
print(f"Test MAE: {mae:.0f}")


In [None]:
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import numpy as np
import math
y_pred = regression_model.predict(X_test_prep).squeeze()

mae  = mean_absolute_error(y_test, y_pred)
rmse = math.sqrt(mean_squared_error(y_test, y_pred))
r2   = r2_score(y_test, y_pred)

print(f"MAE:  ${mae:,.0f}")
print(f"RMSE: ${rmse:,.0f}")
print(f"R² score: {r2:.3f}")

In [None]:
with open("text_model.pkl", "wb") as f:
    pickle.dump(text_model, f)

with open("image_model.pkl", "wb") as f:
    pickle.dump(image_model, f)

with open("regression_model.pkl", "wb") as f:
    pickle.dump(regression_model, f)


In [None]:
!pip freeze > requirements.txt
